import os import hashlib import sys import glob import re import tempfile import shutil import requests import mimetypes import mistune import contextlib import time import logging logger = logging.getLogger('mkdocs') from time import sleep from mkdocs.config import config_options from mkdocs.plugins import BasePlugin from md2cf.confluence_renderer import ConfluenceRenderer from os import environ from pathlib import Path ENABLE_ENV_VAR = "MKDOCS_TO_CONFLUENCE" DRY_RUN_ENV_VAR = "MKDOCS_TO_CONFLUENCE_DRY_RUN" TEMPLATE_BODY = "

TEMPLATE

" HEADER_WARNING = "‼️ This page is created automatically, all you changes will be overwritten during the next MKDocs deployment. Do not edit a page here ‼️" SECTION_PAGE_CONTENT = "

It's just a Section Page

" # -- I don't know why it's here @contextlib.contextmanager def nostdout(): save_stdout = sys.stdout sys.stdout = DummyFile() yield sys.stdout = save_stdout # -- I don't know why it's here class DummyFile(object): def write(self, x): pass class MkdocsWithConfluence(BasePlugin): config_scheme = ( ("host_url", config_options.Type(str, default=None)), ("space", config_options.Type(str, default=None)), ("parent_page_name", config_options.Type(str, default=None)), ("username", config_options.Type(str, default=environ.get("JIRA_USERNAME", None))), ("password", config_options.Type(str, default=environ.get("JIRA_PASSWORD", None))), ("dryrun", config_options.Type(bool, default=False)), ("header_message", config_options.Type(str, default=None)), ("upstream_url", config_options.Type(str, default=None)), ("header_warning", config_options.Type(str, default=HEADER_WARNING)), ) def __init__(self): self.enabled = False self.confluence_renderer = ConfluenceRenderer(use_xhtml=True) self.confluence_mistune = mistune.Markdown(renderer=self.confluence_renderer) self.simple_log = False self.flen = 1 self.session = requests.Session() self.page_attachments = {} self.repo_url = None self.header_message = None self.upstream_url = None def on_config(self, config): # ------------------------------------------------------ # -- Enable the plugin by setting environment variable # ------------------------------------------------------ if os.environ.get(ENABLE_ENV_VAR): logger.info("MKDocs with Confluence is enabled") self.enabled = True else: logger.info( f"MKDocs with Confluence is disabled, set the {ENABLE_ENV_VAR} to enable the plugin" ) # ------------------------------------------------------ # -- Set the dry-run mode # ------------------------------------------------------ if self.config["dryrun"] or os.environ.get(DRY_RUN_ENV_VAR): logger.info("dry-run mode is turned on, your changes won't be synced with Confluence") self.dryrun = True else: logger.info("dry-run mode is turned off, your changes will be synced with Confluence") self.dryrun = False # ------------------------------------------------------ # -- Set git url to add to a confluence page # ------------------------------------------------------ if config["repo_url"]: self.repo_url = config["repo_url"] logger.info(f"git url is set to {self.repo_url}") # ------------------------------------------------------ # -- Set a custom header to add to a confluence page # ------------------------------------------------------ if self.config["header_message"]: self.header_message = self.config["header_message"] logger.info(f"header message is set to {self.header_message}") # ------------------------------------------------------ # -- Set an upstream url to add to a confluence page # ------------------------------------------------------ if self.config["upstream_url"]: self.upstream_url = self.config["upstream_url"] logger.info(f"upstream url is set to {self.upstream_url}") def on_files(self, files, config): pages = files.documentation_pages() try: self.flen = len(pages) logger.debug(f"number of Files in directory tree: {self.flen}") except 0: logger.error("no files found to be synced") def on_page_markdown(self, markdown, page, config, files): # TODO: Modify pages here try: self.session.auth = (self.config["username"], self.config["password"]) confluencePageName = page.url[0:-1] #.replace("/", "-") if self.config["parent_page_name"] is not None: parent_page = self.config["parent_page_name"] else: parent_page = self.config["space"] page_name = "" # TODO: Refactor if confluencePageName.rsplit('/',1)[0]: confluencePageName = (f"{confluencePageName.rsplit('/',1)[0]}+{page.title.replace(' ', ' ')}") else: confluencePageName = (f"{page.title.replace(' ', ' ')}") # Create empty pages for sections only logger.info("preparing emtpy pages for sections") for path in page.url.rsplit("/", 2)[0].split("/"): logger.debug(f"path is {path}") parent_id = self.find_page_id(parent_page) if path: if page_name: page_name = page_name + " " + path else: page_name = path logger.info(f"Will create a page {page_name} under the {parent_page}") self.add_page(page_name, parent_id, SECTION_PAGE_CONTENT) parent_page = page_name parent_id = self.find_page_id(parent_page) confluencePageName = parent_page + " " + page.title new_markdown = markdown # -- Adding an upstream url if self.upstream_url: new_markdown = f">Original page is here: {self.upstream_url}/{page.url}\n\n" + new_markdown # -- Adding a header message if self.header_message: new_markdown = f">{self.header_message}\n\n" + new_markdown # -- Adding a repo url if self.repo_url: new_markdown = f">You can edit documentation here: {self.repo_url}\n\n" + new_markdown # -- Adding a header warning new_markdown = f">{self.config['header_warning']}\n\n" + new_markdown # ------------------------------------------------- # -- Sync attachments # ------------------------------------------------- attachments = [] # -- TODO: support named picture md_image_reg = "(?:[!]\[(?P.*?)\])\((?P.*?)\)(?P\{.*\})?" try: for match in re.finditer(md_image_reg, markdown): # -- TODO: I'm sure it can be done better attachment_path = "./docs" + match.group(2) logger.info(f"found image: ./docs{match.group(2)}") images = re.search(md_image_reg, new_markdown) # -- TODO: Options maybe the reason why page is invalid, but I'm not sure about it yet # new_markdown = new_markdown.replace(images.group("options"), "") new_markdown = re.sub(md_image_reg, f"

", new_markdown) attachments.append(attachment_path) except AttributeError as e: logger.warning(e) logger.debug(f"attachments: {attachments}") confluence_body = self.confluence_mistune(new_markdown) self.add_page(confluencePageName, parent_id, confluence_body) if attachments: logger.debug(f"UPLOADING ATTACHMENTS TO CONFLUENCE FOR {page.title}, DETAILS:") logger.debug(f"FILES: {attachments}") for attachment in attachments: logger.debug(f"trying to upload {attachment} to {confluencePageName}") if self.enabled: try: self.add_or_update_attachment(confluencePageName, attachment) except Exception as Argument: logger.warning(Argument) except Exception as exp: logger.error(exp) return markdown def on_post_page(self, output, page, config): logger.info("The author was uploading images here, maybe there was a reason for that") def on_page_content(self, html, page, config, files): return html def __get_page_url(self, section): return re.search("url='(.*)'\\)", section).group(1)[:-1] + ".md" def __get_page_name(self, section): return os.path.basename(re.search("url='(.*)'\\)", section).group(1)[:-1]) def __get_section_name(self, section): logger.debug(f"SECTION name: {section}") return os.path.basename(re.search("url='(.*)'\\/", section).group(1)[:-1]) def __get_section_title(self, section): logger.debug(f"SECTION title: {section}") try: r = re.search("Section\\(title='(.*)'\\)", section) return r.group(1) except AttributeError: name = self.__get_section_name(section) logger.warning(f"Section '{name}' doesn't exist in the mkdocs.yml nav section!") return name def __get_page_title(self, section): try: r = re.search("\\s*Page\\(title='(.*)',", section) return r.group(1) except AttributeError: name = self.__get_page_url(section) logger.warning(f"Page '{name}' doesn't exist in the mkdocs.yml nav section!") return name # Adapted from https://stackoverflow.com/a/3431838 def get_file_sha1(self, file_path): hash_sha1 = hashlib.sha1() with open(file_path, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash_sha1.update(chunk) return hash_sha1.hexdigest() def add_or_update_attachment(self, page_name, filepath): logger.warning(f"Mkdocs With Confluence * {page_name} *ADD/Update ATTACHMENT if required* {filepath}") logger.debug(f"Mkdocs With Confluence: Add Attachment: PAGE NAME: {page_name}, FILE: {filepath}") page_id = self.find_page_id(page_name) if page_id: file_hash = self.get_file_sha1(filepath) attachment_message = f"MKDocsWithConfluence [v{file_hash}]" existing_attachment = self.get_attachment(page_id, filepath) if existing_attachment: file_hash_regex = re.compile(r"\[v([a-f0-9]{40})]$") existing_match = file_hash_regex.search(existing_attachment["version"]["message"]) if existing_match is not None and existing_match.group(1) == file_hash: logger.debug(f" * Mkdocs With Confluence * {page_name} * Existing attachment skipping * {filepath}") else: self.update_attachment(page_id, filepath, existing_attachment, attachment_message) else: self.create_attachment(page_id, filepath, attachment_message) else: logger.debug("PAGE DOES NOT EXISTS") def get_attachment(self, page_id, filepath): name = os.path.basename(filepath) logger.debug(f" * Mkdocs With Confluence: Get Attachment: PAGE ID: {page_id}, FILE: {filepath}") url = self.config["host_url"] + "/" + page_id + "/child/attachment" headers = {"X-Atlassian-Token": "no-check"} # no content-type here! logger.debug(f"URL: {url}") r = self.session.get(url, headers=headers, params={"filename": name, "expand": "version"}) r.raise_for_status() with nostdout(): response_json = r.json() if response_json["size"]: return response_json["results"][0] def update_attachment(self, page_id, filepath, existing_attachment, message): logger.debug(f" * Mkdocs With Confluence: Update Attachment: PAGE ID: {page_id}, FILE: {filepath}") url = self.config["host_url"] + "/" + page_id + "/child/attachment/" + existing_attachment["id"] + "/data" headers = {"X-Atlassian-Token": "no-check"} # no content-type here! logger.debug(f"URL: {url}") filename = os.path.basename(filepath) # determine content-type content_type, encoding = mimetypes.guess_type(filepath) if content_type is None: content_type = "multipart/form-data" files = {"file": (filename, open(Path(filepath), "rb"), content_type), "comment": message} if not self.dryrun: r = self.session.post(url, headers=headers, files=files) r.raise_for_status() logger.debug(r.json()) if r.status_code == 200: logger.info("OK!") else: print("ERR!") def create_attachment(self, page_id, filepath, message): logger.debug(f" * Mkdocs With Confluence: Create Attachment: PAGE ID: {page_id}, FILE: {filepath}") url = self.config["host_url"] + "/" + page_id + "/child/attachment" headers = {"X-Atlassian-Token": "no-check"} # no content-type here! logger.debug(f"URL: {url}") filename = os.path.basename(filepath) # determine content-type content_type, encoding = mimetypes.guess_type(filepath) if content_type is None: content_type = "multipart/form-data" files = {"file": (filename, open(filepath, "rb"), content_type), "comment": message} if not self.dryrun: r = self.session.post(url, headers=headers, files=files) logger.debug(r.json()) r.raise_for_status() if r.status_code == 200: logger.debug("OK!") else: logger.debug("ERR!") def find_page_id(self, page_name): logger.info(f"looking for a page id of the page: {page_name}") name_confl = page_name.replace(" ", "+") url = self.config["host_url"] + "?title=" + name_confl + "&spaceKey=" + self.config["space"] + "&expand=history" logger.debug(f"URL: {url}") r = self.session.get(url) r.raise_for_status() with nostdout(): response_json = r.json() if response_json["results"]: logger.debug(f"response: {response_json}") return response_json["results"][0]["id"] else: logger.debug(f"page {page_name} doens't exist") return None def add_page(self, page_name, parent_page_id, page_content_in_storage_format): logger.info(f"Creating a new page: {page_name} under page with ID: {parent_page_id}") if self.enabled: if self.find_page_id(page_name): self.update_page(page_name, page_content_in_storage_format) else: logger.info(f"Creating a new page: {page_name} under page with ID: {parent_page_id}") url = self.config["host_url"] + "/" logger.debug(f"URL: {url}") headers = {"Content-Type": "application/json"} space = self.config["space"] data = { "type": "page", "title": page_name, "space": {"key": space}, "ancestors": [{"id": parent_page_id}], "body": {"storage": {"value": page_content_in_storage_format, "representation": "storage"}}, } logger.debug(f"DATA: {data}") if not self.dryrun: try: r = self.session.post(url, json=data, headers=headers) r.raise_for_status() except Exception as exp: logger.error(exp) if r.status_code == 200: logger.info(f"page created: {page_name}") else: logger.error(f"page can't be created: {page_name}") def update_page(self, page_name, page_content_in_storage_format): page_id = self.find_page_id(page_name) logger.debug(f"updating page {page_name}") if page_id: page_version = self.find_page_version(page_name) page_version = page_version + 1 url = self.config["host_url"] + "/" + page_id headers = {"Content-Type": "application/json"} space = self.config["space"] data = { "id": page_id, "title": page_name, "type": "page", "space": {"key": space}, "body": {"storage": {"value": page_content_in_storage_format, "representation": "storage"}}, "version": {"number": page_version}, } if not self.dryrun: try: r = self.session.put(url, json=data, headers=headers) r.raise_for_status() except Exception as exp: logger.error(exp) if r.status_code == 200: logger.info(f"page created: {page_name}") else: logger.error(f"page can't be created: {page_name}") else: logger.warning("page {page_name} doesn't exist") def find_page_version(self, page_name): logger.debug(f"INFO - * Mkdocs With Confluence: Find PAGE VERSION, PAGE NAME: {page_name}") name_confl = page_name.replace(" ", "+") url = self.config["host_url"] + "?title=" + name_confl + "&spaceKey=" + self.config["space"] + "&expand=version" r = self.session.get(url) r.raise_for_status() with nostdout(): response_json = r.json() if response_json["results"] is not None: logger.debug(f"VERSION: {response_json['results'][0]['version']['number']}") return response_json["results"][0]["version"]["number"] else: logger.debug("PAGE DOES NOT EXISTS") return None def find_parent_name_of_page(self, name): logger.debug(f"INFO - * Mkdocs With Confluence: Find PARENT OF PAGE, PAGE NAME: {name}") idp = self.find_page_id(name) url = self.config["host_url"] + "/" + idp + "?expand=ancestors" r = self.session.get(url) r.raise_for_status() with nostdout(): response_json = r.json() if response_json: logger.debug(f"PARENT NAME: {response_json['ancestors'][-1]['title']}") return response_json["ancestors"][-1]["title"] else: logger.debug("PAGE DOES NOT HAVE PARENT") return None def wait_until(self, condition, interval=0.1, timeout=1): start = time.time() while not condition and time.time() - start < timeout: time.sleep(interval)