import os import hashlib import sys import glob import re import tempfile import shutil import requests import mimetypes import mistune import contextlib import time from time import sleep from mkdocs.config import config_options from mkdocs.plugins import BasePlugin from md2cf.confluence_renderer import ConfluenceRenderer from os import environ from pathlib import Path from loguru import logger ENABLE_ENV_VAR = "MKDOCS_TO_CONFLUENCE" DRY_RUN_ENV_VAR = "MKDOCS_TO_CONFLUENCE_DRY_RUN" TEMPLATE_BODY = "

TEMPLATE

" HEADER_MESSAGE = "‼️ This page is created automatically, all you changes will be overwritten during the next MKDocs deployment. Do not edit a page here ‼️" SECTION_PAGE_CONTENT = "

It's juat a Section Page

" # -- I don't know why it's here @contextlib.contextmanager def nostdout(): save_stdout = sys.stdout sys.stdout = DummyFile() yield sys.stdout = save_stdout # -- I don't know why it's here class DummyFile(object): def write(self, x): pass class MkdocsWithConfluence(BasePlugin): config_scheme = ( ("host_url", config_options.Type(str, default=None)), ("space", config_options.Type(str, default=None)), ("parent_page_name", config_options.Type(str, default=None)), ("username", config_options.Type(str, default=environ.get("JIRA_USERNAME", None))), ("password", config_options.Type(str, default=environ.get("JIRA_PASSWORD", None))), ("dryrun", config_options.Type(bool, default=False)), ("header_message", config_options.Type(str, default=HEADER_MESSAGE)), ) def __init__(self): self.enabled = False self.confluence_renderer = ConfluenceRenderer(use_xhtml=True) self.confluence_mistune = mistune.Markdown(renderer=self.confluence_renderer) self.simple_log = False self.flen = 1 self.session = requests.Session() self.page_attachments = {} self.repo_url = None def on_config(self, config): logger.info(config) # ------------------------------------------------------ # -- Enable the plugin by setting environment variable # ------------------------------------------------------ if os.environ.get(ENABLE_ENV_VAR): logger.info("MKDocs with Confluence is enabled") self.enabled = True else: logger.info( f"MKDocs with Confluence is disabled, set the {ENABLE_ENV_VAR} to enable the plugin" ) # ------------------------------------------------------ # -- Set the dry-run mode # ------------------------------------------------------ if self.config["dryrun"] or os.environ.get(DRY_RUN_ENV_VAR): logger.info("dry-run mode is turned on, your changes won't be synced with Confluence") self.dryrun = True else: logger.info("dry-run mode is turned off, your changes will be synced with Confluence") self.dryrun = False # ------------------------------------------------------ # -- Set git url to add to a confluence page # ------------------------------------------------------ if config["repo_url"]: self.repo_url = config["repo_url"] logger.info(f"git url is set to {self.repo_url}") def on_files(self, files, config): pages = files.documentation_pages() try: self.flen = len(pages) logger.debug(f"number of Files in directory tree: {self.flen}") except 0: logger.error("no files found to be synced") def on_page_markdown(self, markdown, page, config, files): # TODO: Modify pages here logger.warning("TODO: page should be modified in this block") self.session.auth = (self.config["username"], self.config["password"]) confluencePageName = page.url[0:-1] #.replace("/", "-") if self.config["parent_page_name"] is not None: parent_page = self.config["parent_page_name"] else: parent_page = self.config["space"] page_name = "" # TODO: Refactor if confluencePageName.rsplit('/',1)[0]: confluencePageName = (f"{confluencePageName.rsplit('/',1)[0]}+{page.title.replace(' ', ' ')}") else: confluencePageName = (f"{page.title.replace(' ', ' ')}") # Create empty pages for sections only logger.info("preparing emtpy pages for sections") for path in page.url.rsplit("/", 2)[0].split("/"): logger.debug(f"path is {path}") parent_id = self.find_page_id(parent_page) if path: if page_name: page_name = page_name + " " + path else: page_name = path logger.info(f"Will create a page {page_name} under the {parent_page}") self.add_page(page_name, parent_id, "

I want to make sections pages better: https://git.badhouseplants.net/allanger/mkdocs-with-confluence/issues/2

") parent_page = page_name parent_id = self.find_page_id(parent_page) confluencePageName = parent_page + " " + page.title new_markdown = markdown if self.repo_url: new_markdown = f">You can edit documentation here: {self.repo_url}\n" + new_markdown new_markdown = f">{self.config['header_message']}\n\n" + new_markdown # ------------------------------------------------- # -- Sync attachments # ------------------------------------------------- attachments = [] # -- TODO: support named picture md_image_reg = "(?:[!]\[(?P.*?)\])\((?P.*?)\)(?P\{.*\})?" try: for match in re.finditer(md_image_reg, markdown): # -- TODO: I'm sure it can be done better attachment_path = "./docs" + match.group(2) logger.info(f"found image: ./docs{match.group(2)}") images = re.search(md_image_reg, new_markdown) # -- TODO: Options maybe the reason why page is invalid, but I'm not sure about it yet # new_markdown = new_markdown.replace(images.group("options"), "") new_markdown = re.sub(md_image_reg, f"

", new_markdown) attachments.append(attachment_path) except AttributeError as e: logger.warning(e) logger.debug(f"attachments: {attachments}") confluence_body = self.confluence_mistune(new_markdown) self.add_page(confluencePageName, parent_id, confluence_body) if attachments: logger.debug(f"UPLOADING ATTACHMENTS TO CONFLUENCE FOR {page.title}, DETAILS:") logger.debug(f"FILES: {attachments}") for attachment in attachments: logger.debug(f"trying to upload {attachment} to {confluencePageName}") if self.enabled: try: self.add_or_update_attachment(confluencePageName, attachment) except Exception as Argument: logger.warning(Argument) return markdown def on_post_page(self, output, page, config): logger.info("The author was uploading images here, maybe there was a reason for that") def on_page_content(self, html, page, config, files): return html def __get_page_url(self, section): return re.search("url='(.*)'\\)", section).group(1)[:-1] + ".md" def __get_page_name(self, section): return os.path.basename(re.search("url='(.*)'\\)", section).group(1)[:-1]) def __get_section_name(self, section): logger.debug(f"SECTION name: {section}") return os.path.basename(re.search("url='(.*)'\\/", section).group(1)[:-1]) def __get_section_title(self, section): logger.debug(f"SECTION title: {section}") try: r = re.search("Section\\(title='(.*)'\\)", section) return r.group(1) except AttributeError: name = self.__get_section_name(section) logger.warning(f"Section '{name}' doesn't exist in the mkdocs.yml nav section!") return name def __get_page_title(self, section): try: r = re.search("\\s*Page\\(title='(.*)',", section) return r.group(1) except AttributeError: name = self.__get_page_url(section) logger.warning(f"Page '{name}' doesn't exist in the mkdocs.yml nav section!") return name # Adapted from https://stackoverflow.com/a/3431838 def get_file_sha1(self, file_path): hash_sha1 = hashlib.sha1() with open(file_path, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash_sha1.update(chunk) return hash_sha1.hexdigest() def add_or_update_attachment(self, page_name, filepath): logger.warning(f"Mkdocs With Confluence * {page_name} *ADD/Update ATTACHMENT if required* {filepath}") logger.debug(f"Mkdocs With Confluence: Add Attachment: PAGE NAME: {page_name}, FILE: {filepath}") page_id = self.find_page_id(page_name) if page_id: file_hash = self.get_file_sha1(filepath) attachment_message = f"MKDocsWithConfluence [v{file_hash}]" existing_attachment = self.get_attachment(page_id, filepath) if existing_attachment: file_hash_regex = re.compile(r"\[v([a-f0-9]{40})]$") existing_match = file_hash_regex.search(existing_attachment["version"]["message"]) if existing_match is not None and existing_match.group(1) == file_hash: logger.debug(f" * Mkdocs With Confluence * {page_name} * Existing attachment skipping * {filepath}") else: self.update_attachment(page_id, filepath, existing_attachment, attachment_message) else: self.create_attachment(page_id, filepath, attachment_message) else: logger.debug("PAGE DOES NOT EXISTS") def get_attachment(self, page_id, filepath): name = os.path.basename(filepath) logger.debug(f" * Mkdocs With Confluence: Get Attachment: PAGE ID: {page_id}, FILE: {filepath}") url = self.config["host_url"] + "/" + page_id + "/child/attachment" headers = {"X-Atlassian-Token": "no-check"} # no content-type here! logger.debug(f"URL: {url}") r = self.session.get(url, headers=headers, params={"filename": name, "expand": "version"}) r.raise_for_status() with nostdout(): response_json = r.json() if response_json["size"]: return response_json["results"][0] def update_attachment(self, page_id, filepath, existing_attachment, message): logger.debug(f" * Mkdocs With Confluence: Update Attachment: PAGE ID: {page_id}, FILE: {filepath}") url = self.config["host_url"] + "/" + page_id + "/child/attachment/" + existing_attachment["id"] + "/data" headers = {"X-Atlassian-Token": "no-check"} # no content-type here! logger.debug(f"URL: {url}") filename = os.path.basename(filepath) # determine content-type content_type, encoding = mimetypes.guess_type(filepath) if content_type is None: content_type = "multipart/form-data" files = {"file": (filename, open(Path(filepath), "rb"), content_type), "comment": message} if not self.dryrun: r = self.session.post(url, headers=headers, files=files) r.raise_for_status() logger.debug(r.json()) if r.status_code == 200: logger.info("OK!") else: print("ERR!") def create_attachment(self, page_id, filepath, message): logger.debug(f" * Mkdocs With Confluence: Create Attachment: PAGE ID: {page_id}, FILE: {filepath}") url = self.config["host_url"] + "/" + page_id + "/child/attachment" headers = {"X-Atlassian-Token": "no-check"} # no content-type here! logger.debug(f"URL: {url}") filename = os.path.basename(filepath) # determine content-type content_type, encoding = mimetypes.guess_type(filepath) if content_type is None: content_type = "multipart/form-data" files = {"file": (filename, open(filepath, "rb"), content_type), "comment": message} if not self.dryrun: r = self.session.post(url, headers=headers, files=files) logger.debug(r.json()) r.raise_for_status() if r.status_code == 200: logger.debug("OK!") else: logger.debug("ERR!") def find_page_id(self, page_name): logger.info(f"looking for a page id of the page: {page_name}") name_confl = page_name.replace(" ", "+") url = self.config["host_url"] + "?title=" + name_confl + "&spaceKey=" + self.config["space"] + "&expand=history" logger.debug(f"URL: {url}") r = self.session.get(url) r.raise_for_status() with nostdout(): response_json = r.json() if response_json["results"]: logger.debug(f"response: {response_json}") return response_json["results"][0]["id"] else: logger.debug(f"page {page_name} doens't exist") return None def add_page(self, page_name, parent_page_id, page_content_in_storage_format): logger.info(f"Creating a new page: {page_name} under page with ID: {parent_page_id}") if self.enabled: if self.find_page_id(page_name): self.update_page(page_name, page_content_in_storage_format) else: logger.info(f"Creating a new page: {page_name} under page with ID: {parent_page_id}") url = self.config["host_url"] + "/" logger.debug(f"URL: {url}") headers = {"Content-Type": "application/json"} space = self.config["space"] data = { "type": "page", "title": page_name, "space": {"key": space}, "ancestors": [{"id": parent_page_id}], "body": {"storage": {"value": page_content_in_storage_format, "representation": "storage"}}, } logger.debug(f"DATA: {data}") if not self.dryrun: try: r = self.session.post(url, json=data, headers=headers) r.raise_for_status() except Exception as exp: logger.error(exp) if r.status_code == 200: logger.info(f"page created: {page_name}") else: logger.error(f"page can't be created: {page_name}") def update_page(self, page_name, page_content_in_storage_format): page_id = self.find_page_id(page_name) logger.debug(f"updating page {page_name}") if page_id: page_version = self.find_page_version(page_name) page_version = page_version + 1 url = self.config["host_url"] + "/" + page_id headers = {"Content-Type": "application/json"} space = self.config["space"] data = { "id": page_id, "title": page_name, "type": "page", "space": {"key": space}, "body": {"storage": {"value": page_content_in_storage_format, "representation": "storage"}}, "version": {"number": page_version}, } if not self.dryrun: try: r = self.session.put(url, json=data, headers=headers) r.raise_for_status() except Exception as exp: logger.error(exp) if r.status_code == 200: logger.info(f"page created: {page_name}") else: logger.error(f"page can't be created: {page_name}") else: logger.warning("page {page_name} doesn't exist") def find_page_version(self, page_name): logger.debug(f"INFO - * Mkdocs With Confluence: Find PAGE VERSION, PAGE NAME: {page_name}") name_confl = page_name.replace(" ", "+") url = self.config["host_url"] + "?title=" + name_confl + "&spaceKey=" + self.config["space"] + "&expand=version" r = self.session.get(url) r.raise_for_status() with nostdout(): response_json = r.json() if response_json["results"] is not None: logger.debug(f"VERSION: {response_json['results'][0]['version']['number']}") return response_json["results"][0]["version"]["number"] else: logger.debug("PAGE DOES NOT EXISTS") return None def find_parent_name_of_page(self, name): logger.debug(f"INFO - * Mkdocs With Confluence: Find PARENT OF PAGE, PAGE NAME: {name}") idp = self.find_page_id(name) url = self.config["host_url"] + "/" + idp + "?expand=ancestors" r = self.session.get(url) r.raise_for_status() with nostdout(): response_json = r.json() if response_json: logger.debug(f"PARENT NAME: {response_json['ancestors'][-1]['title']}") return response_json["ancestors"][-1]["title"] else: logger.debug("PAGE DOES NOT HAVE PARENT") return None def wait_until(self, condition, interval=0.1, timeout=1): start = time.time() while not condition and time.time() - start < timeout: time.sleep(interval)