mkdocs-with-confluence/mkdocs_with_confluence/plugin.py

import os
import hashlib
import sys
import glob
import re
import tempfile
import shutil
import requests
import mimetypes
import mistune
import contextlib
import time

from time import sleep
from mkdocs.config import config_options
from mkdocs.plugins import BasePlugin
from md2cf.confluence_renderer import ConfluenceRenderer
from os import environ
from pathlib import Path
from loguru import logger

ENABLE_ENV_VAR = "MKDOCS_TO_CONFLUENCE"
DRY_RUN_ENV_VAR = "MKDOCS_TO_CONFLUENCE_DRY_RUN"
TEMPLATE_BODY = "<p> TEMPLATE </p>"
HEADER_MESSAGE = "‼️ This page is created automatically, all you changes will be overwritten during the next MKDocs deployment. Do not edit a page here ‼️"

SECTION_PAGE_CONTENT =  "<p> It's juat a Section Page </p>"

# -- I don't know why it's here
@contextlib.contextmanager
def nostdout():
    save_stdout = sys.stdout
    sys.stdout = DummyFile()
    yield
    sys.stdout = save_stdout

# -- I don't know why it's here
class DummyFile(object):
    def write(self, x):
        pass


class MkdocsWithConfluence(BasePlugin):
    config_scheme = (
        ("host_url", config_options.Type(str, default=None)),
        ("space", config_options.Type(str, default=None)),
        ("parent_page_name", config_options.Type(str, default=None)),
        ("username", config_options.Type(str, default=environ.get("JIRA_USERNAME", None))),
        ("password", config_options.Type(str, default=environ.get("JIRA_PASSWORD", None))),
        ("dryrun", config_options.Type(bool, default=False)),
        ("header_message", config_options.Type(str, default=HEADER_MESSAGE)),
    )

    def __init__(self):
        self.enabled = False
        self.confluence_renderer = ConfluenceRenderer(use_xhtml=True)
        self.confluence_mistune = mistune.Markdown(renderer=self.confluence_renderer)
        self.simple_log = False
        self.flen = 1
        self.session = requests.Session()
        self.page_attachments = {}
        self.repo_url = None

    def on_config(self, config):
        logger.info(config)
        # ------------------------------------------------------
        # -- Enable the plugin by setting environment variable
        # ------------------------------------------------------
        if os.environ.get(ENABLE_ENV_VAR):
            logger.info("MKDocs with Confluence is enabled")
            self.enabled = True
        else:
            logger.info(
                f"MKDocs with Confluence is disabled, set the {ENABLE_ENV_VAR} to enable the plugin"
            )
        # ------------------------------------------------------
        # -- Set the dry-run mode
        # ------------------------------------------------------
        if self.config["dryrun"]  or os.environ.get(DRY_RUN_ENV_VAR):
            logger.info("dry-run mode is turned on, your changes won't be synced with Confluence")
            self.dryrun = True
        else:
            logger.info("dry-run mode is turned off, your changes will be synced with Confluence")
            self.dryrun = False
        # ------------------------------------------------------
        # -- Set git url to add to a confluence page
        # ------------------------------------------------------
        if config["repo_url"]:
            self.repo_url = config["repo_url"]
            logger.info(f"git url is set to {self.repo_url}")

    def on_files(self, files, config):
        pages = files.documentation_pages()
        try:
            self.flen = len(pages)
            logger.debug(f"number of Files in directory tree: {self.flen}")
        except 0:
            logger.error("no files found to be synced")

    def on_page_markdown(self, markdown, page, config, files):
        # TODO: Modify pages here
        logger.warning("TODO: page should be modified in this block")
        self.session.auth = (self.config["username"], self.config["password"])
        confluencePageName = page.url[0:-1]
        #.replace("/", "-")
        if self.config["parent_page_name"] is not None:
            parent_page = self.config["parent_page_name"]
        else:
            parent_page = self.config["space"]
        page_name = ""

        # TODO: Refactor
        if confluencePageName.rsplit('/',1)[0]:
            confluencePageName = (f"{confluencePageName.rsplit('/',1)[0]}+{page.title.replace(' ', ' ')}")
        else:
            confluencePageName = (f"{page.title.replace(' ', ' ')}")
            # Create empty pages for sections only
        logger.info("preparing emtpy pages for sections")
        for path in page.url.rsplit("/", 2)[0].split("/"):
            logger.debug(f"path is {path}")
            parent_id = self.find_page_id(parent_page)
            if path:
                if page_name:
                    page_name = page_name + " " + path
                else:
                    page_name = path
                logger.info(f"Will create a page {page_name} under the {parent_page}")
                self.add_page(page_name, parent_id, "<p> I want to make sections pages better: https://git.badhouseplants.net/allanger/mkdocs-with-confluence/issues/2 </p>")
                parent_page = page_name
        parent_id = self.find_page_id(parent_page)
        confluencePageName = parent_page + " " + page.title
        new_markdown = markdown
        if self.repo_url:
            new_markdown = f">You can edit documentation here: {self.repo_url}\n" + new_markdown
        new_markdown = f">{self.config['header_message']}\n\n" + new_markdown
        # -------------------------------------------------
        # -- Sync attachments
        # -------------------------------------------------
        attachments = []
        # -- TODO: support named picture
        md_image_reg = "(?:[!]\[(?P<caption>.*?)\])\((?P<image>.*?)\)(?P<options>\{.*\})?"
        try:
            for match in re.finditer(md_image_reg, markdown):
                # -- TODO: I'm sure it can be done better
                attachment_path = "./docs" + match.group(2)
                logger.info(f"found image: ./docs{match.group(2)}")
                images = re.search(md_image_reg, new_markdown)
                # -- TODO: Options maybe the reason why page is invalid, but I'm not sure about it yet
                # new_markdown = new_markdown.replace(images.group("options"), "")
                new_markdown = re.sub(md_image_reg, f"<p><ac:image><ri:attachment ri:filename=\"{os.path.basename(attachment_path)}\"/></ac:image></p>", new_markdown)
                attachments.append(attachment_path)
        except AttributeError as e:
            logger.warning(e)
        logger.debug(f"attachments: {attachments}")
        confluence_body = self.confluence_mistune(new_markdown)
        self.add_page(confluencePageName, parent_id, confluence_body)
        if attachments:
            logger.debug(f"UPLOADING ATTACHMENTS TO CONFLUENCE FOR {page.title}, DETAILS:")
            logger.debug(f"FILES: {attachments}")
        for attachment in attachments:
            logger.debug(f"trying to upload {attachment} to {confluencePageName}")
            if self.enabled:
                try:
                    self.add_or_update_attachment(confluencePageName, attachment)
                except Exception as Argument:
                    logger.warning(Argument)
        return markdown

    def on_post_page(self, output, page, config):
        logger.info("The author was uploading images here, maybe there was a reason for that")

    def on_page_content(self, html, page, config, files):
        return html

    def __get_page_url(self, section):
        return re.search("url='(.*)'\\)", section).group(1)[:-1] + ".md"

    def __get_page_name(self, section):
        return os.path.basename(re.search("url='(.*)'\\)", section).group(1)[:-1])

    def __get_section_name(self, section):
        logger.debug(f"SECTION name: {section}")
        return os.path.basename(re.search("url='(.*)'\\/", section).group(1)[:-1])

    def __get_section_title(self, section):
        logger.debug(f"SECTION title: {section}")
        try:
            r = re.search("Section\\(title='(.*)'\\)", section)
            return r.group(1)
        except AttributeError:
            name = self.__get_section_name(section)
            logger.warning(f"Section '{name}' doesn't exist in the mkdocs.yml nav section!")
            return name

    def __get_page_title(self, section):
        try:
            r = re.search("\\s*Page\\(title='(.*)',", section)
            return r.group(1)
        except AttributeError:
            name = self.__get_page_url(section)
            logger.warning(f"Page '{name}' doesn't exist in the mkdocs.yml nav section!")
            return name

    # Adapted from https://stackoverflow.com/a/3431838
    def get_file_sha1(self, file_path):
        hash_sha1 = hashlib.sha1()
        with open(file_path, "rb") as f:
            for chunk in iter(lambda: f.read(4096), b""):
                hash_sha1.update(chunk)
        return hash_sha1.hexdigest()

    def add_or_update_attachment(self, page_name, filepath):
        logger.warning(f"Mkdocs With Confluence * {page_name} *ADD/Update ATTACHMENT if required* {filepath}")
        logger.debug(f"Mkdocs With Confluence: Add Attachment: PAGE NAME: {page_name}, FILE: {filepath}")
        page_id = self.find_page_id(page_name)
        if page_id:
            file_hash = self.get_file_sha1(filepath)
            attachment_message = f"MKDocsWithConfluence [v{file_hash}]"
            existing_attachment = self.get_attachment(page_id, filepath)
            if existing_attachment:
                file_hash_regex = re.compile(r"\[v([a-f0-9]{40})]$")
                existing_match = file_hash_regex.search(existing_attachment["version"]["message"])
                if existing_match is not None and existing_match.group(1) == file_hash:
                    logger.debug(f" * Mkdocs With Confluence * {page_name} * Existing attachment skipping * {filepath}")
                else:
                    self.update_attachment(page_id, filepath, existing_attachment, attachment_message)
            else:
                self.create_attachment(page_id, filepath, attachment_message)
        else:
            logger.debug("PAGE DOES NOT EXISTS")

    def get_attachment(self, page_id, filepath):
        name = os.path.basename(filepath)
        logger.debug(f" * Mkdocs With Confluence: Get Attachment: PAGE ID: {page_id}, FILE: {filepath}")

        url = self.config["host_url"] + "/" + page_id + "/child/attachment"
        headers = {"X-Atlassian-Token": "no-check"}  # no content-type here!
        logger.debug(f"URL: {url}")

        r = self.session.get(url, headers=headers, params={"filename": name, "expand": "version"})
        r.raise_for_status()
        with nostdout():
            response_json = r.json()
        if response_json["size"]:
            return response_json["results"][0]

    def update_attachment(self, page_id, filepath, existing_attachment, message):
        logger.debug(f" * Mkdocs With Confluence: Update Attachment: PAGE ID: {page_id}, FILE: {filepath}")

        url = self.config["host_url"] + "/" + page_id + "/child/attachment/" + existing_attachment["id"] + "/data"
        headers = {"X-Atlassian-Token": "no-check"}  # no content-type here!
        logger.debug(f"URL: {url}")
        filename = os.path.basename(filepath)

        # determine content-type
        content_type, encoding = mimetypes.guess_type(filepath)
        if content_type is None:
            content_type = "multipart/form-data"
        files = {"file": (filename, open(Path(filepath), "rb"), content_type), "comment": message}

        if not self.dryrun:
            r = self.session.post(url, headers=headers, files=files)
            r.raise_for_status()
            logger.debug(r.json())
            if r.status_code == 200:
                logger.info("OK!")
            else:
                print("ERR!")

    def create_attachment(self, page_id, filepath, message):
        logger.debug(f" * Mkdocs With Confluence: Create Attachment: PAGE ID: {page_id}, FILE: {filepath}")

        url = self.config["host_url"] + "/" + page_id + "/child/attachment"
        headers = {"X-Atlassian-Token": "no-check"}  # no content-type here!
        logger.debug(f"URL: {url}")

        filename = os.path.basename(filepath)

        # determine content-type
        content_type, encoding = mimetypes.guess_type(filepath)
        if content_type is None:
            content_type = "multipart/form-data"
        files = {"file": (filename, open(filepath, "rb"), content_type), "comment": message}
        if not self.dryrun:
            r = self.session.post(url, headers=headers, files=files)
            logger.debug(r.json())
            r.raise_for_status()
            if r.status_code == 200:
                logger.debug("OK!")
            else:
                logger.debug("ERR!")

    def find_page_id(self, page_name):
        logger.info(f"looking for a page id of the page: {page_name}")
        name_confl = page_name.replace(" ", "+")
        url = self.config["host_url"] + "?title=" + name_confl + "&spaceKey=" + self.config["space"] + "&expand=history"
        logger.debug(f"URL: {url}")

        r = self.session.get(url)
        r.raise_for_status()
        with nostdout():
            response_json = r.json()
        if response_json["results"]:
            logger.debug(f"response: {response_json}")
            return response_json["results"][0]["id"]
        else:
            logger.debug(f"page {page_name} doens't exist")
            return None

    def add_page(self, page_name, parent_page_id, page_content_in_storage_format):
        logger.info(f"Creating a new page: {page_name} under page with ID: {parent_page_id}")
        if self.enabled:
            if self.find_page_id(page_name):
                self.update_page(page_name, page_content_in_storage_format)
            else:
                logger.info(f"Creating a new page: {page_name} under page with ID: {parent_page_id}")
                url = self.config["host_url"] + "/"
                logger.debug(f"URL: {url}")
                headers = {"Content-Type": "application/json"}
                space = self.config["space"]
                data = {
                    "type": "page",
                    "title": page_name,
                    "space": {"key": space},
                    "ancestors": [{"id": parent_page_id}],
                    "body": {"storage": {"value": page_content_in_storage_format, "representation": "storage"}},
                }
                logger.debug(f"DATA: {data}")
                if not self.dryrun:
                    try:
                        r = self.session.post(url, json=data, headers=headers)
                        r.raise_for_status()
                    except Exception as exp:
                        logger.error(exp)
                    if r.status_code == 200:
                        logger.info(f"page created: {page_name}")
                    else:
                        logger.error(f"page can't be created: {page_name}")

    def update_page(self, page_name, page_content_in_storage_format):
        page_id = self.find_page_id(page_name)
        logger.debug(f"updating page {page_name}")
        if page_id:
            page_version = self.find_page_version(page_name)
            page_version = page_version + 1
            url = self.config["host_url"] + "/" + page_id
            headers = {"Content-Type": "application/json"}
            space = self.config["space"]
            data = {
                "id": page_id,
                "title": page_name,
                "type": "page",
                "space": {"key": space},
                "body": {"storage": {"value": page_content_in_storage_format, "representation": "storage"}},
                "version": {"number": page_version},
            }

            if not self.dryrun:
                try:
                    r = self.session.put(url, json=data, headers=headers)
                    r.raise_for_status()
                except Exception as exp:
                    logger.error(exp)
                if r.status_code == 200:
                    logger.info(f"page created: {page_name}")
                else:
                    logger.error(f"page can't be created: {page_name}")
        else:
            logger.warning("page {page_name} doesn't exist")

    def find_page_version(self, page_name):
        logger.debug(f"INFO    -   * Mkdocs With Confluence: Find PAGE VERSION, PAGE NAME: {page_name}")
        name_confl = page_name.replace(" ", "+")
        url = self.config["host_url"] + "?title=" + name_confl + "&spaceKey=" + self.config["space"] + "&expand=version"
        r = self.session.get(url)
        r.raise_for_status()
        with nostdout():
            response_json = r.json()
        if response_json["results"] is not None:
            logger.debug(f"VERSION: {response_json['results'][0]['version']['number']}")
            return response_json["results"][0]["version"]["number"]
        else:
            logger.debug("PAGE DOES NOT EXISTS")
            return None

    def find_parent_name_of_page(self, name):
        logger.debug(f"INFO    -   * Mkdocs With Confluence: Find PARENT OF PAGE, PAGE NAME: {name}")
        idp = self.find_page_id(name)
        url = self.config["host_url"] + "/" + idp + "?expand=ancestors"

        r = self.session.get(url)
        r.raise_for_status()
        with nostdout():
            response_json = r.json()
        if response_json:
            logger.debug(f"PARENT NAME: {response_json['ancestors'][-1]['title']}")
            return response_json["ancestors"][-1]["title"]
        else:
            logger.debug("PAGE DOES NOT HAVE PARENT")
            return None

    def wait_until(self, condition, interval=0.1, timeout=1):
        start = time.time()
        while not condition and time.time() - start < timeout:
            time.sleep(interval)