From e2019c49d4a0b6f4d4b34aab814a8922e94c9445 Mon Sep 17 00:00:00 2001 From: pennae Date: Wed, 18 Jan 2023 15:31:41 +0100 Subject: [PATCH] nixos/make-options-doc: use markdown-it-py for rendering only whitespace changes (mostly empty descriptions rendered as literal line breaks and trailing space toPretty generates, but that were dropped by mistune). --- nixos/lib/make-options-doc/default.nix | 15 +- nixos/lib/make-options-doc/mergeJSON.py | 307 +++++++++++++----------- 2 files changed, 184 insertions(+), 138 deletions(-) diff --git a/nixos/lib/make-options-doc/default.nix b/nixos/lib/make-options-doc/default.nix index 3a5e1f2023d5..335217703c82 100644 --- a/nixos/lib/make-options-doc/default.nix +++ b/nixos/lib/make-options-doc/default.nix @@ -118,7 +118,20 @@ in rec { inherit self; includeSiteCustomize = true; }); - in self.withPackages (p: [ p.mistune ])) + in self.withPackages (p: + let + # TODO add our own small test suite when rendering is split out into a new tool + markdown-it-py = p.markdown-it-py.override { + disableTests = true; + }; + mdit-py-plugins = p.mdit-py-plugins.override { + inherit markdown-it-py; + disableTests = true; + }; + in [ + markdown-it-py + mdit-py-plugins + ])) ]; options = builtins.toFile "options.json" (builtins.unsafeDiscardStringContext (builtins.toJSON optionsNix)); diff --git a/nixos/lib/make-options-doc/mergeJSON.py b/nixos/lib/make-options-doc/mergeJSON.py index 686c57ef7be0..2de0bbae1d91 100644 --- a/nixos/lib/make-options-doc/mergeJSON.py +++ b/nixos/lib/make-options-doc/mergeJSON.py @@ -3,9 +3,17 @@ import json import os import sys from typing import Any, Dict, List +from collections.abc import MutableMapping, Sequence +import inspect # for MD conversion -import mistune +import markdown_it +import markdown_it.renderer +from markdown_it.token import Token +from markdown_it.utils import OptionsDict +from mdit_py_plugins.container import container_plugin +from mdit_py_plugins.deflist import deflist_plugin +from mdit_py_plugins.myst_role import myst_role_plugin import re from xml.sax.saxutils import escape, quoteattr @@ -49,149 +57,174 @@ def unpivot(options: Dict[Key, Option]) -> Dict[str, JSON]: manpage_urls = json.load(open(os.getenv('MANPAGE_URLS'))) -admonitions = { - '.warning': 'warning', - '.important': 'important', - '.note': 'note' -} -class Renderer(mistune.renderers.BaseRenderer): - def _get_method(self, name): - try: - return super(Renderer, self)._get_method(name) - except AttributeError: - def not_supported(*args, **kwargs): - raise NotImplementedError("md node not supported yet", name, args, **kwargs) - return not_supported - - def text(self, text): - return escape(text) - def paragraph(self, text): - return f"{text}" - def newline(self): - return "\n" - def codespan(self, text): - return f"{escape(text)}" - def block_code(self, text, info=None): - info = f" language={quoteattr(info)}" if info is not None else "" - return f"{escape(text)}" - def link(self, link, text=None, title=None): - tag = "link" - if link[0:1] == '#': - if text == "": - tag = "xref" - attr = "linkend" - link = link[1:] - else: - # try to faithfully reproduce links that were of the form - # in docbook format - if text == link: - text = "" - attr = "xlink:href" - return f"<{tag} {attr}=\"{link}\">{text}" - def list(self, text, ordered, level, start=None): - if ordered: - raise NotImplementedError("ordered lists not supported yet") - return f"\n{text}\n" - def list_item(self, text, level): - return f"{text}\n" - def block_text(self, text): - return self.paragraph(text) - def emphasis(self, text): - return f"{text}" - def strong(self, text): - return f"{text}" - def admonition(self, text, kind): - if kind not in admonitions: - raise NotImplementedError(f"admonition {kind} not supported yet") - tag = admonitions[kind] - return f"<{tag}>{text.rstrip()}" - def block_quote(self, text): - return f"
{text}
" - def command(self, text): - return f"{escape(text)}" - def option(self, text): - return f"" - def file(self, text): - return f"{escape(text)}" - def var(self, text): - return f"{escape(text)}" - def env(self, text): - return f"{escape(text)}" - def manpage(self, page, section): - man = f"{page}({section})" - title = f"{escape(page)}" - vol = f"{escape(section)}" - ref = f"{title}{vol}" - if man in manpage_urls: - return self.link(manpage_urls[man], text=ref) - else: - return ref - - def finalize(self, data): - return "".join(data) - -def p_command(md): - COMMAND_PATTERN = r'\{command\}`(.*?)`' - def parse(self, m, state): - return ('command', m.group(1)) - md.inline.register_rule('command', COMMAND_PATTERN, parse) - md.inline.rules.append('command') - -def p_file(md): - FILE_PATTERN = r'\{file\}`(.*?)`' - def parse(self, m, state): - return ('file', m.group(1)) - md.inline.register_rule('file', FILE_PATTERN, parse) - md.inline.rules.append('file') - -def p_var(md): - VAR_PATTERN = r'\{var\}`(.*?)`' - def parse(self, m, state): - return ('var', m.group(1)) - md.inline.register_rule('var', VAR_PATTERN, parse) - md.inline.rules.append('var') - -def p_env(md): - ENV_PATTERN = r'\{env\}`(.*?)`' - def parse(self, m, state): - return ('env', m.group(1)) - md.inline.register_rule('env', ENV_PATTERN, parse) - md.inline.rules.append('env') - -def p_option(md): - OPTION_PATTERN = r'\{option\}`(.*?)`' - def parse(self, m, state): - return ('option', m.group(1)) - md.inline.register_rule('option', OPTION_PATTERN, parse) - md.inline.rules.append('option') - -def p_manpage(md): - MANPAGE_PATTERN = r'\{manpage\}`(.*?)\((.+?)\)`' - def parse(self, m, state): - return ('manpage', m.group(1), m.group(2)) - md.inline.register_rule('manpage', MANPAGE_PATTERN, parse) - md.inline.rules.append('manpage') - -def p_admonition(md): - ADMONITION_PATTERN = re.compile(r'^::: \{([^\n]*?)\}\n(.*?)^:::$\n*', flags=re.MULTILINE|re.DOTALL) - def parse(self, m, state): - return { - 'type': 'admonition', - 'children': self.parse(m.group(2), state), - 'params': [ m.group(1) ], +class Renderer(markdown_it.renderer.RendererProtocol): + __output__ = "docbook" + def __init__(self, parser=None): + self.rules = { + k: v + for k, v in inspect.getmembers(self, predicate=inspect.ismethod) + if not (k.startswith("render") or k.startswith("_")) + } | { + "container_{.note}_open": self._note_open, + "container_{.note}_close": self._note_close, + "container_{.important}_open": self._important_open, + "container_{.important}_close": self._important_close, + "container_{.warning}_open": self._warning_open, + "container_{.warning}_close": self._warning_close, } - md.block.register_rule('admonition', ADMONITION_PATTERN, parse) - md.block.rules.append('admonition') + def render(self, tokens: Sequence[Token], options: OptionsDict, env: MutableMapping) -> str: + assert '-link-tag-stack' not in env + env['-link-tag-stack'] = [] + assert '-deflist-stack' not in env + env['-deflist-stack'] = [] + def do_one(i, token): + if token.type == "inline": + assert token.children is not None + return self.renderInline(token.children, options, env) + elif token.type in self.rules: + return self.rules[token.type](tokens[i], tokens, i, options, env) + else: + raise NotImplementedError("md token not supported yet", token) + return "".join(map(lambda arg: do_one(*arg), enumerate(tokens))) + def renderInline(self, tokens: Sequence[Token], options: OptionsDict, env: MutableMapping) -> str: + # HACK to support docbook links and xrefs. link handling is only necessary because the docbook + # manpage stylesheet converts - in urls to a mathematical minus, which may be somewhat incorrect. + for i, token in enumerate(tokens): + if token.type != 'link_open': + continue + token.tag = 'link' + # turn [](#foo) into xrefs + if token.attrs['href'][0:1] == '#' and tokens[i + 1].type == 'link_close': + token.tag = "xref" + # turn into links without contents + if tokens[i + 1].type == 'text' and tokens[i + 1].content == token.attrs['href']: + tokens[i + 1].content = '' -md = mistune.create_markdown(renderer=Renderer(), plugins=[ - p_command, p_file, p_var, p_env, p_option, p_manpage, p_admonition -]) + def do_one(i, token): + if token.type in self.rules: + return self.rules[token.type](tokens[i], tokens, i, options, env) + else: + raise NotImplementedError("md node not supported yet", token) + return "".join(map(lambda arg: do_one(*arg), enumerate(tokens))) + + def text(self, token, tokens, i, options, env): + return escape(token.content) + def paragraph_open(self, token, tokens, i, options, env): + return "" + def paragraph_close(self, token, tokens, i, options, env): + return "" + def hardbreak(self, token, tokens, i, options, env): + return "\n" + def softbreak(self, token, tokens, i, options, env): + # should check options.breaks() and emit hard break if so + return "\n" + def code_inline(self, token, tokens, i, options, env): + return f"{escape(token.content)}" + def code_block(self, token, tokens, i, options, env): + return f"{escape(token.content)}" + def link_open(self, token, tokens, i, options, env): + env['-link-tag-stack'].append(token.tag) + (attr, start) = ('linkend', 1) if token.attrs['href'][0] == '#' else ('xlink:href', 0) + return f"<{token.tag} {attr}={quoteattr(token.attrs['href'][start:])}>" + def link_close(self, token, tokens, i, options, env): + return f"" + def list_item_open(self, token, tokens, i, options, env): + return "" + def list_item_close(self, token, tokens, i, options, env): + return "\n" + # HACK open and close para for docbook change size. remove soon. + def bullet_list_open(self, token, tokens, i, options, env): + return "\n" + def bullet_list_close(self, token, tokens, i, options, env): + return "\n" + def em_open(self, token, tokens, i, options, env): + return "" + def em_close(self, token, tokens, i, options, env): + return "" + def strong_open(self, token, tokens, i, options, env): + return "" + def strong_close(self, token, tokens, i, options, env): + return "" + def fence(self, token, tokens, i, options, env): + info = f" language={quoteattr(token.info)}" if token.info != "" else "" + return f"{escape(token.content)}" + def blockquote_open(self, token, tokens, i, options, env): + return "
" + def blockquote_close(self, token, tokens, i, options, env): + return "
" + def _note_open(self, token, tokens, i, options, env): + return "" + def _note_close(self, token, tokens, i, options, env): + return "" + def _important_open(self, token, tokens, i, options, env): + return "" + def _important_close(self, token, tokens, i, options, env): + return "" + def _warning_open(self, token, tokens, i, options, env): + return "" + def _warning_close(self, token, tokens, i, options, env): + return "" + # markdown-it emits tokens based on the html syntax tree, but docbook is + # slightly different. html has
{
{
}}
, + # docbook has {} + # we have to reject multiple definitions for the same term for time being. + def dl_open(self, token, tokens, i, options, env): + env['-deflist-stack'].append({}) + return "" + def dl_close(self, token, tokens, i, options, env): + env['-deflist-stack'].pop() + return "" + def dt_open(self, token, tokens, i, options, env): + env['-deflist-stack'][-1]['has-dd'] = False + return "" + def dt_close(self, token, tokens, i, options, env): + return "" + def dd_open(self, token, tokens, i, options, env): + if env['-deflist-stack'][-1]['has-dd']: + raise Exception("multiple definitions per term not supported") + env['-deflist-stack'][-1]['has-dd'] = True + return "" + def dd_close(self, token, tokens, i, options, env): + return "" + def myst_role(self, token, tokens, i, options, env): + if token.meta['name'] == 'command': + return f"{escape(token.content)}" + if token.meta['name'] == 'file': + return f"{escape(token.content)}" + if token.meta['name'] == 'var': + return f"{escape(token.content)}" + if token.meta['name'] == 'env': + return f"{escape(token.content)}" + if token.meta['name'] == 'option': + return f"" + if token.meta['name'] == 'manpage': + [page, section] = [ s.strip() for s in token.content.rsplit('(', 1) ] + section = section[:-1] + man = f"{page}({section})" + title = f"{escape(page)}" + vol = f"{escape(section)}" + ref = f"{title}{vol}" + if man in manpage_urls: + return f"{ref}" + else: + return ref + raise NotImplementedError("md node not supported yet", token) + +md = ( + markdown_it.MarkdownIt(renderer_cls=Renderer) + # TODO maybe fork the plugin and have only a single rule for all? + .use(container_plugin, name="{.note}") + .use(container_plugin, name="{.important}") + .use(container_plugin, name="{.warning}") + .use(deflist_plugin) + .use(myst_role_plugin) +) # converts in-place! def convertMD(options: Dict[str, Any]) -> str: def convertString(path: str, text: str) -> str: try: - rendered = md(text) + rendered = md.render(text) return rendered except: print(f"error in {path}")