diff --git a/nixos/doc/manual/default.nix b/nixos/doc/manual/default.nix index 8c71e5108799..714b3efca20a 100644 --- a/nixos/doc/manual/default.nix +++ b/nixos/doc/manual/default.nix @@ -158,7 +158,7 @@ let '@NIXOS_TEST_OPTIONS_JSON@' \ ${testOptionsDoc.optionsJSON}/share/doc/nixos/options.json - nixos-render-docs manual docbook \ + nixos-render-docs -j $NIX_BUILD_CORES manual docbook \ --manpage-urls ${manpageUrls} \ --revision ${lib.escapeShellArg revision} \ ./manual.md \ @@ -285,7 +285,7 @@ in rec { '' else '' mkdir -p $out/share/man/man5 - nixos-render-docs options manpage \ + nixos-render-docs -j $NIX_BUILD_CORES options manpage \ --revision ${lib.escapeShellArg revision} \ ${optionsJSON}/share/doc/nixos/options.json \ $out/share/man/man5/configuration.nix.5 diff --git a/nixos/lib/make-options-doc/default.nix b/nixos/lib/make-options-doc/default.nix index 09b0191d2bb8..50fb9ede08de 100644 --- a/nixos/lib/make-options-doc/default.nix +++ b/nixos/lib/make-options-doc/default.nix @@ -152,7 +152,7 @@ in rec { pkgs.nixos-render-docs ]; } '' - nixos-render-docs options docbook \ + nixos-render-docs -j $NIX_BUILD_CORES options docbook \ --manpage-urls ${pkgs.path + "/doc/manpage-urls.json"} \ --revision ${lib.escapeShellArg revision} \ --document-type ${lib.escapeShellArg documentType} \ diff --git a/pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/__init__.py b/pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/__init__.py index 56b68ba27a53..1c58accb4166 100644 --- a/pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/__init__.py +++ b/pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/__init__.py @@ -10,6 +10,7 @@ from typing import Any, Dict from .md import Converter from . import manual from . import options +from . import parallel def pretty_print_exc(e: BaseException, *, _desc_text: str = "error") -> None: print(f"\x1b[1;31m{_desc_text}:\x1b[0m", file=sys.stderr) @@ -35,6 +36,7 @@ def pretty_print_exc(e: BaseException, *, _desc_text: str = "error") -> None: def main() -> None: parser = argparse.ArgumentParser(description='render nixos manual bits') + parser.add_argument('-j', '--jobs', type=int, default=None) commands = parser.add_subparsers(dest='command', required=True) @@ -43,6 +45,7 @@ def main() -> None: args = parser.parse_args() try: + parallel.pool_processes = args.jobs if args.command == 'options': options.run_cli(args) elif args.command == 'manual': diff --git a/pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/options.py b/pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/options.py index 3cba36140bb4..8282d7493249 100644 --- a/pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/options.py +++ b/pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/options.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import argparse import json @@ -10,6 +12,7 @@ from xml.sax.saxutils import escape, quoteattr import markdown_it +from . import parallel from .docbook import DocBookRenderer, make_xml_id from .manpage import ManpageRenderer, man_escape from .md import Converter, md_escape @@ -148,15 +151,33 @@ class BaseConverter(Converter): return [ l for part in blocks for l in part ] + # this could return a TState parameter, but that does not allow dependent types and + # will cause headaches when using BaseConverter as a type bound anywhere. Any is the + # next best thing we can use, and since this is internal it will be mostly safe. + @abstractmethod + def _parallel_render_prepare(self) -> Any: raise NotImplementedError() + # this should return python 3.11's Self instead to ensure that a prepare+finish + # round-trip ends up with an object of the same type. for now we'll use BaseConverter + # since it's good enough so far. + @classmethod + @abstractmethod + def _parallel_render_init_worker(cls, a: Any) -> BaseConverter: raise NotImplementedError() + def _render_option(self, name: str, option: dict[str, Any]) -> RenderedOption: try: return RenderedOption(option['loc'], self._convert_one(option)) except Exception as e: raise Exception(f"Failed to render option {name}") from e + @classmethod + def _parallel_render_step(cls, s: BaseConverter, a: Any) -> RenderedOption: + return s._render_option(*a) + def add_options(self, options: dict[str, Any]) -> None: - for (name, option) in options.items(): - self._options[name] = self._render_option(name, option) + mapped = parallel.map(self._parallel_render_step, options.items(), 100, + self._parallel_render_init_worker, self._parallel_render_prepare()) + for (name, option) in zip(options.keys(), mapped): + self._options[name] = option @abstractmethod def finalize(self) -> str: raise NotImplementedError() @@ -194,6 +215,13 @@ class DocBookConverter(BaseConverter): self._varlist_id = varlist_id self._id_prefix = id_prefix + def _parallel_render_prepare(self) -> Any: + return (self._manpage_urls, self._revision, self._markdown_by_default, self._document_type, + self._varlist_id, self._id_prefix) + @classmethod + def _parallel_render_init_worker(cls, a: Any) -> DocBookConverter: + return cls(*a) + def _render_code(self, option: dict[str, Any], key: str) -> list[str]: if lit := option_is(option, key, 'literalDocBook'): return [ f"{key.capitalize()}: {lit['text']}" ] @@ -283,10 +311,19 @@ class ManpageConverter(BaseConverter): _options_by_id: dict[str, str] _links_in_last_description: Optional[list[str]] = None - def __init__(self, revision: str, markdown_by_default: bool): - self._options_by_id = {} + def __init__(self, revision: str, markdown_by_default: bool, + *, + # only for parallel rendering + _options_by_id: Optional[dict[str, str]] = None): + self._options_by_id = _options_by_id or {} super().__init__({}, revision, markdown_by_default) + def _parallel_render_prepare(self) -> Any: + return ((self._revision, self._markdown_by_default), { '_options_by_id': self._options_by_id }) + @classmethod + def _parallel_render_init_worker(cls, a: Any) -> ManpageConverter: + return cls(*a[0], **a[1]) + def _render_option(self, name: str, option: dict[str, Any]) -> RenderedOption: assert isinstance(self._md.renderer, OptionsManpageRenderer) links = self._md.renderer.link_footnotes = [] diff --git a/pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/parallel.py b/pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/parallel.py new file mode 100644 index 000000000000..c968d3a1322c --- /dev/null +++ b/pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/parallel.py @@ -0,0 +1,58 @@ +# this module only has to exist because cpython has a global interpreter lock +# and markdown-it is pure python code. ideally we'd just use thread pools, but +# the GIL prohibits this. + +import multiprocessing + +from typing import Any, Callable, ClassVar, Iterable, Optional, TypeVar + +R = TypeVar('R') +S = TypeVar('S') +T = TypeVar('T') +A = TypeVar('A') + +pool_processes: Optional[int] = None + +# this thing is impossible to type because there's so much global state involved. +# wrapping in a class to get access to Generic[] parameters is not sufficient +# because mypy is too weak, and unnecessarily obscures how much global state is +# needed in each worker to make this whole brouhaha work. +_map_worker_fn: Any = None +_map_worker_state_fn: Any = None +_map_worker_state_arg: Any = None + +def _map_worker_init(*args: Any) -> None: + global _map_worker_fn, _map_worker_state_fn, _map_worker_state_arg + (_map_worker_fn, _map_worker_state_fn, _map_worker_state_arg) = args + +# NOTE: the state argument is never passed by any caller, we only use it as a localized +# cache for the created state in lieu of another global. it is effectively a global though. +def _map_worker_step(arg: Any, state: Any = []) -> Any: + global _map_worker_fn, _map_worker_state_fn, _map_worker_state_arg + # if a Pool initializer throws it'll just be retried, leading to endless loops. + # doing the proper initialization only on first use avoids this. + if not state: + state.append(_map_worker_state_fn(_map_worker_state_arg)) + return _map_worker_fn(state[0], arg) + +def map(fn: Callable[[S, T], R], d: Iterable[T], chunk_size: int, + state_fn: Callable[[A], S], state_arg: A) -> list[R]: + """ + `[ fn(state, i) for i in d ]` where `state = state_fn(state_arg)`, but using multiprocessing + if `pool_processes` is not `None`. when using multiprocessing is used the state function will + be run once in ever worker process and `multiprocessing.Pool.imap` will be used. + + **NOTE:** neither `state_fn` nor `fn` are allowed to mutate global state! doing so will cause + discrepancies if `pool_processes` is not None, since each worker will have its own copy. + + **NOTE**: all data types that potentially cross a process boundary (so, all of them) must be + pickle-able. this excludes lambdas, bound functions, local functions, and a number of other + types depending on their exact internal structure. *theoretically* the pool constructor + can transfer non-pickleable data to worker processes, but this only works when using the + `fork` spawn method (and is thus not available on darwin or windows). + """ + if pool_processes is None: + state = state_fn(state_arg) + return [ fn(state, i) for i in d ] + with multiprocessing.Pool(pool_processes, _map_worker_init, (fn, state_fn, state_arg)) as p: + return list(p.imap(_map_worker_step, d, chunk_size))