Merge pull request #273773 from nbraud/scripts/doc/check-manpage-urls

maintainers: Add new script to check URLs in `doc/manpage-urls.json`
This commit is contained in:
Silvan Mosberger 2023-12-20 19:33:32 +01:00 committed by GitHub
commit f27bb1216c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 133 additions and 2 deletions

View File

@ -29,4 +29,4 @@ jobs:
name: nixpkgs-ci
signingKey: '${{ secrets.CACHIX_SIGNING_KEY }}'
- name: Building Nixpkgs manual
run: NIX_PATH=nixpkgs=$(pwd) nix-build --option restrict-eval true pkgs/top-level/release.nix -A manual
run: NIX_PATH=nixpkgs=$(pwd) nix-build --option restrict-eval true pkgs/top-level/release.nix -A manual -A manual.tests

View File

@ -149,4 +149,26 @@ in pkgs.stdenv.mkDerivation {
echo "doc manual $dest ${common.indexPath}" >> $out/nix-support/hydra-build-products
echo "doc manual $dest nixpkgs-manual.epub" >> $out/nix-support/hydra-build-products
'';
passthru.tests.manpage-urls = with pkgs; testers.invalidateFetcherByDrvHash
({ name ? "manual_check-manpage-urls"
, script
, urlsFile
}: runCommand name {
nativeBuildInputs = [
cacert
(python3.withPackages (p: with p; [
aiohttp
rich
structlog
]))
];
outputHash = "sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="; # Empty output
} ''
python3 ${script} ${urlsFile}
touch $out
'') {
script = ./tests/manpage-urls.py;
urlsFile = ./manpage-urls.json;
};
}

View File

@ -1,5 +1,5 @@
{
"gnunet.conf(5)": "https://docs.gnunet.org/users/configuration.html",
"gnunet.conf(5)": "https://docs.gnunet.org/latest/users/configuration.html",
"mpd(1)": "https://mpd.readthedocs.io/en/latest/mpd.1.html",
"mpd.conf(5)": "https://mpd.readthedocs.io/en/latest/mpd.conf.5.html",
"nix.conf(5)": "https://nixos.org/manual/nix/stable/command-ref/conf-file.html",

109
doc/tests/manpage-urls.py Executable file
View File

@ -0,0 +1,109 @@
#! /usr/bin/env nix-shell
#! nix-shell -i "python3 -I" -p "python3.withPackages(p: with p; [ aiohttp rich structlog ])"
from argparse import ArgumentParser, Namespace
from collections import defaultdict
from collections.abc import Mapping, Sequence
from enum import IntEnum
from http import HTTPStatus
from pathlib import Path
from typing import Optional
import asyncio, json, logging
import aiohttp, structlog
from structlog.contextvars import bound_contextvars as log_context
LogLevel = IntEnum('LogLevel', {
lvl: getattr(logging, lvl)
for lvl in ('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL')
})
LogLevel.__str__ = lambda self: self.name
EXPECTED_STATUS=frozenset((
HTTPStatus.OK, HTTPStatus.FOUND,
HTTPStatus.NOT_FOUND,
))
async def check(session: aiohttp.ClientSession, manpage: str, url: str) -> HTTPStatus:
with log_context(manpage=manpage, url=url):
logger.debug("Checking")
async with session.head(url) as resp:
st = HTTPStatus(resp.status)
match st:
case HTTPStatus.OK | HTTPStatus.FOUND:
logger.debug("OK!")
case HTTPStatus.NOT_FOUND:
logger.error("Broken link!")
case _ if st < 400:
logger.info("Unexpected code", status=st)
case _ if 400 <= st < 600:
logger.warn("Unexpected error", status=st)
return st
async def main(urls_path: Path) -> Mapping[HTTPStatus, int]:
logger.info(f"Parsing {urls_path}")
with urls_path.open() as urls_file:
urls = json.load(urls_file)
count: defaultdict[HTTPStatus, int] = defaultdict(lambda: 0)
logger.info(f"Checking URLs from {urls_path}")
async with aiohttp.ClientSession() as session:
for status in asyncio.as_completed([
check(session, manpage, url)
for manpage, url in urls.items()
]):
count[await status]+=1
ok = count[HTTPStatus.OK] + count[HTTPStatus.FOUND]
broken = count[HTTPStatus.NOT_FOUND]
unknown = sum(c for st, c in count.items() if st not in EXPECTED_STATUS)
logger.info(f"Done: {broken} broken links, "
f"{ok} correct links, and {unknown} unexpected status")
return count
def parse_args(args: Optional[Sequence[str]] = None) -> Namespace:
parser = ArgumentParser(
prog = 'check-manpage-urls',
description = 'Check the validity of the manpage URLs linked in the nixpkgs manual',
)
parser.add_argument(
'-l', '--log-level',
default = os.getenv('LOG_LEVEL', 'INFO'),
type = lambda s: LogLevel[s],
choices = list(LogLevel),
)
parser.add_argument(
'file',
type = Path,
nargs = '?',
)
return parser.parse_args(args)
if __name__ == "__main__":
import os, sys
args = parse_args()
structlog.configure(
wrapper_class=structlog.make_filtering_bound_logger(args.log_level),
)
logger = structlog.getLogger("check-manpage-urls.py")
urls_path = args.file
if urls_path is None:
REPO_ROOT = Path(__file__).parent.parent.parent.parent
logger.info(f"Assuming we are in a nixpkgs repo rooted at {REPO_ROOT}")
urls_path = REPO_ROOT / 'doc' / 'manpage-urls.json'
count = asyncio.run(main(urls_path))
sys.exit(0 if count[HTTPStatus.NOT_FOUND] == 0 else 1)