Merge pull request #303388 from SuperSandro2000/paperless-only-enabled-languages

nixos/paperless: override enabled tesseract languages with the in paperless configured ones
This commit is contained in:
Leona Maroni 2024-04-14 14:59:33 +02:00 committed by GitHub
commit 025d3a2f65
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 30 additions and 18 deletions

View File

@ -3,7 +3,6 @@
with lib; with lib;
let let
cfg = config.services.paperless; cfg = config.services.paperless;
pkg = cfg.package;
defaultUser = "paperless"; defaultUser = "paperless";
defaultFont = "${pkgs.liberation_ttf}/share/fonts/truetype/LiberationSerif-Regular.ttf"; defaultFont = "${pkgs.liberation_ttf}/share/fonts/truetype/LiberationSerif-Regular.ttf";
@ -25,7 +24,7 @@ let
} // optionalAttrs (cfg.settings.PAPERLESS_ENABLE_NLTK or true) { } // optionalAttrs (cfg.settings.PAPERLESS_ENABLE_NLTK or true) {
PAPERLESS_NLTK_DIR = pkgs.symlinkJoin { PAPERLESS_NLTK_DIR = pkgs.symlinkJoin {
name = "paperless_ngx_nltk_data"; name = "paperless_ngx_nltk_data";
paths = pkg.nltkData; paths = cfg.package.nltkData;
}; };
} // optionalAttrs (cfg.openMPThreadingWorkaround) { } // optionalAttrs (cfg.openMPThreadingWorkaround) {
OMP_NUM_THREADS = "1"; OMP_NUM_THREADS = "1";
@ -38,7 +37,7 @@ let
manage = pkgs.writeShellScript "manage" '' manage = pkgs.writeShellScript "manage" ''
set -o allexport # Export the following env vars set -o allexport # Export the following env vars
${lib.toShellVars env} ${lib.toShellVars env}
exec ${pkg}/bin/paperless-ngx "$@" exec ${cfg.package}/bin/paperless-ngx "$@"
''; '';
# Secure the services # Secure the services
@ -200,7 +199,17 @@ in
description = "User under which Paperless runs."; description = "User under which Paperless runs.";
}; };
package = mkPackageOption pkgs "paperless-ngx" { }; package = mkPackageOption pkgs "paperless-ngx" { } // {
apply = pkg: pkg.override {
tesseract5 = pkg.tesseract5.override {
# always enable detection modules
enableLanguages = if cfg.settings ? PAPERLESS_OCR_LANGUAGE then
[ "equ" "osd" ]
++ lib.splitString "+" cfg.settings.PAPERLESS_OCR_LANGUAGE
else null;
};
};
};
openMPThreadingWorkaround = mkEnableOption '' openMPThreadingWorkaround = mkEnableOption ''
a workaround for document classifier timeouts. a workaround for document classifier timeouts.
@ -237,7 +246,7 @@ in
wants = [ "paperless-consumer.service" "paperless-web.service" "paperless-task-queue.service" ]; wants = [ "paperless-consumer.service" "paperless-web.service" "paperless-task-queue.service" ];
serviceConfig = defaultServiceConfig // { serviceConfig = defaultServiceConfig // {
User = cfg.user; User = cfg.user;
ExecStart = "${pkg}/bin/celery --app paperless beat --loglevel INFO"; ExecStart = "${cfg.package}/bin/celery --app paperless beat --loglevel INFO";
Restart = "on-failure"; Restart = "on-failure";
LoadCredential = lib.optionalString (cfg.passwordFile != null) "PAPERLESS_ADMIN_PASSWORD:${cfg.passwordFile}"; LoadCredential = lib.optionalString (cfg.passwordFile != null) "PAPERLESS_ADMIN_PASSWORD:${cfg.passwordFile}";
}; };
@ -250,8 +259,8 @@ in
versionFile="${cfg.dataDir}/src-version" versionFile="${cfg.dataDir}/src-version"
version=$(cat "$versionFile" 2>/dev/null || echo 0) version=$(cat "$versionFile" 2>/dev/null || echo 0)
if [[ $version != ${pkg.version} ]]; then if [[ $version != ${cfg.package.version} ]]; then
${pkg}/bin/paperless-ngx migrate ${cfg.package}/bin/paperless-ngx migrate
# Parse old version string format for backwards compatibility # Parse old version string format for backwards compatibility
version=$(echo "$version" | grep -ohP '[^-]+$') version=$(echo "$version" | grep -ohP '[^-]+$')
@ -264,10 +273,10 @@ in
if versionLessThan 1.12.0; then if versionLessThan 1.12.0; then
# Reindex documents as mentioned in https://github.com/paperless-ngx/paperless-ngx/releases/tag/v1.12.1 # Reindex documents as mentioned in https://github.com/paperless-ngx/paperless-ngx/releases/tag/v1.12.1
echo "Reindexing documents, to allow searching old comments. Required after the 1.12.x upgrade." echo "Reindexing documents, to allow searching old comments. Required after the 1.12.x upgrade."
${pkg}/bin/paperless-ngx document_index reindex ${cfg.package}/bin/paperless-ngx document_index reindex
fi fi
echo ${pkg.version} > "$versionFile" echo ${cfg.package.version} > "$versionFile"
fi fi
'' ''
+ optionalString (cfg.passwordFile != null) '' + optionalString (cfg.passwordFile != null) ''
@ -277,7 +286,7 @@ in
superuserStateFile="${cfg.dataDir}/superuser-state" superuserStateFile="${cfg.dataDir}/superuser-state"
if [[ $(cat "$superuserStateFile" 2>/dev/null) != $superuserState ]]; then if [[ $(cat "$superuserStateFile" 2>/dev/null) != $superuserState ]]; then
${pkg}/bin/paperless-ngx manage_superuser ${cfg.package}/bin/paperless-ngx manage_superuser
echo "$superuserState" > "$superuserStateFile" echo "$superuserState" > "$superuserStateFile"
fi fi
''; '';
@ -290,7 +299,7 @@ in
after = [ "paperless-scheduler.service" ]; after = [ "paperless-scheduler.service" ];
serviceConfig = defaultServiceConfig // { serviceConfig = defaultServiceConfig // {
User = cfg.user; User = cfg.user;
ExecStart = "${pkg}/bin/celery --app paperless worker --loglevel INFO"; ExecStart = "${cfg.package}/bin/celery --app paperless worker --loglevel INFO";
Restart = "on-failure"; Restart = "on-failure";
# The `mbind` syscall is needed for running the classifier. # The `mbind` syscall is needed for running the classifier.
SystemCallFilter = defaultServiceConfig.SystemCallFilter ++ [ "mbind" ]; SystemCallFilter = defaultServiceConfig.SystemCallFilter ++ [ "mbind" ];
@ -308,7 +317,7 @@ in
after = [ "paperless-scheduler.service" ]; after = [ "paperless-scheduler.service" ];
serviceConfig = defaultServiceConfig // { serviceConfig = defaultServiceConfig // {
User = cfg.user; User = cfg.user;
ExecStart = "${pkg}/bin/paperless-ngx document_consumer"; ExecStart = "${cfg.package}/bin/paperless-ngx document_consumer";
Restart = "on-failure"; Restart = "on-failure";
}; };
environment = env; environment = env;
@ -340,8 +349,8 @@ in
echo "PAPERLESS_SECRET_KEY is empty, refusing to start." echo "PAPERLESS_SECRET_KEY is empty, refusing to start."
exit 1 exit 1
fi fi
exec ${pkg.python.pkgs.gunicorn}/bin/gunicorn \ exec ${cfg.package.python.pkgs.gunicorn}/bin/gunicorn \
-c ${pkg}/lib/paperless-ngx/gunicorn.conf.py paperless.asgi:application -c ${cfg.package}/lib/paperless-ngx/gunicorn.conf.py paperless.asgi:application
''; '';
serviceConfig = defaultServiceConfig // { serviceConfig = defaultServiceConfig // {
User = cfg.user; User = cfg.user;
@ -357,7 +366,7 @@ in
CapabilityBoundingSet = [ "CAP_NET_BIND_SERVICE" ]; CapabilityBoundingSet = [ "CAP_NET_BIND_SERVICE" ];
}; };
environment = env // { environment = env // {
PYTHONPATH = "${pkg.python.pkgs.makePythonPath pkg.propagatedBuildInputs}:${pkg}/lib/paperless-ngx/src"; PYTHONPATH = "${cfg.package.python.pkgs.makePythonPath cfg.package.propagatedBuildInputs}:${cfg.package}/lib/paperless-ngx/src";
}; };
# Allow the web interface to access the private /tmp directory of the server. # Allow the web interface to access the private /tmp directory of the server.
# This is required to support uploading files via the web interface. # This is required to support uploading files via the web interface.

View File

@ -37,8 +37,11 @@ let
# https://github.com/NixOS/nixpkgs/issues/298719 # https://github.com/NixOS/nixpkgs/issues/298719
# https://github.com/paperless-ngx/paperless-ngx/issues/5494 # https://github.com/paperless-ngx/paperless-ngx/issues/5494
python = python3.override { python = python3.override {
packageOverrides = self: super: { packageOverrides = final: prev: {
uvicorn = super.uvicorn.overridePythonAttrs (oldAttrs: { # tesseract5 may be overwritten in the paperless module and we need to propagate that to make the closure reduction effective
ocrmypdf = prev.ocrmypdf.override { tesseract = tesseract5; };
uvicorn = prev.uvicorn.overridePythonAttrs (_: {
version = "0.25.0"; version = "0.25.0";
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "encode"; owner = "encode";
@ -245,7 +248,7 @@ python.pkgs.buildPythonApplication rec {
doCheck = !stdenv.isDarwin; doCheck = !stdenv.isDarwin;
passthru = { passthru = {
inherit python path frontend; inherit python path frontend tesseract5;
nltkData = with nltk-data; [ punkt snowball_data stopwords ]; nltkData = with nltk-data; [ punkt snowball_data stopwords ];
tests = { inherit (nixosTests) paperless; }; tests = { inherit (nixosTests) paperless; };
}; };