Merge pull request #29141 from danielfullmer/k2pdfopt
k2pdfopt: 2.32 -> 2.42
This commit is contained in:
commit
0504dd6fb0
@ -8,7 +8,18 @@ stdenv.mkDerivation rec {
|
||||
sha256 = "1dgmcpapy7h68d53q2c5d0bpgzgfb2nw2blndnx9qhc7z12149mw";
|
||||
};
|
||||
|
||||
buildFlags = [ "all" "libs" ];
|
||||
installFlags = [ "libdir=/lib/" ]; # Specify libdir so Makefile will also install library.
|
||||
|
||||
preInstall = "mkdir -p $out/lib";
|
||||
|
||||
postInstall = ''
|
||||
for i in pgm2asc.h gocr.h; do
|
||||
install -D -m644 src/$i $out/include/gocr/$i
|
||||
done
|
||||
'';
|
||||
|
||||
preFixup = ''
|
||||
sed -i -e 's|exec wish|exec ${tk}/bin/wish|' $out/bin/gocr.tcl
|
||||
'';
|
||||
|
||||
|
@ -1,105 +1,92 @@
|
||||
# Build procedure lifted from https://aur.archlinux.org/packages/k2/k2pdfopt/PKGBUILD
|
||||
{ stdenv, fetchzip, fetchurl, writeScript, libX11, libXext, autoconf, automake, libtool
|
||||
, leptonica, libpng, libtiff, zlib, openjpeg, freetype, jbig2dec, djvulibre
|
||||
, openssl }:
|
||||
{ stdenv, fetchzip, fetchurl, fetchpatch, cmake, pkgconfig
|
||||
, zlib, libpng
|
||||
, enableGSL ? true, gsl
|
||||
, enableGhostScript ? true, ghostscript
|
||||
, enableMuPDF ? true, jbig2dec, openjpeg, freetype, harfbuzz, mupdf
|
||||
, enableJPEG2K ? true, jasper
|
||||
, enableDJVU ? true, djvulibre
|
||||
, enableGOCR ? false, gocr # Disabled by default due to crashes
|
||||
, enableTesseract ? true, leptonica, tesseract
|
||||
}:
|
||||
|
||||
let
|
||||
mupdf_src = fetchurl {
|
||||
url = http://www.mupdf.com/downloads/archive/mupdf-1.6-source.tar.gz;
|
||||
sha256 = "0qx51rj6alzcagcixm59rvdpm54w6syrwr4184v439jh14ryw4wq";
|
||||
};
|
||||
with stdenv.lib;
|
||||
|
||||
tess_src = fetchurl {
|
||||
url = http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.02.tar.gz;
|
||||
sha256 = "0g81m9y4iydp7kgr56mlkvjdwpp3mb01q385yhdnyvra7z5kkk96";
|
||||
};
|
||||
|
||||
gocr_src = fetchurl {
|
||||
url = http://www-e.uni-magdeburg.de/jschulen/ocr/gocr-0.49.tar.gz;
|
||||
sha256 = "06hpzp7rkkwfr1fvmc8kcfz9v490i9yir7f7imh13gmka0fr6afc";
|
||||
};
|
||||
|
||||
in stdenv.mkDerivation rec {
|
||||
stdenv.mkDerivation rec {
|
||||
name = "k2pdfopt-${version}";
|
||||
version = "2.32";
|
||||
version = "2.42";
|
||||
|
||||
src = fetchzip {
|
||||
url = "http://www.willus.com/k2pdfopt/src/k2pdfopt_v${version}_src.zip";
|
||||
sha256 = "1v3cj5bwpjvy7s66sfqcmkxs91f7nxaykjpdjm2wn87vn6q7n19m";
|
||||
sha256 = "1zag4jmkr0qrcpqqb5davmvdrabhdyz87q4zz0xpfkl6xw2dn9bk";
|
||||
};
|
||||
|
||||
buildInputs = [ libX11 libXext autoconf automake libtool leptonica libpng libtiff zlib
|
||||
openjpeg freetype jbig2dec djvulibre openssl ];
|
||||
NIX_LDFLAGS = "-lX11 -lXext";
|
||||
patches = [ ./k2pdfopt.patch ];
|
||||
|
||||
hardeningDisable = [ "format" ];
|
||||
nativeBuildInputs = [ cmake pkgconfig ];
|
||||
|
||||
k2_pa = ./k2pdfopt.patch;
|
||||
tess_pa = ./tesseract.patch;
|
||||
buildInputs =
|
||||
let
|
||||
mupdf_modded = mupdf.overrideAttrs (attrs: {
|
||||
name = "mupdf-1.10a";
|
||||
src = fetchurl {
|
||||
url = "http://mupdf.com/downloads/archive/mupdf-1.10a-source.tar.gz";
|
||||
sha256 = "0dm8wcs8i29aibzkqkrn8kcnk4q0kd1v66pg48h5c3qqp4v1zk5a";
|
||||
};
|
||||
# Excluded the pdf-*.c files, since they mostly just broke the #includes
|
||||
prePatch = ''
|
||||
cp ${src}/mupdf_mod/{font,stext-device,string}.c source/fitz/
|
||||
cp ${src}/mupdf_mod/font-win32.c source/pdf/
|
||||
'';
|
||||
# Patches from previous 1.10a version in nixpkgs
|
||||
patches = [
|
||||
# Compatibility with new openjpeg
|
||||
(fetchpatch {
|
||||
name = "mupdf-1.9a-openjpeg-2.1.1.patch";
|
||||
url = "https://git.archlinux.org/svntogit/community.git/plain/mupdf/trunk/0001-mupdf-openjpeg.patch?id=5a28ad0a8999a9234aa7848096041992cc988099";
|
||||
sha256 = "1i24qr4xagyapx4bijjfksj4g3bxz8vs5c2mn61nkm29c63knp75";
|
||||
})
|
||||
|
||||
builder = writeScript "builder.sh" ''
|
||||
. ${stdenv}/setup
|
||||
set -e
|
||||
(fetchurl {
|
||||
name = "CVE-2017-5896.patch";
|
||||
url = "http://git.ghostscript.com/?p=mupdf.git;a=patch;h=2c4e5867ee699b1081527bc6c6ea0e99a35a5c27";
|
||||
sha256 = "14k7x47ifx82sds1c06ibzbmcparfg80719jhgwjk6w1vkh4r693";
|
||||
})
|
||||
];
|
||||
});
|
||||
leptonica_modded = leptonica.overrideAttrs (attrs: {
|
||||
prePatch = ''
|
||||
cp ${src}/leptonica_mod/* src/
|
||||
'';
|
||||
});
|
||||
tesseract_modded = tesseract.overrideAttrs (attrs: {
|
||||
prePatch = ''
|
||||
cp ${src}/tesseract_mod/{ambigs.cpp,ccutil.h,ccutil.cpp} ccutil/
|
||||
cp ${src}/tesseract_mod/dawg.cpp api/
|
||||
cp ${src}/tesseract_mod/{imagedata.cpp,tessdatamanager.cpp} ccstruct/
|
||||
cp ${src}/tesseract_mod/openclwrapper.h opencl/
|
||||
cp ${src}/tesseract_mod/{tessedit.cpp,thresholder.cpp} ccmain/
|
||||
cp ${src}/tesseract_mod/tess_lang_mod_edge.h cube/
|
||||
cp ${src}/tesseract_mod/tesscapi.cpp api/
|
||||
cp ${src}/include_mod/{tesseract.h,leptonica.h} api/
|
||||
'';
|
||||
patches = [ ./tesseract.patch ];
|
||||
});
|
||||
in
|
||||
[ zlib libpng ] ++
|
||||
optional enableGSL gsl ++
|
||||
optional enableGhostScript ghostscript ++
|
||||
optionals enableMuPDF [ jbig2dec openjpeg freetype harfbuzz mupdf_modded ] ++
|
||||
optionals enableJPEG2K [ jasper ] ++
|
||||
optional enableDJVU djvulibre ++
|
||||
optional enableGOCR gocr ++
|
||||
optionals enableTesseract [ leptonica_modded tesseract_modded ];
|
||||
|
||||
plibs=`pwd`/patched_libraries
|
||||
dontUseCmakeBuildDir = true;
|
||||
|
||||
tar zxf ${mupdf_src}
|
||||
cp $src/mupdf_mod/font.c $src/mupdf_mod/string.c mupdf-1.6-source/source/fitz/
|
||||
cp $src/mupdf_mod/pdf-* mupdf-1.6-source/source/pdf
|
||||
cmakeFlags = [ "-DCMAKE_C_FLAGS=-I${src}/include_mod" ];
|
||||
|
||||
tar zxf ${tess_src}
|
||||
cp $src/tesseract_mod/dawg.cpp tesseract-ocr/dict
|
||||
cp $src/tesseract_mod/tessdatamanager.cpp tesseract-ocr/ccutil
|
||||
cp $src/tesseract_mod/tessedit.cpp tesseract-ocr/ccmain
|
||||
cp $src/tesseract_mod/tesscapi.cpp tesseract-ocr/api
|
||||
cp $src/include_mod/tesseract.h $src/include_mod/leptonica.h tesseract-ocr/api
|
||||
|
||||
cp -a $src k2pdfopt_v2.21
|
||||
chmod -R +w k2pdfopt_v2.21
|
||||
|
||||
patch -p0 -i $tess_pa
|
||||
patch -p0 -i $k2_pa
|
||||
|
||||
cd tesseract-ocr
|
||||
./autogen.sh
|
||||
substituteInPlace "configure" \
|
||||
--replace 'LIBLEPT_HEADERSDIR="/usr/local/include /usr/include"' \
|
||||
'LIBLEPT_HEADERSDIR=${leptonica}/include'
|
||||
./configure --prefix=$plibs --disable-shared
|
||||
make install
|
||||
|
||||
cd ..
|
||||
tar zxf ${gocr_src}
|
||||
cd gocr-0.49
|
||||
./configure
|
||||
cp src/{gocr.h,pnm.h,unicode.h,list.h} $plibs/include
|
||||
cp include/config.h $plibs/include
|
||||
make libs
|
||||
cp src/libPgm2asc.a $plibs/lib
|
||||
|
||||
cd ../mupdf-1.6-source
|
||||
make prefix=$plibs install
|
||||
install -Dm644 build/debug/libmujs.a $plibs/lib
|
||||
|
||||
cd ../k2pdfopt_v2.21/k2pdfoptlib
|
||||
gcc -Ofast -Wall -c *.c -I ../include_mod/ -I $plibs/include \
|
||||
-I . -I ../willuslib
|
||||
ar rcs libk2pdfopt.a *.o
|
||||
|
||||
cd ../willuslib
|
||||
gcc -Ofast -Wall -c *.c -I ../include_mod/ -I $plibs/include
|
||||
ar rcs libwillus.a *.o
|
||||
|
||||
cd ..
|
||||
gcc -Wall -Ofast -o k2pdfopt.o -c k2pdfopt.c -I k2pdfoptlib/ -I willuslib/ \
|
||||
-I include_mod/ -I $plibs/include
|
||||
g++ -Ofast k2pdfopt.o -o k2pdfopt -I willuslib/ -I k2pdfoptlib/ -I include_mod/ \
|
||||
-I $plibs/include -L $plibs/lib/ \
|
||||
-L willuslib/ -L k2pdfoptlib/ -lk2pdfopt -lwillus -ldjvulibre -lz -lmupdf \
|
||||
-ljbig2dec -ljpeg -lopenjp2 -lpng -lfreetype -lpthread -lmujs \
|
||||
-lPgm2asc -llept -ltesseract -lcrypto
|
||||
|
||||
mkdir -p $out/bin
|
||||
cp k2pdfopt $out/bin
|
||||
installPhase = ''
|
||||
install -D -m 755 k2pdfopt $out/bin/k2pdfopt
|
||||
'';
|
||||
|
||||
meta = with stdenv.lib; {
|
||||
@ -107,7 +94,7 @@ in stdenv.mkDerivation rec {
|
||||
homepage = http://www.willus.com/k2pdfopt;
|
||||
license = licenses.gpl3;
|
||||
platforms = platforms.linux;
|
||||
maintainers = [ maintainers.bosu ];
|
||||
maintainers = with maintainers; [ bosu danielfullmer ];
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -1,95 +1,99 @@
|
||||
diff -aur k2pdfopt_v2.21/willuslib/array.c k2pdfopt_v2.21.new/willuslib/array.c
|
||||
--- k2pdfopt_v2.21/willuslib/array.c 2014-05-23 16:29:58.000000000 -0300
|
||||
+++ k2pdfopt_v2.21.new/willuslib/array.c 2014-07-26 11:35:49.829825567 -0300
|
||||
@@ -1055,7 +1055,7 @@
|
||||
void arrayf_sort(float *a,int n)
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index 4a2378b..502c477 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -52,6 +52,7 @@ endif(JPEG_FOUND)
|
||||
include(FindJasper)
|
||||
if(JASPER_FOUND)
|
||||
set(HAVE_JASPER_LIB 1)
|
||||
+ set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${JASPER_LIBRARY})
|
||||
endif(JASPER_FOUND)
|
||||
|
||||
{
|
||||
- sort(a,(long)n);
|
||||
+ willus_sort(a,(long)n);
|
||||
}
|
||||
# paths from willuslib/wgs.c
|
||||
@@ -66,8 +67,12 @@ else()
|
||||
message(STATUS "Could NOT find ghostscript executable")
|
||||
endif(GHOSTSCRIPT_EXECUTABLE)
|
||||
|
||||
-# willus.h
|
||||
-# HAVE_GSL_LIB
|
||||
+pkg_check_modules(GSL gsl)
|
||||
+if(MUPDF_FOUND)
|
||||
+ set(HAVE_GSL_LIB 1)
|
||||
+ include_directories(SYSTEM ${GSL_INCLUDEDIR})
|
||||
+ set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${GSL_LDFLAGS})
|
||||
+endif(MUPDF_FOUND)
|
||||
|
||||
|
||||
diff -aur k2pdfopt_v2.21/willuslib/math.c k2pdfopt_v2.21.new/willuslib/math.c
|
||||
--- k2pdfopt_v2.21/willuslib/math.c 2013-08-15 21:33:50.000000000 -0300
|
||||
+++ k2pdfopt_v2.21.new/willuslib/math.c 2014-07-26 11:36:02.853170659 -0300
|
||||
@@ -532,7 +532,7 @@
|
||||
# libfreetype6 (>= 2.3.9), libjbig2dec0, libjpeg8 (>= 8c), libx11-6, libxext6, zlib1g (>= 1:1.2.0)
|
||||
@@ -80,7 +85,7 @@ if(MUPDF_FOUND)
|
||||
include_directories(SYSTEM ${MUPDF_INCLUDEDIR})
|
||||
message(STATUS "mupdf libraries: ${MUPDF_LDFLAGS}")
|
||||
set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${MUPDF_LDFLAGS}
|
||||
- -lmupdf-js-none -lopenjpeg -ljbig2dec -ljpeg -lfreetype
|
||||
+ -lopenjp2 -ljbig2dec -ljpeg -lfreetype -lharfbuzz
|
||||
)
|
||||
endif(MUPDF_FOUND)
|
||||
|
||||
@@ -91,9 +96,25 @@ if(DJVU_FOUND)
|
||||
set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${DJVU_LDFLAGS})
|
||||
endif(DJVU_FOUND)
|
||||
|
||||
-# HAVE_GOCR_LIB
|
||||
-# HAVE_LEPTONICA_LIB
|
||||
-# HAVE_TESSERACT_LIB
|
||||
+find_library(GOCR_LIB NAMES Pgm2asc)
|
||||
+if(GOCR_LIB)
|
||||
+ set(HAVE_GOCR_LIB 1)
|
||||
+ set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${GOCR_LIB})
|
||||
+endif(GOCR_LIB)
|
||||
+
|
||||
+pkg_check_modules(LEPTONICA lept)
|
||||
+if(LEPTONICA_FOUND)
|
||||
+ set(HAVE_LEPTONICA_LIB 1)
|
||||
+ include_directories(SYSTEM ${LEPTONICA_INCLUDEDIR})
|
||||
+ set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${LEPTONICA_LDFLAGS})
|
||||
+endif(LEPTONICA_FOUND)
|
||||
+
|
||||
+pkg_check_modules(TESSERACT tesseract)
|
||||
+if(TESSERACT_FOUND)
|
||||
+ set(HAVE_TESSERACT_LIB 1)
|
||||
+ include_directories(SYSTEM ${TESSERACT_INCLUDEDIR})
|
||||
+ set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${TESSERACT_LDFLAGS})
|
||||
+endif(TESSERACT_FOUND)
|
||||
|
||||
-void sort(float *x,int n)
|
||||
+void willus_sort(float *x,int n)
|
||||
# ---- Describe project
|
||||
|
||||
{
|
||||
int top,n1;
|
||||
diff -aur k2pdfopt_v2.21/willuslib/ocrjocr.c k2pdfopt_v2.21.new/willuslib/ocrjocr.c
|
||||
--- k2pdfopt_v2.21/willuslib/ocrjocr.c 2012-11-12 13:09:42.000000000 -0300
|
||||
+++ k2pdfopt_v2.21.new/willuslib/ocrjocr.c 2014-07-26 11:36:46.699837185 -0300
|
||||
diff --git a/willuslib/CMakeLists.txt b/willuslib/CMakeLists.txt
|
||||
index 463bbc9..8043db5 100644
|
||||
--- a/willuslib/CMakeLists.txt
|
||||
+++ b/willuslib/CMakeLists.txt
|
||||
@@ -6,7 +6,7 @@ include_directories(..)
|
||||
set(WILLUSLIB_SRC
|
||||
ansi.c array.c bmp.c bmpdjvu.c bmpmupdf.c dtcompress.c filelist.c
|
||||
fontdata.c fontrender.c gslpolyfit.c linux.c math.c mem.c ocr.c
|
||||
- ocrjocr.c ocrtess.c pdfwrite.c point2d.c render.c strbuf.c string.c
|
||||
+ ocrgocr.c ocrtess.c pdfwrite.c point2d.c render.c strbuf.c string.c
|
||||
token.c wfile.c wgs.c wgui.c willusversion.c win.c winbmp.c
|
||||
wincomdlg.c winmbox.c winshell.c wmupdf.c wmupdfinfo.c wpdf.c wsys.c
|
||||
wzfile.c wleptonica.c
|
||||
diff --git a/willuslib/ocrgocr.c b/willuslib/ocrgocr.c
|
||||
index 6027e9a..fbe10f0 100644
|
||||
--- a/willuslib/ocrgocr.c
|
||||
+++ b/willuslib/ocrgocr.c
|
||||
@@ -29,6 +29,8 @@
|
||||
#ifdef HAVE_GOCR_LIB
|
||||
#include <gocr.h>
|
||||
|
||||
+job_t *JOB;
|
||||
+job_t *OCR_JOB;
|
||||
+
|
||||
/*
|
||||
** bmp8 must be grayscale
|
||||
** (x1,y1) and (x2,y2) from top left of bitmap
|
||||
@@ -66,6 +68,7 @@
|
||||
@@ -63,6 +65,7 @@ void gocr_single_word_from_bmp8(char *text,int maxlen,WILLUSBITMAP *bmp8,
|
||||
h=y2-y1+1;
|
||||
dh=h+bw*2;
|
||||
job=&_job;
|
||||
+ JOB=job;
|
||||
+ OCR_JOB=job;
|
||||
job_init(job);
|
||||
job_init_image(job);
|
||||
// willus_mem_alloc_warn((void **)&job->src.p.p,w*h,funcname,10);
|
||||
diff -aur k2pdfopt_v2.21/willuslib/string.c k2pdfopt_v2.21.new/willuslib/string.c
|
||||
--- k2pdfopt_v2.21/willuslib/string.c 2014-02-03 00:37:44.000000000 -0300
|
||||
+++ k2pdfopt_v2.21.new/willuslib/string.c 2014-07-26 11:37:01.766506277 -0300
|
||||
@@ -81,7 +81,7 @@
|
||||
** Returns NULL if EOF, otherwise returns pointer to the string.
|
||||
**
|
||||
*/
|
||||
-char *get_line(char *buf,int max,FILE *f)
|
||||
+char *willus_get_line(char *buf,int max,FILE *f)
|
||||
|
||||
{
|
||||
int i;
|
||||
diff -aur k2pdfopt_v2.21/willuslib/willus.h k2pdfopt_v2.21.new/willuslib/willus.h
|
||||
--- k2pdfopt_v2.21/willuslib/willus.h 2014-07-25 15:03:51.000000000 -0300
|
||||
+++ k2pdfopt_v2.21.new/willuslib/willus.h 2014-07-26 11:37:56.316506038 -0300
|
||||
@@ -214,9 +214,6 @@
|
||||
** CMAKE handles the defines, not this source
|
||||
** (Mod from Dirk Thierbach, 31-Dec-2013)
|
||||
*/
|
||||
-#ifdef USE_CMAKE
|
||||
-#include "config.h"
|
||||
-#else /* USE_CMAKE */
|
||||
|
||||
#ifndef HAVE_Z_LIB
|
||||
#define HAVE_Z_LIB
|
||||
@@ -268,7 +265,6 @@
|
||||
#undef HAVE_GSL_LIB
|
||||
#endif
|
||||
|
||||
-#endif /* USE_CMAKE */
|
||||
/*
|
||||
** Consistency check
|
||||
*/
|
||||
@@ -533,7 +529,7 @@
|
||||
int *n,FILE *err);
|
||||
int readxyz_ex (char *filename,double **x,double **y,double **z,
|
||||
int *n,FILE *err,int ignore_after_semicolon);
|
||||
-void sort (float *x,int n);
|
||||
+void willus_sort (float *x,int n);
|
||||
void sortd (double *x,int n);
|
||||
void sorti (int *x,int n);
|
||||
void sortxy (float *x,float *y,int n);
|
||||
@@ -602,7 +598,7 @@
|
||||
/* string.c */
|
||||
void clean_line (char *buf);
|
||||
void clean_line_end(char *buf);
|
||||
-char *get_line (char *buf,int max,FILE *f);
|
||||
+char *willus_get_line (char *buf,int max,FILE *f);
|
||||
char *get_line_cf (char *buf,int max,FILE *f);
|
||||
int mem_get_line_cf(char *buf,int maxlen,char *cptr,long *cindex,long csize);
|
||||
int in_string (char *buffer,char *pattern);
|
||||
|
@ -1,12 +1,13 @@
|
||||
diff -aur tesseract-ocr/api/Makefile.am tesseract-ocr.new/api/Makefile.am
|
||||
--- tesseract-ocr/api/Makefile.am 2012-10-09 14:18:39.000000000 -0300
|
||||
+++ tesseract-ocr.new/api/Makefile.am 2014-03-20 18:43:13.926030341 -0300
|
||||
@@ -36,7 +36,7 @@
|
||||
diff --git a/api/Makefile.am b/api/Makefile.am
|
||||
index d8c1e54..46ead13 100644
|
||||
--- a/api/Makefile.am
|
||||
+++ b/api/Makefile.am
|
||||
@@ -42,7 +42,7 @@ libtesseract_api_la_CPPFLAGS = $(AM_CPPFLAGS)
|
||||
if VISIBILITY
|
||||
libtesseract_api_la_CPPFLAGS += -DTESS_EXPORTS
|
||||
endif
|
||||
-libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp
|
||||
+libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp tesscapi.cpp
|
||||
-libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp renderer.cpp pdfrenderer.cpp
|
||||
+libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp renderer.cpp pdfrenderer.cpp tesscapi.cpp
|
||||
|
||||
lib_LTLIBRARIES += libtesseract.la
|
||||
libtesseract_la_LDFLAGS =
|
||||
|
Loading…
Reference in New Issue
Block a user