nixpkgs/pkgs/development/python-modules/ocrmypdf/default.nix

113 lines
2.2 KiB
Nix

{
lib,
buildPythonPackage,
deprecation,
fetchFromGitHub,
fpdf2,
ghostscript_headless,
hatch-vcs,
hatchling,
hypothesis,
img2pdf,
jbig2enc,
packaging,
pdfminer-six,
pillow-heif,
pikepdf,
pillow,
pluggy,
pngquant,
pydantic,
pypdfium2,
pytest-xdist,
pytestCheckHook,
rich,
reportlab,
replaceVars,
tesseract,
uharfbuzz,
unpaper,
installShellFiles,
}:
buildPythonPackage rec {
pname = "ocrmypdf";
version = "17.3.0";
pyproject = true;
src = fetchFromGitHub {
owner = "ocrmypdf";
repo = "OCRmyPDF";
tag = "v${version}";
# The content of .git_archival.txt is substituted upon tarball creation,
# which creates indeterminism if master no longer points to the tag.
# See https://github.com/ocrmypdf/OCRmyPDF/issues/841
postFetch = ''
rm "$out/.git_archival.txt"
'';
hash = "sha256-/R/W8TMBaFBTjPiOIroZ1CNQAKMTLJH+cQvY2177e0U=";
};
patches = [
./use-pillow-heif.patch
(replaceVars ./paths.patch {
gs = lib.getExe ghostscript_headless;
jbig2 = lib.getExe jbig2enc;
pngquant = lib.getExe pngquant;
tesseract = lib.getExe tesseract;
unpaper = lib.getExe unpaper;
})
];
build-system = [
hatch-vcs
hatchling
];
nativeBuildInputs = [ installShellFiles ];
dependencies = [
deprecation
fpdf2
img2pdf
packaging
pdfminer-six
pillow-heif
pikepdf
pillow
pluggy
pydantic
pypdfium2
rich
uharfbuzz
];
nativeCheckInputs = [
hypothesis
pytest-xdist
pytestCheckHook
reportlab
];
pythonImportsCheck = [ "ocrmypdf" ];
postInstall = ''
installShellCompletion --cmd ocrmypdf \
--bash misc/completion/ocrmypdf.bash \
--fish misc/completion/ocrmypdf.fish
'';
meta = {
homepage = "https://github.com/ocrmypdf/OCRmyPDF";
description = "Adds an OCR text layer to scanned PDF files, allowing them to be searched";
license = with lib.licenses; [
mpl20
mit
];
maintainers = with lib.maintainers; [
dotlambda
];
changelog = "https://github.com/ocrmypdf/OCRmyPDF/blob/${src.tag}/docs/release_notes.md";
mainProgram = "ocrmypdf";
};
}