mirror of
https://github.com/NixOS/nixpkgs.git
synced 2026-03-14 08:26:37 +01:00
112 lines
2.2 KiB
Nix
112 lines
2.2 KiB
Nix
{
|
|
lib,
|
|
stdenv,
|
|
buildPythonPackage,
|
|
fetchFromGitHub,
|
|
|
|
# build-system
|
|
hatchling,
|
|
|
|
# dependencies
|
|
beautifulsoup4,
|
|
defusedxml,
|
|
ffmpeg-headless,
|
|
lxml,
|
|
magika,
|
|
mammoth,
|
|
markdownify,
|
|
numpy,
|
|
olefile,
|
|
openai,
|
|
openpyxl,
|
|
pandas,
|
|
pathvalidate,
|
|
pdfminer-six,
|
|
puremagic,
|
|
pydub,
|
|
python-pptx,
|
|
requests,
|
|
speechrecognition,
|
|
xlrd,
|
|
youtube-transcript-api,
|
|
|
|
# tests
|
|
pytestCheckHook,
|
|
|
|
# passthru
|
|
gitUpdater,
|
|
}:
|
|
|
|
let
|
|
isNotAarch64Linux = !(stdenv.hostPlatform.isLinux && stdenv.hostPlatform.isAarch64);
|
|
in
|
|
buildPythonPackage (finalAttrs: {
|
|
pname = "markitdown";
|
|
version = "0.1.4";
|
|
pyproject = true;
|
|
|
|
src = fetchFromGitHub {
|
|
owner = "microsoft";
|
|
repo = "markitdown";
|
|
tag = "v${finalAttrs.version}";
|
|
hash = "sha256-WKA2eY8wY3SM9xZ7Cek5eUcJbO5q6eMDx2aTKfQnFvE=";
|
|
};
|
|
|
|
sourceRoot = "${finalAttrs.src.name}/packages/markitdown";
|
|
|
|
build-system = [ hatchling ];
|
|
|
|
pythonRelaxDeps = [
|
|
"magika"
|
|
];
|
|
dependencies = [
|
|
beautifulsoup4
|
|
defusedxml
|
|
ffmpeg-headless
|
|
lxml
|
|
magika
|
|
mammoth
|
|
markdownify
|
|
numpy
|
|
olefile
|
|
openai
|
|
openpyxl
|
|
pandas
|
|
pathvalidate
|
|
pdfminer-six
|
|
puremagic
|
|
pydub
|
|
python-pptx
|
|
requests
|
|
speechrecognition
|
|
xlrd
|
|
youtube-transcript-api
|
|
];
|
|
|
|
# aarch64-linux fails cpuinfo test, because /sys/devices/system/cpu/ does not exist in the sandbox:
|
|
# terminate called after throwing an instance of 'onnxruntime::OnnxRuntimeException'
|
|
#
|
|
# -> Skip all tests that require importing markitdown
|
|
pythonImportsCheck = lib.optionals isNotAarch64Linux [ "markitdown" ];
|
|
doCheck = isNotAarch64Linux;
|
|
|
|
nativeCheckInputs = [ pytestCheckHook ];
|
|
|
|
disabledTests = [
|
|
# Require network access
|
|
"test_markitdown_remote"
|
|
"test_module_vectors"
|
|
"test_cli_vectors"
|
|
"test_module_misc"
|
|
];
|
|
|
|
passthru.updateScripts = gitUpdater { };
|
|
|
|
meta = {
|
|
description = "Python tool for converting files and office documents to Markdown";
|
|
homepage = "https://github.com/microsoft/markitdown";
|
|
changelog = "https://github.com/microsoft/markitdown/releases/tag/${finalAttrs.src.tag}";
|
|
license = lib.licenses.mit;
|
|
maintainers = with lib.maintainers; [ malik ];
|
|
};
|
|
})
|