nixpkgs/pkgs/development/python-modules/pyarrow/default.nix
Robert Schütz 1a04744f74 treewide: remove superfluous disabled
There is no need to disable Python packages for Python versions that are
no longer in Nixpkgs.
This change was generated using the following script:

    pattern='^\s*disabled\s*=\s*pythonOlder\s*"3\.\([0-9]\|10\)"\s*;\s*$'
    for f in $(find -name '*.nix'); do
        grep -q "$pattern" "$f" || continue
        sed -i "/$pattern/d" "$f"
        if [ $(grep -c pythonOlder "$f") == 1 ]; then
            sed -i '/^\s*pythonOlder,\s*$/d' "$f"
        fi
        nixfmt "$f"
    done
2026-01-11 09:34:20 -08:00

189 lines
5.4 KiB
Nix

{
lib,
stdenv,
buildPythonPackage,
python,
pythonAtLeast,
arrow-cpp,
cffi,
cloudpickle,
cmake,
cython,
fsspec,
hypothesis,
numpy,
pandas,
pytestCheckHook,
pytest-lazy-fixture,
pkg-config,
setuptools,
setuptools-scm,
oldest-supported-numpy,
}:
let
zero_or_one = cond: if cond then 1 else 0;
in
buildPythonPackage rec {
pname = "pyarrow";
inherit (arrow-cpp) version src;
pyproject = true;
sourceRoot = "${src.name}/python";
nativeBuildInputs = [
cmake
cython
pkg-config
setuptools
setuptools-scm
oldest-supported-numpy
];
buildInputs = [ arrow-cpp ];
propagatedBuildInputs = [
cffi
numpy
];
checkInputs = [
cloudpickle
fsspec
];
nativeCheckInputs = [
hypothesis
pandas
pytestCheckHook
pytest-lazy-fixture
];
PYARROW_BUILD_TYPE = "release";
PYARROW_WITH_DATASET = zero_or_one true;
PYARROW_WITH_FLIGHT = zero_or_one arrow-cpp.enableFlight;
PYARROW_WITH_HDFS = zero_or_one true;
PYARROW_WITH_PARQUET = zero_or_one true;
PYARROW_WITH_PARQUET_ENCRYPTION = zero_or_one true;
PYARROW_WITH_S3 = zero_or_one arrow-cpp.enableS3;
PYARROW_WITH_GCS = zero_or_one arrow-cpp.enableGcs;
PYARROW_BUNDLE_ARROW_CPP_HEADERS = zero_or_one false;
PYARROW_CMAKE_OPTIONS = [ "-DCMAKE_INSTALL_RPATH=${ARROW_HOME}/lib" ];
ARROW_HOME = arrow-cpp;
PARQUET_HOME = arrow-cpp;
ARROW_TEST_DATA = lib.optionalString doCheck arrow-cpp.ARROW_TEST_DATA;
doCheck = true;
dontUseCmakeConfigure = true;
__darwinAllowLocalNetworking = true;
preBuild = ''
export PYARROW_PARALLEL=$NIX_BUILD_CORES
'';
postInstall = ''
# copy the pyarrow C++ header files to the appropriate location
pyarrow_include="$out/${python.sitePackages}/pyarrow/include"
mkdir -p "$pyarrow_include/arrow/python"
find "$PWD/pyarrow/src/arrow" -type f -name '*.h' -exec cp {} "$pyarrow_include/arrow/python" \;
'';
disabledTestPaths = [
# These tests require access to s3 via the internet.
"pyarrow/tests/test_fs.py::test_resolve_s3_region"
"pyarrow/tests/test_fs.py::test_s3_finalize"
"pyarrow/tests/test_fs.py::test_s3_finalize_region_resolver"
"pyarrow/tests/test_fs.py::test_s3_real_aws"
"pyarrow/tests/test_fs.py::test_s3_real_aws_region_selection"
"pyarrow/tests/test_fs.py::test_s3_options"
# Flaky test
"pyarrow/tests/test_flight.py::test_roundtrip_errors"
"pyarrow/tests/test_pandas.py::test_threaded_pandas_import"
# Flaky test, works locally but not on Hydra.
"pyarrow/tests/test_csv.py::TestThreadedCSVTableRead::test_cancellation"
# expects arrow-cpp headers to be bundled.
"pyarrow/tests/test_cpp_internals.py::test_pyarrow_include"
# Searches for TZDATA in /usr.
"pyarrow/tests/test_orc.py::test_example_using_json"
# AssertionError: assert 'Europe/Monaco' == 'Europe/Paris'
"pyarrow/tests/test_types.py::test_dateutil_tzinfo_to_string"
# These fail with xxx_fixture not found.
# xxx = unary_func, unary_agg_func, varargs_agg_func
"pyarrow/tests/test_substrait.py::test_udf_via_substrait"
"pyarrow/tests/test_substrait.py::test_scalar_aggregate_udf_basic"
"pyarrow/tests/test_substrait.py::test_hash_aggregate_udf_basic"
"pyarrow/tests/test_udf.py::test_hash_agg_basic"
"pyarrow/tests/test_udf.py::test_hash_agg_empty"
"pyarrow/tests/test_udf.py::test_input_lifetime"
"pyarrow/tests/test_udf.py::test_scalar_agg_basic"
"pyarrow/tests/test_udf.py::test_scalar_agg_empty"
"pyarrow/tests/test_udf.py::test_scalar_agg_varargs"
"pyarrow/tests/test_udf.py::test_scalar_input"
"pyarrow/tests/test_udf.py::test_scalar_udf_context"
"pyarrow/tests/test_udf.py::test_udf_array_unary"
]
++ lib.optionals stdenv.hostPlatform.isDarwin [
# Requires loopback networking.
"pyarrow/tests/test_ipc.py::test_socket_"
"pyarrow/tests/test_flight.py::test_never_sends_data"
"pyarrow/tests/test_flight.py::test_large_descriptor"
"pyarrow/tests/test_flight.py::test_large_metadata_client"
"pyarrow/tests/test_flight.py::test_none_action_side_effect"
# Fails to compile.
"pyarrow/tests/test_cython.py::test_cython_api"
]
++ lib.optionals (pythonAtLeast "3.11") [
# Repr output is printing number instead of enum name so these tests fail
"pyarrow/tests/test_fs.py::test_get_file_info"
]
++ lib.optionals stdenv.hostPlatform.isLinux [
# This test requires local networking.
"pyarrow/tests/test_fs.py::test_filesystem_from_uri_gcs"
];
disabledTests = [ "GcsFileSystem" ];
preCheck = ''
export PARQUET_TEST_DATA="${arrow-cpp.PARQUET_TEST_DATA}"
shopt -s extglob
rm -r pyarrow/!(conftest.py|tests)
mv pyarrow/conftest.py pyarrow/tests/parent_conftest.py
substituteInPlace pyarrow/tests/conftest.py --replace-fail ..conftest .parent_conftest
''
+ lib.optionalString stdenv.hostPlatform.isDarwin ''
# OSError: [Errno 24] Too many open files
ulimit -n 1024
'';
pythonImportsCheck = [
"pyarrow"
]
++ map (module: "pyarrow.${module}") [
"compute"
"csv"
"dataset"
"feather"
"flight"
"fs"
"json"
"orc"
"parquet"
];
meta = {
description = "Cross-language development platform for in-memory data";
homepage = "https://arrow.apache.org/";
license = lib.licenses.asl20;
platforms = lib.platforms.unix;
maintainers = with lib.maintainers; [
veprbl
cpcloud
];
};
}