diff options
author | Michał Górny <mgorny@gentoo.org> | 2024-05-08 11:50:36 +0200 |
---|---|---|
committer | Michał Górny <mgorny@gentoo.org> | 2024-05-08 11:50:36 +0200 |
commit | e518dee37abe47fd08f4c2bfc7e0d20753b7697f (patch) | |
tree | 91093ea0ba1e2cf6ee9aa986c44ec50803dfb362 /dev-python/pyarrow | |
parent | www-client/chromium: Stabilize 124.0.6367.155 arm64, #931558 (diff) | |
download | gentoo-e518dee37abe47fd08f4c2bfc7e0d20753b7697f.tar.gz gentoo-e518dee37abe47fd08f4c2bfc7e0d20753b7697f.tar.bz2 gentoo-e518dee37abe47fd08f4c2bfc7e0d20753b7697f.zip |
dev-python/pyarrow: Remove old
Signed-off-by: Michał Górny <mgorny@gentoo.org>
Diffstat (limited to 'dev-python/pyarrow')
-rw-r--r-- | dev-python/pyarrow/Manifest | 1 | ||||
-rw-r--r-- | dev-python/pyarrow/files/pyarrow-15.0.1-32bit.patch | 325 | ||||
-rw-r--r-- | dev-python/pyarrow/pyarrow-15.0.2.ebuild | 87 |
3 files changed, 0 insertions, 413 deletions
diff --git a/dev-python/pyarrow/Manifest b/dev-python/pyarrow/Manifest index 3b44275b17cd..6136d388636a 100644 --- a/dev-python/pyarrow/Manifest +++ b/dev-python/pyarrow/Manifest @@ -1,2 +1 @@ -DIST apache-arrow-15.0.2.tar.gz 21503812 BLAKE2B 5a42b3409515d7a09daff33d30e72e828e1df2e009ed746f101f4d8e6dcadb2e9c305a6cb9799d4003e1421ba666d2a2e9ba182c11b0c538fbd1aee4b3ba10ff SHA512 6c83e3be1e5840c30387f088315b74aca8e7c2d060793af70a156effb496a71e3e6af0693188c0f46f8a4a061a263a47095912ef04a5dc8141abd59075b14c78 DIST apache-arrow-16.0.0.tar.gz 21695067 BLAKE2B aa5dfef3d8d46a53242075c165473635051d51ff28587ea8b80751232d5f75ee3ef89e0a027aa39bdc9dc03fa46ddb68e46ae2c7f40605258e47ff194f1d3979 SHA512 773f4f3eef603032c8ba0cfdc023bfd2a24bb5e41c82da354a22d7854ab153294ede1f4782cc32b27451cf1b58303f105bac61ceeb3568faea747b93e21d79e4 diff --git a/dev-python/pyarrow/files/pyarrow-15.0.1-32bit.patch b/dev-python/pyarrow/files/pyarrow-15.0.1-32bit.patch deleted file mode 100644 index 0b54deaf2c33..000000000000 --- a/dev-python/pyarrow/files/pyarrow-15.0.1-32bit.patch +++ /dev/null @@ -1,325 +0,0 @@ -diff --git a/pyarrow/array.pxi b/pyarrow/array.pxi -index 1416f5f43..058e0eec0 100644 ---- a/pyarrow/array.pxi -+++ b/pyarrow/array.pxi -@@ -1573,7 +1573,7 @@ cdef class Array(_PandasConvertible): - # decoding the dictionary will make sure nulls are correctly handled. - # Decoding a dictionary does imply a copy by the way, - # so it can't be done if the user requested a zero_copy. -- c_options.decode_dictionaries = not zero_copy_only -+ c_options.decode_dictionaries = True - c_options.zero_copy_only = zero_copy_only - c_options.to_numpy = True - -@@ -1585,9 +1585,6 @@ cdef class Array(_PandasConvertible): - # always convert to numpy array without pandas dependency - array = PyObject_to_object(out) - -- if isinstance(array, dict): -- array = np.take(array['dictionary'], array['indices']) -- - if writable and not array.flags.writeable: - # if the conversion already needed to a copy, writeable is True - array = array.copy() -diff --git a/pyarrow/io.pxi b/pyarrow/io.pxi -index 1897e76ef..b57980b3d 100644 ---- a/pyarrow/io.pxi -+++ b/pyarrow/io.pxi -@@ -1987,7 +1987,7 @@ def foreign_buffer(address, size, base=None): - Object that owns the referenced memory. - """ - cdef: -- intptr_t c_addr = address -+ uintptr_t c_addr = address - int64_t c_size = size - shared_ptr[CBuffer] buf - -diff --git a/pyarrow/lib.pxd b/pyarrow/lib.pxd -index 58ec34add..91c7633a7 100644 ---- a/pyarrow/lib.pxd -+++ b/pyarrow/lib.pxd -@@ -285,6 +285,8 @@ cdef class Tensor(_Weakrefable): - - cdef readonly: - DataType type -+ bytes _ssize_t_shape -+ bytes _ssize_t_strides - - cdef void init(self, const shared_ptr[CTensor]& sp_tensor) - -diff --git a/pyarrow/src/arrow/python/arrow_to_pandas.cc b/pyarrow/src/arrow/python/arrow_to_pandas.cc -index e979342b8..8354812ea 100644 ---- a/pyarrow/src/arrow/python/arrow_to_pandas.cc -+++ b/pyarrow/src/arrow/python/arrow_to_pandas.cc -@@ -2499,6 +2499,8 @@ Status ConvertChunkedArrayToPandas(const PandasOptions& options, - std::shared_ptr<ChunkedArray> arr, PyObject* py_ref, - PyObject** out) { - if (options.decode_dictionaries && arr->type()->id() == Type::DICTIONARY) { -+ // XXX we should return an error as below if options.zero_copy_only -+ // is true, but that would break compatibility with existing tests. - const auto& dense_type = - checked_cast<const DictionaryType&>(*arr->type()).value_type(); - RETURN_NOT_OK(DecodeDictionaries(options.pool, dense_type, &arr)); -diff --git a/pyarrow/src/arrow/python/io.cc b/pyarrow/src/arrow/python/io.cc -index 43f8297c5..197f8b9d3 100644 ---- a/pyarrow/src/arrow/python/io.cc -+++ b/pyarrow/src/arrow/python/io.cc -@@ -92,9 +92,12 @@ class PythonFile { - Status Seek(int64_t position, int whence) { - RETURN_NOT_OK(CheckClosed()); - -+ // NOTE: `long long` is at least 64 bits in the C standard, the cast below is -+ // therefore safe. -+ - // whence: 0 for relative to start of file, 2 for end of file -- PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "seek", "(ni)", -- static_cast<Py_ssize_t>(position), whence); -+ PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "seek", "(Li)", -+ static_cast<long long>(position), whence); - Py_XDECREF(result); - PY_RETURN_IF_ERROR(StatusCode::IOError); - return Status::OK(); -@@ -103,16 +106,16 @@ class PythonFile { - Status Read(int64_t nbytes, PyObject** out) { - RETURN_NOT_OK(CheckClosed()); - -- PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "read", "(n)", -- static_cast<Py_ssize_t>(nbytes)); -+ PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "read", "(L)", -+ static_cast<long long>(nbytes)); - PY_RETURN_IF_ERROR(StatusCode::IOError); - *out = result; - return Status::OK(); - } - - Status ReadBuffer(int64_t nbytes, PyObject** out) { -- PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "read_buffer", "(n)", -- static_cast<Py_ssize_t>(nbytes)); -+ PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "read_buffer", "(L)", -+ static_cast<long long>(nbytes)); - PY_RETURN_IF_ERROR(StatusCode::IOError); - *out = result; - return Status::OK(); -diff --git a/pyarrow/tensor.pxi b/pyarrow/tensor.pxi -index 1afce7f4a..c674663dc 100644 ---- a/pyarrow/tensor.pxi -+++ b/pyarrow/tensor.pxi -@@ -15,6 +15,9 @@ - # specific language governing permissions and limitations - # under the License. - -+# Avoid name clash with `pa.struct` function -+import struct as _struct -+ - - cdef class Tensor(_Weakrefable): - """ -@@ -31,7 +34,6 @@ cdef class Tensor(_Weakrefable): - shape: (2, 3) - strides: (12, 4) - """ -- - def __init__(self): - raise TypeError("Do not call Tensor's constructor directly, use one " - "of the `pyarrow.Tensor.from_*` functions instead.") -@@ -40,6 +42,14 @@ cdef class Tensor(_Weakrefable): - self.sp_tensor = sp_tensor - self.tp = sp_tensor.get() - self.type = pyarrow_wrap_data_type(self.tp.type()) -+ self._ssize_t_shape = self._make_shape_or_strides_buffer(self.shape) -+ self._ssize_t_strides = self._make_shape_or_strides_buffer(self.strides) -+ -+ def _make_shape_or_strides_buffer(self, values): -+ """ -+ Make a bytes object holding an array of `values` cast to `Py_ssize_t`. -+ """ -+ return _struct.pack(f"{len(values)}n", *values) - - def __repr__(self): - return """<pyarrow.Tensor> -@@ -282,10 +292,8 @@ strides: {0.strides}""".format(self) - buffer.readonly = 0 - else: - buffer.readonly = 1 -- # NOTE: This assumes Py_ssize_t == int64_t, and that the shape -- # and strides arrays lifetime is tied to the tensor's -- buffer.shape = <Py_ssize_t *> &self.tp.shape()[0] -- buffer.strides = <Py_ssize_t *> &self.tp.strides()[0] -+ buffer.shape = <Py_ssize_t *> cp.PyBytes_AsString(self._ssize_t_shape) -+ buffer.strides = <Py_ssize_t *> cp.PyBytes_AsString(self._ssize_t_strides) - buffer.suboffsets = NULL - - -diff --git a/pyarrow/tests/test_gdb.py b/pyarrow/tests/test_gdb.py -index d0d241cc5..0d12d710d 100644 ---- a/pyarrow/tests/test_gdb.py -+++ b/pyarrow/tests/test_gdb.py -@@ -885,32 +885,61 @@ def test_arrays_heap(gdb_arrow): - ("arrow::DurationArray of type arrow::duration" - "(arrow::TimeUnit::NANO), length 2, offset 0, null count 1 = {" - "[0] = null, [1] = -1234567890123456789ns}")) -- check_heap_repr( -- gdb_arrow, "heap_timestamp_array_s", -- ("arrow::TimestampArray of type arrow::timestamp" -- "(arrow::TimeUnit::SECOND), length 4, offset 0, null count 1 = {" -- "[0] = null, [1] = 0s [1970-01-01 00:00:00], " -- "[2] = -2203932304s [1900-02-28 12:34:56], " -- "[3] = 63730281600s [3989-07-14 00:00:00]}")) -- check_heap_repr( -- gdb_arrow, "heap_timestamp_array_ms", -- ("arrow::TimestampArray of type arrow::timestamp" -- "(arrow::TimeUnit::MILLI), length 3, offset 0, null count 1 = {" -- "[0] = null, [1] = -2203932303877ms [1900-02-28 12:34:56.123], " -- "[2] = 63730281600789ms [3989-07-14 00:00:00.789]}")) -- check_heap_repr( -- gdb_arrow, "heap_timestamp_array_us", -- ("arrow::TimestampArray of type arrow::timestamp" -- "(arrow::TimeUnit::MICRO), length 3, offset 0, null count 1 = {" -- "[0] = null, " -- "[1] = -2203932303345679us [1900-02-28 12:34:56.654321], " -- "[2] = 63730281600456789us [3989-07-14 00:00:00.456789]}")) -- check_heap_repr( -- gdb_arrow, "heap_timestamp_array_ns", -- ("arrow::TimestampArray of type arrow::timestamp" -- "(arrow::TimeUnit::NANO), length 2, offset 0, null count 1 = {" -- "[0] = null, " -- "[1] = -2203932303012345679ns [1900-02-28 12:34:56.987654321]}")) -+ if sys.maxsize > 2**32: -+ check_heap_repr( -+ gdb_arrow, "heap_timestamp_array_s", -+ ("arrow::TimestampArray of type arrow::timestamp" -+ "(arrow::TimeUnit::SECOND), length 4, offset 0, null count 1 = {" -+ "[0] = null, [1] = 0s [1970-01-01 00:00:00], " -+ "[2] = -2203932304s [1900-02-28 12:34:56], " -+ "[3] = 63730281600s [3989-07-14 00:00:00]}")) -+ check_heap_repr( -+ gdb_arrow, "heap_timestamp_array_ms", -+ ("arrow::TimestampArray of type arrow::timestamp" -+ "(arrow::TimeUnit::MILLI), length 3, offset 0, null count 1 = {" -+ "[0] = null, [1] = -2203932303877ms [1900-02-28 12:34:56.123], " -+ "[2] = 63730281600789ms [3989-07-14 00:00:00.789]}")) -+ check_heap_repr( -+ gdb_arrow, "heap_timestamp_array_us", -+ ("arrow::TimestampArray of type arrow::timestamp" -+ "(arrow::TimeUnit::MICRO), length 3, offset 0, null count 1 = {" -+ "[0] = null, " -+ "[1] = -2203932303345679us [1900-02-28 12:34:56.654321], " -+ "[2] = 63730281600456789us [3989-07-14 00:00:00.456789]}")) -+ check_heap_repr( -+ gdb_arrow, "heap_timestamp_array_ns", -+ ("arrow::TimestampArray of type arrow::timestamp" -+ "(arrow::TimeUnit::NANO), length 2, offset 0, null count 1 = {" -+ "[0] = null, " -+ "[1] = -2203932303012345679ns [1900-02-28 12:34:56.987654321]}")) -+ else: -+ # Python's datetime is limited to smaller timestamps on 32-bit platforms -+ check_heap_repr( -+ gdb_arrow, "heap_timestamp_array_s", -+ ("arrow::TimestampArray of type arrow::timestamp" -+ "(arrow::TimeUnit::SECOND), length 4, offset 0, null count 1 = {" -+ "[0] = null, [1] = 0s [1970-01-01 00:00:00], " -+ "[2] = -2203932304s [too large to represent], " -+ "[3] = 63730281600s [too large to represent]}")) -+ check_heap_repr( -+ gdb_arrow, "heap_timestamp_array_ms", -+ ("arrow::TimestampArray of type arrow::timestamp" -+ "(arrow::TimeUnit::MILLI), length 3, offset 0, null count 1 = {" -+ "[0] = null, [1] = -2203932303877ms [too large to represent], " -+ "[2] = 63730281600789ms [too large to represent]}")) -+ check_heap_repr( -+ gdb_arrow, "heap_timestamp_array_us", -+ ("arrow::TimestampArray of type arrow::timestamp" -+ "(arrow::TimeUnit::MICRO), length 3, offset 0, null count 1 = {" -+ "[0] = null, " -+ "[1] = -2203932303345679us [too large to represent], " -+ "[2] = 63730281600456789us [too large to represent]}")) -+ check_heap_repr( -+ gdb_arrow, "heap_timestamp_array_ns", -+ ("arrow::TimestampArray of type arrow::timestamp" -+ "(arrow::TimeUnit::NANO), length 2, offset 0, null count 1 = {" -+ "[0] = null, " -+ "[1] = -2203932303012345679ns [too large to represent]}")) - - # Decimal - check_heap_repr( -diff --git a/pyarrow/tests/test_io.py b/pyarrow/tests/test_io.py -index 5a495aa80..17eab871a 100644 ---- a/pyarrow/tests/test_io.py -+++ b/pyarrow/tests/test_io.py -@@ -36,7 +36,7 @@ from pyarrow import Codec - import pyarrow as pa - - --def check_large_seeks(file_factory): -+def check_large_seeks(file_factory, expected_error=None): - if sys.platform in ('win32', 'darwin'): - pytest.skip("need sparse file support") - try: -@@ -45,11 +45,16 @@ def check_large_seeks(file_factory): - f.truncate(2 ** 32 + 10) - f.seek(2 ** 32 + 5) - f.write(b'mark\n') -- with file_factory(filename) as f: -- assert f.seek(2 ** 32 + 5) == 2 ** 32 + 5 -- assert f.tell() == 2 ** 32 + 5 -- assert f.read(5) == b'mark\n' -- assert f.tell() == 2 ** 32 + 10 -+ if expected_error: -+ with expected_error: -+ file_factory(filename) -+ else: -+ with file_factory(filename) as f: -+ assert f.size() == 2 ** 32 + 10 -+ assert f.seek(2 ** 32 + 5) == 2 ** 32 + 5 -+ assert f.tell() == 2 ** 32 + 5 -+ assert f.read(5) == b'mark\n' -+ assert f.tell() == 2 ** 32 + 10 - finally: - os.unlink(filename) - -@@ -1137,7 +1142,14 @@ def test_memory_zero_length(tmpdir): - - - def test_memory_map_large_seeks(): -- check_large_seeks(pa.memory_map) -+ if sys.maxsize >= 2**32: -+ expected_error = None -+ else: -+ expected_error = pytest.raises( -+ pa.ArrowCapacityError, -+ match="Requested memory map length 4294967306 " -+ "does not fit in a C size_t") -+ check_large_seeks(pa.memory_map, expected_error=expected_error) - - - def test_memory_map_close_remove(tmpdir): -diff --git a/pyarrow/tests/test_pandas.py b/pyarrow/tests/test_pandas.py -index 8fd4b3041..168ed7e42 100644 ---- a/pyarrow/tests/test_pandas.py -+++ b/pyarrow/tests/test_pandas.py -@@ -2601,8 +2601,9 @@ class TestConvertStructTypes: - ('yy', np.bool_)])), - ('y', np.int16), - ('z', np.object_)]) -- # Note: itemsize is not a multiple of sizeof(object) -- assert dt.itemsize == 12 -+ # Note: itemsize is not necessarily a multiple of sizeof(object) -+ # object_ is 8 bytes on 64-bit systems, 4 bytes on 32-bit systems -+ assert dt.itemsize == (12 if sys.maxsize > 2**32 else 8) - ty = pa.struct([pa.field('x', pa.struct([pa.field('xx', pa.int8()), - pa.field('yy', pa.bool_())])), - pa.field('y', pa.int16()), -diff --git a/pyarrow/tests/test_schema.py b/pyarrow/tests/test_schema.py -index fa75fcea3..8793c9e77 100644 ---- a/pyarrow/tests/test_schema.py -+++ b/pyarrow/tests/test_schema.py -@@ -681,7 +681,8 @@ def test_schema_sizeof(): - pa.field('bar', pa.string()), - ]) - -- assert sys.getsizeof(schema) > 30 -+ # Note: pa.schema is twice as large on 64-bit systems -+ assert sys.getsizeof(schema) > (30 if sys.maxsize > 2**32 else 15) - - schema2 = schema.with_metadata({"key": "some metadata"}) - assert sys.getsizeof(schema2) > sys.getsizeof(schema) diff --git a/dev-python/pyarrow/pyarrow-15.0.2.ebuild b/dev-python/pyarrow/pyarrow-15.0.2.ebuild deleted file mode 100644 index 8f358f46c970..000000000000 --- a/dev-python/pyarrow/pyarrow-15.0.2.ebuild +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright 2023-2024 Gentoo Authors -# Distributed under the terms of the GNU General Public License v2 - -EAPI=8 - -DISTUTILS_EXT=1 -DISTUTILS_USE_PEP517=setuptools -PYTHON_COMPAT=( python3_{10..12} ) - -inherit distutils-r1 multiprocessing - -DESCRIPTION="Python library for Apache Arrow" -HOMEPAGE=" - https://arrow.apache.org/ - https://github.com/apache/arrow/ - https://pypi.org/project/pyarrow/ -" -SRC_URI="mirror://apache/arrow/arrow-${PV}/apache-arrow-${PV}.tar.gz" -S="${WORKDIR}/apache-arrow-${PV}/python" - -LICENSE="Apache-2.0" -SLOT="0" -KEYWORDS="amd64 ~arm64 ~hppa ~riscv ~x86" -IUSE="+parquet +snappy ssl" - -RDEPEND=" - ~dev-libs/apache-arrow-${PV}[compute,dataset,json,parquet?,re2,snappy?,ssl?] - <dev-python/numpy-2:=[${PYTHON_USEDEP}] -" -BDEPEND=" - test? ( - dev-python/hypothesis[${PYTHON_USEDEP}] - dev-python/pandas[${PYTHON_USEDEP}] - dev-libs/apache-arrow[lz4,zlib] - ) -" - -EPYTEST_XDIST=1 -distutils_enable_tests pytest - -PATCHES=( - # upstream backports - "${FILESDIR}/${PN}-15.0.1-32bit.patch" -) - -src_prepare() { - # cython's -Werror - sed -i -e '/--warning-errors/d' CMakeLists.txt || die - distutils-r1_src_prepare -} - -src_compile() { - export PYARROW_PARALLEL="$(makeopts_jobs)" - export PYARROW_BUILD_VERBOSE=1 - export PYARROW_CXXFLAGS="${CXXFLAGS}" - export PYARROW_BUNDLE_ARROW_CPP_HEADERS=0 - export PYARROW_CMAKE_GENERATOR=Ninja - export PYARROW_WITH_HDFS=1 - if use parquet; then - export PYARROW_WITH_DATASET=1 - export PYARROW_WITH_PARQUET=1 - use ssl && export PYARROW_WITH_PARQUET_ENCRYPTION=1 - fi - if use snappy; then - export PYARROW_WITH_SNAPPY=1 - fi - - distutils-r1_src_compile -} - -python_test() { - local EPYTEST_DESELECT=( - # wtf? - tests/test_fs.py::test_localfs_errors - # these require apache-arrow with jemalloc that doesn't seem - # to be supported by the Gentoo package - tests/test_memory.py::test_env_var - tests/test_memory.py::test_specific_memory_pools - tests/test_memory.py::test_supported_memory_backends - # pandas changed, i guess - tests/test_pandas.py::test_array_protocol_pandas_extension_types - tests/test_table.py::test_table_factory_function_args_pandas - ) - - cd "${T}" || die - epytest --pyargs pyarrow -} |