From e8b237d60a823386d8636a8bb21e3dc825d70320 Mon Sep 17 00:00:00 2001 From: Cho Date: Wed, 18 Jan 2023 14:34:02 -0500 Subject: [PATCH] PyArrow Version Update & v0.8.0 Bump --- CMakeLists.txt | 4 ++-- doc/build.md | 6 +++--- packaging/conda/recipe/meta.yaml | 2 +- src/mlio-py/mlio/integ/arrow/module.cc | 2 +- src/mlio-py/setup.py | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a950ce9..4e42380 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/cmake) # Project # ------------------------------------------------------------ -project(MLIO VERSION 0.7.1 LANGUAGES C CXX) +project(MLIO VERSION 0.8.0 LANGUAGES C CXX) if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY VALUE RelWithDebInfo) @@ -309,7 +309,7 @@ if(MLIO_INCLUDE_PYTHON_EXTENSION) endif() if(MLIO_INCLUDE_ARROW_INTEGRATION) - find_package(Arrow 1.0 REQUIRED CONFIG) + find_package(Arrow 10.0.1 REQUIRED CONFIG) endif() include(PythonExtension) diff --git a/doc/build.md b/doc/build.md index 65db30e..d997874 100644 --- a/doc/build.md +++ b/doc/build.md @@ -19,7 +19,7 @@ * Intel TBB 2019.0 or higher * Python 3.6 or higher * AWS C++ SDK 1.7 or higher (optional) -* Apache Arrow 1.0 (optional) +* Apache Arrow 10.0.1 (optional) ## Build Requirements * A C++17-enabled compiler. On Linux gcc 7.0 or higher; on macOS clang 6 or higher should be sufficient. @@ -158,10 +158,10 @@ $ pip install -e . With this mode changes made in Python files will be immediately reflected when the `mlio` package gets imported. ## Building the Apache Arrow Integration -Please refer to Arrow's official install instructions [here](https://arrow.apache.org/install/) first. As Arrow does not guarantee API compatibility (yet) you have to make sure that your environment has the right version. As of today MLIO works with Arrow v0.15.1. Once you have it installed, turn on the `MLIO_INCLUDE_ARROW_INTEGRATION` flag as follows: +Please refer to Arrow's official install instructions [here](https://arrow.apache.org/install/) first. As Arrow does not guarantee API compatibility (yet) you have to make sure that your environment has the right version. As of today MLIO works with Arrow v10.0.1. Once you have it installed, turn on the `MLIO_INCLUDE_ARROW_INTEGRATION` flag as follows: ```bash -$ conda install pyarrow=0.16.0 +$ conda install pyarrow=10.0.1 $ cmake DMLIO_INCLUDE_PYTHON_EXTENSION=ON -DMLIO_INCLUDE_ARROW_INTEGRATION=ON ../.. $ cmake --build . --target mlio-py $ cmake --build . --target mlio-arrow diff --git a/packaging/conda/recipe/meta.yaml b/packaging/conda/recipe/meta.yaml index 2a4b63d..09ac0f8 100644 --- a/packaging/conda/recipe/meta.yaml +++ b/packaging/conda/recipe/meta.yaml @@ -1,4 +1,4 @@ -{% set version = '0.7.1.dev' %} +{% set version = '0.8.0' %} {% if version.endswith('.dev') %} {% set version = version + os.getenv('MLIO_BUILD_NUMBER', '0') %} diff --git a/src/mlio-py/mlio/integ/arrow/module.cc b/src/mlio-py/mlio/integ/arrow/module.cc index 62050ac..8a03522 100644 --- a/src/mlio-py/mlio/integ/arrow/module.cc +++ b/src/mlio-py/mlio/integ/arrow/module.cc @@ -38,6 +38,7 @@ namespace pymlio { // The memory layout of Arrow's Cython NativeFile type. struct Py_arrow_native_file { PyObject_HEAD void *vtable; + PyObject *weakref; std::shared_ptr input_stream; std::shared_ptr random_access; std::shared_ptr output_stream; @@ -54,7 +55,6 @@ static py::object make_py_arrow_native_file(Intrusive_ptr &&stream auto nf_inst = nf_type(); auto *obj = reinterpret_cast(nf_inst.ptr()); - obj->random_access = std::make_shared(std::move(stream)); obj->input_stream = obj->random_access; obj->output_stream = nullptr; diff --git a/src/mlio-py/setup.py b/src/mlio-py/setup.py index 99db883..cf2336b 100644 --- a/src/mlio-py/setup.py +++ b/src/mlio-py/setup.py @@ -55,7 +55,7 @@ def stamp_dev_version(version): # Metadata name='mlio', - version=stamp_dev_version('0.7.1.dev'), + version=stamp_dev_version('0.8.0'), description='A high performance data access library for machine learning tasks', # noqa: E501 long_description=long_description, long_description_content_type='text/markdown', @@ -87,7 +87,7 @@ def stamp_dev_version(version): 'tensorflow': ['tensorflow>=1.9.0'], 'torch': ['torch>=1.0.0'], 'mxnet': ['mxnet>=1.4.1'], - 'pyarrow': ['pyarrow==0.16.0'], + 'pyarrow': ['pyarrow==10.0.1'], }, # Add our externally-built extension modules to the package.