diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 46a054e0e51c..1b6eeb15dca4 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -41,6 +41,7 @@ on: - arrow-avro/** - arrow-ord/** - arrow-pyarrow-integration-testing/** + - arrow-pyarrow/** - arrow-schema/** - arrow-select/** - arrow-sort/** @@ -164,7 +165,7 @@ jobs: - name: Run Rust tests run: | source venv/bin/activate - cargo test -p arrow --test pyarrow --features pyarrow + cargo test -p arrow-pyarrow - name: Run tests run: | source venv/bin/activate diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 80fef2674aae..a20575391b48 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -51,7 +51,8 @@ jobs: run: | # do not produce debug symbols to keep memory usage down export RUSTFLAGS="-C debuginfo=0" - cargo test + # PyArrow tests happen in integration.yml. + cargo test --workspace --exclude arrow-pyarrow # Check workspace wide compile and test with default features for @@ -83,7 +84,8 @@ jobs: # do not produce debug symbols to keep memory usage down export RUSTFLAGS="-C debuginfo=0" export PATH=$PATH:/d/protoc/bin - cargo test + # PyArrow tests happen in integration.yml. + cargo test --workspace --exclude arrow-pyarrow # Run cargo fmt for all crates diff --git a/Cargo.toml b/Cargo.toml index c5fda34751ad..b44618eed45d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ members = [ "arrow-ipc", "arrow-json", "arrow-ord", + "arrow-pyarrow", "arrow-row", "arrow-schema", "arrow-select", @@ -88,6 +89,7 @@ arrow-data = { version = "55.1.0", path = "./arrow-data" } arrow-ipc = { version = "55.1.0", path = "./arrow-ipc" } arrow-json = { version = "55.1.0", path = "./arrow-json" } arrow-ord = { version = "55.1.0", path = "./arrow-ord" } +arrow-pyarrow = { version = "55.1.0", path = "./arrow-pyarrow" } arrow-row = { version = "55.1.0", path = "./arrow-row" } arrow-schema = { version = "55.1.0", path = "./arrow-schema" } arrow-select = { version = "55.1.0", path = "./arrow-select" } diff --git a/arrow-pyarrow/Cargo.toml b/arrow-pyarrow/Cargo.toml new file mode 100644 index 000000000000..e0dc3137d5f5 --- /dev/null +++ b/arrow-pyarrow/Cargo.toml @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "arrow-pyarrow" +version = { workspace = true } +description = "Pyarrow bindings" +homepage = { workspace = true } +repository = { workspace = true } +authors = { workspace = true } +license = { workspace = true } +keywords = { workspace = true } +include = { workspace = true } +edition = { workspace = true } +rust-version = { workspace = true } + +[lib] +name = "arrow_pyarrow" +bench = false + +[package.metadata.docs.rs] +all-features = true + +[dependencies] +arrow-array = { workspace = true, features = ["ffi"] } +arrow-data = { workspace = true } +arrow-schema = { workspace = true } +pyo3 = { version = "0.24.1", default-features = false } diff --git a/arrow/src/pyarrow.rs b/arrow-pyarrow/src/lib.rs similarity index 98% rename from arrow/src/pyarrow.rs rename to arrow-pyarrow/src/lib.rs index 4ccbd0541d3f..566aa7402c6e 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow-pyarrow/src/lib.rs @@ -60,7 +60,15 @@ use std::convert::{From, TryFrom}; use std::ptr::{addr_of, addr_of_mut}; use std::sync::Arc; -use arrow_array::{RecordBatchIterator, RecordBatchOptions, RecordBatchReader, StructArray}; +use arrow_array::ffi; +use arrow_array::ffi::{FFI_ArrowArray, FFI_ArrowSchema}; +use arrow_array::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream}; +use arrow_array::{ + make_array, RecordBatch, RecordBatchIterator, RecordBatchOptions, RecordBatchReader, + StructArray, +}; +use arrow_data::ArrayData; +use arrow_schema::{ArrowError, DataType, Field, Schema}; use pyo3::exceptions::{PyTypeError, PyValueError}; use pyo3::ffi::Py_uintptr_t; use pyo3::import_exception; @@ -68,14 +76,6 @@ use pyo3::prelude::*; use pyo3::pybacked::PyBackedStr; use pyo3::types::{PyCapsule, PyList, PyTuple}; -use crate::array::{make_array, ArrayData}; -use crate::datatypes::{DataType, Field, Schema}; -use crate::error::ArrowError; -use crate::ffi; -use crate::ffi::{FFI_ArrowArray, FFI_ArrowSchema}; -use crate::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream}; -use crate::record_batch::RecordBatch; - import_exception!(pyarrow, ArrowException); /// Represents an exception raised by PyArrow. pub type PyArrowException = ArrowException; diff --git a/arrow/tests/pyarrow.rs b/arrow-pyarrow/tests/pyarrow.rs similarity index 95% rename from arrow/tests/pyarrow.rs rename to arrow-pyarrow/tests/pyarrow.rs index d9ebd0daa1cd..8ed21f5d8ae4 100644 --- a/arrow/tests/pyarrow.rs +++ b/arrow-pyarrow/tests/pyarrow.rs @@ -15,11 +15,11 @@ // specific language governing permissions and limitations // under the License. -use arrow::array::{ArrayRef, Int32Array, StringArray}; -use arrow::pyarrow::{FromPyArrow, ToPyArrow}; -use arrow::record_batch::RecordBatch; use arrow_array::builder::{BinaryViewBuilder, StringViewBuilder}; -use arrow_array::{Array, BinaryViewArray, StringViewArray}; +use arrow_array::{ + Array, ArrayRef, BinaryViewArray, Int32Array, RecordBatch, StringArray, StringViewArray, +}; +use arrow_pyarrow::{FromPyArrow, ToPyArrow}; use pyo3::Python; use std::sync::Arc; diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index e19981f76b69..0be22561a50c 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -48,13 +48,13 @@ arrow-data = { workspace = true } arrow-ipc = { workspace = true, optional = true } arrow-json = { workspace = true, optional = true } arrow-ord = { workspace = true } +arrow-pyarrow = { workspace = true, optional = true } arrow-row = { workspace = true } arrow-schema = { workspace = true } arrow-select = { workspace = true } arrow-string = { workspace = true } rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"], optional = true } -pyo3 = { version = "0.24.1", default-features = false, optional = true } half = { version = "2.1", default-features = false, optional = true } [package.metadata.docs.rs] @@ -72,7 +72,7 @@ prettyprint = ["arrow-cast/prettyprint"] # an optional dependency for supporting compile to wasm32-unknown-unknown # target without assuming an environment containing JavaScript. test_utils = ["dep:rand", "dep:half"] -pyarrow = ["pyo3", "ffi"] +pyarrow = ["ffi", "dep:arrow-pyarrow"] # force_validate runs full data validation for all arrays that are created # this is not enabled by default as it is too computationally expensive # but is run as part of our CI checks @@ -299,10 +299,6 @@ required-features = ["test_utils"] name = "csv" required-features = ["csv", "chrono-tz"] -[[test]] -name = "pyarrow" -required-features = ["pyarrow"] - [[test]] name = "array_cast" required-features = ["chrono-tz", "prettyprint"] diff --git a/arrow/src/lib.rs b/arrow/src/lib.rs index 98599634df56..e9e94540d9e7 100644 --- a/arrow/src/lib.rs +++ b/arrow/src/lib.rs @@ -397,7 +397,7 @@ pub use arrow_ipc as ipc; #[cfg(feature = "json")] pub use arrow_json as json; #[cfg(feature = "pyarrow")] -pub mod pyarrow; +pub use arrow_pyarrow as pyarrow; /// Contains the `RecordBatch` type and associated traits pub mod record_batch {