Skip to content

Commit

Permalink
Implement AsRef on PyArray and PyChunkedArray (#33)
Browse files Browse the repository at this point in the history
* Implement AsRef on PyArray and PyChunkedArray

* comment out python output

* Add arrow-select as dev dependency
  • Loading branch information
kylebarron authored Jun 28, 2024
1 parent f2423dc commit 1e8dd9f
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 21 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions arro3-compute/src/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ use pyo3::prelude::*;
use pyo3_arrow::error::PyArrowResult;
use pyo3_arrow::PyArray;

/// Take elements by index from an Array, creating a new Array from those
/// indexes.
#[pyfunction]
pub fn take(py: Python, values: PyArray, indices: PyArray) -> PyArrowResult<PyObject> {
let (values_array, values_field) = values.into_inner();
let (indices, _) = indices.into_inner();
let output_array =
py.allow_threads(|| arrow_select::take::take(&values_array, &indices, None))?;
PyArray::new(output_array, values_field).to_arro3(py)
py.allow_threads(|| arrow_select::take::take(values.as_ref(), indices.as_ref(), None))?;
PyArray::new(output_array, values.field().clone()).to_arro3(py)
}
3 changes: 3 additions & 0 deletions pyo3-arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,8 @@ pyo3 = { workspace = true, features = ["abi3-py38"] }
numpy = { workspace = true, features = ["half"] }
thiserror = { workspace = true }

[dev-dependencies]
arrow-select = { workspace = true }

[lib]
crate-type = ["rlib"]
23 changes: 10 additions & 13 deletions pyo3-arrow/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ We can wrap a function to be used in Python with just a few lines of code.

When you use a struct defined in `pyo3_arrow` as an argument to your function, it will automatically convert user input to a Rust `arrow` object via zero-copy FFI. Then once you're done, call `to_arro3` or `to_pyarrow` to export the data back to Python.

```rs
```rust
use pyo3::prelude::*;
use pyo3_arrow::error::PyArrowResult;
use pyo3_arrow::PyArray;
Expand All @@ -27,18 +27,15 @@ use pyo3_arrow::PyArray;
/// indexes.
#[pyfunction]
pub fn take(py: Python, values: PyArray, indices: PyArray) -> PyArrowResult<PyObject> {
let (values_array, values_field) = values.into_inner();
let (indices, _) = indices.into_inner();

// We can call py.allow_threads to ensure the GIL is released during our
// operations
// This example just wraps `arrow_select::take::take`
let output_array =
py.allow_threads(|| arrow_select::take::take(&values_array, &indices, None))?;
py.allow_threads(|| arrow_select::take::take(values.as_ref(), indices.as_ref(), None))?;

// Construct a PyArray and export it to the arro3 Python Arrow
// implementation
PyArray::new(output_array, values_field).to_arro3(py)
PyArray::new(output_array, values.field().clone()).to_arro3(py)
}
```

Expand All @@ -53,13 +50,13 @@ output = take(arr, arr)
output
# <arro3.core._rust.Array at 0x10787b510>
pa.array(output)
<pyarrow.lib.Int64Array object at 0x10aa11000>
[
0,
1,
2,
3
]
# <pyarrow.lib.Int64Array object at 0x10aa11000>
# [
# 0,
# 1,
# 2,
# 3
# ]
```

In this example, we use pyarrow to create the original array and to view the result, but the use of pyarrow is not required. It does, at least, show how the Arrow PyCapsule Interface makes it seamless to share these Arrow objects between Python Arrow implementations.
Expand Down
17 changes: 15 additions & 2 deletions pyo3-arrow/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ use std::ffi::CString;
use std::sync::Arc;

use arrow::ffi::{FFI_ArrowArray, FFI_ArrowSchema};
use arrow_array::ArrayRef;
use arrow_schema::FieldRef;
use arrow_array::{Array, ArrayRef};
use arrow_schema::{Field, FieldRef};
use pyo3::intern;
use pyo3::prelude::*;
use pyo3::types::{PyCapsule, PyTuple, PyType};
Expand Down Expand Up @@ -72,6 +72,19 @@ impl PyArray {
}
}

impl From<ArrayRef> for PyArray {
fn from(value: ArrayRef) -> Self {
let field = Field::new("", value.data_type().clone(), true);
Self::new(value, Arc::new(field))
}
}

impl AsRef<ArrayRef> for PyArray {
fn as_ref(&self) -> &ArrayRef {
&self.array
}
}

#[pymethods]
impl PyArray {
/// An implementation of the Array interface, for interoperability with numpy and other
Expand Down
34 changes: 32 additions & 2 deletions pyo3-arrow/src/chunked.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
use std::ffi::CString;
use std::sync::Arc;

use arrow_array::{Array, ArrayRef};
use arrow_schema::FieldRef;
use arrow_schema::{ArrowError, Field, FieldRef};
use pyo3::exceptions::{PyTypeError, PyValueError};
use pyo3::intern;
use pyo3::prelude::*;
use pyo3::types::{PyCapsule, PyTuple, PyType};

use crate::error::PyArrowResult;
use crate::error::{PyArrowError, PyArrowResult};
use crate::ffi::from_python::ffi_stream::ArrowArrayStreamReader;
use crate::ffi::from_python::utils::import_stream_pycapsule;
use crate::ffi::to_python::chunked::ArrayIterator;
Expand Down Expand Up @@ -70,6 +71,35 @@ impl PyChunkedArray {
}
}

impl TryFrom<Vec<ArrayRef>> for PyChunkedArray {
type Error = PyArrowError;

fn try_from(value: Vec<ArrayRef>) -> Result<Self, Self::Error> {
if value.is_empty() {
return Err(ArrowError::SchemaError(
"Cannot infer data type from empty Vec<ArrayRef>".to_string(),
)
.into());
}

if !value
.windows(2)
.all(|w| w[0].data_type() == w[1].data_type())
{
return Err(ArrowError::SchemaError("Mismatched data types".to_string()).into());
}

let field = Field::new("", value.first().unwrap().data_type().clone(), true);
Ok(Self::new(value, Arc::new(field)))
}
}

impl AsRef<[ArrayRef]> for PyChunkedArray {
fn as_ref(&self) -> &[ArrayRef] {
&self.chunks
}
}

#[pymethods]
impl PyChunkedArray {
/// An implementation of the Array interface, for interoperability with numpy and other
Expand Down
6 changes: 6 additions & 0 deletions pyo3-arrow/src/record_batch_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,12 @@ impl PyRecordBatchReader {
}
}

impl From<Box<dyn RecordBatchReader + Send>> for PyRecordBatchReader {
fn from(value: Box<dyn RecordBatchReader + Send>) -> Self {
Self::new(value)
}
}

#[pymethods]
impl PyRecordBatchReader {
/// An implementation of the [Arrow PyCapsule
Expand Down

0 comments on commit 1e8dd9f

Please sign in to comment.