From b67bc9c76390e3baeb766e8d73caff90be4086ca Mon Sep 17 00:00:00 2001 From: coastalwhite Date: Mon, 18 Nov 2024 10:28:07 +0100 Subject: [PATCH 1/4] chore: Add proper tests for row encoding --- Cargo.lock | 1 + crates/polars-python/Cargo.toml | 1 + crates/polars-python/src/dataframe/general.rs | 42 +++++++++++++ crates/polars-python/src/datatypes.rs | 1 + crates/polars-python/src/series/general.rs | 59 +++++++++++++++++++ py-polars/polars/dataframe/frame.py | 17 ++++++ py-polars/polars/series/series.py | 18 ++++++ 7 files changed, 139 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index d00cfa7ff0a6..c284821461d1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3351,6 +3351,7 @@ dependencies = [ "polars-ops", "polars-parquet", "polars-plan", + "polars-row", "polars-time", "polars-utils", "pyo3", diff --git a/crates/polars-python/Cargo.toml b/crates/polars-python/Cargo.toml index 1f1624fa7b0f..71bf8b369d9b 100644 --- a/crates/polars-python/Cargo.toml +++ b/crates/polars-python/Cargo.toml @@ -18,6 +18,7 @@ polars-mem-engine = { workspace = true } polars-ops = { workspace = true, features = ["bitwise"] } polars-parquet = { workspace = true, optional = true } polars-plan = { workspace = true } +polars-row = { workspace = true } polars-time = { workspace = true } polars-utils = { workspace = true } diff --git a/crates/polars-python/src/dataframe/general.rs b/crates/polars-python/src/dataframe/general.rs index e866e7db1004..f7c0e7a5e1ec 100644 --- a/crates/polars-python/src/dataframe/general.rs +++ b/crates/polars-python/src/dataframe/general.rs @@ -710,4 +710,46 @@ impl PyDataFrame { let cap = md_cols.capacity(); (ptr as usize, len, cap) } + + // Utility functions to work with polars-row + #[pyo3(signature = (fields))] + fn _row_encode<'py>( + &'py self, + py: Python<'py>, + fields: Vec<(bool, bool, bool)>, + ) -> PyResult { + let rows = py.allow_threads(|| { + let mut df = self.df.clone(); + df.rechunk_mut(); + + assert_eq!(df.width(), fields.len()); + + let chunks = df + .get_columns() + .iter() + .map(|c| c.as_materialized_series().chunks()[0].to_boxed()) + .collect::>(); + let fields = fields + .into_iter() + .map( + |(descending, nulls_last, no_order)| polars_row::EncodingField { + descending, + nulls_last, + no_order, + }, + ) + .collect::>(); + + polars_row::convert_columns(&chunks, &fields) + }); + + Ok(unsafe { + Series::from_chunks_and_dtype_unchecked( + PlSmallStr::from_static("row_enc"), + vec![rows.into_array().boxed()], + &DataType::BinaryOffset, + ) + } + .into()) + } } diff --git a/crates/polars-python/src/datatypes.rs b/crates/polars-python/src/datatypes.rs index ea7686a29ec6..ff7446fc3252 100644 --- a/crates/polars-python/src/datatypes.rs +++ b/crates/polars-python/src/datatypes.rs @@ -10,6 +10,7 @@ use crate::{PyExpr, Wrap}; // Don't change the order of these! #[repr(u8)] +#[derive(Clone)] pub(crate) enum PyDataType { Int8, Int16, diff --git a/crates/polars-python/src/series/general.rs b/crates/polars-python/src/series/general.rs index 3134f5354f09..648c4a7f97ec 100644 --- a/crates/polars-python/src/series/general.rs +++ b/crates/polars-python/src/series/general.rs @@ -10,6 +10,7 @@ use pyo3::Python; use super::PySeries; use crate::dataframe::PyDataFrame; +use crate::datatypes::PyDataType; use crate::error::PyPolarsErr; use crate::prelude::*; use crate::py_modules::POLARS; @@ -534,6 +535,64 @@ impl PySeries { .map_err(PyPolarsErr::from)?; Ok(out.into()) } + + // Utility functions to work with polars-row + #[pyo3(signature = (dtypes, fields))] + fn _row_decode<'py>( + &'py self, + py: Python<'py>, + dtypes: Vec<(String, PyDataType)>, + fields: Vec<(bool, bool, bool)>, + ) -> PyResult { + assert_eq!(dtypes.len(), fields.len()); + + let arrow_dtypes = dtypes + .iter() + .map(|(_, dt)| DataType::from(dt.clone()).to_arrow(CompatLevel::newest())) + .collect::>(); + let fields = fields + .into_iter() + .map( + |(descending, nulls_last, no_order)| polars_row::EncodingField { + descending, + nulls_last, + no_order, + }, + ) + .collect::>(); + let columns = py.allow_threads(|| { + let arr = self.series.binary_offset().map_err(PyPolarsErr::from)?; + assert_eq!(arr.chunks().len(), 1); + let mut values = arr + .downcast_iter() + .next() + .unwrap() + .values_iter() + .collect::>(); + let columns = PyResult::Ok(unsafe { + polars_row::decode::decode_rows(&mut values, &fields, &arrow_dtypes) + })?; + + PyResult::Ok( + columns + .into_iter() + .zip(dtypes) + .map(|(arr, (name, dtype))| { + unsafe { + Series::from_chunks_and_dtype_unchecked( + PlSmallStr::from(name), + vec![arr], + &DataType::from(dtype), + ) + } + .into_column() + }) + .collect(), + ) + })?; + + Ok(DataFrame::new(columns).map_err(PyPolarsErr::from)?.into()) + } } macro_rules! impl_set_with_mask { diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 49c2e2470534..f0db77f7f7f5 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -11304,6 +11304,23 @@ def _to_metadata( return md + def _row_encode( + self, + fields: list[tuple[bool, bool, bool]], + ) -> Series: + """ + Row encode the given DataFrame. + + This is an internal function not meant for outside consumption and can + be changed or removed at any point in time. + + fields have order: + - descending + - nulls_last + - no_order + """ + return pl.Series._from_pyseries(self._df._row_encode(fields)) + def _prepare_other_arg(other: Any, length: int | None = None) -> Series: # if not a series create singleton series such that it will broadcast diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index d86c9d29cd0f..e270fb4c9322 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -7517,6 +7517,24 @@ def plot(self) -> SeriesPlot: raise ModuleUpgradeRequiredError(msg) return SeriesPlot(self) + def _row_decode( + self, + dtypes: list[tuple[str, DataType]], + fields: list[tuple[bool, bool, bool]], + ) -> DataFrame: + """ + Row decode the given Series. + + This is an internal function not meant for outside consumption and can + be changed or removed at any point in time. + + fields have order: + - descending + - nulls_last + - no_order + """ + return pl.DataFrame._from_pydf(self._s._row_decode(dtypes, fields)) + def _resolve_temporal_dtype( dtype: PolarsDataType | None, From 9241c27ef64482172241ee05cb8009b153271125 Mon Sep 17 00:00:00 2001 From: coastalwhite Date: Mon, 18 Nov 2024 10:30:39 +0100 Subject: [PATCH 2/4] add actual tests --- py-polars/tests/unit/test_row_encoding.py | 143 ++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 py-polars/tests/unit/test_row_encoding.py diff --git a/py-polars/tests/unit/test_row_encoding.py b/py-polars/tests/unit/test_row_encoding.py new file mode 100644 index 000000000000..0b4d7ddc8e7c --- /dev/null +++ b/py-polars/tests/unit/test_row_encoding.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +import numpy as np +import pytest +from hypothesis import given + +import polars as pl +from polars.testing import assert_frame_equal +from polars.testing.parametric import dataframes + +# @TODO: At the moment no_order=True breaks roundtripping for some reason +FIELD_COMBS = [ + (descending, nulls_last, False) + for descending in [False, True] + for nulls_last in [False, True] +] + + +def roundtrip_re( + df: pl.DataFrame, fields: list[tuple[bool, bool, bool]] | None = None +) -> None: + if fields is None: + fields = [(False, False, False)] * df.width + + row_encoded = df._row_encode(fields) + dtypes = [(c, df.get_column(c).dtype) for c in df.columns] + result = row_encoded._row_decode(dtypes, fields) + + assert_frame_equal(df, result) + + +@given( + df=dataframes( + excluded_dtypes=[ + pl.List, + pl.Array, + pl.Struct, + pl.Categorical, + pl.Enum, + pl.Time, + pl.Datetime, + pl.Date, + pl.Duration, + pl.Null, + pl.Decimal, + ] + ) +) +@pytest.mark.parametrize("field", FIELD_COMBS) +def test_row_encoding_parametric( + df: pl.DataFrame, field: tuple[bool, bool, bool] +) -> None: + roundtrip_re(df, [field] * df.width) + + +@pytest.mark.parametrize("field", FIELD_COMBS) +def test_bool(field: tuple[bool, bool, bool]) -> None: + roundtrip_re(pl.Series("a", [], pl.Boolean).to_frame(), [field]) + roundtrip_re(pl.Series("a", [False], pl.Boolean).to_frame(), [field]) + roundtrip_re(pl.Series("a", [True], pl.Boolean).to_frame(), [field]) + roundtrip_re(pl.Series("a", [False, True], pl.Boolean).to_frame(), [field]) + roundtrip_re(pl.Series("a", [True, False], pl.Boolean).to_frame(), [field]) + + +@pytest.mark.parametrize( + "dtype", + [ + pl.Int8, + pl.Int16, + pl.Int32, + pl.Int64, + pl.UInt8, + pl.UInt16, + pl.UInt32, + pl.UInt64, + ], +) +@pytest.mark.parametrize("field", FIELD_COMBS) +def test_int(dtype: pl.DataType, field: tuple[bool, bool, bool]) -> None: + min = pl.select(x=dtype.min()).item() # type: ignore[attr-defined] + max = pl.select(x=dtype.max()).item() # type: ignore[attr-defined] + + roundtrip_re(pl.Series("a", [], dtype).to_frame(), [field]) + roundtrip_re(pl.Series("a", [0], dtype).to_frame(), [field]) + roundtrip_re(pl.Series("a", [min], dtype).to_frame(), [field]) + roundtrip_re(pl.Series("a", [max], dtype).to_frame(), [field]) + + roundtrip_re(pl.Series("a", [1, 2, 3], dtype).to_frame(), [field]) + roundtrip_re(pl.Series("a", [0, 1, 2, 3], dtype).to_frame(), [field]) + roundtrip_re(pl.Series("a", [min, 0, max], dtype).to_frame(), [field]) + + +@pytest.mark.parametrize( + "dtype", + [ + pl.Float32, + pl.Float64, + ], +) +@pytest.mark.parametrize("field", FIELD_COMBS) +def test_float(dtype: pl.DataType, field: tuple[bool, bool, bool]) -> None: + roundtrip_re(pl.Series("a", [], dtype).to_frame(), [field]) + roundtrip_re(pl.Series("a", [0.0], dtype).to_frame(), [field]) + roundtrip_re(pl.Series("a", [np.Infinity], dtype).to_frame(), [field]) + roundtrip_re(pl.Series("a", [-np.Infinity], dtype).to_frame(), [field]) + + roundtrip_re(pl.Series("a", [1.0, 2.0, 3.0], dtype).to_frame(), [field]) + roundtrip_re(pl.Series("a", [0.0, 1.0, 2.0, 3.0], dtype).to_frame(), [field]) + roundtrip_re( + pl.Series("a", [np.Infinity, 0, -np.Infinity], dtype).to_frame(), [field] + ) + + +@pytest.mark.parametrize("field", FIELD_COMBS) +def test_str(field: tuple[bool, bool, bool]) -> None: + roundtrip_re(pl.Series("a", [], pl.String).to_frame(), [field]) + roundtrip_re(pl.Series("a", [""], pl.String).to_frame(), [field]) + + roundtrip_re(pl.Series("a", ["a", "b", "c"], pl.String).to_frame(), [field]) + roundtrip_re(pl.Series("a", ["", "a", "b", "c"], pl.String).to_frame(), [field]) + + roundtrip_re( + pl.Series("a", ["different", "length", "strings"], pl.String).to_frame(), + [field], + ) + roundtrip_re( + pl.Series( + "a", ["different", "", "length", "", "strings"], pl.String + ).to_frame(), + [field], + ) + + +# def test_struct() -> None: +# # @TODO: How do we deal with zero-field structs? +# # roundtrip_re(pl.Series('a', [], pl.Struct({})).to_frame()) +# # roundtrip_re(pl.Series('a', [{}], pl.Struct({})).to_frame()) +# roundtrip_re(pl.Series("a", [{"x": 1}], pl.Struct({"x": pl.Int32})).to_frame()) +# roundtrip_re( +# pl.Series( +# "a", [{"x": 1}, {"y": 2}], pl.Struct({"x": pl.Int32, "y": pl.Int32}) +# ).to_frame() +# ) From 569a82780fe04dcce95be17cb21eaab44c79263f Mon Sep 17 00:00:00 2001 From: coastalwhite Date: Mon, 18 Nov 2024 13:19:43 +0100 Subject: [PATCH 3/4] fix tests --- crates/polars-python/src/dataframe/general.rs | 2 +- crates/polars-python/src/series/general.rs | 11 ++++--- py-polars/polars/series/series.py | 6 ++-- py-polars/tests/unit/test_row_encoding.py | 29 ++++++++++--------- 4 files changed, 27 insertions(+), 21 deletions(-) diff --git a/crates/polars-python/src/dataframe/general.rs b/crates/polars-python/src/dataframe/general.rs index f7c0e7a5e1ec..9172b79a3007 100644 --- a/crates/polars-python/src/dataframe/general.rs +++ b/crates/polars-python/src/dataframe/general.rs @@ -727,7 +727,7 @@ impl PyDataFrame { let chunks = df .get_columns() .iter() - .map(|c| c.as_materialized_series().chunks()[0].to_boxed()) + .map(|c| c.as_materialized_series().to_physical_repr().chunks()[0].to_boxed()) .collect::>(); let fields = fields .into_iter() diff --git a/crates/polars-python/src/series/general.rs b/crates/polars-python/src/series/general.rs index 648c4a7f97ec..d8697ed991ff 100644 --- a/crates/polars-python/src/series/general.rs +++ b/crates/polars-python/src/series/general.rs @@ -10,7 +10,6 @@ use pyo3::Python; use super::PySeries; use crate::dataframe::PyDataFrame; -use crate::datatypes::PyDataType; use crate::error::PyPolarsErr; use crate::prelude::*; use crate::py_modules::POLARS; @@ -541,14 +540,18 @@ impl PySeries { fn _row_decode<'py>( &'py self, py: Python<'py>, - dtypes: Vec<(String, PyDataType)>, + dtypes: Vec<(String, Wrap)>, fields: Vec<(bool, bool, bool)>, ) -> PyResult { assert_eq!(dtypes.len(), fields.len()); let arrow_dtypes = dtypes .iter() - .map(|(_, dt)| DataType::from(dt.clone()).to_arrow(CompatLevel::newest())) + .map(|(_, dt)| { + DataType::from(dt.0.clone()) + .to_physical() + .to_arrow(CompatLevel::newest()) + }) .collect::>(); let fields = fields .into_iter() @@ -582,7 +585,7 @@ impl PySeries { Series::from_chunks_and_dtype_unchecked( PlSmallStr::from(name), vec![arr], - &DataType::from(dtype), + &DataType::from(dtype.0), ) } .into_column() diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index e270fb4c9322..a559c7a3ce44 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -7519,8 +7519,8 @@ def plot(self) -> SeriesPlot: def _row_decode( self, - dtypes: list[tuple[str, DataType]], - fields: list[tuple[bool, bool, bool]], + dtypes: Iterable[tuple[str, DataType]], # type: ignore[valid-type] + fields: Iterable[tuple[bool, bool, bool]], ) -> DataFrame: """ Row decode the given Series. @@ -7533,7 +7533,7 @@ def _row_decode( - nulls_last - no_order """ - return pl.DataFrame._from_pydf(self._s._row_decode(dtypes, fields)) + return pl.DataFrame._from_pydf(self._s._row_decode(list(dtypes), list(fields))) def _resolve_temporal_dtype( diff --git a/py-polars/tests/unit/test_row_encoding.py b/py-polars/tests/unit/test_row_encoding.py index 0b4d7ddc8e7c..3c94b53a1bf2 100644 --- a/py-polars/tests/unit/test_row_encoding.py +++ b/py-polars/tests/unit/test_row_encoding.py @@ -1,6 +1,5 @@ from __future__ import annotations -import numpy as np import pytest from hypothesis import given @@ -8,7 +7,7 @@ from polars.testing import assert_frame_equal from polars.testing.parametric import dataframes -# @TODO: At the moment no_order=True breaks roundtripping for some reason +# @TODO: Deal with no_order FIELD_COMBS = [ (descending, nulls_last, False) for descending in [False, True] @@ -37,12 +36,6 @@ def roundtrip_re( pl.Struct, pl.Categorical, pl.Enum, - pl.Time, - pl.Datetime, - pl.Date, - pl.Duration, - pl.Null, - pl.Decimal, ] ) ) @@ -53,6 +46,15 @@ def test_row_encoding_parametric( roundtrip_re(df, [field] * df.width) +@pytest.mark.parametrize("field", FIELD_COMBS) +def test_nulls(field: tuple[bool, bool, bool]) -> None: + roundtrip_re(pl.Series("a", [], pl.Null).to_frame(), [field]) + roundtrip_re(pl.Series("a", [None], pl.Null).to_frame(), [field]) + roundtrip_re(pl.Series("a", [None] * 2, pl.Null).to_frame(), [field]) + roundtrip_re(pl.Series("a", [None] * 13, pl.Null).to_frame(), [field]) + roundtrip_re(pl.Series("a", [None] * 42, pl.Null).to_frame(), [field]) + + @pytest.mark.parametrize("field", FIELD_COMBS) def test_bool(field: tuple[bool, bool, bool]) -> None: roundtrip_re(pl.Series("a", [], pl.Boolean).to_frame(), [field]) @@ -99,16 +101,17 @@ def test_int(dtype: pl.DataType, field: tuple[bool, bool, bool]) -> None: ) @pytest.mark.parametrize("field", FIELD_COMBS) def test_float(dtype: pl.DataType, field: tuple[bool, bool, bool]) -> None: + inf = float("inf") + inf_b = float("-inf") + roundtrip_re(pl.Series("a", [], dtype).to_frame(), [field]) roundtrip_re(pl.Series("a", [0.0], dtype).to_frame(), [field]) - roundtrip_re(pl.Series("a", [np.Infinity], dtype).to_frame(), [field]) - roundtrip_re(pl.Series("a", [-np.Infinity], dtype).to_frame(), [field]) + roundtrip_re(pl.Series("a", [inf], dtype).to_frame(), [field]) + roundtrip_re(pl.Series("a", [-inf_b], dtype).to_frame(), [field]) roundtrip_re(pl.Series("a", [1.0, 2.0, 3.0], dtype).to_frame(), [field]) roundtrip_re(pl.Series("a", [0.0, 1.0, 2.0, 3.0], dtype).to_frame(), [field]) - roundtrip_re( - pl.Series("a", [np.Infinity, 0, -np.Infinity], dtype).to_frame(), [field] - ) + roundtrip_re(pl.Series("a", [inf, 0, -inf_b], dtype).to_frame(), [field]) @pytest.mark.parametrize("field", FIELD_COMBS) From f195eb228b923d7f108303dacd3a694fb7e95a38 Mon Sep 17 00:00:00 2001 From: coastalwhite Date: Mon, 18 Nov 2024 14:06:42 +0100 Subject: [PATCH 4/4] clean up impls --- crates/polars-python/src/dataframe/general.rs | 24 +++--- crates/polars-python/src/series/general.rs | 82 +++++++++---------- 2 files changed, 53 insertions(+), 53 deletions(-) diff --git a/crates/polars-python/src/dataframe/general.rs b/crates/polars-python/src/dataframe/general.rs index 9172b79a3007..0494d80bacea 100644 --- a/crates/polars-python/src/dataframe/general.rs +++ b/crates/polars-python/src/dataframe/general.rs @@ -711,14 +711,14 @@ impl PyDataFrame { (ptr as usize, len, cap) } - // Utility functions to work with polars-row + /// Internal utility function to allow direct access to the row encoding from python. #[pyo3(signature = (fields))] fn _row_encode<'py>( &'py self, py: Python<'py>, fields: Vec<(bool, bool, bool)>, ) -> PyResult { - let rows = py.allow_threads(|| { + py.allow_threads(|| { let mut df = self.df.clone(); df.rechunk_mut(); @@ -740,16 +740,16 @@ impl PyDataFrame { ) .collect::>(); - polars_row::convert_columns(&chunks, &fields) - }); + let rows = polars_row::convert_columns(&chunks, &fields); - Ok(unsafe { - Series::from_chunks_and_dtype_unchecked( - PlSmallStr::from_static("row_enc"), - vec![rows.into_array().boxed()], - &DataType::BinaryOffset, - ) - } - .into()) + Ok(unsafe { + Series::from_chunks_and_dtype_unchecked( + PlSmallStr::from_static("row_enc"), + vec![rows.into_array().boxed()], + &DataType::BinaryOffset, + ) + } + .into()) + }) } } diff --git a/crates/polars-python/src/series/general.rs b/crates/polars-python/src/series/general.rs index d8697ed991ff..88afa077b2e6 100644 --- a/crates/polars-python/src/series/general.rs +++ b/crates/polars-python/src/series/general.rs @@ -535,7 +535,7 @@ impl PySeries { Ok(out.into()) } - // Utility functions to work with polars-row + /// Internal utility function to allow direct access to the row encoding from python. #[pyo3(signature = (dtypes, fields))] fn _row_decode<'py>( &'py self, @@ -543,28 +543,29 @@ impl PySeries { dtypes: Vec<(String, Wrap)>, fields: Vec<(bool, bool, bool)>, ) -> PyResult { - assert_eq!(dtypes.len(), fields.len()); + py.allow_threads(|| { + assert_eq!(dtypes.len(), fields.len()); - let arrow_dtypes = dtypes - .iter() - .map(|(_, dt)| { - DataType::from(dt.0.clone()) - .to_physical() - .to_arrow(CompatLevel::newest()) - }) - .collect::>(); - let fields = fields - .into_iter() - .map( - |(descending, nulls_last, no_order)| polars_row::EncodingField { - descending, - nulls_last, - no_order, - }, - ) - .collect::>(); - let columns = py.allow_threads(|| { - let arr = self.series.binary_offset().map_err(PyPolarsErr::from)?; + let fields = fields + .into_iter() + .map( + |(descending, nulls_last, no_order)| polars_row::EncodingField { + descending, + nulls_last, + no_order, + }, + ) + .collect::>(); + + // The polars-row crate expects the physical arrow types. + let arrow_dtypes = dtypes + .iter() + .map(|(_, dtype)| dtype.0.to_physical().to_arrow(CompatLevel::newest())) + .collect::>(); + + // Get the BinaryOffset array. + let arr = self.series.rechunk(); + let arr = arr.binary_offset().map_err(PyPolarsErr::from)?; assert_eq!(arr.chunks().len(), 1); let mut values = arr .downcast_iter() @@ -572,29 +573,28 @@ impl PySeries { .unwrap() .values_iter() .collect::>(); + let columns = PyResult::Ok(unsafe { polars_row::decode::decode_rows(&mut values, &fields, &arrow_dtypes) })?; - PyResult::Ok( - columns - .into_iter() - .zip(dtypes) - .map(|(arr, (name, dtype))| { - unsafe { - Series::from_chunks_and_dtype_unchecked( - PlSmallStr::from(name), - vec![arr], - &DataType::from(dtype.0), - ) - } - .into_column() - }) - .collect(), - ) - })?; - - Ok(DataFrame::new(columns).map_err(PyPolarsErr::from)?.into()) + // Construct a DataFrame from the result. + let columns = columns + .into_iter() + .zip(dtypes) + .map(|(arr, (name, dtype))| { + unsafe { + Series::from_chunks_and_dtype_unchecked( + PlSmallStr::from(name), + vec![arr], + &dtype.0, + ) + } + .into_column() + }) + .collect::>(); + Ok(DataFrame::new(columns).map_err(PyPolarsErr::from)?.into()) + }) } }