Skip to content

Commit e29e9df

Browse files
authored
fix: Use all chunks in Series from arrow struct (#19218)
1 parent df8699b commit e29e9df

File tree

4 files changed

+63
-32
lines changed

4 files changed

+63
-32
lines changed

crates/polars-core/src/series/from.rs

+45-30
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use arrow::legacy::kernels::concatenate::concatenate_owned_unchecked;
1010
))]
1111
use arrow::temporal_conversions::*;
1212
use polars_error::feature_gated;
13+
use polars_utils::itertools::Itertools;
1314

1415
use crate::chunked_array::cast::{cast_chunks, CastOptions};
1516
#[cfg(feature = "object")]
@@ -575,39 +576,53 @@ unsafe fn to_physical_and_dtype(
575576
},
576577
ArrowDataType::Struct(_fields) => {
577578
feature_gated!("dtype-struct", {
578-
debug_assert_eq!(arrays.len(), 1);
579-
let arr = arrays[0].clone();
580-
let arr = arr.as_any().downcast_ref::<StructArray>().unwrap();
581-
let (values, dtypes): (Vec<_>, Vec<_>) = arr
582-
.values()
579+
let mut pl_fields = None;
580+
let arrays = arrays
583581
.iter()
584-
.zip(_fields.iter())
585-
.map(|(value, field)| {
586-
let mut out =
587-
to_physical_and_dtype(vec![value.clone()], Some(&field.metadata));
588-
(out.0.pop().unwrap(), out.1)
582+
.map(|arr| {
583+
let arr = arr.as_any().downcast_ref::<StructArray>().unwrap();
584+
let (values, dtypes): (Vec<_>, Vec<_>) = arr
585+
.values()
586+
.iter()
587+
.zip(_fields.iter())
588+
.map(|(value, field)| {
589+
let mut out = to_physical_and_dtype(
590+
vec![value.clone()],
591+
Some(&field.metadata),
592+
);
593+
(out.0.pop().unwrap(), out.1)
594+
})
595+
.unzip();
596+
597+
let arrow_fields = values
598+
.iter()
599+
.zip(_fields.iter())
600+
.map(|(arr, field)| {
601+
ArrowField::new(field.name.clone(), arr.dtype().clone(), true)
602+
})
603+
.collect();
604+
let arrow_array = Box::new(StructArray::new(
605+
ArrowDataType::Struct(arrow_fields),
606+
arr.len(),
607+
values,
608+
arr.validity().cloned(),
609+
)) as ArrayRef;
610+
611+
if pl_fields.is_none() {
612+
pl_fields = Some(
613+
_fields
614+
.iter()
615+
.zip(dtypes)
616+
.map(|(field, dtype)| Field::new(field.name.clone(), dtype))
617+
.collect_vec(),
618+
)
619+
}
620+
621+
arrow_array
589622
})
590-
.unzip();
623+
.collect_vec();
591624

592-
let arrow_fields = values
593-
.iter()
594-
.zip(_fields.iter())
595-
.map(|(arr, field)| {
596-
ArrowField::new(field.name.clone(), arr.dtype().clone(), true)
597-
})
598-
.collect();
599-
let arrow_array = Box::new(StructArray::new(
600-
ArrowDataType::Struct(arrow_fields),
601-
arr.len(),
602-
values,
603-
arr.validity().cloned(),
604-
)) as ArrayRef;
605-
let polars_fields = _fields
606-
.iter()
607-
.zip(dtypes)
608-
.map(|(field, dtype)| Field::new(field.name.clone(), dtype))
609-
.collect();
610-
(vec![arrow_array], DataType::Struct(polars_fields))
625+
(arrays, DataType::Struct(pl_fields.unwrap()))
611626
})
612627
},
613628
// Use Series architecture to convert nested logical types to physical.

crates/polars-ops/src/chunked_array/array/to_struct.rs

-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ pub trait ToStruct: AsArray {
2323
.as_deref()
2424
.unwrap_or(&arr_default_struct_name_gen);
2525

26-
polars_ensure!(n_fields != 0, ComputeError: "cannot create a struct with 0 fields");
2726
let fields = POOL.install(|| {
2827
(0..n_fields)
2928
.into_par_iter()

crates/polars-ops/src/chunked_array/list/to_struct.rs

-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ pub trait ToStruct: AsList {
6767
.as_deref()
6868
.unwrap_or(&_default_struct_name_gen);
6969

70-
polars_ensure!(n_fields != 0, ComputeError: "cannot create a struct with 0 fields");
7170
let fields = POOL.install(|| {
7271
(0..n_fields)
7372
.into_par_iter()

py-polars/tests/unit/datatypes/test_struct.py

+18
Original file line numberDiff line numberDiff line change
@@ -1131,3 +1131,21 @@ def test_zfs_row_encoding(size: int) -> None:
11311131

11321132
# We need to ignore the order because the group_by is non-deterministic
11331133
assert_frame_equal(gb, df, check_row_order=False)
1134+
1135+
1136+
@pytest.mark.may_fail_auto_streaming
1137+
def test_list_to_struct_19208() -> None:
1138+
df = pl.DataFrame(
1139+
{
1140+
"nested": [
1141+
[{"a": 1}],
1142+
[],
1143+
[{"a": 3}],
1144+
]
1145+
}
1146+
)
1147+
assert pl.concat([df[0], df[1], df[2]]).select(
1148+
pl.col("nested").list.to_struct()
1149+
).to_dict(as_series=False) == {
1150+
"nested": [{"field_0": {"a": 1}}, {"field_0": None}, {"field_0": {"a": 3}}]
1151+
}

0 commit comments

Comments
 (0)