Skip to content

Commit

Permalink
refactor: Implement nested row encoding / decoding (#19874)
Browse files Browse the repository at this point in the history
  • Loading branch information
coastalwhite authored Nov 22, 2024
1 parent 7b07c85 commit 132c64d
Show file tree
Hide file tree
Showing 10 changed files with 1,459 additions and 553 deletions.
1 change: 1 addition & 0 deletions crates/polars-arrow/src/array/dictionary/typed_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ impl<'a, K: DictionaryKey, V: DictValue> Iterator for DictionaryIterTyped<'a, K,

unsafe impl<K: DictionaryKey, V: DictValue> TrustedLen for DictionaryIterTyped<'_, K, V> {}

impl<K: DictionaryKey, V: DictValue> ExactSizeIterator for DictionaryIterTyped<'_, K, V> {}
impl<K: DictionaryKey, V: DictValue> DoubleEndedIterator for DictionaryIterTyped<'_, K, V> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
Expand Down
26 changes: 26 additions & 0 deletions crates/polars-arrow/src/array/fixed_size_list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,32 @@ impl FixedSizeListArray {
}
dims
}

pub fn propagate_nulls(&self) -> Self {
let Some(validity) = self.validity() else {
return self.clone();
};

let propagated_validity = if self.size == 1 {
validity.clone()
} else {
Bitmap::from_trusted_len_iter(
(0..self.size * validity.len())
.map(|i| unsafe { validity.get_bit_unchecked(i / self.size) }),
)
};

let propagated_validity = match self.values.validity() {
None => propagated_validity,
Some(val) => val & &propagated_validity,
};
Self::new(
self.dtype().clone(),
self.length,
self.values.with_validity(Some(propagated_validity)),
self.validity.clone(),
)
}
}

// must use
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-arrow/src/offset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,7 @@ impl<O: Offset> OffsetsBuffer<O> {

/// Returns `(offset, len)` pairs.
#[inline]
pub fn offset_and_length_iter(&self) -> impl Iterator<Item = (usize, usize)> + '_ {
pub fn offset_and_length_iter(&self) -> impl ExactSizeIterator<Item = (usize, usize)> + '_ {
self.windows(2).map(|x| {
let [l, r] = x else { unreachable!() };
let l = l.to_usize();
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-arrow/src/trusted_len.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! Declares [`TrustedLen`].
use std::iter::Scan;
use std::slice::Iter;
use std::slice::{Iter, IterMut};

/// An iterator of known, fixed size.
///
Expand All @@ -14,6 +14,7 @@ use std::slice::Iter;
pub unsafe trait TrustedLen: Iterator {}

unsafe impl<T> TrustedLen for Iter<'_, T> {}
unsafe impl<T> TrustedLen for IterMut<'_, T> {}

unsafe impl<'a, I, T: 'a> TrustedLen for std::iter::Copied<I>
where
Expand Down
33 changes: 4 additions & 29 deletions crates/polars-core/src/chunked_array/ops/row_encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,20 +144,8 @@ pub fn _get_rows_encoded_unordered(by: &[Series]) -> PolarsResult<RowsEncoded> {

let arr = _get_rows_encoded_compat_array(by)?;
let field = EncodingField::new_unsorted();
match arr.dtype() {
// Flatten the struct fields.
ArrowDataType::Struct(_) => {
let arr = arr.as_any().downcast_ref::<StructArray>().unwrap();
for arr in arr.values() {
cols.push(arr.clone() as ArrayRef);
fields.push(field)
}
},
_ => {
cols.push(arr);
fields.push(field)
},
}
cols.push(arr);
fields.push(field);
}
Ok(convert_columns(num_rows, &cols, &fields))
}
Expand Down Expand Up @@ -187,21 +175,8 @@ pub fn _get_rows_encoded(
nulls_last: *null_last,
no_order: false,
};
match arr.dtype() {
// Flatten the struct fields.
ArrowDataType::Struct(_) => {
let arr = arr.as_any().downcast_ref::<StructArray>().unwrap();
let arr = arr.propagate_nulls();
for value_arr in arr.values() {
cols.push(value_arr.clone() as ArrayRef);
fields.push(sort_field);
}
},
_ => {
cols.push(arr);
fields.push(sort_field);
},
}
cols.push(arr);
fields.push(sort_field);
}
Ok(convert_columns(num_rows, &cols, &fields))
}
Expand Down
Loading

0 comments on commit 132c64d

Please sign in to comment.