Skip to content

Commit

Permalink
fix: Take into account scalar/partitioned columns in DataFrame::split…
Browse files Browse the repository at this point in the history
…_chunks (#21606)
  • Loading branch information
orlp authored Mar 5, 2025
1 parent 55be31f commit 4d2aa57
Showing 1 changed file with 20 additions and 4 deletions.
24 changes: 20 additions & 4 deletions crates/polars-core/src/frame/chunks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,32 @@ impl DataFrame {
pub fn split_chunks(&mut self) -> impl Iterator<Item = DataFrame> + '_ {
self.align_chunks_par();

let first_series_col_idx = self
.columns
.iter()
.position(|col| col.as_series().is_some());
let df_height = self.height();
let mut prev_height = 0;
(0..self.first_col_n_chunks()).map(move |i| unsafe {
// There might still be scalar/partitioned columns after aligning,
// so we follow the size of the chunked column, if any.
let chunk_size = first_series_col_idx
.map(|c| self.get_columns()[c].as_series().unwrap().chunks()[i].len())
.unwrap_or(df_height);
let columns = self
.get_columns()
.iter()
.map(|column| column.as_materialized_series().select_chunk(i))
.map(Column::from)
.map(|col| match col {
Column::Series(s) => Column::from(s.select_chunk(i)),
Column::Partitioned(_) | Column::Scalar(_) => {
col.slice(prev_height as i64, chunk_size)
},
})
.collect::<Vec<_>>();

let height = Self::infer_height(&columns);
DataFrame::new_no_checks(height, columns)
prev_height += chunk_size;

DataFrame::new_no_checks(chunk_size, columns)
})
}

Expand Down

0 comments on commit 4d2aa57

Please sign in to comment.