diff --git a/crates/polars-lazy/src/frame/mod.rs b/crates/polars-lazy/src/frame/mod.rs index 9759988bb5cf..ad3df3c0bfed 100644 --- a/crates/polars-lazy/src/frame/mod.rs +++ b/crates/polars-lazy/src/frame/mod.rs @@ -1138,6 +1138,16 @@ impl LazyFrame { if let Expr::Column(name) = index_column { options.index_column = name; } else { + fn is_int_range(expr: &Expr) -> bool { + match expr { + Expr::Alias(input, _) => is_int_range(input), + Expr::Function { function, .. } => { + matches!(function, FunctionExpr::Range(f) if f.to_string() == "int_range") + }, + _ => false, + } + } + options.int_range = is_int_range(&index_column); let output_field = index_column .to_field(&self.collect_schema().unwrap(), Context::Default) .unwrap(); diff --git a/crates/polars-time/src/group_by/dynamic.rs b/crates/polars-time/src/group_by/dynamic.rs index 2ee6b6a0a793..4f8889ab9c30 100644 --- a/crates/polars-time/src/group_by/dynamic.rs +++ b/crates/polars-time/src/group_by/dynamic.rs @@ -34,6 +34,7 @@ pub struct DynamicGroupOptions { pub include_boundaries: bool, pub closed_window: ClosedWindow, pub start_by: StartBy, + pub int_range: bool, // Whether the index column is a range column } impl Default for DynamicGroupOptions { @@ -47,6 +48,7 @@ impl Default for DynamicGroupOptions { include_boundaries: false, closed_window: ClosedWindow::Left, start_by: Default::default(), + int_range: false, } } } @@ -311,16 +313,30 @@ impl Wrap<&DataFrame> { let groups = if by.is_empty() { let vals = dt.downcast_iter().next().unwrap(); let ts = vals.values().as_slice(); - let (groups, lower, upper) = group_by_windows( - w, - ts, - options.closed_window, - tu, - tz, - include_lower_bound, - include_upper_bound, - options.start_by, - ); + + let vanilla_start_step = (ts[0] == 0) && (ts[1] - ts[0] == 1); + let (groups, lower, upper) = match (options.int_range, vanilla_start_step) { + (true, true) => group_by_windows_int_range( + self.0.height() as IdxSize, + options.every.nanoseconds() as IdxSize, + options.period.nanoseconds() as IdxSize, + options.offset.nanoseconds() as IdxSize, + options.closed_window, + include_lower_bound, + include_upper_bound, + options.start_by, + ), + _ => group_by_windows( + w, + ts, + options.closed_window, + tu, + tz, + include_lower_bound, + include_upper_bound, + options.start_by, + ), + }; update_bounds(lower, upper); PolarsResult::Ok(GroupsProxy::Slice { groups, @@ -841,6 +857,7 @@ mod test { include_boundaries: true, closed_window: ClosedWindow::Both, start_by: Default::default(), + int_range: Default::default(), }, ) .unwrap(); @@ -961,6 +978,7 @@ mod test { include_boundaries: true, closed_window: ClosedWindow::Both, start_by: Default::default(), + int_range: Default::default(), }, ) .unwrap(); diff --git a/crates/polars-time/src/windows/group_by.rs b/crates/polars-time/src/windows/group_by.rs index af5c9f8ec492..3e9ad3ecfe4d 100644 --- a/crates/polars-time/src/windows/group_by.rs +++ b/crates/polars-time/src/windows/group_by.rs @@ -220,6 +220,104 @@ pub fn group_by_windows( (groups, lower_bound, upper_bound) } +/// Generate groups efficiently for a (vanilla) int-range index. +/// A vanilla int-range index is an index that starts at 0 and has a step of 1. +#[allow(clippy::too_many_arguments)] +pub(crate) fn group_by_windows_int_range( + len: IdxSize, + step: IdxSize, + mut window_size: IdxSize, + mut offset: IdxSize, + closed_window: ClosedWindow, + include_lower_bound: bool, + include_upper_bound: bool, + start_by: StartBy, +) -> (Vec<[IdxSize; 2]>, Vec, Vec) { + let orig_window_size = window_size; + + if start_by == StartBy::DataPoint { + offset = 0; + } + + if closed_window == ClosedWindow::Right { + offset += 1; + } else if closed_window == ClosedWindow::Both { + window_size += 1; + } else if closed_window == ClosedWindow::None { + offset += 1; + window_size -= 1; + } + + let mut groups: Vec<[IdxSize; 2]> = match window_size { + 0 => Vec::new(), + _ => generate_start_indices(offset, len, step) + .into_iter() + .map(|start| { + let end = std::cmp::min(window_size, len - start); + [start, end] + }) + .collect(), + }; + + if (start_by == StartBy::WindowBound) && (window_size > 0) { + while offset >= step { + offset -= step; + groups.insert(0, [offset, window_size]); + } + } + + let mut lower = match (include_lower_bound, window_size > 0) { + (true, true) => groups.iter().map(|&i| i[0] as i64).collect::>(), + _ => Vec::::new(), + }; + + let mut upper = match (include_upper_bound, window_size > 0) { + (true, true) => groups + .iter() + .map(|&i| (i[0] + orig_window_size) as i64) + .collect::>(), + _ => Vec::::new(), + }; + + if include_lower_bound + && (closed_window == ClosedWindow::Right) | (closed_window == ClosedWindow::None) + { + lower = lower.iter().map(|&i| i - 1).collect::>(); + } + if include_upper_bound + && (closed_window == ClosedWindow::Right) | (closed_window == ClosedWindow::None) + { + upper = upper.iter().map(|&i| i - 1).collect::>(); + } + if start_by == StartBy::WindowBound + && (offset > 0) + && (window_size >= offset) + && (step < window_size + offset) + { + groups.insert(0, [0, offset + window_size - step]); + if include_lower_bound { + lower.insert(0, offset as i64 - step as i64); + if (closed_window == ClosedWindow::Right) | (closed_window == ClosedWindow::None) { + lower[0] -= 1; + } + } + if include_upper_bound { + upper.insert(0, groups[0][1] as i64); + if (closed_window == ClosedWindow::Right) | (closed_window == ClosedWindow::Both) { + upper[0] -= 1; + } + } + } + + (groups, lower, upper) +} + +#[inline] +fn generate_start_indices(offset: IdxSize, len: IdxSize, stride: IdxSize) -> Vec { + let nb_idxs: IdxSize = (len - offset).div_ceil(stride); + (0..nb_idxs).map(|i| offset + i * stride).collect() +} + // t is right at the end of the window // ------t--- // [------]