Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow Index Column of Type Duration in DataFrame.rolling #15999

Closed
wants to merge 27 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
52d0792
feat: Allow rolling windows over duration columns.
Nennia May 1, 2024
fb7430e
feat: Allow rolling windows over duration columns.
Nennia May 1, 2024
9b53a23
Merge branch 'pola-rs:main' into master
AlexanderNenninger May 1, 2024
6884a8c
fix(python): Add types to test and use Python 3.8 compatible string l…
Nennia May 1, 2024
62aebcb
fix(python): Python3.8 compatible f-strings.
Nennia May 1, 2024
aabdf56
fix(python): formatting
Nennia May 1, 2024
b7d9045
fix(python): retrigger CI
Nennia May 1, 2024
07bc9c8
fix(python): formatting
Nennia May 1, 2024
c5d92ec
feat: Additional `uint` datatype support for the SQL interface (#15993)
alexander-beedie May 2, 2024
1498a37
fix: Crash/incorrect group_by/n_unique on categoricals created by (q)…
nameexhaustion May 2, 2024
cbbb8c8
test(python): Improve hypothesis strategy for decimals (#16001)
stinodego May 2, 2024
63a2af6
docs(python): Improve user-guide doc of UDF (#15923)
May 2, 2024
aa46fef
refactor: Add some comments (#16008)
ritchie46 May 2, 2024
f0d81b7
feat: Improve dynamic supertypes (#16009)
ritchie46 May 2, 2024
d6306be
docs(python): correct default in rolling_* function examples (#16000)
MarcoGorelli May 2, 2024
19e7548
fix: Fix CSE case where upper plan has no projection (#16011)
ritchie46 May 2, 2024
24c8fe1
fix: properly handle nulls in DictionaryArray::iter_typed (#16013)
orlp May 2, 2024
28373f0
docs(python): Remove unwanted linebreaks from docstrings (#16002)
bertiewooster May 2, 2024
31a019f
feat: Convert concat during IR conversion (#16016)
ritchie46 May 2, 2024
4f8e1dc
feat: raise more informative error messages in rolling_* aggregations…
MarcoGorelli May 2, 2024
6ca8f79
refactor: Use UnionArgs for DSL side (#16017)
ritchie46 May 3, 2024
267e5b6
docs(python): Update reference to `apply` (#15982)
avimallu May 3, 2024
5cc6b27
fix(python): formatting
Nennia May 3, 2024
f66dba1
feat: Allow rolling windows over duration columns.
Nennia May 1, 2024
3008b1d
fix(python): Add types to test and use Python 3.8 compatible string l…
Nennia May 1, 2024
4276b70
fix(python): retrigger CI
Nennia May 1, 2024
396fd23
feat: raise more informative error messages in rolling_* aggregations…
MarcoGorelli May 2, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion crates/polars-time/src/group_by/dynamic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,14 @@ impl Wrap<&DataFrame> {
TimeUnit::Milliseconds,
None,
),
Duration(tu) => {
let time_type_dt = Datetime(*tu, None);
let dt = time.cast(&time_type_dt).unwrap();
let (out, by, gt) =
self.impl_rolling(dt, group_by, options, *tu, None, &time_type_dt)?;
let out = out.cast(&Duration(*tu)).unwrap();
return Ok((out, by, gt));
},
UInt32 | UInt64 | Int32 => {
let time_type_dt = Datetime(TimeUnit::Nanoseconds, None);
let dt = time.cast(&Int64).unwrap().cast(&time_type_dt).unwrap();
Expand Down Expand Up @@ -182,7 +190,7 @@ impl Wrap<&DataFrame> {
},
dt => polars_bail!(
ComputeError:
"expected any of the following dtypes: {{ Date, Datetime, Int32, Int64, UInt32, UInt64 }}, got {}",
"expected any of the following dtypes: {{ Date, Datetime, Duration, Int32, Int64, UInt32, UInt64 }}, got {}",
dt
),
};
Expand Down
62 changes: 62 additions & 0 deletions crates/polars/tests/it/lazy/queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,68 @@ fn test_special_group_by_schemas() -> PolarsResult<()> {
&[3, 5, 7, 9, 5]
);

// Duration index column - period have different units
let out = df
.clone()
.lazy()
.with_column(
col("a")
.cast(DataType::Duration(TimeUnit::Milliseconds))
.set_sorted_flag(IsSorted::Ascending),
)
.rolling(
col("a"),
[],
RollingGroupOptions {
period: Duration::parse("2ms"),
offset: Duration::parse("0ms"),
closed_window: ClosedWindow::Left,
..Default::default()
},
)
.agg([col("b").sum().alias("sum")])
.select([col("a"), col("sum")])
.collect()?;

assert_eq!(
out.column("sum")?
.i32()?
.into_no_null_iter()
.collect::<Vec<_>>(),
&[3, 5, 7, 9, 5]
);

// Datetime index column - period have same units
let out = df
.clone()
.lazy()
.with_column(
col("a")
.cast(DataType::Datetime(TimeUnit::Milliseconds, None))
.set_sorted_flag(IsSorted::Ascending),
)
.rolling(
col("a"),
[],
RollingGroupOptions {
period: Duration::parse("2ms"),
offset: Duration::parse("0ms"),
closed_window: ClosedWindow::Left,
..Default::default()
},
)
.agg([col("b").sum().alias("sum")])
.select([col("a"), col("sum")])
.collect()?;

assert_eq!(
out.column("sum")?
.i32()?
.into_no_null_iter()
.collect::<Vec<_>>(),
&[3, 5, 7, 9, 5]
);

let out = df
.lazy()
.with_column(col("a").set_sorted_flag(IsSorted::Ascending))
Expand Down
41 changes: 41 additions & 0 deletions py-polars/tests/unit/operations/rolling/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -955,3 +955,44 @@ def test_rolling_invalid() -> None:
.rolling("index", period="3000d")
.agg(pl.col("values").sum().alias("sum"))
)


@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
def test_rolling_duration(time_unit):
# Note: Both datetime with Unit != ns and Duration have weird behavior.
# Here we only test for consistency.
df = pl.DataFrame(
{
"index_column": [1, 2, 3, 4, 5],
"value": [
1,
10,
100,
1000,
10000,
],
}
)
df_duration = df.select(
pl.col("index_column").cast(pl.Duration(time_unit=time_unit)).set_sorted(),
"value",
)

df_datetime = df.select(
pl.col("index_column").cast(pl.Datetime(time_unit=time_unit)).set_sorted(),
"value",
)

res_duration = df_duration.rolling(
index_column="index_column", period=f"2{time_unit}"
).agg(pl.col("value").sum())

res_datetime = df_datetime.rolling(
index_column="index_column", period=f"2{time_unit}"
).agg(pl.col("value").sum())

assert (
res_duration["value"].to_list() == res_datetime["value"].to_list()
), f"{res_duration["value"].to_list()=}, {res_datetime["value"].to_list()=}"

assert res_duration["index_column"].dtype == pl.Duration(time_unit=time_unit)
Loading