Skip to content

Commit

Permalink
fix: Don't silently produce null values from invalid input to `pl.dat…
Browse files Browse the repository at this point in the history
…etime` and `pl.date` (pola-rs#21013)
  • Loading branch information
mcrumiller authored and anath2 committed Mar 5, 2025
1 parent 0e91b7b commit 1cca630
Show file tree
Hide file tree
Showing 5 changed files with 137 additions and 25 deletions.
16 changes: 9 additions & 7 deletions crates/polars-time/src/chunkedarray/date.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use arrow::temporal_conversions::{EPOCH_DAYS_FROM_CE, MILLISECONDS, SECONDS_IN_DAY};
use chrono::{Datelike, NaiveDate};
use polars_core::utils::CustomIterTools;

use super::*;

Expand Down Expand Up @@ -84,20 +83,23 @@ pub trait DateMethods: AsDate {
day: &Int8Chunked,
name: PlSmallStr,
) -> PolarsResult<DateChunked> {
let mut ca: Int32Chunked = year
let ca: Int32Chunked = year
.into_iter()
.zip(month)
.zip(day)
.map(|((y, m), d)| {
if let (Some(y), Some(m), Some(d)) = (y, m, d) {
NaiveDate::from_ymd_opt(y, m as u32, d as u32)
.map(|t| t.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
NaiveDate::from_ymd_opt(y, m as u32, d as u32).map_or_else(
// We have an invalid date.
|| Err(polars_err!(ComputeError: format!("Invalid date components ({}, {}, {}) supplied", y, m, d))),
// We have a valid date.
|date| Ok(Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)),
)
} else {
None
Ok(None)
}
})
.collect_trusted();
ca.rename(name);
.try_collect_ca_with_dtype(name, DataType::Int32)?;
Ok(ca.into_date())
}
}
Expand Down
39 changes: 26 additions & 13 deletions crates/polars-time/src/chunkedarray/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,22 +179,36 @@ pub trait DatetimeMethods: AsDatetime {
if let (Some(y), Some(m), Some(d), Some(h), Some(mnt), Some(s), Some(ns)) =
(y, m, d, h, mnt, s, ns)
{
NaiveDate::from_ymd_opt(y, m as u32, d as u32)
.and_then(|nd| {
nd.and_hms_nano_opt(h as u32, mnt as u32, s as u32, ns as u32)
})
.map(|ndt| match time_unit {
TimeUnit::Milliseconds => ndt.and_utc().timestamp_millis(),
TimeUnit::Microseconds => ndt.and_utc().timestamp_micros(),
TimeUnit::Nanoseconds => ndt.and_utc().timestamp_nanos_opt().unwrap(),
})
NaiveDate::from_ymd_opt(y, m as u32, d as u32).map_or_else(
// We have an invalid date.
|| Err(polars_err!(ComputeError: format!("Invalid date components ({}, {}, {}) supplied", y, m, d))),
// We have a valid date.
|date| {
date.and_hms_nano_opt(h as u32, mnt as u32, s as u32, ns as u32)
.map_or_else(
// We have invalid time components for the specified date.
|| Err(polars_err!(ComputeError: format!("Invalid time components ({}, {}, {}, {}) supplied", h, mnt, s, ns))),
// We have a valid time.
|ndt| {
let t = ndt.and_utc();
Ok(Some(match time_unit {
TimeUnit::Milliseconds => t.timestamp_millis(),
TimeUnit::Microseconds => t.timestamp_micros(),
TimeUnit::Nanoseconds => {
t.timestamp_nanos_opt().unwrap()
},
}))
},
)
},
)
} else {
None
Ok(None)
}
})
.collect_trusted();
.try_collect_ca_with_dtype(name, DataType::Int64)?;

let mut ca = match time_zone {
let ca = match time_zone {
#[cfg(feature = "timezones")]
Some(_) => {
let mut ca = ca.into_datetime(*time_unit, None);
Expand All @@ -209,7 +223,6 @@ pub trait DatetimeMethods: AsDatetime {
ca.into_datetime(*time_unit, None)
},
};
ca.rename(name);
Ok(ca)
}
}
Expand Down
47 changes: 47 additions & 0 deletions py-polars/tests/unit/functions/as_datatype/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,53 @@ def test_date_datetime() -> None:
assert_series_equal(out["h2"], df["hour"].rename("h2"))


@pytest.mark.parametrize(
"components",
[
[2025, 13, 1],
[2025, 1, 32],
[2025, 2, 29],
],
)
def test_date_invalid_component(components: list[int]) -> None:
y, m, d = components
msg = rf"Invalid date components \({y}, {m}, {d}\) supplied"
with pytest.raises(ComputeError, match=msg):
pl.select(pl.date(*components))


@pytest.mark.parametrize(
"components",
[
[2025, 13, 1, 0, 0, 0, 0],
[2025, 1, 32, 0, 0, 0, 0],
[2025, 2, 29, 0, 0, 0, 0],
],
)
def test_datetime_invalid_date_component(components: list[int]) -> None:
y, m, d = components[0:3]
msg = rf"Invalid date components \({y}, {m}, {d}\) supplied"
with pytest.raises(ComputeError, match=msg):
pl.select(pl.datetime(*components))


@pytest.mark.parametrize(
"components",
[
[2025, 1, 1, 25, 0, 0, 0],
[2025, 1, 1, 0, 60, 0, 0],
[2025, 1, 1, 0, 0, 60, 0],
[2025, 1, 1, 0, 0, 0, 2_000_000],
],
)
def test_datetime_invalid_time_component(components: list[int]) -> None:
h, mnt, s, us = components[3:]
ns = us * 1_000
msg = rf"Invalid time components \({h}, {mnt}, {s}, {ns}\) supplied"
with pytest.raises(ComputeError, match=msg):
pl.select(pl.datetime(*components))


@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
def test_datetime_time_unit(time_unit: TimeUnit) -> None:
result = pl.datetime(2022, 1, 2, time_unit=time_unit)
Expand Down
5 changes: 0 additions & 5 deletions py-polars/tests/unit/functions/range/test_date_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,6 @@ def test_date_range_invalid_time_unit() -> None:
)


def test_date_range_invalid_time() -> None:
with pytest.raises(ComputeError, match="end is an out-of-range time"):
pl.date_range(pl.date(2024, 1, 1), pl.date(2024, 2, 30), eager=True)


def test_date_range_lazy_with_literals() -> None:
df = pl.DataFrame({"misc": ["x"]}).with_columns(
pl.date_ranges(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -298,3 +298,58 @@ def test_replace_preserve_tu_and_tz(tu: TimeUnit, tzinfo: str) -> None:
result = s.dt.replace(year=2000)
assert result.dtype.time_unit == tu # type: ignore[attr-defined]
assert result.dtype.time_zone == tzinfo # type: ignore[attr-defined]


def test_replace_date_invalid_components() -> None:
df = pl.DataFrame({"a": [date(2025, 1, 1)]})

with pytest.raises(
ComputeError, match=r"Invalid date components \(2025, 13, 1\) supplied"
):
df.select(pl.col("a").dt.replace(month=13))
with pytest.raises(
ComputeError, match=r"Invalid date components \(2025, 1, 32\) supplied"
):
df.select(pl.col("a").dt.replace(day=32))


def test_replace_datetime_invalid_date_components() -> None:
df = pl.DataFrame({"a": [datetime(2025, 1, 1)]})

with pytest.raises(
ComputeError, match=r"Invalid date components \(2025, 13, 1\) supplied"
):
df.select(pl.col("a").dt.replace(month=13))
with pytest.raises(
ComputeError, match=r"Invalid date components \(2025, 1, 32\) supplied"
):
df.select(pl.col("a").dt.replace(day=32))


def test_replace_datetime_invalid_time_components() -> None:
df = pl.DataFrame({"a": [datetime(2025, 1, 1)]})

# hour
with pytest.raises(
ComputeError, match=r"Invalid time components \(25, 0, 0, 0\) supplied"
):
df.select(pl.col("a").dt.replace(hour=25))

# minute
with pytest.raises(
ComputeError, match=r"Invalid time components \(0, 61, 0, 0\) supplied"
):
df.select(pl.col("a").dt.replace(minute=61))

# second
with pytest.raises(
ComputeError, match=r"Invalid time components \(0, 0, 61, 0\) supplied"
):
df.select(pl.col("a").dt.replace(second=61))

# microsecond
with pytest.raises(
ComputeError,
match=r"Invalid time components \(0, 0, 0, 2000000000\) supplied",
):
df.select(pl.col("a").dt.replace(microsecond=2_000_000))

0 comments on commit 1cca630

Please sign in to comment.