Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add dt.replace #19708

Merged
merged 17 commits into from
Dec 20, 2024
30 changes: 30 additions & 0 deletions crates/polars-plan/src/dsl/dt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -331,4 +331,34 @@ impl DateLikeNameSpace {
TemporalFunction::TotalNanoseconds,
))
}

/// Replace the time units of a value
#[allow(clippy::too_many_arguments)]
pub fn replace(
self,
year: Expr,
month: Expr,
day: Expr,
hour: Expr,
minute: Expr,
second: Expr,
microsecond: Expr,
ambiguous: Expr,
) -> Expr {
self.0.map_many_private(
FunctionExpr::TemporalExpr(TemporalFunction::Replace),
&[
year,
month,
day,
hour,
minute,
second,
microsecond,
ambiguous,
],
false,
None,
)
}
}
48 changes: 48 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use polars_time::base_utc_offset as base_utc_offset_fn;
use polars_time::dst_offset as dst_offset_fn;
#[cfg(feature = "offset_by")]
use polars_time::impl_offset_by;
#[cfg(any(feature = "dtype-date", feature = "dtype-datetime"))]
use polars_time::replace::{replace_date, replace_datetime};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

Expand Down Expand Up @@ -62,6 +64,7 @@ pub enum TemporalFunction {
#[cfg(feature = "timezones")]
DSTOffset,
Round,
Replace,
#[cfg(feature = "timezones")]
ReplaceTimeZone(Option<TimeZone>, NonExistent),
Combine(TimeUnit),
Expand Down Expand Up @@ -117,6 +120,7 @@ impl TemporalFunction {
#[cfg(feature = "timezones")]
DSTOffset => mapper.with_dtype(DataType::Duration(TimeUnit::Milliseconds)),
Round => mapper.with_same_dtype(),
Replace => mapper.with_same_dtype(),
#[cfg(feature = "timezones")]
ReplaceTimeZone(tz, _non_existent) => mapper.map_datetime_dtype_timezone(tz.as_ref()),
DatetimeFunction {
Expand Down Expand Up @@ -187,6 +191,7 @@ impl Display for TemporalFunction {
#[cfg(feature = "timezones")]
DSTOffset => "dst_offset",
Round => "round",
Replace => "replace",
#[cfg(feature = "timezones")]
ReplaceTimeZone(_, _) => "replace_time_zone",
DatetimeFunction { .. } => return write!(f, "dt.datetime"),
Expand Down Expand Up @@ -555,3 +560,46 @@ pub(super) fn round(s: &[Column]) -> PolarsResult<Column> {
dt => polars_bail!(opq = round, got = dt, expected = "date/datetime"),
})
}

pub(super) fn replace(s: &[Column]) -> PolarsResult<Column> {
let time_series = &s[0];
let s_year = &s[1].strict_cast(&DataType::Int32)?;
let s_month = &s[2].strict_cast(&DataType::Int8)?;
let s_day = &s[3].strict_cast(&DataType::Int8)?;
let year = s_year.i32()?;
let month = s_month.i8()?;
let day = s_day.i8()?;

match time_series.dtype() {
DataType::Datetime(_, _) => {
let s_hour = &s[4].strict_cast(&DataType::Int8)?;
let s_minute = &s[5].strict_cast(&DataType::Int8)?;
let s_second = &s[6].strict_cast(&DataType::Int8)?;
let s_microsecond = &s[7].strict_cast(&DataType::Int32)?;
let hour = s_hour.i8()?;
let minute = s_minute.i8()?;
let second = s_second.i8()?;
let nanosecond = &(s_microsecond.i32()? * 1_000);
let s_ambiguous = &s[8].strict_cast(&DataType::String)?;
let ambiguous = s_ambiguous.str()?;

let out = replace_datetime(
time_series.datetime().unwrap(),
year,
month,
day,
hour,
minute,
second,
nanosecond,
ambiguous,
);
out.map(|s| s.into_column())
},
DataType::Date => {
let out = replace_date(time_series.date().unwrap(), year, month, day);
out.map(|s| s.into_column())
},
dt => polars_bail!(opq = round, got = dt, expected = "date/datetime"),
}
}
85 changes: 23 additions & 62 deletions crates/polars-plan/src/dsl/function_expr/temporal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ impl From<TemporalFunction> for SpecialEq<Arc<dyn ColumnsUdf>> {
#[cfg(feature = "timezones")]
DSTOffset => map!(datetime::dst_offset),
Round => map_as_slice!(datetime::round),
Replace => map_as_slice!(datetime::replace),
#[cfg(feature = "timezones")]
ReplaceTimeZone(tz, non_existent) => {
map_as_slice!(dispatch::replace_time_zone, tz.as_deref(), non_existent)
Expand All @@ -73,14 +74,12 @@ impl From<TemporalFunction> for SpecialEq<Arc<dyn ColumnsUdf>> {
}
}

#[cfg(feature = "dtype-datetime")]
pub(super) fn datetime(
s: &[Column],
time_unit: &TimeUnit,
time_zone: Option<&str>,
) -> PolarsResult<Column> {
use polars_core::export::chrono::NaiveDate;
use polars_core::utils::CustomIterTools;

let col_name = PlSmallStr::from_static("datetime");

if s.iter().any(|s| s.is_empty()) {
Expand Down Expand Up @@ -123,91 +122,53 @@ pub(super) fn datetime(
}
let year = year.i32()?;

let mut month = month.cast(&DataType::UInt32)?;
let mut month = month.cast(&DataType::Int8)?;
if month.len() < max_len {
month = month.new_from_index(0, max_len);
}
let month = month.u32()?;
let month = month.i8()?;

let mut day = day.cast(&DataType::UInt32)?;
let mut day = day.cast(&DataType::Int8)?;
if day.len() < max_len {
day = day.new_from_index(0, max_len);
}
let day = day.u32()?;
let day = day.i8()?;

let mut hour = hour.cast(&DataType::UInt32)?;
let mut hour = hour.cast(&DataType::Int8)?;
if hour.len() < max_len {
hour = hour.new_from_index(0, max_len);
}
let hour = hour.u32()?;
let hour = hour.i8()?;

let mut minute = minute.cast(&DataType::UInt32)?;
let mut minute = minute.cast(&DataType::Int8)?;
if minute.len() < max_len {
minute = minute.new_from_index(0, max_len);
}
let minute = minute.u32()?;
let minute = minute.i8()?;

let mut second = second.cast(&DataType::UInt32)?;
let mut second = second.cast(&DataType::Int8)?;
if second.len() < max_len {
second = second.new_from_index(0, max_len);
}
let second = second.u32()?;
let second = second.i8()?;

let mut microsecond = microsecond.cast(&DataType::UInt32)?;
if microsecond.len() < max_len {
microsecond = microsecond.new_from_index(0, max_len);
let mut nanosecond = microsecond.cast(&DataType::Int32)? * 1_000;
if nanosecond.len() < max_len {
nanosecond = nanosecond.new_from_index(0, max_len);
}
let microsecond = microsecond.u32()?;
let nanosecond = nanosecond.i32()?;

let mut _ambiguous = ambiguous.cast(&DataType::String)?;
if _ambiguous.len() < max_len {
_ambiguous = _ambiguous.new_from_index(0, max_len);
}
let _ambiguous = _ambiguous.str()?;

let ca: Int64Chunked = year
.into_iter()
.zip(month)
.zip(day)
.zip(hour)
.zip(minute)
.zip(second)
.zip(microsecond)
.map(|((((((y, m), d), h), mnt), s), us)| {
if let (Some(y), Some(m), Some(d), Some(h), Some(mnt), Some(s), Some(us)) =
(y, m, d, h, mnt, s, us)
{
NaiveDate::from_ymd_opt(y, m, d)
.and_then(|nd| nd.and_hms_micro_opt(h, mnt, s, us))
.map(|ndt| match time_unit {
TimeUnit::Milliseconds => ndt.and_utc().timestamp_millis(),
TimeUnit::Microseconds => ndt.and_utc().timestamp_micros(),
TimeUnit::Nanoseconds => ndt.and_utc().timestamp_nanos_opt().unwrap(),
})
} else {
None
}
})
.collect_trusted();

let ca = match time_zone {
#[cfg(feature = "timezones")]
Some(_) => {
let mut ca = ca.into_datetime(*time_unit, None);
ca = replace_time_zone(&ca, time_zone, _ambiguous, NonExistent::Raise)?;
ca
},
_ => {
assert!(
time_zone.is_none(),
"cannot make use of the `time_zone` argument without the 'timezones' feature enabled."
);
ca.into_datetime(*time_unit, None)
},
};
let ambiguous = _ambiguous.str()?;

let mut s = ca.into_column();
s.rename(col_name);
Ok(s)
let ca = DatetimeChunked::new_from_parts(
year, month, day, hour, minute, second, nanosecond, ambiguous, time_unit, time_zone,
col_name,
);
ca.map(|s| s.into_column())
}

pub(super) fn combine(s: &[Column], tu: TimeUnit) -> PolarsResult<Column> {
Expand Down
27 changes: 27 additions & 0 deletions crates/polars-python/src/expr/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,33 @@ impl PyExpr {
self.inner.clone().dt().round(every.inner).into()
}

fn dt_replace(
&self,
year: Self,
month: Self,
day: Self,
hour: Self,
minute: Self,
second: Self,
microsecond: Self,
ambiguous: Self,
) -> Self {
self.inner
.clone()
.dt()
.replace(
year.inner,
month.inner,
day.inner,
hour.inner,
minute.inner,
second.inner,
microsecond.inner,
ambiguous.inner,
)
.into()
}

fn dt_combine(&self, time: Self, time_unit: Wrap<TimeUnit>) -> Self {
self.inner
.clone()
Expand Down
2 changes: 2 additions & 0 deletions crates/polars-python/src/lazyframe/visitor/expr_nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ pub enum PyTemporalFunction {
BaseUtcOffset,
DSTOffset,
Round,
Replace,
ReplaceTimeZone,
Combine,
DatetimeFunction,
Expand Down Expand Up @@ -1024,6 +1025,7 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult<PyObject> {
#[cfg(feature = "timezones")]
TemporalFunction::DSTOffset => (PyTemporalFunction::DSTOffset,).into_py_any(py),
TemporalFunction::Round => (PyTemporalFunction::Round,).into_py_any(py),
TemporalFunction::Replace => (PyTemporalFunction::Replace).into_py_any(py),
#[cfg(feature = "timezones")]
TemporalFunction::ReplaceTimeZone(time_zone, non_existent) => (
PyTemporalFunction::ReplaceTimeZone,
Expand Down
28 changes: 27 additions & 1 deletion crates/polars-time/src/chunkedarray/date.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use arrow::temporal_conversions::{MILLISECONDS, SECONDS_IN_DAY};
use arrow::temporal_conversions::{EPOCH_DAYS_FROM_CE, MILLISECONDS, SECONDS_IN_DAY};
use polars_core::export::chrono::{Datelike, NaiveDate};
use polars_core::utils::CustomIterTools;

use super::*;

Expand Down Expand Up @@ -74,6 +76,30 @@ pub trait DateMethods: AsDate {
}

fn parse_from_str_slice(name: PlSmallStr, v: &[&str], fmt: &str) -> DateChunked;

/// Construct a date ChunkedArray from individual time components.
fn new_from_parts(
year: &Int32Chunked,
month: &Int8Chunked,
day: &Int8Chunked,
name: PlSmallStr,
) -> PolarsResult<DateChunked> {
let mut ca: Int32Chunked = year
.into_iter()
.zip(month)
.zip(day)
.map(|((y, m), d)| {
if let (Some(y), Some(m), Some(d)) = (y, m, d) {
NaiveDate::from_ymd_opt(y, m as u32, d as u32)
.map(|t| t.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
} else {
None
}
})
.collect_trusted();
ca.rename(name);
Ok(ca.into_date())
}
}

impl DateMethods for DateChunked {
Expand Down
Loading
Loading