Skip to content

Commit

Permalink
feat: Support both "iso" and "iso:strict" format options for `dt.to_s…
Browse files Browse the repository at this point in the history
…tring` (#19840)
  • Loading branch information
alexander-beedie authored Nov 20, 2024
1 parent 7c9e626 commit 9f1b40c
Show file tree
Hide file tree
Showing 7 changed files with 198 additions and 95 deletions.
53 changes: 27 additions & 26 deletions crates/polars-core/src/chunked_array/temporal/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,31 +53,32 @@ pub(crate) fn naive_datetime_to_date(v: NaiveDateTime) -> i32 {
}

pub fn get_strftime_format(fmt: &str, dtype: &DataType) -> String {
if fmt != "iso" {
return fmt.to_string();
if fmt != "iso" && fmt != "iso:strict" {
fmt.to_string()
} else {
let sep = if fmt == "iso" { " " } else { "T" };
#[allow(unreachable_code)]
match dtype {
#[cfg(feature = "dtype-datetime")]
DataType::Datetime(tu, tz) => match (tu, tz.is_some()) {
(TimeUnit::Milliseconds, true) => format!("%F{}%T%.3f%:z", sep),
(TimeUnit::Milliseconds, false) => format!("%F{}%T%.3f", sep),
(TimeUnit::Microseconds, true) => format!("%F{}%T%.6f%:z", sep),
(TimeUnit::Microseconds, false) => format!("%F{}%T%.6f", sep),
(TimeUnit::Nanoseconds, true) => format!("%F{}%T%.9f%:z", sep),
(TimeUnit::Nanoseconds, false) => format!("%F{}%T%.9f", sep),
},
#[cfg(feature = "dtype-date")]
DataType::Date => "%F".to_string(),
#[cfg(feature = "dtype-time")]
DataType::Time => "%T%.f".to_string(),
_ => {
let err = format!(
"invalid call to `get_strftime_format`; fmt={:?}, dtype={}",
fmt, dtype
);
unimplemented!("{}", err)
},
}
}
#[allow(unreachable_code)]
let fmt: &str = match dtype {
#[cfg(feature = "dtype-datetime")]
DataType::Datetime(tu, tz) => match (tu, tz.is_some()) {
(TimeUnit::Milliseconds, true) => "%F %T%.3f%:z",
(TimeUnit::Milliseconds, false) => "%F %T%.3f",
(TimeUnit::Microseconds, true) => "%F %T%.6f%:z",
(TimeUnit::Microseconds, false) => "%F %T%.6f",
(TimeUnit::Nanoseconds, true) => "%F %T%.9f%:z",
(TimeUnit::Nanoseconds, false) => "%F %T%.9f",
},
#[cfg(feature = "dtype-date")]
DataType::Date => "%F",
#[cfg(feature = "dtype-time")]
DataType::Time => "%T%.f",
_ => {
let err = format!(
"invalid call to `get_strftime_format`; fmt={:?}, dtype={}",
fmt, dtype
);
unimplemented!("{}", err)
},
};
fmt.to_string()
}
6 changes: 5 additions & 1 deletion crates/polars-core/src/chunked_array/temporal/date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,11 @@ impl DateChunked {
/// Convert from Date into String with the given format.
/// See [chrono strftime/strptime](https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html).
pub fn to_string(&self, format: &str) -> PolarsResult<StringChunked> {
let format = if format == "iso" { "%F" } else { format };
let format = if format == "iso" || format == "iso:strict" {
"%F"
} else {
format
};
let datefmt_f = |ndt: NaiveDate| ndt.format(format);
self.try_apply_into_string_amortized(|val, buf| {
let ndt = date32_to_date(val);
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/temporal/duration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ impl DurationChunked {
// the duration string functions below can reuse this string buffer
let mut s = String::with_capacity(32);
match format {
"iso" => {
"iso" | "iso:strict" => {
let out: StringChunked =
self.0
.apply_nonnull_values_generic(DataType::String, |v: i64| {
Expand Down
6 changes: 5 additions & 1 deletion crates/polars-core/src/chunked_array/temporal/time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,11 @@ impl TimeChunked {
pub fn to_string(&self, format: &str) -> StringChunked {
let mut ca: StringChunked = self.apply_kernel_cast(&|arr| {
let mut buf = String::new();
let format = if format == "iso" { "%T%.9f" } else { format };
let format = if format == "iso" || format == "iso:strict" {
"%T%.9f"
} else {
format
};
let mut mutarr = MutablePlString::with_capacity(arr.len());

for opt in arr.into_iter() {
Expand Down
97 changes: 62 additions & 35 deletions py-polars/polars/expr/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,19 +454,35 @@ def to_string(self, format: str | None = None) -> Expr:
"""
Convert a Date/Time/Datetime column into a String column with the given format.
.. versionchanged:: 1.15.0
Added support for the use of "iso:strict" as a format string.
.. versionchanged:: 1.14.0
Added support for the `Duration` dtype, and use of "iso" as a format string.
Parameters
----------
format
Format to use, refer to the `chrono strftime documentation
<https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
for specification. Example: `"%y-%m-%d"`.
* Format to use, refer to the `chrono strftime documentation
<https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
for specification. Example: `"%y-%m-%d"`.
* If no format is provided, the appropriate ISO format for the underlying
data type is used. This can be made explicit by passing `"iso"` or
`"iso:strict"` as the format string (see notes below for details).
Notes
-----
* Similar to `cast(pl.String)`, but this method allows you to customize
the formatting of the resulting string; if no format is provided, the
appropriate ISO format for the underlying data type is used.
* Datetime dtype expressions distinguish between "iso" and "iso:strict"
format strings. The difference is in the inclusion of a "T" separator
between the date and time components ("iso" results in ISO compliant
date and time components, separated with a space; "iso:strict" returns
the same components separated with a "T"). All other temporal types
return the same value for both format strings.
* Duration dtype expressions cannot be formatted with `strftime`. Instead,
only "iso" and "polars" are supported as format strings. The "iso" format
string results in ISO8601 duration string output, and "polars" results
Expand Down Expand Up @@ -494,7 +510,7 @@ def to_string(self, format: str | None = None) -> Expr:
... ],
... "td": [
... timedelta(days=-1, seconds=-42),
... timedelta(days=14, hours=-10, microseconds=1001),
... timedelta(days=14, hours=-10, microseconds=100),
... timedelta(seconds=0),
... ],
... }
Expand All @@ -503,28 +519,36 @@ def to_string(self, format: str | None = None) -> Expr:
Default format for temporal dtypes is ISO8601:
>>> import polars.selectors as cs
>>> df.select((cs.date() | cs.datetime()).dt.to_string().name.prefix("s_"))
shape: (3, 2)
┌────────────┬────────────────────────────┐
│ s_dt ┆ s_dtm │
│ --- ┆ --- │
│ str ┆ str │
╞════════════╪════════════════════════════╡
│ 1999-03-01 ┆ 1980-08-10 00:10:20.000000 │
│ 2020-05-03 ┆ 2010-10-20 08:25:35.000000 │
│ 2077-07-05 ┆ 2040-12-30 16:40:50.000000 │
└────────────┴────────────────────────────┘
>>> df.select((cs.time() | cs.duration()).dt.to_string().name.prefix("s_"))
>>> df.select(cs.temporal().dt.to_string().name.prefix("s_"))
shape: (3, 4)
┌────────────┬────────────────────────────┬─────────────────┬─────────────────┐
│ s_dt ┆ s_dtm ┆ s_tm ┆ s_td │
│ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ str ┆ str ┆ str │
╞════════════╪════════════════════════════╪═════════════════╪═════════════════╡
│ 1999-03-01 ┆ 1980-08-10 00:10:20.000000 ┆ 01:02:03.456789 ┆ -P1DT42S │
│ 2020-05-03 ┆ 2010-10-20 08:25:35.000000 ┆ 23:59:09.000101 ┆ P13DT14H0.0001S │
│ 2077-07-05 ┆ 2040-12-30 16:40:50.000000 ┆ 00:00:00.000100 ┆ PT0S │
└────────────┴────────────────────────────┴─────────────────┴─────────────────┘
For `Datetime` specifically you can choose between "iso" (where the date and
time components are ISO, separated by a space) and "iso:strict" (where these
components are separated by a "T"):
>>> df.select(
... pl.col("dtm").dt.to_string("iso").alias("dtm_iso"),
... pl.col("dtm").dt.to_string("iso:strict").alias("dtm_iso_strict"),
... )
shape: (3, 2)
┌────────────────────────────────────┐
s_tm ┆ s_td
│ --- ┆ --- │
│ str ┆ str │
╞════════════════════════════════════╡
01:02:03.456789-P1DT42S
23:59:09.000101P13DT14H0.001001S
00:00:00.000100PT0S
└────────────────────────────────────┘
┌────────────────────────────┬────────────────────────────┐
dtm_iso ┆ dtm_iso_strict
│ --- ┆ ---
│ str ┆ str
╞════════════════════════════╪════════════════════════════╡
1980-08-10 00:10:20.0000001980-08-10T00:10:20.000000
2010-10-20 08:25:35.0000002010-10-20T08:25:35.000000
2040-12-30 16:40:50.0000002040-12-30T16:40:50.000000
└────────────────────────────┴────────────────────────────┘
All temporal types (aside from `Duration`) support strftime formatting:
Expand All @@ -545,17 +569,20 @@ def to_string(self, format: str | None = None) -> Expr:
The Polars Duration string format (as seen in the frame repr) is also available:
>>> df.select(pl.col("td"), s_td=pl.col("td").dt.to_string("polars"))
>>> df.select(
... pl.col("td"),
... s_td=pl.col("td").dt.to_string("polars"),
... )
shape: (3, 2)
┌────────────────┬────────────────┐
│ td ┆ s_td
│ --- ┆ ---
│ duration[μs] ┆ str
╞════════════════╪════════════════╡
│ -1d -42s ┆ -1d -42s
│ 13d 14h 1001µs ┆ 13d 14h 1001µs
│ 0µs ┆ 0µs
└────────────────┴────────────────┘
┌──────────────────────────────┐
│ td ┆ s_td │
│ --- ┆ --- │
│ duration[μs] ┆ str │
╞══════════════════════════════╡
│ -1d -42s ┆ -1d -42s │
│ 13d 14h 100µs ┆ 13d 14h 100µs
│ 0µs ┆ 0µs │
└──────────────────────────────┘
If you're interested in extracting the day or month names, you can use
the `'%A'` and `'%B'` strftime specifiers:
Expand Down
83 changes: 58 additions & 25 deletions py-polars/polars/series/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,77 +207,110 @@ def mean(self) -> TemporalLiteral | None:
"""
return self._s.mean()

def to_string(self, format: str) -> Series:
def to_string(self, format: str | None = None) -> Series:
"""
Convert a Date/Time/Datetime column into a String column with the given format.
.. versionchanged:: 1.15.0
Added support for the use of "iso:strict" as a format string.
.. versionchanged:: 1.14.0
Added support for the `Duration` dtype, and use of "iso" as a format string.
Parameters
----------
format
Format to use, refer to the `chrono strftime documentation
<https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
for specification. Example: `"%y-%m-%d"`.
* Format to use, refer to the `chrono strftime documentation
<https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
for specification. Example: `"%y-%m-%d"`.
* If no format is provided, the appropriate ISO format for the underlying
data type is used. This can be made explicit by passing `"iso"` or
`"iso:strict"` as the format string (see notes below for details).
Notes
-----
* Similar to `cast(pl.String)`, but this method allows you to customize
the formatting of the resulting string; if no format is provided, the
appropriate ISO format for the underlying data type is used.
* Duration dtype Series cannot be formatted with `strftime`. Instead,
* Datetime dtype expressions distinguish between "iso" and "iso:strict"
format strings. The difference is in the inclusion of a "T" separator
between the date and time components ("iso" results in ISO compliant
date and time components, separated with a space; "iso:strict" returns
the same components separated with a "T"). All other temporal types
return the same value for both format strings.
* Duration dtype expressions cannot be formatted with `strftime`. Instead,
only "iso" and "polars" are supported as format strings. The "iso" format
string results in ISO8601 duration string output, and "polars" results
in the same form seen in the frame `repr`.
Examples
--------
>>> from datetime import date
>>> from datetime import datetime
>>> s = pl.Series(
... "datetime",
... [date(2020, 3, 1), date(2020, 4, 1), date(2020, 5, 1)],
... "dtm",
... [
... datetime(1999, 12, 31, 6, 12, 30, 800),
... datetime(2020, 7, 5, 10, 20, 45, 12345),
... datetime(2077, 10, 20, 18, 25, 10, 999999),
... ],
... )
Default for temporal dtypes (if not specifying a format string) is ISO8601:
>>> s.dt.to_string()
>>> s.dt.to_string() # or s.dt.to_string("iso")
shape: (3,)
Series: 'datetime' [str]
Series: 'dtm' [str]
[
"2020-03-01"
"2020-04-01"
"2020-05-01"
"1999-12-31 06:12:30.000800"
"2020-07-05 10:20:45.012345"
"2077-10-20 18:25:10.999999"
]
For `Datetime` specifically you can choose between "iso" (where the date and
time components are ISO, separated by a space) and "iso:strict" (where these
components are separated by a "T"):
>>> s.dt.to_string("iso:strict")
shape: (3,)
Series: 'dtm' [str]
[
"1999-12-31T06:12:30.000800"
"2020-07-05T10:20:45.012345"
"2077-10-20T18:25:10.999999"
]
The output can be customized by using a strftime-compatible format string:
>>> s.dt.to_string("%d/%m/%y")
shape: (3,)
Series: 'datetime' [str]
Series: 'dtm' [str]
[
"01/03/20"
"01/04/20"
"01/05/20"
"31/12/99"
"05/07/20"
"20/10/77"
]
If you're interested in using day or month names, you can use
the `'%A'` and/or `'%B'` format strings:
>>> s.dt.to_string("%A")
shape: (3,)
Series: 'datetime' [str]
Series: 'dtm' [str]
[
"Sunday"
"Wednesday"
"Friday"
"Friday"
"Sunday"
"Wednesday"
]
>>> s.dt.to_string("%B")
shape: (3,)
Series: 'datetime' [str]
Series: 'dtm' [str]
[
"March"
"April"
"May"
"December"
"July"
"October"
]
"""

Expand Down
Loading

0 comments on commit 9f1b40c

Please sign in to comment.