-
Notifications
You must be signed in to change notification settings - Fork 888
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix: date_part
to extract only the requested part (not the overall interval)
#7189
base: main
Are you sure you want to change the base?
Changes from all commits
9dd6a56
538a237
bd3da87
a075580
a3b3be7
2eb17d5
a913433
5240062
088ed68
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -464,11 +464,15 @@ impl ExtractDatePartExt for PrimitiveArray<IntervalDayTimeType> { | |
DatePart::Week => Ok(self.unary_opt(|d| Some(d.days / 7))), | ||
DatePart::Day => Ok(self.unary_opt(|d| Some(d.days))), | ||
DatePart::Hour => Ok(self.unary_opt(|d| Some(d.milliseconds / (60 * 60 * 1_000)))), | ||
DatePart::Minute => Ok(self.unary_opt(|d| Some(d.milliseconds / (60 * 1_000)))), | ||
DatePart::Second => Ok(self.unary_opt(|d| Some(d.milliseconds / 1_000))), | ||
DatePart::Millisecond => Ok(self.unary_opt(|d| Some(d.milliseconds))), | ||
DatePart::Microsecond => Ok(self.unary_opt(|d| d.milliseconds.checked_mul(1_000))), | ||
DatePart::Nanosecond => Ok(self.unary_opt(|d| d.milliseconds.checked_mul(1_000_000))), | ||
DatePart::Minute => Ok(self.unary_opt(|d| Some(d.milliseconds / (60 * 1_000) % 60))), | ||
DatePart::Second => Ok(self.unary_opt(|d| Some(d.milliseconds / 1_000 % 60))), | ||
DatePart::Millisecond => Ok(self.unary_opt(|d| Some(d.milliseconds % (60 * 1_000)))), | ||
DatePart::Microsecond => { | ||
Ok(self.unary_opt(|d| (d.milliseconds % (60 * 1_000)).checked_mul(1_000))) | ||
} | ||
DatePart::Nanosecond => { | ||
Ok(self.unary_opt(|d| (d.milliseconds % (60 * 1_000)).checked_mul(1_000_000))) | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. D SELECT datepart('m', interval '1m 61s 33ms 44us');
┌─────────────────────────────────────────────────────┐
│ datepart('m', CAST('1m 61s 33ms 44us' AS INTERVAL)) │
│ int64 │
├─────────────────────────────────────────────────────┤
│ 2 │
└─────────────────────────────────────────────────────┘
D SELECT datepart('s', interval '1m 61s 33ms 44us');
┌─────────────────────────────────────────────────────┐
│ datepart('s', CAST('1m 61s 33ms 44us' AS INTERVAL)) │
│ int64 │
├─────────────────────────────────────────────────────┤
│ 1 │
└─────────────────────────────────────────────────────┘
D SELECT datepart('ms', interval '1m 61s 33ms 44us');
┌──────────────────────────────────────────────────────┐
│ datepart('ms', CAST('1m 61s 33ms 44us' AS INTERVAL)) │
│ int64 │
├──────────────────────────────────────────────────────┤
│ 1033 │
└──────────────────────────────────────────────────────┘
D SELECT datepart('us', interval '1m 61s 33ms 44us');
┌──────────────────────────────────────────────────────┐
│ datepart('us', CAST('1m 61s 33ms 44us' AS INTERVAL)) │
│ int64 │
├──────────────────────────────────────────────────────┤
│ 1033044 │
│ (1.03 million) │
└──────────────────────────────────────────────────────┘ There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a behavior change for arrow-rs. duckdb says 1SELECT datepart('s', interval '1m 61s 33ms 44us'); ┌─────────────────────────────────────────────────────┐ │ datepart('s', CAST('1m 61s 33ms 44us' AS INTERVAL)) │ │ int64 │ ├─────────────────────────────────────────────────────┤ │ 1 │ └─────────────────────────────────────────────────────┘ postgres says almost 1:postgres=# SELECT extract (seconds from interval '1m 61s 33ms 44us');
extract
----------
1.033044
(1 row) Arrow says > SELECT datepart('s', interval '1m 61s 33ms 44us');
+---------------------------------------------------------------------------------------------------------------------+
| date_part(Utf8("s"),IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 121033044000 }")) |
+---------------------------------------------------------------------------------------------------------------------+
| 121 |
+---------------------------------------------------------------------------------------------------------------------+
1 row(s) fetched.
Elapsed 0.004 seconds. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same thing for millisecond and microseconds |
||
|
||
DatePart::Quarter | ||
| DatePart::Year | ||
|
@@ -488,25 +492,31 @@ impl ExtractDatePartExt for PrimitiveArray<IntervalMonthDayNanoType> { | |
fn date_part(&self, part: DatePart) -> Result<Int32Array, ArrowError> { | ||
match part { | ||
DatePart::Year => Ok(self.unary_opt(|d: IntervalMonthDayNano| Some(d.months / 12))), | ||
DatePart::Month => Ok(self.unary_opt(|d: IntervalMonthDayNano| Some(d.months))), | ||
DatePart::Month => Ok(self.unary_opt(|d: IntervalMonthDayNano| Some(d.months % 12))), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. D SELECT datepart('year', interval '12 month');
┌────────────────────────────────────────────────┐
│ datepart('year', CAST('12 month' AS INTERVAL)) │
│ int64 │
├────────────────────────────────────────────────┤
│ 1 │
└────────────────────────────────────────────────┘
D SELECT datepart('month', interval '12 month');
┌─────────────────────────────────────────────────┐
│ datepart('month', CAST('12 month' AS INTERVAL)) │
│ int64 │
├─────────────────────────────────────────────────┤
│ 0 │
└─────────────────────────────────────────────────┘ There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Postgres agrees postgres=# SELECT extract (month from interval '12 month');
extract
---------
0
(1 row) Before this PR arrow also currently says 12. > SELECT datepart('month', interval '12 month');
+---------------------------------------------------------------------------------------------------------------+
| date_part(Utf8("month"),IntervalMonthDayNano("IntervalMonthDayNano { months: 12, days: 0, nanoseconds: 0 }")) |
+---------------------------------------------------------------------------------------------------------------+
| 12 |
+---------------------------------------------------------------------------------------------------------------+
1 row(s) fetched.
Elapsed 0.001 seconds. I think this change makes sense |
||
DatePart::Week => Ok(self.unary_opt(|d: IntervalMonthDayNano| Some(d.days / 7))), | ||
DatePart::Day => Ok(self.unary_opt(|d: IntervalMonthDayNano| Some(d.days))), | ||
DatePart::Hour => { | ||
Ok(self.unary_opt(|d| (d.nanoseconds / (60 * 60 * 1_000_000_000)).try_into().ok())) | ||
} | ||
DatePart::Minute => { | ||
Ok(self.unary_opt(|d| (d.nanoseconds / (60 * 1_000_000_000)).try_into().ok())) | ||
Ok(self.unary_opt(|d| (d.nanoseconds / (60 * 1_000_000_000) % 60).try_into().ok())) | ||
} | ||
DatePart::Second => { | ||
Ok(self.unary_opt(|d| (d.nanoseconds / 1_000_000_000).try_into().ok())) | ||
Ok(self.unary_opt(|d| ((d.nanoseconds / 1_000_000_000) % 60).try_into().ok())) | ||
} | ||
DatePart::Millisecond => { | ||
Ok(self.unary_opt(|d| (d.nanoseconds / 1_000_000).try_into().ok())) | ||
} | ||
DatePart::Microsecond => { | ||
Ok(self.unary_opt(|d| (d.nanoseconds / 1_000).try_into().ok())) | ||
DatePart::Millisecond => Ok(self.unary_opt(|d| { | ||
(d.nanoseconds % (60 * 1_000_000_000) / 1_000_000) | ||
.try_into() | ||
.ok() | ||
})), | ||
DatePart::Microsecond => Ok(self.unary_opt(|d| { | ||
(d.nanoseconds % (60 * 1_000_000_000) / 1_000) | ||
.try_into() | ||
.ok() | ||
})), | ||
DatePart::Nanosecond => { | ||
Ok(self.unary_opt(|d| (d.nanoseconds % (60 * 1_000_000_000)).try_into().ok())) | ||
} | ||
DatePart::Nanosecond => Ok(self.unary_opt(|d| d.nanoseconds.try_into().ok())), | ||
|
||
DatePart::Quarter | ||
| DatePart::WeekISO | ||
|
@@ -1778,9 +1788,14 @@ mod tests { | |
fn test_interval_day_time_array() { | ||
let input: IntervalDayTimeArray = vec![ | ||
IntervalDayTime::ZERO, | ||
IntervalDayTime::new(10, 42), | ||
IntervalDayTime::new(10, 1042), | ||
IntervalDayTime::new(10, MILLISECONDS_IN_DAY as i32 + 1), | ||
IntervalDayTime::new(10, 42), // 10d, 42ms | ||
IntervalDayTime::new(10, 1042), // 10d, 1s, 42ms | ||
IntervalDayTime::new(10, MILLISECONDS_IN_DAY as i32 + 1), // 10d, 24h, 1ms | ||
IntervalDayTime::new( | ||
6, | ||
(MILLISECONDS * 60 * 60 * 4 + MILLISECONDS * 60 * 22 + MILLISECONDS * 11 + 3) | ||
as i32, | ||
), // 6d, 4h, 22m, 11s, 3ms | ||
] | ||
.into(); | ||
|
||
|
@@ -1791,58 +1806,65 @@ mod tests { | |
assert_eq!(10, actual.value(1)); | ||
assert_eq!(10, actual.value(2)); | ||
assert_eq!(10, actual.value(3)); | ||
assert_eq!(6, actual.value(4)); | ||
|
||
let actual = date_part(&input, DatePart::Week).unwrap(); | ||
let actual = actual.as_primitive::<Int32Type>(); | ||
assert_eq!(0, actual.value(0)); | ||
assert_eq!(1, actual.value(1)); | ||
assert_eq!(1, actual.value(2)); | ||
assert_eq!(1, actual.value(3)); | ||
assert_eq!(0, actual.value(4)); | ||
|
||
// Days doesn't affect time. | ||
let actual = date_part(&input, DatePart::Nanosecond).unwrap(); | ||
let actual = date_part(&input, DatePart::Hour).unwrap(); | ||
let actual = actual.as_primitive::<Int32Type>(); | ||
assert_eq!(0, actual.value(0)); | ||
assert_eq!(42_000_000, actual.value(1)); | ||
assert_eq!(1_042_000_000, actual.value(2)); | ||
// Overflow returns zero. | ||
assert_eq!(0, actual.value(1)); | ||
assert_eq!(0, actual.value(2)); | ||
assert_eq!(24, actual.value(3)); | ||
assert_eq!(4, actual.value(4)); | ||
|
||
let actual = date_part(&input, DatePart::Minute).unwrap(); | ||
let actual = actual.as_primitive::<Int32Type>(); | ||
assert_eq!(0, actual.value(0)); | ||
assert_eq!(0, actual.value(1)); | ||
assert_eq!(0, actual.value(2)); | ||
assert_eq!(0, actual.value(3)); | ||
assert_eq!(22, actual.value(4)); | ||
|
||
let actual = date_part(&input, DatePart::Microsecond).unwrap(); | ||
let actual = date_part(&input, DatePart::Second).unwrap(); | ||
let actual = actual.as_primitive::<Int32Type>(); | ||
assert_eq!(0, actual.value(0)); | ||
assert_eq!(42_000, actual.value(1)); | ||
assert_eq!(1_042_000, actual.value(2)); | ||
// Overflow returns zero. | ||
assert_eq!(0, actual.value(1)); | ||
assert_eq!(1, actual.value(2)); | ||
assert_eq!(0, actual.value(3)); | ||
assert_eq!(11, actual.value(4)); | ||
|
||
let actual = date_part(&input, DatePart::Millisecond).unwrap(); | ||
let actual = actual.as_primitive::<Int32Type>(); | ||
assert_eq!(0, actual.value(0)); | ||
assert_eq!(42, actual.value(1)); | ||
assert_eq!(1042, actual.value(2)); | ||
assert_eq!(MILLISECONDS_IN_DAY as i32 + 1, actual.value(3)); | ||
|
||
let actual = date_part(&input, DatePart::Second).unwrap(); | ||
let actual = actual.as_primitive::<Int32Type>(); | ||
assert_eq!(0, actual.value(0)); | ||
assert_eq!(0, actual.value(1)); | ||
assert_eq!(1, actual.value(2)); | ||
assert_eq!(24 * 60 * 60, actual.value(3)); | ||
assert_eq!(1, actual.value(3)); | ||
assert_eq!(11003, actual.value(4)); | ||
|
||
let actual = date_part(&input, DatePart::Minute).unwrap(); | ||
let actual = date_part(&input, DatePart::Microsecond).unwrap(); | ||
let actual = actual.as_primitive::<Int32Type>(); | ||
assert_eq!(0, actual.value(0)); | ||
assert_eq!(0, actual.value(1)); | ||
assert_eq!(0, actual.value(2)); | ||
assert_eq!(24 * 60, actual.value(3)); | ||
assert_eq!(42_000, actual.value(1)); | ||
assert_eq!(1_042_000, actual.value(2)); | ||
assert_eq!(1_000, actual.value(3)); | ||
assert_eq!(11_003_000, actual.value(4)); | ||
|
||
let actual = date_part(&input, DatePart::Hour).unwrap(); | ||
let actual = date_part(&input, DatePart::Nanosecond).unwrap(); | ||
let actual = actual.as_primitive::<Int32Type>(); | ||
assert_eq!(0, actual.value(0)); | ||
assert_eq!(0, actual.value(1)); | ||
assert_eq!(0, actual.value(2)); | ||
assert_eq!(24, actual.value(3)); | ||
assert_eq!(42_000_000, actual.value(1)); | ||
assert_eq!(1_042_000_000, actual.value(2)); | ||
assert_eq!(1_000_000, actual.value(3)); | ||
// Overflow returns zero. | ||
assert_eq!(0, actual.value(4)); | ||
|
||
// Month and year are not valid (since days in month varies). | ||
assert!(date_part(&input, DatePart::Month).is_err()); | ||
|
@@ -1855,8 +1877,18 @@ mod tests { | |
fn test_interval_month_day_nano_array() { | ||
let input: IntervalMonthDayNanoArray = vec![ | ||
IntervalMonthDayNano::ZERO, | ||
IntervalMonthDayNano::new(5, 10, 42), | ||
IntervalMonthDayNano::new(16, 35, MILLISECONDS_IN_DAY * 1_000_000 + 1), | ||
IntervalMonthDayNano::new(5, 10, 42), // 5m, 1w, 3d, 42ns | ||
IntervalMonthDayNano::new(16, 35, NANOSECONDS_IN_DAY + 1), // 1y, 4m, 5w, 24h, 1ns | ||
IntervalMonthDayNano::new( | ||
0, | ||
0, | ||
NANOSECONDS * 60 * 60 * 4 | ||
+ NANOSECONDS * 60 * 22 | ||
+ NANOSECONDS * 11 | ||
+ 1_000_000 * 33 | ||
+ 1_000 * 44 | ||
+ 5, | ||
), // 4hr, 22m, 11s, 33ms, 44us, 5ns | ||
] | ||
.into(); | ||
|
||
|
@@ -1866,64 +1898,73 @@ mod tests { | |
assert_eq!(0, actual.value(0)); | ||
assert_eq!(0, actual.value(1)); | ||
assert_eq!(1, actual.value(2)); | ||
assert_eq!(0, actual.value(3)); | ||
|
||
let actual = date_part(&input, DatePart::Month).unwrap(); | ||
let actual = actual.as_primitive::<Int32Type>(); | ||
assert_eq!(0, actual.value(0)); | ||
assert_eq!(5, actual.value(1)); | ||
assert_eq!(16, actual.value(2)); | ||
assert_eq!(4, actual.value(2)); | ||
assert_eq!(0, actual.value(3)); | ||
|
||
// Week and day follow from day, but are not affected by months or nanos. | ||
let actual = date_part(&input, DatePart::Week).unwrap(); | ||
let actual = actual.as_primitive::<Int32Type>(); | ||
assert_eq!(0, actual.value(0)); | ||
assert_eq!(1, actual.value(1)); | ||
assert_eq!(5, actual.value(2)); | ||
assert_eq!(0, actual.value(3)); | ||
|
||
let actual = date_part(&input, DatePart::Day).unwrap(); | ||
let actual = actual.as_primitive::<Int32Type>(); | ||
assert_eq!(0, actual.value(0)); | ||
assert_eq!(10, actual.value(1)); | ||
assert_eq!(35, actual.value(2)); | ||
assert_eq!(0, actual.value(3)); | ||
|
||
// Times follow from nanos, but are not affected by months or days. | ||
let actual = date_part(&input, DatePart::Hour).unwrap(); | ||
let actual = actual.as_primitive::<Int32Type>(); | ||
assert_eq!(0, actual.value(0)); | ||
assert_eq!(0, actual.value(1)); | ||
assert_eq!(24, actual.value(2)); | ||
assert_eq!(4, actual.value(3)); | ||
|
||
let actual = date_part(&input, DatePart::Minute).unwrap(); | ||
let actual = actual.as_primitive::<Int32Type>(); | ||
assert_eq!(0, actual.value(0)); | ||
assert_eq!(0, actual.value(1)); | ||
assert_eq!(24 * 60, actual.value(2)); | ||
assert_eq!(0, actual.value(2)); | ||
assert_eq!(22, actual.value(3)); | ||
|
||
let actual = date_part(&input, DatePart::Second).unwrap(); | ||
let actual = actual.as_primitive::<Int32Type>(); | ||
assert_eq!(0, actual.value(0)); | ||
assert_eq!(0, actual.value(1)); | ||
assert_eq!(24 * 60 * 60, actual.value(2)); | ||
assert_eq!(0, actual.value(2)); | ||
assert_eq!(11, actual.value(3)); | ||
|
||
let actual = date_part(&input, DatePart::Millisecond).unwrap(); | ||
let actual = actual.as_primitive::<Int32Type>(); | ||
assert_eq!(0, actual.value(0)); | ||
assert_eq!(0, actual.value(1)); | ||
assert_eq!(24 * 60 * 60 * 1_000, actual.value(2)); | ||
assert_eq!(0, actual.value(2)); | ||
assert_eq!(11_033, actual.value(3)); | ||
|
||
let actual = date_part(&input, DatePart::Microsecond).unwrap(); | ||
let actual = actual.as_primitive::<Int32Type>(); | ||
assert_eq!(0, actual.value(0)); | ||
assert_eq!(0, actual.value(1)); | ||
// Overflow gives zero. | ||
assert_eq!(0, actual.value(2)); | ||
assert_eq!(11_033_044, actual.value(3)); | ||
|
||
let actual = date_part(&input, DatePart::Nanosecond).unwrap(); | ||
let actual = actual.as_primitive::<Int32Type>(); | ||
assert_eq!(0, actual.value(0)); | ||
assert_eq!(42, actual.value(1)); | ||
// Overflow gives zero. | ||
assert_eq!(0, actual.value(2)); | ||
assert_eq!(1, actual.value(2)); | ||
// Overflow returns zero. | ||
assert_eq!(0, actual.value(3)); | ||
} | ||
|
||
#[test] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For the record, the current arrow behavior is the same it seems: