Skip to content

Commit e59fb46

Browse files
boneanxsfacebook-github-bot
authored andcommitted
feat: Support timestamp and date types for Spark unix_timestamp function (facebookincubator#11128)
Summary: Adds timestamp and date support for Spark `unix_timestamp` and `to_unix_timestamp` functions. Spark's implementation: https://github.com/apache/spark/blob/v3.5.1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala#L1246-L1247. Pull Request resolved: facebookincubator#11128 Reviewed By: xiaoxmeng Differential Revision: D70260312 Pulled By: kevinwilfong fbshipit-source-id: 961c1b859f9cf2b5a9abe7bc0504c0ccb65f35dc
1 parent 0d8c05a commit e59fb46

File tree

5 files changed

+85
-1
lines changed

5 files changed

+85
-1
lines changed

velox/docs/functions/spark/datetime.rst

+26
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,11 @@ These functions support TIMESTAMP and DATE input types.
248248

249249
SELECT timestamp_millis(1230219000123); -- '2008-12-25 15:30:00.123'
250250

251+
.. spark:function:: to_unix_timestamp(date) -> integer
252+
:noindex:
253+
254+
Alias for ``unix_timestamp(date) -> integer``.
255+
251256
.. spark:function:: to_unix_timestamp(string) -> integer
252257
253258
Alias for ``unix_timestamp(string) -> integer``.
@@ -257,6 +262,11 @@ These functions support TIMESTAMP and DATE input types.
257262

258263
Alias for ``unix_timestamp(string, format) -> integer``.
259264

265+
.. spark:function:: to_unix_timestamp(timestamp) -> integer
266+
:noindex:
267+
268+
Alias for ``unix_timestamp(timestamp) -> integer``.
269+
260270
.. spark:function:: to_utc_timestamp(timestamp, string) -> timestamp
261271
262272
Returns the timestamp value from the given timezone to UTC timezone. ::
@@ -294,6 +304,14 @@ These functions support TIMESTAMP and DATE input types.
294304
295305
Returns the current UNIX timestamp in seconds.
296306

307+
.. spark:function:: unix_timestamp(date) -> integer
308+
309+
Converts the time represented by ``date`` at the configured session timezone to the GMT time, and extracts the seconds. ::
310+
311+
SELECT unix_timestamp('1970-01-01'); -- 0
312+
SELECT unix_timestamp('2024-10-01'); -- 1727740800
313+
SELECT unix_timestamp('-2025-02-18'); -- -126065894400
314+
297315
.. spark:function:: unix_timestamp(string) -> integer
298316
:noindex:
299317

@@ -311,6 +329,14 @@ These functions support TIMESTAMP and DATE input types.
311329
Returns null if ``string`` does not match ``format`` or if ``format``
312330
is invalid.
313331

332+
.. spark:function:: unix_timestamp(timestamp) -> integer
333+
334+
Returns the UNIX timestamp of the given ``timestamp`` in seconds. ::
335+
336+
SELECT unix_timestamp(CAST(0 AS TIMESTAMP)); -- 0
337+
SELECT unix_timestamp(CAST(1739933174 AS TIMESTAMP)); -- 1739933174
338+
SELECT unix_timestamp(CAST(-1739933174 AS TIMESTAMP)); -- -1739933174
339+
314340
.. function:: week_of_year(x) -> integer
315341

316342
Returns the `ISO-Week`_ of the year from x. The value ranges from ``1`` to ``53``.

velox/functions/sparksql/DateTimeFunctions.h

+19
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,13 @@ struct UnixTimestampParseWithFormatFunction
222222
this->setTimezone(config);
223223
}
224224

225+
FOLLY_ALWAYS_INLINE void initialize(
226+
const std::vector<TypePtr>& /*inputTypes*/,
227+
const core::QueryConfig& config,
228+
const arg_type<Date>* /*input*/) {
229+
this->setTimezone(config);
230+
}
231+
225232
FOLLY_ALWAYS_INLINE bool call(
226233
int64_t& result,
227234
const arg_type<Varchar>& input,
@@ -252,6 +259,18 @@ struct UnixTimestampParseWithFormatFunction
252259
return true;
253260
}
254261

262+
FOLLY_ALWAYS_INLINE void call(
263+
int64_t& result,
264+
const arg_type<Timestamp>& input) {
265+
result = input.getSeconds();
266+
}
267+
268+
FOLLY_ALWAYS_INLINE void call(int64_t& result, const arg_type<Date>& input) {
269+
auto timestamp = Timestamp::fromDate(input);
270+
timestamp.toGMT(*this->sessionTimeZone_);
271+
result = timestamp.getSeconds();
272+
}
273+
255274
private:
256275
bool isConstFormat_{false};
257276
bool invalidFormat_{false};

velox/functions/sparksql/registration/RegisterDatetime.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ void registerDatetimeFunctions(const std::string& prefix) {
4040
int64_t,
4141
Varchar,
4242
Varchar>({prefix + "unix_timestamp", prefix + "to_unix_timestamp"});
43+
registerFunction<UnixTimestampParseWithFormatFunction, int64_t, Timestamp>(
44+
{prefix + "unix_timestamp", prefix + "to_unix_timestamp"});
45+
registerFunction<UnixTimestampParseWithFormatFunction, int64_t, Date>(
46+
{prefix + "unix_timestamp", prefix + "to_unix_timestamp"});
4347
registerFunction<FromUnixtimeFunction, Varchar, int64_t, Varchar>(
4448
{prefix + "from_unixtime"});
4549
registerFunction<MakeDateFunction, Date, int32_t, int32_t, int32_t>(

velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp

+35
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,41 @@ TEST_F(DateTimeFunctionsTest, unixTimestampCustomFormat) {
312312
unixTimestamp("2022-12-12 asd 07:45:31", "yyyy-MM-dd 'asd HH:mm:ss"));
313313
}
314314

315+
TEST_F(DateTimeFunctionsTest, unixTimestampTimestampInput) {
316+
const auto unixTimestamp = [&](std::optional<Timestamp> timestamp) {
317+
return evaluateOnce<int64_t>("unix_timestamp(c0)", timestamp);
318+
};
319+
EXPECT_EQ(0, unixTimestamp(Timestamp(0, 0)));
320+
EXPECT_EQ(1, unixTimestamp(Timestamp(1, 990)));
321+
EXPECT_EQ(61, unixTimestamp(Timestamp(61, 0)));
322+
EXPECT_EQ(-1, unixTimestamp(Timestamp(-1, 0)));
323+
EXPECT_EQ(1739933174, unixTimestamp(Timestamp(1739933174, 0)));
324+
EXPECT_EQ(-1739933174, unixTimestamp(Timestamp(-1739933174, 0)));
325+
EXPECT_EQ(kMax, unixTimestamp(Timestamp(kMax, 0)));
326+
EXPECT_EQ(kMin, unixTimestamp(Timestamp(kMin, 0)));
327+
}
328+
329+
TEST_F(DateTimeFunctionsTest, unixTimestampDateInput) {
330+
const auto unixTimestamp = [&](std::optional<int32_t> date) {
331+
return evaluateOnce<int64_t>("unix_timestamp(c0)", {DATE()}, date);
332+
};
333+
EXPECT_EQ(0, unixTimestamp(parseDate("1970-01-01")));
334+
EXPECT_EQ(1727740800, unixTimestamp(parseDate("2024-10-01")));
335+
EXPECT_EQ(-126065894400, unixTimestamp(parseDate("-2025-02-18")));
336+
setQueryTimeZone("America/Los_Angeles");
337+
EXPECT_EQ(1727766000, unixTimestamp(parseDate("2024-10-01")));
338+
EXPECT_EQ(-126065866022, unixTimestamp(parseDate("-2025-02-18")));
339+
340+
// Test invalid inputs.
341+
VELOX_ASSERT_THROW(
342+
unixTimestamp(kMax), "Timepoint is outside of supported year range");
343+
VELOX_ASSERT_THROW(
344+
unixTimestamp(kMin), "Timepoint is outside of supported year range");
345+
VELOX_ASSERT_THROW(
346+
unixTimestamp(parseDate("2045-12-31")),
347+
"Unable to convert timezone 'America/Los_Angeles' past 2037-11-01 09:00:00");
348+
}
349+
315350
// unix_timestamp and to_unix_timestamp are aliases.
316351
TEST_F(DateTimeFunctionsTest, toUnixTimestamp) {
317352
std::optional<StringView> dateStr = "1970-01-01 08:32:11"_sv;

velox/type/Timestamp.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ void Timestamp::toGMT(const tz::TimeZone& zone) {
6666
// Invalid argument means we hit a conversion not supported by
6767
// external/date. Need to throw a RuntimeError so that try() statements do
6868
// not suppress it.
69-
VELOX_FAIL(e.what());
69+
VELOX_FAIL_UNSUPPORTED_INPUT_UNCATCHABLE(e.what());
7070
}
7171
seconds_ = sysSeconds.count();
7272
}

0 commit comments

Comments
 (0)