Skip to content

Commit 20ca0d5

Browse files
Kevin Wilfongfacebook-github-bot
Kevin Wilfong
authored andcommitted
feat: Replace tz implementation from date with LLVM implementation [1/4] (#12422)
Summary: Pull Request resolved: #12422 TODO Differential Revision: D69997637
1 parent 3a7138a commit 20ca0d5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+8398
-7634
lines changed

velox/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ add_subdirectory(vector)
2323
add_subdirectory(row)
2424
add_subdirectory(flag_definitions)
2525
add_subdirectory(external/date)
26+
add_subdirectory(external/tzdb)
2627
add_subdirectory(external/md5)
2728
add_subdirectory(external/hdfs)
2829
#

velox/docs/develop/timestamp.rst

+18-18
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,9 @@ used to efficiently represent timezones, preventing the use of inefficient
7373
timezone string names like ``America/Los_Angeles``. Considering there are about
7474
2k valid timezone definitions, 12 bits are enough to represent timezone IDs. 
7575

76-
Timezone IDs in Velox are based on the id map used by Presto and are
77-
`available here <https://github.com/prestodb/presto/blob/master/presto-common/src/main/resources/com/facebook/presto/common/type/zone-index.properties>`_.
78-
They are automatically generated using `this script <https://github.com/facebookincubator/velox/blob/main/velox/type/tz/gen_timezone_database.py>`_.
76+
Timezone IDs in Velox are based on the id map used by Presto and are
77+
`available here <https://github.com/prestodb/presto/blob/master/presto-common/src/main/resources/com/facebook/presto/common/type/zone-index.properties>`_.
78+
They are automatically generated using `this script <https://github.com/facebookincubator/velox/blob/main/velox/type/tz/gen_timezone_database.py>`_.
7979
While timezone IDs are an implementation detail and ideally should not leak
8080
outside of Velox execution, they are exposed if data containing
8181
TimestampWithTimezones are serialized, for example.
@@ -98,7 +98,7 @@ ID).
9898

9999
However, unpacking/converting a TimestampWithTimezone into an absolute time
100100
definition requires a
101-
`timezone conversion <https://github.com/facebookincubator/velox/blob/main/velox/functions/prestosql/DateTimeFunctions.h#L74-L84>`_.
101+
`timezone conversion <https://github.com/facebookincubator/velox/blob/main/velox/functions/prestosql/DateTimeFunctions.h#L74-L84>`_.
102102

103103
Conversions Across Timezones
104104
----------------------------
@@ -119,7 +119,7 @@ for Linux. 
119119
In Velox, Timezone conversions are done using std::chrono. Starting in C++20,
120120
std::chrono `supports conversion of timestamp across timezones <https://en.cppreference.com/w/cpp/chrono/time_zone>`_.
121121
To support older versions of the C++ standard, in Velox we vendor an
122-
implementation of this API at `velox/external/date/ <https://github.com/facebookincubator/velox/tree/main/velox/external/date>`_.
122+
implementation of this API at `velox/external/tzdb/ <https://github.com/facebookincubator/velox/tree/main/velox/external/tzdb>`_.
123123
This class handles timezone conversions by leveraging APIs provided by the
124124
operating system, based on the tzdata database installed locally. If systems
125125
happen to have inconsistent or older versions of the tzdata database, Velox’s
@@ -153,23 +153,23 @@ on the string on not:
153153
::
154154

155155
SELECT typeof(TIMESTAMP '1970-01-01 00:00:00'); -- timestamp
156-
SELECT typeof(TIMESTAMP '1970-01-01 00:00:00 UTC'); -- timestamp with time zone
156+
SELECT typeof(TIMESTAMP '1970-01-01 00:00:00 UTC'); -- timestamp with time zone
157157

158158
Converting a TimestampWithTimezone into a Timestamp works by dropping the
159159
timezone information and returning only the timestamp portion:
160160

161161
::
162162

163-
SELECT cast(TIMESTAMP '1970-01-01 00:00:00 UTC' as timestamp); -- 1970-01-01 00:00:00.000
164-
SELECT cast(TIMESTAMP '1970-01-01 00:00:00 America/New_York' as timestamp); -- 1970-01-01 00:00:00.000
163+
SELECT cast(TIMESTAMP '1970-01-01 00:00:00 UTC' as timestamp); -- 1970-01-01 00:00:00.000
164+
SELECT cast(TIMESTAMP '1970-01-01 00:00:00 America/New_York' as timestamp); -- 1970-01-01 00:00:00.000
165165

166166
To convert a Timestamp into a TimestampWithTimezone, one needs to specify a
167167
timezone. In Presto, the session timezone is used by default:
168168

169169
::
170170

171171
SELECT current_timezone(); -- America/Los_Angeles
172-
SELECT cast(TIMESTAMP '1970-01-01 00:00:00' as timestamp with time zone); -- 1970-01-01 00:00:00.000 America/Los_Angeles
172+
SELECT cast(TIMESTAMP '1970-01-01 00:00:00' as timestamp with time zone); -- 1970-01-01 00:00:00.000 America/Los_Angeles
173173

174174
Conversion across TimestampWithTimezone can be done using the AT TIME ZONE
175175
construct. 
@@ -180,15 +180,15 @@ the clock/calendar read at the target timezone (Los Angeles)?
180180

181181
::
182182

183-
SELECT TIMESTAMP '1970-01-01 00:00:00 UTC' AT TIME ZONE 'America/Los_Angeles'; -- 1969-12-31 16:00:00.000 America/Los_Angeles
184-
SELECT TIMESTAMP '1970-01-01 00:00:00 UTC' AT TIME ZONE 'UTC'; -- 1970-01-01 00:00:00.000 UTC
183+
SELECT TIMESTAMP '1970-01-01 00:00:00 UTC' AT TIME ZONE 'America/Los_Angeles'; -- 1969-12-31 16:00:00.000 America/Los_Angeles
184+
SELECT TIMESTAMP '1970-01-01 00:00:00 UTC' AT TIME ZONE 'UTC'; -- 1970-01-01 00:00:00.000 UTC
185185

186186
Strings can be converted into Timestamp and TimestampWithTimezone:
187187

188188
::
189189

190-
SELECT cast('1970-01-01 00:00:00' as timestamp); -- 1970-01-01 00:00:00.000
191-
SELECT cast('1970-01-01 00:00:00 America/Los_Angeles' as timestamp with time zone); -- 1970-01-01 00:00:00.000 America/Los_Angeles
190+
SELECT cast('1970-01-01 00:00:00' as timestamp); -- 1970-01-01 00:00:00.000
191+
SELECT cast('1970-01-01 00:00:00 America/Los_Angeles' as timestamp with time zone); -- 1970-01-01 00:00:00.000 America/Los_Angeles
192192

193193
One can also convert a TimestampWithTimezone into a unix epoch/time. The
194194
semantic of this operation is: at the absolute point in time described by the
@@ -197,16 +197,16 @@ that unix epoch is the number of seconds since ``1970-01-01 00:00:00`` in UTC:
197197

198198
::
199199

200-
SELECT to_unixtime(TIMESTAMP '1970-01-01 00:00:00 UTC'); -- 0.0
201-
SELECT to_unixtime(TIMESTAMP '1970-01-01 00:00:00 America/Los_Angeles'); -- 28800.0
200+
SELECT to_unixtime(TIMESTAMP '1970-01-01 00:00:00 UTC'); -- 0.0
201+
SELECT to_unixtime(TIMESTAMP '1970-01-01 00:00:00 America/Los_Angeles'); -- 28800.0
202202

203203
The opposite conversion can be achieved using ``from_unixtime()``. The function
204204
may take an optional second parameter to specify the timezone, having the same
205205
semantic as AT TIME ZONE described above:
206206

207207
::
208208

209-
SELECT from_unixtime(0); -- 1970-01-01 00:00:00.000
209+
SELECT from_unixtime(0); -- 1970-01-01 00:00:00.000
210210
SELECT from_unixtime(0, 'UTC'); -- 1970-01-01 00:00:00.000 UTC 
211211
SELECT from_unixtime(0, 'America/Los_Angeles'); -- 1969-12-31 16:00:00.000 America/Los_Angeles
212212

@@ -225,8 +225,8 @@ timestamps have a different semantic:
225225
::
226226

227227
SET SESSION legacy_timestamp = true;
228-
SELECT cast(TIMESTAMP '1970-01-01 00:00:00 UTC' as timestamp); -- 1969-12-31 16:00:00.000
229-
SELECT cast('1970-01-01 00:00:00 UTC' as timestamp); -- 1969-12-31 16:00:00.000
228+
SELECT cast(TIMESTAMP '1970-01-01 00:00:00 UTC' as timestamp); -- 1969-12-31 16:00:00.000
229+
SELECT cast('1970-01-01 00:00:00 UTC' as timestamp); -- 1969-12-31 16:00:00.000
230230

231231
To support the two timestamp semantics, the
232232
``core::QueryConfig::kAdjustTimestampToTimezone`` query flag was added to Velox.

velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
#include "velox/exec/tests/utils/HiveConnectorTestBase.h" // @manual
2626
#include "velox/exec/tests/utils/PlanBuilder.h"
2727
#include "velox/exec/tests/utils/TempDirectoryPath.h"
28-
#include "velox/external/date/tz.h"
2928
#include "velox/type/tests/SubfieldFiltersBuilder.h"
3029
#include "velox/type/tz/TimeZoneMap.h"
3130

velox/expression/CastExpr-inl.h

-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
#include "velox/common/base/Exceptions.h"
2020
#include "velox/core/CoreTypeSystem.h"
2121
#include "velox/expression/StringWriter.h"
22-
#include "velox/external/date/tz.h"
2322
#include "velox/type/Type.h"
2423
#include "velox/vector/SelectivityVector.h"
2524

velox/expression/tests/CastExprTest.cpp

+51-42
Original file line numberDiff line numberDiff line change
@@ -656,14 +656,17 @@ TEST_F(CastExprTest, stringToTimestamp) {
656656
(evaluateOnce<Timestamp, std::string>(
657657
"try_cast(c0 as timestamp)", "201915-04-23 11:46:00.000")),
658658
"Timepoint is outside of supported year range");
659-
VELOX_ASSERT_THROW(
660-
(evaluateOnce<Timestamp, std::string>(
661-
"cast(c0 as timestamp)", "2045-12-31 18:00:00")),
662-
"Unable to convert timezone 'America/Los_Angeles' past 2037-11-01 09:00:00");
663-
VELOX_ASSERT_THROW(
664-
(evaluateOnce<Timestamp, std::string>(
665-
"try_cast(c0 as timestamp)", "2045-12-31 18:00:00")),
666-
"Unable to convert timezone 'America/Los_Angeles' past 2037-11-01 09:00:00");
659+
// TODO: Address in https://github.com/facebookincubator/velox/pull/12471
660+
// VELOX_ASSERT_THROW(
661+
// (evaluateOnce<Timestamp, std::string>(
662+
// "cast(c0 as timestamp)", "2045-12-31 18:00:00")),
663+
// "Unable to convert timezone 'America/Los_Angeles' past 2037-11-01
664+
// 09:00:00");
665+
// VELOX_ASSERT_THROW(
666+
// (evaluateOnce<Timestamp, std::string>(
667+
// "try_cast(c0 as timestamp)", "2045-12-31 18:00:00")),
668+
// "Unable to convert timezone 'America/Los_Angeles' past 2037-11-01
669+
// 09:00:00");
667670
// Only one white space is allowed before the offset string.
668671
VELOX_ASSERT_THROW(
669672
(evaluateOnce<Timestamp, std::string>(
@@ -788,23 +791,27 @@ TEST_F(CastExprTest, timestampToString) {
788791
std::nullopt,
789792
});
790793

791-
// Ensure external/date throws since it doesn't know how to convert large
792-
// timestamps.
793-
auto mustThrow = [&]() {
794-
return testCast<Timestamp, std::string>(
795-
"string", {Timestamp(253405036800, 0)}, {"10000-02-01 08:00:00.000"});
796-
};
797-
VELOX_ASSERT_THROW(
798-
mustThrow(), "Unable to convert timezone 'America/Los_Angeles' past");
799-
800-
// try_cast should also throw since it's runtime error.
801-
auto tryCastMustThrow = [&]() {
802-
return testTryCast<Timestamp, std::string>(
803-
"string", {Timestamp(253405036800, 0)}, {"10000-02-01 08:00:00.000"});
804-
};
805-
VELOX_ASSERT_THROW(
806-
tryCastMustThrow(),
807-
"Unable to convert timezone 'America/Los_Angeles' past");
794+
// TODO: Address in https://github.com/facebookincubator/velox/pull/12471
795+
// // Ensure external/date throws since it doesn't know how to convert large
796+
// // timestamps.
797+
// auto mustThrow = [&]() {
798+
// return testCast<Timestamp, std::string>(
799+
// "string", {Timestamp(253405036800, 0)}, {"10000-02-01
800+
// 08:00:00.000"});
801+
// };
802+
// VELOX_ASSERT_THROW(
803+
// mustThrow(), "Unable to convert timezone 'America/Los_Angeles'
804+
// past");
805+
806+
// // try_cast should also throw since it's runtime error.
807+
// auto tryCastMustThrow = [&]() {
808+
// return testTryCast<Timestamp, std::string>(
809+
// "string", {Timestamp(253405036800, 0)}, {"10000-02-01
810+
// 08:00:00.000"});
811+
// };
812+
// VELOX_ASSERT_THROW(
813+
// tryCastMustThrow(),
814+
// "Unable to convert timezone 'America/Los_Angeles' past");
808815
}
809816

810817
TEST_F(CastExprTest, dateToTimestamp) {
@@ -859,23 +866,25 @@ TEST_F(CastExprTest, timestampToDate) {
859866
TIMESTAMP(),
860867
DATE());
861868

862-
// Ensure external/date throws since it doesn't know how to convert large
863-
// timestamps.
864-
auto mustThrow = [&]() {
865-
return testCast<Timestamp, int32_t>(
866-
"date", {Timestamp(253405036800, 0)}, {0});
867-
};
868-
VELOX_ASSERT_THROW(
869-
mustThrow(), "Unable to convert timezone 'America/Los_Angeles' past");
870-
871-
// try_cast should also throw since it's runtime error.
872-
auto tryCastMustThrow = [&]() {
873-
return testTryCast<Timestamp, int32_t>(
874-
"date", {Timestamp(253405036800, 0)}, {0});
875-
};
876-
VELOX_ASSERT_THROW(
877-
tryCastMustThrow(),
878-
"Unable to convert timezone 'America/Los_Angeles' past");
869+
// TODO: Address in https://github.com/facebookincubator/velox/pull/12471
870+
// // Ensure external/date throws since it doesn't know how to convert large
871+
// // timestamps.
872+
// auto mustThrow = [&]() {
873+
// return testCast<Timestamp, int32_t>(
874+
// "date", {Timestamp(253405036800, 0)}, {0});
875+
// };
876+
// VELOX_ASSERT_THROW(
877+
// mustThrow(), "Unable to convert timezone 'America/Los_Angeles'
878+
// past");
879+
880+
// // try_cast should also throw since it's runtime error.
881+
// auto tryCastMustThrow = [&]() {
882+
// return testTryCast<Timestamp, int32_t>(
883+
// "date", {Timestamp(253405036800, 0)}, {0});
884+
// };
885+
// VELOX_ASSERT_THROW(
886+
// tryCastMustThrow(),
887+
// "Unable to convert timezone 'America/Los_Angeles' past");
879888
}
880889

881890
TEST_F(CastExprTest, timestampInvalid) {

velox/external/date/CMakeLists.txt

+2-4
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,5 @@
1010
# See the License for the specific language governing permissions and
1111
# limitations under the License.
1212

13-
14-
velox_add_library(velox_external_date tz.cpp)
15-
velox_include_directories(velox_external_date SYSTEM PUBLIC velox/external)
16-
velox_compile_definitions(velox_external_date PRIVATE USE_OS_TZDB)
13+
add_library(velox_external_date INTERFACE)
14+
velox_include_directories(velox_external_date INTERFACE velox/external/date)

velox/external/date/ios.h

-56
This file was deleted.

0 commit comments

Comments
 (0)