From 462166fff3b047266d7b767c7b1d4fbda885bdb2 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Sun, 21 Jan 2024 22:52:54 -0500 Subject: [PATCH] TIME type support (#238) --- pantab/src/pantab.cpp | 53 ++++++++++++++++++++++++++++++++++++---- pantab/tests/conftest.py | 3 +++ 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/pantab/src/pantab.cpp b/pantab/src/pantab.cpp index 3fcb522b..779cc5e9 100644 --- a/pantab/src/pantab.cpp +++ b/pantab/src/pantab.cpp @@ -54,6 +54,8 @@ static auto hyperTypeFromArrowSchema(struct ArrowSchema *schema, } else { return hyperapi::SqlType::timestamp(); } + case NANOARROW_TYPE_TIME64: + return hyperapi::SqlType::time(); default: throw std::invalid_argument("Unsupported Arrow type: " + std::to_string(schema_view.type)); @@ -363,6 +365,15 @@ static auto makeInsertHelper(std::shared_ptr inserter, } throw std::runtime_error( "This code block should not be hit - contact a developer"); + case NANOARROW_TYPE_TIME64: + switch (schema_view.time_unit) { + case NANOARROW_TIME_UNIT_MICRO: + return std::unique_ptr(new IntegralInsertHelper( + inserter, chunk, schema, error, column_position)); + default: + throw std::invalid_argument( + "Only microsecond-precision timestamp writes are implemented!"); + } default: throw std::invalid_argument("makeInsertHelper: Unsupported Arrow type: " + std::to_string(schema_view.type)); @@ -637,6 +648,24 @@ template class DatetimeReadHelper : public ReadHelper { } }; +class TimeReadHelper : public ReadHelper { + using ReadHelper::ReadHelper; + + auto Read(const hyperapi::Value &value) -> void override { + if (value.isNull()) { + if (ArrowArrayAppendNull(array_, 1)) { + throw std::runtime_error("ArrowAppendNull failed"); + } + return; + } + const auto time = value.get(); + const auto raw_value = time.getRaw(); + if (ArrowArrayAppendInt(array_, raw_value)) { + throw std::runtime_error("ArrowAppendInt failed"); + }; + } +}; + static auto makeReadHelper(const ArrowSchemaView *schema_view, struct ArrowArray *array) -> std::unique_ptr { @@ -665,6 +694,8 @@ static auto makeReadHelper(const ArrowSchemaView *schema_view, } else { return std::unique_ptr(new DatetimeReadHelper(array)); } + case NANOARROW_TYPE_TIME64: + return std::unique_ptr(new TimeReadHelper(array)); default: throw nb::type_error("unknownn arrow type provided"); } @@ -686,6 +717,7 @@ static auto arrowTypeFromHyper(const hyperapi::SqlType &sqltype) case hyperapi::TypeTag::Date : return NANOARROW_TYPE_DATE32; case hyperapi::TypeTag::Timestamp : case hyperapi::TypeTag:: TimestampTZ : return NANOARROW_TYPE_TIMESTAMP; + case hyperapi::TypeTag::Time : return NANOARROW_TYPE_TIME64; default : throw nb::type_error( ("Reader not implemented for type: " + sqltype.toString()).c_str()); } @@ -728,19 +760,30 @@ auto read_from_hyper_query(const std::string &path, const std::string &query) } const auto sqltype = column.getType(); - if (sqltype.getTag() == hyperapi::TypeTag::TimestampTZ) { + switch (sqltype.getTag()) { + case hyperapi::TypeTag::TimestampTZ: if (ArrowSchemaSetTypeDateTime(schema->children[i], NANOARROW_TYPE_TIMESTAMP, NANOARROW_TIME_UNIT_MICRO, "UTC")) { - throw std::runtime_error("ArrowSchemaSetDateTime failed"); + throw std::runtime_error( + "ArrowSchemaSetDateTime failed for TimestampTZ type"); } - } else if (sqltype.getTag() == hyperapi::TypeTag::Timestamp) { + break; + case hyperapi::TypeTag::Timestamp: if (ArrowSchemaSetTypeDateTime(schema->children[i], NANOARROW_TYPE_TIMESTAMP, NANOARROW_TIME_UNIT_MICRO, nullptr)) { - throw std::runtime_error("ArrowSchemaSetDateTime failed"); + throw std::runtime_error( + "ArrowSchemaSetDateTime failed for Timestamp type"); } - } else { + break; + case hyperapi::TypeTag::Time: + if (ArrowSchemaSetTypeDateTime(schema->children[i], NANOARROW_TYPE_TIME64, + NANOARROW_TIME_UNIT_MICRO, nullptr)) { + throw std::runtime_error("ArrowSchemaSetDateTime failed for Time type"); + } + break; + default: const enum ArrowType arrow_type = arrowTypeFromHyper(sqltype); if (ArrowSchemaSetType(schema->children[i], arrow_type)) { throw std::runtime_error("ArrowSchemaSetType failed"); diff --git a/pantab/tests/conftest.py b/pantab/tests/conftest.py index e2c9dc32..f5362a6f 100644 --- a/pantab/tests/conftest.py +++ b/pantab/tests/conftest.py @@ -144,6 +144,8 @@ def get_basic_dataframe(): # See pandas GH issue #56994 df["binary"] = pa.array([b"\xde\xad\xbe\xef", b"\xff\xee", None], type=pa.binary()) df["binary"] = df["binary"].astype("binary[pyarrow]") + df["time64us"] = pd.DataFrame({"col": pa.array([234, 42], type=pa.time64("us"))}) + df["time64us"] = df["time64us"].astype("time64[us][pyarrow]") return df @@ -184,6 +186,7 @@ def roundtripped(): "non-ascii": "large_string[pyarrow]", "string": "large_string[pyarrow]", "binary": "large_binary[pyarrow]", + "time64us": "time64[us][pyarrow]", } ) return df