diff --git a/c/driver/postgresql/CMakeLists.txt b/c/driver/postgresql/CMakeLists.txt index b98e6ea3fc..18eb773d6f 100644 --- a/c/driver/postgresql/CMakeLists.txt +++ b/c/driver/postgresql/CMakeLists.txt @@ -76,7 +76,6 @@ if(ADBC_BUILD_TESTS) driver-postgresql SOURCES postgres_type_test.cc - postgres_copy_reader_test.cc postgresql_test.cc EXTRA_LINK_LIBS adbc_driver_common @@ -91,6 +90,28 @@ if(ADBC_BUILD_TESTS) ${REPOSITORY_ROOT}/c/vendor ${REPOSITORY_ROOT}/c/driver) adbc_configure_target(adbc-driver-postgresql-test) + + add_test_case(driver_postgresql_copy_test + PREFIX + adbc + EXTRA_LABELS + driver-postgresql + SOURCES + copy/postgres_copy_reader_test.cc + copy/postgres_copy_writer_test.cc + EXTRA_LINK_LIBS + adbc_driver_common + adbc_validation + nanoarrow + ${TEST_LINK_LIBS}) + target_compile_features(adbc-driver-postgresql-copy-test PRIVATE cxx_std_17) + target_include_directories(adbc-driver-postgresql-copy-test SYSTEM + PRIVATE ${REPOSITORY_ROOT} + ${REPOSITORY_ROOT}/c/ + ${LIBPQ_INCLUDE_DIRS} + ${REPOSITORY_ROOT}/c/vendor + ${REPOSITORY_ROOT}/c/driver) + adbc_configure_target(adbc-driver-postgresql-copy-test) endif() if(ADBC_BUILD_BENCHMARKS) diff --git a/c/driver/postgresql/copy/copy_common.h b/c/driver/postgresql/copy/copy_common.h new file mode 100644 index 0000000000..04205907da --- /dev/null +++ b/c/driver/postgresql/copy/copy_common.h @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +// Windows +#define NOMINMAX + +#include + +// R 3.6 / Windows builds on a very old toolchain that does not define ENODATA +#if defined(_WIN32) && !defined(MSVC) && !defined(ENODATA) +#define ENODATA 120 +#endif + +namespace adbcpq { + +// "PGCOPY\n\377\r\n\0" +static int8_t kPgCopyBinarySignature[] = {0x50, 0x47, 0x43, 0x4F, + 0x50, 0x59, 0x0A, static_cast(0xFF), + 0x0D, 0x0A, 0x00}; + +// The maximum value in microseconds that can be converted into nanoseconds +// without overflow +constexpr int64_t kMaxSafeMicrosToNanos = 9223372036854775L; + +// The minimum value in microseconds that can be converted into nanoseconds +// without overflow +constexpr int64_t kMinSafeMicrosToNanos = -9223372036854775L; + +} // namespace adbcpq diff --git a/c/driver/postgresql/copy/postgres_copy_reader_test.cc b/c/driver/postgresql/copy/postgres_copy_reader_test.cc new file mode 100644 index 0000000000..55a61b27ed --- /dev/null +++ b/c/driver/postgresql/copy/postgres_copy_reader_test.cc @@ -0,0 +1,697 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include "postgresql/copy/reader.h" +#include "postgres_copy_test_common.h" + +namespace adbcpq { + +class PostgresCopyStreamTester { + public: + ArrowErrorCode Init(const PostgresType& root_type, ArrowError* error = nullptr) { + NANOARROW_RETURN_NOT_OK(reader_.Init(root_type)); + NANOARROW_RETURN_NOT_OK(reader_.InferOutputSchema(error)); + NANOARROW_RETURN_NOT_OK(reader_.InitFieldReaders(error)); + return NANOARROW_OK; + } + + ArrowErrorCode ReadAll(ArrowBufferView* data, ArrowError* error = nullptr) { + NANOARROW_RETURN_NOT_OK(reader_.ReadHeader(data, error)); + + int result; + do { + result = reader_.ReadRecord(data, error); + } while (result == NANOARROW_OK); + + return result; + } + + void GetSchema(ArrowSchema* out) { reader_.GetSchema(out); } + + ArrowErrorCode GetArray(ArrowArray* out, ArrowError* error = nullptr) { + return reader_.GetArray(out, error); + } + + private: + PostgresCopyStreamReader reader_; +}; + + +TEST(PostgresCopyUtilsTest, PostgresCopyReadBoolean) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyBoolean; + data.size_bytes = sizeof(kTestPgCopyBoolean); + + auto col_type = PostgresType(PostgresTypeId::kBool); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyBoolean, sizeof(kTestPgCopyBoolean)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 3); + ASSERT_EQ(array->n_children, 1); + + const uint8_t* validity = + reinterpret_cast(array->children[0]->buffers[0]); + const uint8_t* data_buffer = + reinterpret_cast(array->children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_FALSE(ArrowBitGet(validity, 2)); + + ASSERT_TRUE(ArrowBitGet(data_buffer, 0)); + ASSERT_FALSE(ArrowBitGet(data_buffer, 1)); + ASSERT_FALSE(ArrowBitGet(data_buffer, 2)); +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyReadSmallInt) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopySmallInt; + data.size_bytes = sizeof(kTestPgCopySmallInt); + + auto col_type = PostgresType(PostgresTypeId::kInt2); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopySmallInt, sizeof(kTestPgCopySmallInt)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 5); + ASSERT_EQ(array->n_children, 1); + + auto validity = reinterpret_cast(array->children[0]->buffers[0]); + auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_TRUE(ArrowBitGet(validity, 2)); + ASSERT_TRUE(ArrowBitGet(validity, 3)); + ASSERT_FALSE(ArrowBitGet(validity, 4)); + + ASSERT_EQ(data_buffer[0], -123); + ASSERT_EQ(data_buffer[1], -1); + ASSERT_EQ(data_buffer[2], 1); + ASSERT_EQ(data_buffer[3], 123); + ASSERT_EQ(data_buffer[4], 0); +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyReadInteger) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyInteger; + data.size_bytes = sizeof(kTestPgCopyInteger); + + auto col_type = PostgresType(PostgresTypeId::kInt4); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyInteger, sizeof(kTestPgCopyInteger)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 5); + ASSERT_EQ(array->n_children, 1); + + auto validity = reinterpret_cast(array->children[0]->buffers[0]); + auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_TRUE(ArrowBitGet(validity, 2)); + ASSERT_TRUE(ArrowBitGet(validity, 3)); + ASSERT_FALSE(ArrowBitGet(validity, 4)); + + ASSERT_EQ(data_buffer[0], -123); + ASSERT_EQ(data_buffer[1], -1); + ASSERT_EQ(data_buffer[2], 1); + ASSERT_EQ(data_buffer[3], 123); + ASSERT_EQ(data_buffer[4], 0); +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyReadBigInt) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyBigInt; + data.size_bytes = sizeof(kTestPgCopyBigInt); + + auto col_type = PostgresType(PostgresTypeId::kInt8); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyBigInt, sizeof(kTestPgCopyBigInt)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 5); + ASSERT_EQ(array->n_children, 1); + + auto validity = reinterpret_cast(array->children[0]->buffers[0]); + auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_TRUE(ArrowBitGet(validity, 2)); + ASSERT_TRUE(ArrowBitGet(validity, 3)); + ASSERT_FALSE(ArrowBitGet(validity, 4)); + + ASSERT_EQ(data_buffer[0], -123); + ASSERT_EQ(data_buffer[1], -1); + ASSERT_EQ(data_buffer[2], 1); + ASSERT_EQ(data_buffer[3], 123); + ASSERT_EQ(data_buffer[4], 0); +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyReadReal) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyReal; + data.size_bytes = sizeof(kTestPgCopyReal); + + auto col_type = PostgresType(PostgresTypeId::kFloat4); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyReal, sizeof(kTestPgCopyReal)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 5); + ASSERT_EQ(array->n_children, 1); + + auto validity = reinterpret_cast(array->children[0]->buffers[0]); + auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_TRUE(ArrowBitGet(validity, 2)); + ASSERT_TRUE(ArrowBitGet(validity, 3)); + ASSERT_FALSE(ArrowBitGet(validity, 4)); + + ASSERT_FLOAT_EQ(data_buffer[0], -123.456); + ASSERT_EQ(data_buffer[1], -1); + ASSERT_EQ(data_buffer[2], 1); + ASSERT_FLOAT_EQ(data_buffer[3], 123.456); + ASSERT_EQ(data_buffer[4], 0); +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyReadDoublePrecision) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyDoublePrecision; + data.size_bytes = sizeof(kTestPgCopyDoublePrecision); + + auto col_type = PostgresType(PostgresTypeId::kFloat8); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyDoublePrecision, + sizeof(kTestPgCopyDoublePrecision)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 5); + ASSERT_EQ(array->n_children, 1); + + auto validity = reinterpret_cast(array->children[0]->buffers[0]); + auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_TRUE(ArrowBitGet(validity, 2)); + ASSERT_TRUE(ArrowBitGet(validity, 3)); + ASSERT_FALSE(ArrowBitGet(validity, 4)); + + ASSERT_DOUBLE_EQ(data_buffer[0], -123.456); + ASSERT_EQ(data_buffer[1], -1); + ASSERT_EQ(data_buffer[2], 1); + ASSERT_DOUBLE_EQ(data_buffer[3], 123.456); + ASSERT_EQ(data_buffer[4], 0); +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyReadDate) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyDate; + data.size_bytes = sizeof(kTestPgCopyDate); + + auto col_type = PostgresType(PostgresTypeId::kDate); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyDate, sizeof(kTestPgCopyDate)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 3); + ASSERT_EQ(array->n_children, 1); + + auto validity = reinterpret_cast(array->children[0]->buffers[0]); + auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_FALSE(ArrowBitGet(validity, 2)); + + ASSERT_EQ(data_buffer[0], -25567); + ASSERT_EQ(data_buffer[1], 47482); +} + + +// For full coverage, ensure that this contains NUMERIC examples that: +// - Have >= four zeroes to the left of the decimal point +// - Have >= four zeroes to the right of the decimal point +// - Include special values (nan, -inf, inf, NULL) +// - Have >= four trailing zeroes to the right of the decimal point +// - Have >= four leading zeroes before the first digit to the right of the decimal point +// - Is < 0 (negative) +// COPY (SELECT CAST(col AS NUMERIC) AS col FROM ( VALUES (1000000), ('0.00001234'), +// ('1.0000'), (-123.456), (123.456), ('nan'), ('-inf'), ('inf'), (NULL)) AS drvd(col)) TO +// STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyNumeric[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, + 0x01, 0xff, 0xfe, 0x00, 0x00, 0x00, 0x08, 0x04, 0xd2, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x01, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x40, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, + 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x7b, 0x11, 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0xf0, 0x00, 0x00, 0x20, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0xd0, 0x00, 0x00, 0x20, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +TEST(PostgresCopyUtilsTest, PostgresCopyReadNumeric) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyNumeric; + data.size_bytes = sizeof(kTestPgCopyNumeric); + + auto col_type = PostgresType(PostgresTypeId::kNumeric); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyNumeric, sizeof(kTestPgCopyNumeric)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 9); + ASSERT_EQ(array->n_children, 1); + + nanoarrow::UniqueSchema schema; + tester.GetSchema(schema.get()); + + nanoarrow::UniqueArrayView array_view; + ASSERT_EQ(ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), nullptr), + NANOARROW_OK); + ASSERT_EQ(array_view->children[0]->storage_type, NANOARROW_TYPE_STRING); + ASSERT_EQ(ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr), NANOARROW_OK); + + auto validity = array_view->children[0]->buffer_views[0].data.as_uint8; + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_TRUE(ArrowBitGet(validity, 2)); + ASSERT_TRUE(ArrowBitGet(validity, 3)); + ASSERT_TRUE(ArrowBitGet(validity, 4)); + ASSERT_TRUE(ArrowBitGet(validity, 5)); + ASSERT_TRUE(ArrowBitGet(validity, 6)); + ASSERT_TRUE(ArrowBitGet(validity, 7)); + ASSERT_FALSE(ArrowBitGet(validity, 8)); + + struct ArrowStringView item; + item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 0); + EXPECT_EQ(std::string(item.data, item.size_bytes), "1000000"); + item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 1); + EXPECT_EQ(std::string(item.data, item.size_bytes), "0.00001234"); + item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 2); + EXPECT_EQ(std::string(item.data, item.size_bytes), "1.0000"); + item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 3); + EXPECT_EQ(std::string(item.data, item.size_bytes), "-123.456"); + item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 4); + EXPECT_EQ(std::string(item.data, item.size_bytes), "123.456"); + item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 5); + EXPECT_EQ(std::string(item.data, item.size_bytes), "nan"); + item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 6); + EXPECT_EQ(std::string(item.data, item.size_bytes), "-inf"); + item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 7); + EXPECT_EQ(std::string(item.data, item.size_bytes), "inf"); +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyReadTimestamp) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyTimestamp; + data.size_bytes = sizeof(kTestPgCopyTimestamp); + + auto col_type = PostgresType(PostgresTypeId::kTimestamp); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyTimestamp, sizeof(kTestPgCopyTimestamp)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 3); + ASSERT_EQ(array->n_children, 1); + + auto validity = reinterpret_cast(array->children[0]->buffers[0]); + auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_FALSE(ArrowBitGet(validity, 3)); + + ASSERT_EQ(data_buffer[0], -2208943504000000); + ASSERT_EQ(data_buffer[1], 4102490096000000); +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyReadInterval) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyInterval; + data.size_bytes = sizeof(kTestPgCopyInterval); + + auto col_type = PostgresType(PostgresTypeId::kInterval); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyInterval, sizeof(kTestPgCopyInterval)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 3); + ASSERT_EQ(array->n_children, 1); + + nanoarrow::UniqueSchema schema; + tester.GetSchema(schema.get()); + + nanoarrow::UniqueArrayView array_view; + ASSERT_EQ(ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), nullptr), + NANOARROW_OK); + ASSERT_EQ(array_view->children[0]->storage_type, + NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO); + ASSERT_EQ(ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr), NANOARROW_OK); + + auto validity = array_view->children[0]->buffer_views[0].data.as_uint8; + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_FALSE(ArrowBitGet(validity, 2)); + + struct ArrowInterval interval; + ArrowIntervalInit(&interval, NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO); + ArrowArrayViewGetIntervalUnsafe(array_view->children[0], 0, &interval); + ASSERT_EQ(interval.months, -1); + ASSERT_EQ(interval.days, -2); + ASSERT_EQ(interval.ns, -4000000000); + ArrowArrayViewGetIntervalUnsafe(array_view->children[0], 1, &interval); + ASSERT_EQ(interval.months, 1); + ASSERT_EQ(interval.days, 2); + ASSERT_EQ(interval.ns, 4000000000); +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyReadText) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyText; + data.size_bytes = sizeof(kTestPgCopyText); + + auto col_type = PostgresType(PostgresTypeId::kText); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyText, sizeof(kTestPgCopyText)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 3); + ASSERT_EQ(array->n_children, 1); + + auto validity = reinterpret_cast(array->children[0]->buffers[0]); + auto offsets = reinterpret_cast(array->children[0]->buffers[1]); + auto data_buffer = reinterpret_cast(array->children[0]->buffers[2]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_FALSE(ArrowBitGet(validity, 2)); + + ASSERT_EQ(offsets[0], 0); + ASSERT_EQ(offsets[1], 3); + ASSERT_EQ(offsets[2], 7); + ASSERT_EQ(offsets[3], 7); + + ASSERT_EQ(std::string(data_buffer + 0, 3), "abc"); + ASSERT_EQ(std::string(data_buffer + 3, 4), "1234"); +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyReadBinary) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyBinary; + data.size_bytes = sizeof(kTestPgCopyBinary); + + auto col_type = PostgresType(PostgresTypeId::kBytea); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyBinary, sizeof(kTestPgCopyBinary)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 5); + ASSERT_EQ(array->n_children, 1); + + auto validity = reinterpret_cast(array->children[0]->buffers[0]); + auto offsets = reinterpret_cast(array->children[0]->buffers[1]); + auto data_buffer = reinterpret_cast(array->children[0]->buffers[2]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_TRUE(ArrowBitGet(validity, 2)); + ASSERT_TRUE(ArrowBitGet(validity, 3)); + ASSERT_FALSE(ArrowBitGet(validity, 4)); + + ASSERT_EQ(offsets[0], 0); + ASSERT_EQ(offsets[1], 0); + ASSERT_EQ(offsets[2], 2); + ASSERT_EQ(offsets[3], 6); + ASSERT_EQ(offsets[4], 8); + ASSERT_EQ(offsets[5], 8); + + ASSERT_EQ(data_buffer[0], 0x00); + ASSERT_EQ(data_buffer[1], 0x01); + ASSERT_EQ(data_buffer[2], 0x01); + ASSERT_EQ(data_buffer[3], 0x02); + ASSERT_EQ(data_buffer[4], 0x03); + ASSERT_EQ(data_buffer[5], 0x04); + ASSERT_EQ(data_buffer[6], 0xfe); + ASSERT_EQ(data_buffer[7], 0xff); +} + + +// COPY (SELECT CAST("col" AS INTEGER ARRAY) AS "col" FROM ( VALUES ('{-123, -1}'), ('{0, +// 1, 123}'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyIntegerArray[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0x85, 0x00, 0x00, 0x00, + 0x04, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x03, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x7b, 0x00, + 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +TEST(PostgresCopyUtilsTest, PostgresCopyReadArray) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyIntegerArray; + data.size_bytes = sizeof(kTestPgCopyIntegerArray); + + auto col_type = PostgresType(PostgresTypeId::kInt4).Array(); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyIntegerArray, + sizeof(kTestPgCopyIntegerArray)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 3); + ASSERT_EQ(array->n_children, 1); + ASSERT_EQ(array->children[0]->n_children, 1); + ASSERT_EQ(array->children[0]->children[0]->length, 5); + + auto validity = reinterpret_cast(array->children[0]->buffers[0]); + auto offsets = reinterpret_cast(array->children[0]->buffers[1]); + auto data_buffer = + reinterpret_cast(array->children[0]->children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_FALSE(ArrowBitGet(validity, 2)); + + ASSERT_EQ(offsets[0], 0); + ASSERT_EQ(offsets[1], 2); + ASSERT_EQ(offsets[2], 5); + ASSERT_EQ(offsets[3], 5); + + ASSERT_EQ(data_buffer[0], -123); + ASSERT_EQ(data_buffer[1], -1); + ASSERT_EQ(data_buffer[2], 0); + ASSERT_EQ(data_buffer[3], 1); + ASSERT_EQ(data_buffer[4], 123); +} + +// CREATE TYPE custom_record AS (nested1 integer, nested2 double precision); +// COPY (SELECT CAST("col" AS custom_record) AS "col" FROM ( VALUES ('(123, 456.789)'), +// ('(12, 345.678)'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyCustomRecord[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, + 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, + 0x00, 0x7b, 0x00, 0x00, 0x02, 0xbd, 0x00, 0x00, 0x00, 0x08, 0x40, 0x7c, 0x8c, + 0x9f, 0xbe, 0x76, 0xc8, 0xb4, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, + 0x00, 0x02, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x02, 0xbd, 0x00, 0x00, 0x00, 0x08, 0x40, 0x75, 0x9a, 0xd9, + 0x16, 0x87, 0x2b, 0x02, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +TEST(PostgresCopyUtilsTest, PostgresCopyReadCustomRecord) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyCustomRecord; + data.size_bytes = sizeof(kTestPgCopyCustomRecord); + + auto col_type = PostgresType(PostgresTypeId::kRecord); + col_type.AppendChild("nested1", PostgresType(PostgresTypeId::kInt4)); + col_type.AppendChild("nested2", PostgresType(PostgresTypeId::kFloat8)); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyCustomRecord, + sizeof(kTestPgCopyCustomRecord)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 3); + ASSERT_EQ(array->n_children, 1); + ASSERT_EQ(array->children[0]->n_children, 2); + ASSERT_EQ(array->children[0]->children[0]->length, 3); + ASSERT_EQ(array->children[0]->children[1]->length, 3); + + auto validity = reinterpret_cast(array->children[0]->buffers[0]); + auto data_buffer1 = + reinterpret_cast(array->children[0]->children[0]->buffers[1]); + auto data_buffer2 = + reinterpret_cast(array->children[0]->children[1]->buffers[1]); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_FALSE(ArrowBitGet(validity, 2)); + + ASSERT_EQ(data_buffer1[0], 123); + ASSERT_EQ(data_buffer1[1], 12); + ASSERT_EQ(data_buffer1[2], 0); + + ASSERT_DOUBLE_EQ(data_buffer2[0], 456.789); + ASSERT_DOUBLE_EQ(data_buffer2[1], 345.678); + ASSERT_DOUBLE_EQ(data_buffer2[2], 0); +} + +} // namespace adbcpq diff --git a/c/driver/postgresql/copy/postgres_copy_test_common.h b/c/driver/postgresql/copy/postgres_copy_test_common.h new file mode 100644 index 0000000000..cdc256d77a --- /dev/null +++ b/c/driver/postgresql/copy/postgres_copy_test_common.h @@ -0,0 +1,124 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +namespace adbcpq { + +// COPY (SELECT CAST("col" AS BOOLEAN) AS "col" FROM ( VALUES (TRUE), (FALSE), (NULL)) AS +// drvd("col")) TO STDOUT; +static uint8_t kTestPgCopyBoolean[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + + +// COPY (SELECT CAST("col" AS SMALLINT) AS "col" FROM ( VALUES (-123), (-1), (1), (123), +// (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopySmallInt[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x02, 0xff, 0x85, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0xff, 0xff, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +// COPY (SELECT CAST("col" AS INTEGER) AS "col" FROM ( VALUES (-123), (-1), (1), (123), +// (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyInteger[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, + 0x85, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, + 0x00, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + + +// COPY (SELECT CAST("col" AS BIGINT) AS "col" FROM ( VALUES (-123), (-1), (1), (123), +// (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyBigInt[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x85, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +// COPY (SELECT CAST("col" AS REAL) AS "col" FROM ( VALUES (-123.456), (-1), (1), +// (123.456), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyReal[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xc2, 0xf6, 0xe9, + 0x79, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xbf, 0x80, 0x00, 0x00, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x04, 0x3f, 0x80, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x42, + 0xf6, 0xe9, 0x79, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +// COPY (SELECT CAST("col" AS DOUBLE PRECISION) AS "col" FROM ( VALUES (-123.456), (-1), +// (1), (123.456), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyDoublePrecision[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xc0, 0x5e, 0xdd, + 0x2f, 0x1a, 0x9f, 0xbe, 0x77, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xbf, 0xf0, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x3f, 0xf0, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x40, 0x5e, 0xdd, + 0x2f, 0x1a, 0x9f, 0xbe, 0x77, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +static uint8_t kTestPgCopyDate[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xff, 0xff, + 0x71, 0x54, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x8e, 0xad, 0x00, 0x01, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +// COPY (SELECT CAST(col AS TIMESTAMP) FROM ( VALUES ('1900-01-01 12:34:56'), +// ('2100-01-01 12:34:56'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT BINARY); +static uint8_t kTestPgCopyTimestamp[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, + 0x00, 0x08, 0xff, 0xf4, 0xc9, 0xf9, 0x07, 0xe5, 0x9c, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0b, 0x36, 0x30, 0x2d, 0xa5, + 0xfc, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +// COPY (SELECT CAST(col AS INTERVAL) FROM ( VALUES ('-1 months -2 days -4 seconds'), +// ('1 months 2 days 4 seconds'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT BINARY); +static uint8_t kTestPgCopyInterval[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xc2, 0xf7, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, + 0xff, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3d, 0x09, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff}; + +// COPY (SELECT CAST("col" AS TEXT) AS "col" FROM ( VALUES ('abc'), ('1234'), +// (NULL::text)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyText[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x03, 0x61, 0x62, 0x63, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x31, 0x32, + 0x33, 0x34, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +// COPY (SELECT CAST("col" AS BYTEA) AS "col" FROM ( VALUES (''), ('\x0001'), +// ('\x01020304'), ('\xFEFF'), (NULL)) AS drvd("col")) TO STDOUT +// WITH (FORMAT binary); +static uint8_t kTestPgCopyBinary[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0xfe, 0xff, + 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + + +} // namespace adbcpq diff --git a/c/driver/postgresql/copy/postgres_copy_writer_test.cc b/c/driver/postgresql/copy/postgres_copy_writer_test.cc new file mode 100644 index 0000000000..2a33b47776 --- /dev/null +++ b/c/driver/postgresql/copy/postgres_copy_writer_test.cc @@ -0,0 +1,630 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include +#include +#include + +#include "postgresql/copy/writer.h" +#include "validation/adbc_validation_util.h" +#include "postgres_copy_test_common.h" + +namespace adbcpq { + +class PostgresCopyStreamWriteTester { + public: + ArrowErrorCode Init(struct ArrowSchema* schema, struct ArrowArray* array, + struct ArrowError* error = nullptr) { + NANOARROW_RETURN_NOT_OK(writer_.Init(schema)); + NANOARROW_RETURN_NOT_OK(writer_.InitFieldWriters(error)); + NANOARROW_RETURN_NOT_OK(writer_.SetArray(array)); + return NANOARROW_OK; + } + + ArrowErrorCode WriteAll(struct ArrowError* error) { + NANOARROW_RETURN_NOT_OK(writer_.WriteHeader(error)); + + int result; + do { + result = writer_.WriteRecord(error); + } while (result == NANOARROW_OK); + + return result; + } + + ArrowErrorCode WriteArray(struct ArrowArray* array, struct ArrowError* error) { + writer_.SetArray(array); + int result; + do { + result = writer_.WriteRecord(error); + } while (result == NANOARROW_OK); + + return result; + } + + const struct ArrowBuffer& WriteBuffer() const { return writer_.WriteBuffer(); } + + void Rewind() { writer_.Rewind(); } + + private: + PostgresCopyStreamWriter writer_; +}; + + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteBoolean) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + adbc_validation::Handle buffer; + struct ArrowError na_error; + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_BOOL}}), + ADBC_STATUS_OK); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, + {true, false, std::nullopt}), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyBoolean) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyBoolean[i]); + } +} + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteInt8) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_INT8}}), + ADBC_STATUS_OK); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, + {-123, -1, 1, 123, std::nullopt}), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopySmallInt) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopySmallInt[i]); + } +} + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteInt16) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_INT16}}), + ADBC_STATUS_OK); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, + {-123, -1, 1, 123, std::nullopt}), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopySmallInt) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopySmallInt[i]); + } +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteInt32) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_INT32}}), + ADBC_STATUS_OK); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, + {-123, -1, 1, 123, std::nullopt}), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyInteger) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyInteger[i]); + } +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteInt64) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_INT64}}), + ADBC_STATUS_OK); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, + {-123, -1, 1, 123, std::nullopt}), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyBigInt) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyBigInt[i]); + } +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteReal) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_FLOAT}}), + ADBC_STATUS_OK); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, + {-123.456, -1, 1, 123.456, std::nullopt}), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyReal) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyReal[i]) << " mismatch at index: " << i; + } +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteDoublePrecision) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_DOUBLE}}), + ADBC_STATUS_OK); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, + {-123.456, -1, 1, 123.456, std::nullopt}), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyDoublePrecision) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyDoublePrecision[i]); + } +} + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteDate) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_DATE32}}), + ADBC_STATUS_OK); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, + {-25567, 47482, std::nullopt}), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyDate) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyDate[i]); + } +} + +// This buffer is similar to the read variant above but removes special values +// nan, ±inf as they are not supported via the Arrow Decimal types +// COPY (SELECT CAST(col AS NUMERIC) AS col FROM ( VALUES (NULL), (-123.456), +// ('0.00001234'), (1.0000), (123.456), (1000000)) AS drvd(col)) +// TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyNumericWrite[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x40, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, + 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0xff, 0xfe, 0x00, 0x00, 0x00, + 0x08, 0x04, 0xd2, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0a, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0xff, 0xff}; + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteNumeric) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128; + constexpr int32_t size = 128; + constexpr int32_t precision = 38; + constexpr int32_t scale = 8; + + struct ArrowDecimal decimal1; + struct ArrowDecimal decimal2; + struct ArrowDecimal decimal3; + struct ArrowDecimal decimal4; + struct ArrowDecimal decimal5; + + ArrowDecimalInit(&decimal1, size, 19, 8); + ArrowDecimalSetInt(&decimal1, -12345600000); + ArrowDecimalInit(&decimal2, size, 19, 8); + ArrowDecimalSetInt(&decimal2, 1234); + ArrowDecimalInit(&decimal3, size, 19, 8); + ArrowDecimalSetInt(&decimal3, 100000000); + ArrowDecimalInit(&decimal4, size, 19, 8); + ArrowDecimalSetInt(&decimal4, 12345600000); + ArrowDecimalInit(&decimal5, size, 19, 8); + ArrowDecimalSetInt(&decimal5, 100000000000000); + + const std::vector> values = { + std::nullopt, &decimal1, &decimal2, &decimal3, &decimal4, &decimal5}; + + ArrowSchemaInit(&schema.value); + ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0); + ASSERT_EQ(AdbcNsArrowSchemaSetTypeDecimal(schema.value.children[0], + type, precision, scale), 0); + ASSERT_EQ(ArrowSchemaSetName(schema.value.children[0], "col"), 0); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, + &na_error, values), ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyNumericWrite) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyNumericWrite[i]) << " at position " << i; + } +} + +using TimestampTestParamType = std::tuple>>; + +class PostgresCopyWriteTimestampTest : public testing::TestWithParam< + TimestampTestParamType> { +}; + +TEST_P(PostgresCopyWriteTimestampTest, WritesProperBufferValues) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + + TimestampTestParamType parameters = GetParam(); + enum ArrowTimeUnit unit = std::get<0>(parameters); + const char* timezone = std::get<1>(parameters); + + const std::vector> values = std::get<2>(parameters); + + ArrowSchemaInit(&schema.value); + ArrowSchemaSetTypeStruct(&schema.value, 1); + ArrowSchemaSetTypeDateTime(schema->children[0], + NANOARROW_TYPE_TIMESTAMP, + unit, + timezone); + ArrowSchemaSetName(schema->children[0], "col"); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, + &array.value, + &na_error, + values), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyTimestamp) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyTimestamp[i]); + } +} + +static const std::vector ts_values { + {NANOARROW_TIME_UNIT_SECOND, nullptr, + {-2208943504, 4102490096, std::nullopt}}, + {NANOARROW_TIME_UNIT_MILLI, nullptr, + {-2208943504000, 4102490096000, std::nullopt}}, + {NANOARROW_TIME_UNIT_MICRO, nullptr, + {-2208943504000000, 4102490096000000, std::nullopt}}, + {NANOARROW_TIME_UNIT_NANO, nullptr, + {-2208943504000000000, 4102490096000000000, std::nullopt}}, + {NANOARROW_TIME_UNIT_SECOND, "UTC", + {-2208943504, 4102490096, std::nullopt}}, + {NANOARROW_TIME_UNIT_MILLI, "UTC", + {-2208943504000, 4102490096000, std::nullopt}}, + {NANOARROW_TIME_UNIT_MICRO, "UTC", + {-2208943504000000, 4102490096000000, std::nullopt}}, + {NANOARROW_TIME_UNIT_NANO, "UTC", + {-2208943504000000000, 4102490096000000000, std::nullopt}}, + {NANOARROW_TIME_UNIT_SECOND, "America/New_York", + {-2208943504, 4102490096, std::nullopt}}, + {NANOARROW_TIME_UNIT_MILLI, "America/New_York", + {-2208943504000, 4102490096000, std::nullopt}}, + {NANOARROW_TIME_UNIT_MICRO, "America/New_York", + {-2208943504000000, 4102490096000000, std::nullopt}}, + {NANOARROW_TIME_UNIT_NANO, "America/New_York", + {-2208943504000000000, 4102490096000000000, std::nullopt}}, +}; + +INSTANTIATE_TEST_SUITE_P(PostgresCopyWriteTimestamp, + PostgresCopyWriteTimestampTest, + testing::ValuesIn(ts_values)); + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteInterval) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + const enum ArrowType type = NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO; + // values are days, months, ns + struct ArrowInterval neg_interval; + struct ArrowInterval pos_interval; + + ArrowIntervalInit(&neg_interval, type); + ArrowIntervalInit(&pos_interval, type); + + neg_interval.months = -1; + neg_interval.days = -2; + neg_interval.ns = -4000000000; + + pos_interval.months = 1; + pos_interval.days = 2; + pos_interval.ns = 4000000000; + + const std::vector> values = { + &neg_interval, &pos_interval, std::nullopt}; + + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", type}}), ADBC_STATUS_OK); + + ASSERT_EQ(adbc_validation::MakeBatch( + &schema.value, &array.value, &na_error, values), ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyInterval) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyInterval[i]); + } +} + +// Writing a DURATION from NANOARROW produces INTERVAL in postgres without day/month +// COPY (SELECT CAST(col AS INTERVAL) FROM ( VALUES ('-4 seconds'), +// ('4 seconds'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT BINARY); +static uint8_t kTestPgCopyDuration[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xc2, 0xf7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3d, 0x09, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff}; +using DurationTestParamType = std::tuple>>; + +class PostgresCopyWriteDurationTest : public testing::TestWithParam< + DurationTestParamType> {}; + +TEST_P(PostgresCopyWriteDurationTest, WritesProperBufferValues) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + const enum ArrowType type = NANOARROW_TYPE_DURATION; + + DurationTestParamType parameters = GetParam(); + enum ArrowTimeUnit unit = std::get<0>(parameters); + const std::vector> values = std::get<1>(parameters); + + ArrowSchemaInit(&schema.value); + ArrowSchemaSetTypeStruct(&schema.value, 1); + ArrowSchemaSetTypeDateTime(schema->children[0], type, unit, nullptr); + ArrowSchemaSetName(schema->children[0], "col"); + ASSERT_EQ(adbc_validation::MakeBatch( + &schema.value, &array.value, &na_error, values), ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyDuration) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyDuration[i]); + } +} + +static const std::vector duration_params { + {NANOARROW_TIME_UNIT_SECOND, {-4, 4, std::nullopt}}, + {NANOARROW_TIME_UNIT_MILLI, {-4000, 4000, std::nullopt}}, + {NANOARROW_TIME_UNIT_MICRO, {-4000000, 4000000, std::nullopt}}, + {NANOARROW_TIME_UNIT_NANO, {-4000000000, 4000000000, std::nullopt}}, +}; + +INSTANTIATE_TEST_SUITE_P(PostgresCopyWriteDuration, + PostgresCopyWriteDurationTest, + testing::ValuesIn(duration_params)); + + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteString) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_STRING}}), + ADBC_STATUS_OK); + ASSERT_EQ(adbc_validation::MakeBatch( + &schema.value, &array.value, &na_error, {"abc", "1234", std::nullopt}), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyText) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyText[i]); + } +} + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteLargeString) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + ASSERT_EQ( + adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_LARGE_STRING}}), + ADBC_STATUS_OK); + ASSERT_EQ(adbc_validation::MakeBatch( + &schema.value, &array.value, &na_error, {"abc", "1234", std::nullopt}), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyText) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyText[i]); + } +} + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteBinary) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_BINARY}}), + ADBC_STATUS_OK); + ASSERT_EQ(adbc_validation::MakeBatch>( + &schema.value, &array.value, &na_error, + { + std::vector{}, + std::vector{std::byte{0x00}, std::byte{0x01}}, + std::vector{ + std::byte{0x01}, std::byte{0x02}, std::byte{0x03}, std::byte{0x04} + }, + std::vector{std::byte{0xfe}, std::byte{0xff}}, + std::nullopt}), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyBinary) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyBinary[i]) << "failure at index " << i; + } +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteMultiBatch) { + // Regression test for https://github.com/apache/arrow-adbc/issues/1310 + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_INT32}}), + NANOARROW_OK); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, + {-123, -1, 1, 123, std::nullopt}), + NANOARROW_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + size_t buf_size = sizeof(kTestPgCopyInteger) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyInteger[i]); + } + + tester.Rewind(); + ASSERT_EQ(tester.WriteArray(&array.value, nullptr), ENODATA); + + buf = tester.WriteBuffer(); + // Ignore the header and footer + buf_size = sizeof(kTestPgCopyInteger) - 21; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyInteger[i + 19]); + } +} + +} // namespace adbcpq diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/copy/reader.h similarity index 60% rename from c/driver/postgresql/postgres_copy_reader.h rename to c/driver/postgresql/copy/reader.h index 8a9192c329..8ba0568ad9 100644 --- a/c/driver/postgresql/postgres_copy_reader.h +++ b/c/driver/postgresql/copy/reader.h @@ -17,14 +17,8 @@ #pragma once -// Windows -#define NOMINMAX - #include -#include #include -#include -#include #include #include #include @@ -32,49 +26,12 @@ #include -#include "common/utils.h" -#include "postgres_type.h" -#include "postgres_util.h" - -// R 3.6 / Windows builds on a very old toolchain that does not define ENODATA -#if defined(_WIN32) && !defined(MSVC) && !defined(ENODATA) -#define ENODATA 120 -#endif +#include "copy_common.h" +#include "../postgres_type.h" +#include "../postgres_util.h" namespace adbcpq { -// "PGCOPY\n\377\r\n\0" -static int8_t kPgCopyBinarySignature[] = {0x50, 0x47, 0x43, 0x4F, - 0x50, 0x59, 0x0A, static_cast(0xFF), - 0x0D, 0x0A, 0x00}; - -// The maximum value in seconds that can be converted into microseconds -// without overflow -constexpr int64_t kMaxSafeSecondsToMicros = 9223372036854L; - -// The minimum value in seconds that can be converted into microseconds -// without overflow -constexpr int64_t kMinSafeSecondsToMicros = -9223372036854L; - -// The maximum value in milliseconds that can be converted into microseconds -// without overflow -constexpr int64_t kMaxSafeMillisToMicros = 9223372036854775L; - -// The minimum value in milliseconds that can be converted into microseconds -// without overflow -constexpr int64_t kMinSafeMillisToMicros = -9223372036854775L; - -// The maximum value in microseconds that can be converted into nanoseconds -// without overflow -constexpr int64_t kMaxSafeMicrosToNanos = 9223372036854775L; - -// The minimum value in microseconds that can be converted into nanoseconds -// without overflow -constexpr int64_t kMinSafeMicrosToNanos = -9223372036854775L; - -// 2000-01-01 00:00:00.000000 in microseconds -constexpr int64_t kPostgresTimestampEpoch = 946684800000000L; - // Read a value from the buffer without checking the buffer size. Advances // the cursor of data and reduces its size by sizeof(T). template @@ -125,41 +82,6 @@ ArrowErrorCode ReadChecked(ArrowBufferView* data, T* out, ArrowError* error) { return NANOARROW_OK; } -// Write a value to a buffer without checking the buffer size. Advances -// the cursor of buffer and reduces it by sizeof(T) -template -inline void WriteUnsafe(ArrowBuffer* buffer, T in) { - const T value = SwapNetworkToHost(in); - ArrowBufferAppendUnsafe(buffer, &value, sizeof(T)); -} - -template <> -inline void WriteUnsafe(ArrowBuffer* buffer, int8_t in) { - ArrowBufferAppendUnsafe(buffer, &in, sizeof(int8_t)); -} - -template <> -inline void WriteUnsafe(ArrowBuffer* buffer, int16_t in) { - WriteUnsafe(buffer, in); -} - -template <> -inline void WriteUnsafe(ArrowBuffer* buffer, int32_t in) { - WriteUnsafe(buffer, in); -} - -template <> -inline void WriteUnsafe(ArrowBuffer* buffer, int64_t in) { - WriteUnsafe(buffer, in); -} - -template -ArrowErrorCode WriteChecked(ArrowBuffer* buffer, T in, ArrowError* error) { - NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, sizeof(T))); - WriteUnsafe(buffer, in); - return NANOARROW_OK; -} - class PostgresCopyFieldReader { public: PostgresCopyFieldReader() : validity_(nullptr), offsets_(nullptr), data_(nullptr) { @@ -1101,588 +1023,4 @@ class PostgresCopyStreamReader { int64_t array_size_approx_bytes_; }; -class PostgresCopyFieldWriter { - public: - virtual ~PostgresCopyFieldWriter() {} - - void Init(struct ArrowArrayView* array_view) { array_view_ = array_view; }; - - virtual ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) { - return ENOTSUP; - } - - protected: - struct ArrowArrayView* array_view_; - std::vector> children_; -}; - -class PostgresCopyFieldTupleWriter : public PostgresCopyFieldWriter { - public: - void AppendChild(std::unique_ptr child) { - int64_t child_i = static_cast(children_.size()); - children_.push_back(std::move(child)); - children_[child_i]->Init(array_view_->children[child_i]); - } - - ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { - if (index >= array_view_->length) { - return ENODATA; - } - - const int16_t n_fields = children_.size(); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, n_fields, error)); - - for (int16_t i = 0; i < n_fields; i++) { - const int8_t is_null = ArrowArrayViewIsNull(array_view_->children[i], index); - if (is_null) { - constexpr int32_t field_size_bytes = -1; - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); - } else { - children_[i]->Write(buffer, index, error); - } - } - - return NANOARROW_OK; - } - - private: - std::vector> children_; -}; - -class PostgresCopyBooleanFieldWriter : public PostgresCopyFieldWriter { - public: - ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { - constexpr int32_t field_size_bytes = 1; - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); - const int8_t value = - static_cast(ArrowArrayViewGetIntUnsafe(array_view_, index)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, value, error)); - - return ADBC_STATUS_OK; - } -}; - -template -class PostgresCopyNetworkEndianFieldWriter : public PostgresCopyFieldWriter { - public: - ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { - constexpr int32_t field_size_bytes = sizeof(T); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); - const T value = - static_cast(ArrowArrayViewGetIntUnsafe(array_view_, index)) - kOffset; - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, value, error)); - - return ADBC_STATUS_OK; - } -}; - -class PostgresCopyFloatFieldWriter : public PostgresCopyFieldWriter { - public: - ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { - constexpr int32_t field_size_bytes = sizeof(uint32_t); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); - - uint32_t value; - float raw_value = ArrowArrayViewGetDoubleUnsafe(array_view_, index); - std::memcpy(&value, &raw_value, sizeof(uint32_t)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, value, error)); - - return ADBC_STATUS_OK; - } -}; - -class PostgresCopyDoubleFieldWriter : public PostgresCopyFieldWriter { - public: - ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { - constexpr int32_t field_size_bytes = sizeof(uint64_t); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); - - uint64_t value; - double raw_value = ArrowArrayViewGetDoubleUnsafe(array_view_, index); - std::memcpy(&value, &raw_value, sizeof(uint64_t)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, value, error)); - - return ADBC_STATUS_OK; - } -}; - -class PostgresCopyIntervalFieldWriter : public PostgresCopyFieldWriter { - public: - ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { - constexpr int32_t field_size_bytes = 16; - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); - - struct ArrowInterval interval; - ArrowIntervalInit(&interval, NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO); - ArrowArrayViewGetIntervalUnsafe(array_view_, index, &interval); - const int64_t ms = interval.ns / 1000; - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, ms, error)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, interval.days, error)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, interval.months, error)); - - return ADBC_STATUS_OK; - } -}; - -// Inspiration for this taken from get_str_from_var in the pg source -// src/backend/utils/adt/numeric.c -template -class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { -public: - PostgresCopyNumericFieldWriter(int32_t precision, int32_t scale) : - precision_{precision}, scale_{scale} {} - - ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { - struct ArrowDecimal decimal; - ArrowDecimalInit(&decimal, bitwidth_, precision_, scale_); - ArrowArrayViewGetDecimalUnsafe(array_view_, index, &decimal); - - const int16_t sign = ArrowDecimalSign(&decimal) > 0 ? kNumericPos : kNumericNeg; - - // Number of decimal digits per Postgres digit - constexpr int kDecDigits = 4; - std::vector pg_digits; - int16_t weight = -(scale_ / kDecDigits); - int16_t dscale = scale_; - bool seen_decimal = scale_ == 0; - bool truncating_trailing_zeros = true; - - char decimal_string[max_decimal_digits_ + 1]; - int digits_remaining = DecimalToString(&decimal, decimal_string); - do { - const int start_pos = digits_remaining < kDecDigits ? - 0 : digits_remaining - kDecDigits; - const size_t len = digits_remaining < 4 ? digits_remaining : kDecDigits; - char substr[kDecDigits + 1]; - std::memcpy(substr, decimal_string + start_pos, len); - substr[len] = '\0'; - int16_t val = static_cast(std::atoi(substr)); - - if (val == 0) { - if (!seen_decimal && truncating_trailing_zeros) { - dscale -= kDecDigits; - } - } else { - pg_digits.insert(pg_digits.begin(), val); - if (!seen_decimal && truncating_trailing_zeros) { - if (val % 1000 == 0) { - dscale -= 3; - } else if (val % 100 == 0) { - dscale -= 2; - } else if (val % 10 == 0) { - dscale -= 1; - } - } - truncating_trailing_zeros = false; - } - digits_remaining -= kDecDigits; - if (digits_remaining <= 0) { - break; - } - weight++; - - if (start_pos <= static_cast(std::strlen(decimal_string)) - scale_) { - seen_decimal = true; - } - } while (true); - - int16_t ndigits = pg_digits.size(); - int32_t field_size_bytes = sizeof(ndigits) - + sizeof(weight) - + sizeof(sign) - + sizeof(dscale) - + ndigits * sizeof(int16_t); - - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, ndigits, error)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, weight, error)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, sign, error)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, dscale, error)); - - const size_t pg_digit_bytes = sizeof(int16_t) * pg_digits.size(); - NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, pg_digit_bytes)); - for (auto pg_digit : pg_digits) { - WriteUnsafe(buffer, pg_digit); - } - - return ADBC_STATUS_OK; - } - -private: - // returns the length of the string - template - int DecimalToString(struct ArrowDecimal* decimal, char* out) { - constexpr size_t nwords = (DEC_WIDTH == 128) ? 2 : 4; - uint8_t tmp[DEC_WIDTH / 8]; - ArrowDecimalGetBytes(decimal, tmp); - uint64_t buf[DEC_WIDTH / 64]; - std::memcpy(buf, tmp, sizeof(buf)); - const int16_t sign = ArrowDecimalSign(decimal) > 0 ? kNumericPos : kNumericNeg; - const bool is_negative = sign == kNumericNeg ? true : false; - if (is_negative) { - buf[0] = ~buf[0] + 1; - for (size_t i = 1; i < nwords; i++) { - buf[i] = ~buf[i]; - } - } - - // Basic approach adopted from https://stackoverflow.com/a/8023862/621736 - char s[max_decimal_digits_ + 1]; - std::memset(s, '0', sizeof(s) - 1); - s[sizeof(s) - 1] = '\0'; - - for (size_t i = 0; i < DEC_WIDTH; i++) { - int carry; - - carry = (buf[nwords - 1] >= 0x7FFFFFFFFFFFFFFF); - for (size_t j = nwords - 1; j > 0; j--) { - buf[j] = ((buf[j] << 1) & 0xFFFFFFFFFFFFFFFF) + (buf[j-1] >= 0x7FFFFFFFFFFFFFFF); - } - buf[0] = ((buf[0] << 1) & 0xFFFFFFFFFFFFFFFF); - - for (int j = sizeof(s) - 2; j>= 0; j--) { - s[j] += s[j] - '0' + carry; - carry = (s[j] > '9'); - if (carry) { - s[j] -= 10; - } - } - } - - char* p = s; - while ((p[0] == '0') && (p < &s[sizeof(s) - 2])) { - p++; - } - - const size_t ndigits = sizeof(s) - 1 - (p - s); - std::memcpy(out, p, ndigits); - out[ndigits] = '\0'; - - return ndigits; - } - - static constexpr uint16_t kNumericPos = 0x0000; - static constexpr uint16_t kNumericNeg = 0x4000; - static constexpr int32_t bitwidth_ = (T == NANOARROW_TYPE_DECIMAL128) ? 128 : 256; - static constexpr size_t max_decimal_digits_ = - (T == NANOARROW_TYPE_DECIMAL128) ? 39 : 78; - const int32_t precision_; - const int32_t scale_; -}; - -template -class PostgresCopyDurationFieldWriter : public PostgresCopyFieldWriter { - public: - ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { - constexpr int32_t field_size_bytes = 16; - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); - - int64_t raw_value = ArrowArrayViewGetIntUnsafe(array_view_, index); - int64_t value; - - bool overflow_safe = true; - switch (TU) { - case NANOARROW_TIME_UNIT_SECOND: - if ((overflow_safe = raw_value <= kMaxSafeSecondsToMicros && - raw_value >= kMinSafeSecondsToMicros)) { - value = raw_value * 1000000; - } - break; - case NANOARROW_TIME_UNIT_MILLI: - if ((overflow_safe = raw_value <= kMaxSafeMillisToMicros && - raw_value >= kMinSafeMillisToMicros)) { - value = raw_value * 1000; - } - break; - case NANOARROW_TIME_UNIT_MICRO: - value = raw_value; - break; - case NANOARROW_TIME_UNIT_NANO: - value = raw_value / 1000; - break; - } - - if (!overflow_safe) { - ArrowErrorSet( - error, "Row %" PRId64 " duration value %" PRId64 " with unit %d would overflow", - index, raw_value, TU); - return ADBC_STATUS_INVALID_ARGUMENT; - } - - // 2000-01-01 00:00:00.000000 in microseconds - constexpr uint32_t days = 0; - constexpr uint32_t months = 0; - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, value, error)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, days, error)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, months, error)); - - return ADBC_STATUS_OK; - } -}; - -class PostgresCopyBinaryFieldWriter : public PostgresCopyFieldWriter { - public: - ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { - struct ArrowBufferView buffer_view = ArrowArrayViewGetBytesUnsafe(array_view_, index); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, buffer_view.size_bytes, error)); - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppend(buffer, buffer_view.data.as_uint8, buffer_view.size_bytes)); - - return ADBC_STATUS_OK; - } -}; - -class PostgresCopyBinaryDictFieldWriter : public PostgresCopyFieldWriter { - public: - ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { - int64_t dict_index = ArrowArrayViewGetIntUnsafe(array_view_, index); - if (ArrowArrayViewIsNull(array_view_->dictionary, dict_index)) { - constexpr int32_t field_size_bytes = -1; - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); - } else { - struct ArrowBufferView buffer_view = - ArrowArrayViewGetBytesUnsafe(array_view_->dictionary, dict_index); - NANOARROW_RETURN_NOT_OK( - WriteChecked(buffer, buffer_view.size_bytes, error)); - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppend(buffer, buffer_view.data.as_uint8, buffer_view.size_bytes)); - } - - return ADBC_STATUS_OK; - } -}; - -template -class PostgresCopyTimestampFieldWriter : public PostgresCopyFieldWriter { - public: - ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { - constexpr int32_t field_size_bytes = sizeof(int64_t); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); - - int64_t raw_value = ArrowArrayViewGetIntUnsafe(array_view_, index); - int64_t value; - - bool overflow_safe = true; - switch (TU) { - case NANOARROW_TIME_UNIT_SECOND: - if ((overflow_safe = raw_value <= kMaxSafeSecondsToMicros && - raw_value >= kMinSafeSecondsToMicros)) { - value = raw_value * 1000000; - } - break; - case NANOARROW_TIME_UNIT_MILLI: - if ((overflow_safe = raw_value <= kMaxSafeMillisToMicros && - raw_value >= kMinSafeMillisToMicros)) { - value = raw_value * 1000; - } - break; - case NANOARROW_TIME_UNIT_MICRO: - value = raw_value; - break; - case NANOARROW_TIME_UNIT_NANO: - value = raw_value / 1000; - break; - } - - if (!overflow_safe) { - ArrowErrorSet(error, - "[libpq] Row %" PRId64 " timestamp value %" PRId64 - " with unit %d would overflow", - index, raw_value, TU); - return ADBC_STATUS_INVALID_ARGUMENT; - } - - if (value < std::numeric_limits::min() + kPostgresTimestampEpoch) { - ArrowErrorSet(error, - "[libpq] Row %" PRId64 " timestamp value %" PRId64 - " with unit %d would underflow", - index, raw_value, TU); - return ADBC_STATUS_INVALID_ARGUMENT; - } - - const int64_t scaled = value - kPostgresTimestampEpoch; - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, scaled, error)); - - return ADBC_STATUS_OK; - } -}; - -static inline ArrowErrorCode MakeCopyFieldWriter(struct ArrowSchema* schema, - PostgresCopyFieldWriter** out, - ArrowError* error) { - struct ArrowSchemaView schema_view; - NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view, schema, error)); - - switch (schema_view.type) { - case NANOARROW_TYPE_BOOL: - *out = new PostgresCopyBooleanFieldWriter(); - return NANOARROW_OK; - case NANOARROW_TYPE_INT8: - case NANOARROW_TYPE_INT16: - *out = new PostgresCopyNetworkEndianFieldWriter(); - return NANOARROW_OK; - case NANOARROW_TYPE_INT32: - *out = new PostgresCopyNetworkEndianFieldWriter(); - return NANOARROW_OK; - case NANOARROW_TYPE_INT64: - *out = new PostgresCopyNetworkEndianFieldWriter(); - return NANOARROW_OK; - case NANOARROW_TYPE_DATE32: { - constexpr int32_t kPostgresDateEpoch = 10957; - *out = new PostgresCopyNetworkEndianFieldWriter(); - return NANOARROW_OK; - } - case NANOARROW_TYPE_FLOAT: - *out = new PostgresCopyFloatFieldWriter(); - return NANOARROW_OK; - case NANOARROW_TYPE_DOUBLE: - *out = new PostgresCopyDoubleFieldWriter(); - return NANOARROW_OK; - case NANOARROW_TYPE_DECIMAL128: { - const auto precision = schema_view.decimal_precision; - const auto scale = schema_view.decimal_scale; - *out = new PostgresCopyNumericFieldWriter< - NANOARROW_TYPE_DECIMAL128>(precision, scale); - return NANOARROW_OK; - } - case NANOARROW_TYPE_DECIMAL256: { - const auto precision = schema_view.decimal_precision; - const auto scale = schema_view.decimal_scale; - *out = new PostgresCopyNumericFieldWriter< - NANOARROW_TYPE_DECIMAL256>(precision, scale); - return NANOARROW_OK; - } - case NANOARROW_TYPE_BINARY: - case NANOARROW_TYPE_STRING: - case NANOARROW_TYPE_LARGE_STRING: - *out = new PostgresCopyBinaryFieldWriter(); - return NANOARROW_OK; - case NANOARROW_TYPE_TIMESTAMP: { - switch (schema_view.time_unit) { - case NANOARROW_TIME_UNIT_NANO: - *out = new PostgresCopyTimestampFieldWriter(); - break; - case NANOARROW_TIME_UNIT_MILLI: - *out = new PostgresCopyTimestampFieldWriter(); - break; - case NANOARROW_TIME_UNIT_MICRO: - *out = new PostgresCopyTimestampFieldWriter(); - break; - case NANOARROW_TIME_UNIT_SECOND: - *out = new PostgresCopyTimestampFieldWriter(); - break; - } - return NANOARROW_OK; - } - case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: - *out = new PostgresCopyIntervalFieldWriter(); - return NANOARROW_OK; - case NANOARROW_TYPE_DURATION: { - switch (schema_view.time_unit) { - case NANOARROW_TIME_UNIT_SECOND: - *out = new PostgresCopyDurationFieldWriter(); - break; - case NANOARROW_TIME_UNIT_MILLI: - *out = new PostgresCopyDurationFieldWriter(); - break; - case NANOARROW_TIME_UNIT_MICRO: - *out = new PostgresCopyDurationFieldWriter(); - - break; - case NANOARROW_TIME_UNIT_NANO: - *out = new PostgresCopyDurationFieldWriter(); - break; - } - return NANOARROW_OK; - } - case NANOARROW_TYPE_DICTIONARY: { - struct ArrowSchemaView value_view; - NANOARROW_RETURN_NOT_OK( - ArrowSchemaViewInit(&value_view, schema->dictionary, error)); - switch (value_view.type) { - case NANOARROW_TYPE_BINARY: - case NANOARROW_TYPE_STRING: - case NANOARROW_TYPE_LARGE_BINARY: - case NANOARROW_TYPE_LARGE_STRING: - *out = new PostgresCopyBinaryDictFieldWriter(); - return NANOARROW_OK; - default: - break; - } - } - default: - break; - } - - ArrowErrorSet(error, "COPY Writer not implemented for type %d", schema_view.type); - return EINVAL; -} - -class PostgresCopyStreamWriter { - public: - ArrowErrorCode Init(struct ArrowSchema* schema) { - schema_ = schema; - NANOARROW_RETURN_NOT_OK( - ArrowArrayViewInitFromSchema(&array_view_.value, schema, nullptr)); - root_writer_.Init(&array_view_.value); - ArrowBufferInit(&buffer_.value); - return NANOARROW_OK; - } - - ArrowErrorCode SetArray(struct ArrowArray* array) { - NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArray(&array_view_.value, array, nullptr)); - return NANOARROW_OK; - } - - ArrowErrorCode WriteHeader(ArrowError* error) { - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(&buffer_.value, kPgCopyBinarySignature, - sizeof(kPgCopyBinarySignature))); - - const uint32_t flag_fields = 0; - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppend(&buffer_.value, &flag_fields, sizeof(flag_fields))); - - const uint32_t extension_bytes = 0; - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppend(&buffer_.value, &extension_bytes, sizeof(extension_bytes))); - - return NANOARROW_OK; - } - - ArrowErrorCode WriteRecord(ArrowError* error) { - NANOARROW_RETURN_NOT_OK(root_writer_.Write(&buffer_.value, records_written_, error)); - records_written_++; - return NANOARROW_OK; - } - - ArrowErrorCode InitFieldWriters(ArrowError* error) { - if (schema_->release == nullptr) { - return EINVAL; - } - - for (int64_t i = 0; i < schema_->n_children; i++) { - PostgresCopyFieldWriter* child_writer = nullptr; - NANOARROW_RETURN_NOT_OK( - MakeCopyFieldWriter(schema_->children[i], &child_writer, error)); - root_writer_.AppendChild(std::unique_ptr(child_writer)); - } - - return NANOARROW_OK; - } - - const struct ArrowBuffer& WriteBuffer() const { return buffer_.value; } - - void Rewind() { - records_written_ = 0; - buffer_->size_bytes = 0; - } - - private: - PostgresCopyFieldTupleWriter root_writer_; - struct ArrowSchema* schema_; - Handle array_view_; - Handle buffer_; - int64_t records_written_ = 0; -}; - } // namespace adbcpq diff --git a/c/driver/postgresql/copy/writer.h b/c/driver/postgresql/copy/writer.h new file mode 100644 index 0000000000..b04f370f77 --- /dev/null +++ b/c/driver/postgresql/copy/writer.h @@ -0,0 +1,673 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +#include "copy_common.h" +#include "../postgres_util.h" + +namespace adbcpq { + +// The maximum value in seconds that can be converted into microseconds +// without overflow +constexpr int64_t kMaxSafeSecondsToMicros = 9223372036854L; + +// The minimum value in seconds that can be converted into microseconds +// without overflow +constexpr int64_t kMinSafeSecondsToMicros = -9223372036854L; + +// The maximum value in milliseconds that can be converted into microseconds +// without overflow +constexpr int64_t kMaxSafeMillisToMicros = 9223372036854775L; + +// The minimum value in milliseconds that can be converted into microseconds +// without overflow +constexpr int64_t kMinSafeMillisToMicros = -9223372036854775L; + + +// 2000-01-01 00:00:00.000000 in microseconds +constexpr int64_t kPostgresTimestampEpoch = 946684800000000L; + +// Write a value to a buffer without checking the buffer size. Advances +// the cursor of buffer and reduces it by sizeof(T) +template +inline void WriteUnsafe(ArrowBuffer* buffer, T in) { + const T value = SwapNetworkToHost(in); + ArrowBufferAppendUnsafe(buffer, &value, sizeof(T)); +} + +template <> +inline void WriteUnsafe(ArrowBuffer* buffer, int8_t in) { + ArrowBufferAppendUnsafe(buffer, &in, sizeof(int8_t)); +} + +template <> +inline void WriteUnsafe(ArrowBuffer* buffer, int16_t in) { + WriteUnsafe(buffer, in); +} + +template <> +inline void WriteUnsafe(ArrowBuffer* buffer, int32_t in) { + WriteUnsafe(buffer, in); +} + +template <> +inline void WriteUnsafe(ArrowBuffer* buffer, int64_t in) { + WriteUnsafe(buffer, in); +} + +template +ArrowErrorCode WriteChecked(ArrowBuffer* buffer, T in, ArrowError* error) { + NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, sizeof(T))); + WriteUnsafe(buffer, in); + return NANOARROW_OK; +} + +class PostgresCopyFieldWriter { + public: + virtual ~PostgresCopyFieldWriter() {} + + void Init(struct ArrowArrayView* array_view) { array_view_ = array_view; }; + + virtual ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) { + return ENOTSUP; + } + + protected: + struct ArrowArrayView* array_view_; + std::vector> children_; +}; + +class PostgresCopyFieldTupleWriter : public PostgresCopyFieldWriter { + public: + void AppendChild(std::unique_ptr child) { + int64_t child_i = static_cast(children_.size()); + children_.push_back(std::move(child)); + children_[child_i]->Init(array_view_->children[child_i]); + } + + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + if (index >= array_view_->length) { + return ENODATA; + } + + const int16_t n_fields = children_.size(); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, n_fields, error)); + + for (int16_t i = 0; i < n_fields; i++) { + const int8_t is_null = ArrowArrayViewIsNull(array_view_->children[i], index); + if (is_null) { + constexpr int32_t field_size_bytes = -1; + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); + } else { + children_[i]->Write(buffer, index, error); + } + } + + return NANOARROW_OK; + } + + private: + std::vector> children_; +}; + +class PostgresCopyBooleanFieldWriter : public PostgresCopyFieldWriter { + public: + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + constexpr int32_t field_size_bytes = 1; + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); + const int8_t value = + static_cast(ArrowArrayViewGetIntUnsafe(array_view_, index)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, value, error)); + + return ADBC_STATUS_OK; + } +}; + +template +class PostgresCopyNetworkEndianFieldWriter : public PostgresCopyFieldWriter { + public: + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + constexpr int32_t field_size_bytes = sizeof(T); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); + const T value = + static_cast(ArrowArrayViewGetIntUnsafe(array_view_, index)) - kOffset; + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, value, error)); + + return ADBC_STATUS_OK; + } +}; + +class PostgresCopyFloatFieldWriter : public PostgresCopyFieldWriter { + public: + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + constexpr int32_t field_size_bytes = sizeof(uint32_t); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); + + uint32_t value; + float raw_value = ArrowArrayViewGetDoubleUnsafe(array_view_, index); + std::memcpy(&value, &raw_value, sizeof(uint32_t)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, value, error)); + + return ADBC_STATUS_OK; + } +}; + +class PostgresCopyDoubleFieldWriter : public PostgresCopyFieldWriter { + public: + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + constexpr int32_t field_size_bytes = sizeof(uint64_t); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); + + uint64_t value; + double raw_value = ArrowArrayViewGetDoubleUnsafe(array_view_, index); + std::memcpy(&value, &raw_value, sizeof(uint64_t)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, value, error)); + + return ADBC_STATUS_OK; + } +}; + +class PostgresCopyIntervalFieldWriter : public PostgresCopyFieldWriter { + public: + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + constexpr int32_t field_size_bytes = 16; + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); + + struct ArrowInterval interval; + ArrowIntervalInit(&interval, NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO); + ArrowArrayViewGetIntervalUnsafe(array_view_, index, &interval); + const int64_t ms = interval.ns / 1000; + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, ms, error)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, interval.days, error)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, interval.months, error)); + + return ADBC_STATUS_OK; + } +}; + +// Inspiration for this taken from get_str_from_var in the pg source +// src/backend/utils/adt/numeric.c +template +class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { +public: + PostgresCopyNumericFieldWriter(int32_t precision, int32_t scale) : + precision_{precision}, scale_{scale} {} + + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + struct ArrowDecimal decimal; + ArrowDecimalInit(&decimal, bitwidth_, precision_, scale_); + ArrowArrayViewGetDecimalUnsafe(array_view_, index, &decimal); + + const int16_t sign = ArrowDecimalSign(&decimal) > 0 ? kNumericPos : kNumericNeg; + + // Number of decimal digits per Postgres digit + constexpr int kDecDigits = 4; + std::vector pg_digits; + int16_t weight = -(scale_ / kDecDigits); + int16_t dscale = scale_; + bool seen_decimal = scale_ == 0; + bool truncating_trailing_zeros = true; + + char decimal_string[max_decimal_digits_ + 1]; + int digits_remaining = DecimalToString(&decimal, decimal_string); + do { + const int start_pos = digits_remaining < kDecDigits ? + 0 : digits_remaining - kDecDigits; + const size_t len = digits_remaining < 4 ? digits_remaining : kDecDigits; + char substr[kDecDigits + 1]; + std::memcpy(substr, decimal_string + start_pos, len); + substr[len] = '\0'; + int16_t val = static_cast(std::atoi(substr)); + + if (val == 0) { + if (!seen_decimal && truncating_trailing_zeros) { + dscale -= kDecDigits; + } + } else { + pg_digits.insert(pg_digits.begin(), val); + if (!seen_decimal && truncating_trailing_zeros) { + if (val % 1000 == 0) { + dscale -= 3; + } else if (val % 100 == 0) { + dscale -= 2; + } else if (val % 10 == 0) { + dscale -= 1; + } + } + truncating_trailing_zeros = false; + } + digits_remaining -= kDecDigits; + if (digits_remaining <= 0) { + break; + } + weight++; + + if (start_pos <= static_cast(std::strlen(decimal_string)) - scale_) { + seen_decimal = true; + } + } while (true); + + int16_t ndigits = pg_digits.size(); + int32_t field_size_bytes = sizeof(ndigits) + + sizeof(weight) + + sizeof(sign) + + sizeof(dscale) + + ndigits * sizeof(int16_t); + + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, ndigits, error)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, weight, error)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, sign, error)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, dscale, error)); + + const size_t pg_digit_bytes = sizeof(int16_t) * pg_digits.size(); + NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, pg_digit_bytes)); + for (auto pg_digit : pg_digits) { + WriteUnsafe(buffer, pg_digit); + } + + return ADBC_STATUS_OK; + } + +private: + // returns the length of the string + template + int DecimalToString(struct ArrowDecimal* decimal, char* out) { + constexpr size_t nwords = (DEC_WIDTH == 128) ? 2 : 4; + uint8_t tmp[DEC_WIDTH / 8]; + ArrowDecimalGetBytes(decimal, tmp); + uint64_t buf[DEC_WIDTH / 64]; + std::memcpy(buf, tmp, sizeof(buf)); + const int16_t sign = ArrowDecimalSign(decimal) > 0 ? kNumericPos : kNumericNeg; + const bool is_negative = sign == kNumericNeg ? true : false; + if (is_negative) { + buf[0] = ~buf[0] + 1; + for (size_t i = 1; i < nwords; i++) { + buf[i] = ~buf[i]; + } + } + + // Basic approach adopted from https://stackoverflow.com/a/8023862/621736 + char s[max_decimal_digits_ + 1]; + std::memset(s, '0', sizeof(s) - 1); + s[sizeof(s) - 1] = '\0'; + + for (size_t i = 0; i < DEC_WIDTH; i++) { + int carry; + + carry = (buf[nwords - 1] >= 0x7FFFFFFFFFFFFFFF); + for (size_t j = nwords - 1; j > 0; j--) { + buf[j] = ((buf[j] << 1) & 0xFFFFFFFFFFFFFFFF) + (buf[j-1] >= 0x7FFFFFFFFFFFFFFF); + } + buf[0] = ((buf[0] << 1) & 0xFFFFFFFFFFFFFFFF); + + for (int j = sizeof(s) - 2; j>= 0; j--) { + s[j] += s[j] - '0' + carry; + carry = (s[j] > '9'); + if (carry) { + s[j] -= 10; + } + } + } + + char* p = s; + while ((p[0] == '0') && (p < &s[sizeof(s) - 2])) { + p++; + } + + const size_t ndigits = sizeof(s) - 1 - (p - s); + std::memcpy(out, p, ndigits); + out[ndigits] = '\0'; + + return ndigits; + } + + static constexpr uint16_t kNumericPos = 0x0000; + static constexpr uint16_t kNumericNeg = 0x4000; + static constexpr int32_t bitwidth_ = (T == NANOARROW_TYPE_DECIMAL128) ? 128 : 256; + static constexpr size_t max_decimal_digits_ = + (T == NANOARROW_TYPE_DECIMAL128) ? 39 : 78; + const int32_t precision_; + const int32_t scale_; +}; + +template +class PostgresCopyDurationFieldWriter : public PostgresCopyFieldWriter { + public: + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + constexpr int32_t field_size_bytes = 16; + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); + + int64_t raw_value = ArrowArrayViewGetIntUnsafe(array_view_, index); + int64_t value; + + bool overflow_safe = true; + switch (TU) { + case NANOARROW_TIME_UNIT_SECOND: + if ((overflow_safe = raw_value <= kMaxSafeSecondsToMicros && + raw_value >= kMinSafeSecondsToMicros)) { + value = raw_value * 1000000; + } + break; + case NANOARROW_TIME_UNIT_MILLI: + if ((overflow_safe = raw_value <= kMaxSafeMillisToMicros && + raw_value >= kMinSafeMillisToMicros)) { + value = raw_value * 1000; + } + break; + case NANOARROW_TIME_UNIT_MICRO: + value = raw_value; + break; + case NANOARROW_TIME_UNIT_NANO: + value = raw_value / 1000; + break; + } + + if (!overflow_safe) { + ArrowErrorSet( + error, "Row %" PRId64 " duration value %" PRId64 " with unit %d would overflow", + index, raw_value, TU); + return ADBC_STATUS_INVALID_ARGUMENT; + } + + // 2000-01-01 00:00:00.000000 in microseconds + constexpr uint32_t days = 0; + constexpr uint32_t months = 0; + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, value, error)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, days, error)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, months, error)); + + return ADBC_STATUS_OK; + } +}; + +class PostgresCopyBinaryFieldWriter : public PostgresCopyFieldWriter { + public: + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + struct ArrowBufferView buffer_view = ArrowArrayViewGetBytesUnsafe(array_view_, index); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, buffer_view.size_bytes, error)); + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(buffer, buffer_view.data.as_uint8, buffer_view.size_bytes)); + + return ADBC_STATUS_OK; + } +}; + +class PostgresCopyBinaryDictFieldWriter : public PostgresCopyFieldWriter { + public: + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + int64_t dict_index = ArrowArrayViewGetIntUnsafe(array_view_, index); + if (ArrowArrayViewIsNull(array_view_->dictionary, dict_index)) { + constexpr int32_t field_size_bytes = -1; + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); + } else { + struct ArrowBufferView buffer_view = + ArrowArrayViewGetBytesUnsafe(array_view_->dictionary, dict_index); + NANOARROW_RETURN_NOT_OK( + WriteChecked(buffer, buffer_view.size_bytes, error)); + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(buffer, buffer_view.data.as_uint8, buffer_view.size_bytes)); + } + + return ADBC_STATUS_OK; + } +}; + +template +class PostgresCopyTimestampFieldWriter : public PostgresCopyFieldWriter { + public: + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + constexpr int32_t field_size_bytes = sizeof(int64_t); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); + + int64_t raw_value = ArrowArrayViewGetIntUnsafe(array_view_, index); + int64_t value; + + bool overflow_safe = true; + switch (TU) { + case NANOARROW_TIME_UNIT_SECOND: + if ((overflow_safe = raw_value <= kMaxSafeSecondsToMicros && + raw_value >= kMinSafeSecondsToMicros)) { + value = raw_value * 1000000; + } + break; + case NANOARROW_TIME_UNIT_MILLI: + if ((overflow_safe = raw_value <= kMaxSafeMillisToMicros && + raw_value >= kMinSafeMillisToMicros)) { + value = raw_value * 1000; + } + break; + case NANOARROW_TIME_UNIT_MICRO: + value = raw_value; + break; + case NANOARROW_TIME_UNIT_NANO: + value = raw_value / 1000; + break; + } + + if (!overflow_safe) { + ArrowErrorSet(error, + "[libpq] Row %" PRId64 " timestamp value %" PRId64 + " with unit %d would overflow", + index, raw_value, TU); + return ADBC_STATUS_INVALID_ARGUMENT; + } + + if (value < std::numeric_limits::min() + kPostgresTimestampEpoch) { + ArrowErrorSet(error, + "[libpq] Row %" PRId64 " timestamp value %" PRId64 + " with unit %d would underflow", + index, raw_value, TU); + return ADBC_STATUS_INVALID_ARGUMENT; + } + + const int64_t scaled = value - kPostgresTimestampEpoch; + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, scaled, error)); + + return ADBC_STATUS_OK; + } +}; + +static inline ArrowErrorCode MakeCopyFieldWriter(struct ArrowSchema* schema, + PostgresCopyFieldWriter** out, + ArrowError* error) { + struct ArrowSchemaView schema_view; + NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view, schema, error)); + + switch (schema_view.type) { + case NANOARROW_TYPE_BOOL: + *out = new PostgresCopyBooleanFieldWriter(); + return NANOARROW_OK; + case NANOARROW_TYPE_INT8: + case NANOARROW_TYPE_INT16: + *out = new PostgresCopyNetworkEndianFieldWriter(); + return NANOARROW_OK; + case NANOARROW_TYPE_INT32: + *out = new PostgresCopyNetworkEndianFieldWriter(); + return NANOARROW_OK; + case NANOARROW_TYPE_INT64: + *out = new PostgresCopyNetworkEndianFieldWriter(); + return NANOARROW_OK; + case NANOARROW_TYPE_DATE32: { + constexpr int32_t kPostgresDateEpoch = 10957; + *out = new PostgresCopyNetworkEndianFieldWriter(); + return NANOARROW_OK; + } + case NANOARROW_TYPE_FLOAT: + *out = new PostgresCopyFloatFieldWriter(); + return NANOARROW_OK; + case NANOARROW_TYPE_DOUBLE: + *out = new PostgresCopyDoubleFieldWriter(); + return NANOARROW_OK; + case NANOARROW_TYPE_DECIMAL128: { + const auto precision = schema_view.decimal_precision; + const auto scale = schema_view.decimal_scale; + *out = new PostgresCopyNumericFieldWriter< + NANOARROW_TYPE_DECIMAL128>(precision, scale); + return NANOARROW_OK; + } + case NANOARROW_TYPE_DECIMAL256: { + const auto precision = schema_view.decimal_precision; + const auto scale = schema_view.decimal_scale; + *out = new PostgresCopyNumericFieldWriter< + NANOARROW_TYPE_DECIMAL256>(precision, scale); + return NANOARROW_OK; + } + case NANOARROW_TYPE_BINARY: + case NANOARROW_TYPE_STRING: + case NANOARROW_TYPE_LARGE_STRING: + *out = new PostgresCopyBinaryFieldWriter(); + return NANOARROW_OK; + case NANOARROW_TYPE_TIMESTAMP: { + switch (schema_view.time_unit) { + case NANOARROW_TIME_UNIT_NANO: + *out = new PostgresCopyTimestampFieldWriter(); + break; + case NANOARROW_TIME_UNIT_MILLI: + *out = new PostgresCopyTimestampFieldWriter(); + break; + case NANOARROW_TIME_UNIT_MICRO: + *out = new PostgresCopyTimestampFieldWriter(); + break; + case NANOARROW_TIME_UNIT_SECOND: + *out = new PostgresCopyTimestampFieldWriter(); + break; + } + return NANOARROW_OK; + } + case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: + *out = new PostgresCopyIntervalFieldWriter(); + return NANOARROW_OK; + case NANOARROW_TYPE_DURATION: { + switch (schema_view.time_unit) { + case NANOARROW_TIME_UNIT_SECOND: + *out = new PostgresCopyDurationFieldWriter(); + break; + case NANOARROW_TIME_UNIT_MILLI: + *out = new PostgresCopyDurationFieldWriter(); + break; + case NANOARROW_TIME_UNIT_MICRO: + *out = new PostgresCopyDurationFieldWriter(); + + break; + case NANOARROW_TIME_UNIT_NANO: + *out = new PostgresCopyDurationFieldWriter(); + break; + } + return NANOARROW_OK; + } + case NANOARROW_TYPE_DICTIONARY: { + struct ArrowSchemaView value_view; + NANOARROW_RETURN_NOT_OK( + ArrowSchemaViewInit(&value_view, schema->dictionary, error)); + switch (value_view.type) { + case NANOARROW_TYPE_BINARY: + case NANOARROW_TYPE_STRING: + case NANOARROW_TYPE_LARGE_BINARY: + case NANOARROW_TYPE_LARGE_STRING: + *out = new PostgresCopyBinaryDictFieldWriter(); + return NANOARROW_OK; + default: + break; + } + } + default: + break; + } + + ArrowErrorSet(error, "COPY Writer not implemented for type %d", schema_view.type); + return EINVAL; +} + +class PostgresCopyStreamWriter { + public: + ArrowErrorCode Init(struct ArrowSchema* schema) { + schema_ = schema; + NANOARROW_RETURN_NOT_OK( + ArrowArrayViewInitFromSchema(&array_view_.value, schema, nullptr)); + root_writer_.Init(&array_view_.value); + ArrowBufferInit(&buffer_.value); + return NANOARROW_OK; + } + + ArrowErrorCode SetArray(struct ArrowArray* array) { + NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArray(&array_view_.value, array, nullptr)); + return NANOARROW_OK; + } + + ArrowErrorCode WriteHeader(ArrowError* error) { + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(&buffer_.value, kPgCopyBinarySignature, + sizeof(kPgCopyBinarySignature))); + + const uint32_t flag_fields = 0; + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(&buffer_.value, &flag_fields, sizeof(flag_fields))); + + const uint32_t extension_bytes = 0; + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(&buffer_.value, &extension_bytes, sizeof(extension_bytes))); + + return NANOARROW_OK; + } + + ArrowErrorCode WriteRecord(ArrowError* error) { + NANOARROW_RETURN_NOT_OK(root_writer_.Write(&buffer_.value, records_written_, error)); + records_written_++; + return NANOARROW_OK; + } + + ArrowErrorCode InitFieldWriters(ArrowError* error) { + if (schema_->release == nullptr) { + return EINVAL; + } + + for (int64_t i = 0; i < schema_->n_children; i++) { + PostgresCopyFieldWriter* child_writer = nullptr; + NANOARROW_RETURN_NOT_OK( + MakeCopyFieldWriter(schema_->children[i], &child_writer, error)); + root_writer_.AppendChild(std::unique_ptr(child_writer)); + } + + return NANOARROW_OK; + } + + const struct ArrowBuffer& WriteBuffer() const { return buffer_.value; } + + void Rewind() { + records_written_ = 0; + buffer_->size_bytes = 0; + } + + private: + PostgresCopyFieldTupleWriter root_writer_; + struct ArrowSchema* schema_; + Handle array_view_; + Handle buffer_; + int64_t records_written_ = 0; +}; + + } // namespace adbcpq diff --git a/c/driver/postgresql/postgres_copy_reader_test.cc b/c/driver/postgresql/postgres_copy_reader_test.cc deleted file mode 100644 index 201aa223a2..0000000000 --- a/c/driver/postgresql/postgres_copy_reader_test.cc +++ /dev/null @@ -1,1379 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include - -#include -#include -#include - -#include "postgres_copy_reader.h" -#include "validation/adbc_validation_util.h" - -namespace adbcpq { - -class PostgresCopyStreamTester { - public: - ArrowErrorCode Init(const PostgresType& root_type, ArrowError* error = nullptr) { - NANOARROW_RETURN_NOT_OK(reader_.Init(root_type)); - NANOARROW_RETURN_NOT_OK(reader_.InferOutputSchema(error)); - NANOARROW_RETURN_NOT_OK(reader_.InitFieldReaders(error)); - return NANOARROW_OK; - } - - ArrowErrorCode ReadAll(ArrowBufferView* data, ArrowError* error = nullptr) { - NANOARROW_RETURN_NOT_OK(reader_.ReadHeader(data, error)); - - int result; - do { - result = reader_.ReadRecord(data, error); - } while (result == NANOARROW_OK); - - return result; - } - - void GetSchema(ArrowSchema* out) { reader_.GetSchema(out); } - - ArrowErrorCode GetArray(ArrowArray* out, ArrowError* error = nullptr) { - return reader_.GetArray(out, error); - } - - private: - PostgresCopyStreamReader reader_; -}; - -class PostgresCopyStreamWriteTester { - public: - ArrowErrorCode Init(struct ArrowSchema* schema, struct ArrowArray* array, - struct ArrowError* error = nullptr) { - NANOARROW_RETURN_NOT_OK(writer_.Init(schema)); - NANOARROW_RETURN_NOT_OK(writer_.InitFieldWriters(error)); - NANOARROW_RETURN_NOT_OK(writer_.SetArray(array)); - return NANOARROW_OK; - } - - ArrowErrorCode WriteAll(struct ArrowError* error) { - NANOARROW_RETURN_NOT_OK(writer_.WriteHeader(error)); - - int result; - do { - result = writer_.WriteRecord(error); - } while (result == NANOARROW_OK); - - return result; - } - - ArrowErrorCode WriteArray(struct ArrowArray* array, struct ArrowError* error) { - writer_.SetArray(array); - int result; - do { - result = writer_.WriteRecord(error); - } while (result == NANOARROW_OK); - - return result; - } - - const struct ArrowBuffer& WriteBuffer() const { return writer_.WriteBuffer(); } - - void Rewind() { writer_.Rewind(); } - - private: - PostgresCopyStreamWriter writer_; -}; - -// COPY (SELECT CAST("col" AS BOOLEAN) AS "col" FROM ( VALUES (TRUE), (FALSE), (NULL)) AS -// drvd("col")) TO STDOUT; -static uint8_t kTestPgCopyBoolean[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadBoolean) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyBoolean; - data.size_bytes = sizeof(kTestPgCopyBoolean); - - auto col_type = PostgresType(PostgresTypeId::kBool); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyBoolean, sizeof(kTestPgCopyBoolean)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 3); - ASSERT_EQ(array->n_children, 1); - - const uint8_t* validity = - reinterpret_cast(array->children[0]->buffers[0]); - const uint8_t* data_buffer = - reinterpret_cast(array->children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_FALSE(ArrowBitGet(validity, 2)); - - ASSERT_TRUE(ArrowBitGet(data_buffer, 0)); - ASSERT_FALSE(ArrowBitGet(data_buffer, 1)); - ASSERT_FALSE(ArrowBitGet(data_buffer, 2)); -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteBoolean) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - adbc_validation::Handle buffer; - struct ArrowError na_error; - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_BOOL}}), - ADBC_STATUS_OK); - ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, - {true, false, std::nullopt}), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyBoolean) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyBoolean[i]); - } -} - -// COPY (SELECT CAST("col" AS SMALLINT) AS "col" FROM ( VALUES (-123), (-1), (1), (123), -// (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopySmallInt[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x02, 0xff, 0x85, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0xff, 0xff, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadSmallInt) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopySmallInt; - data.size_bytes = sizeof(kTestPgCopySmallInt); - - auto col_type = PostgresType(PostgresTypeId::kInt2); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopySmallInt, sizeof(kTestPgCopySmallInt)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 5); - ASSERT_EQ(array->n_children, 1); - - auto validity = reinterpret_cast(array->children[0]->buffers[0]); - auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_TRUE(ArrowBitGet(validity, 2)); - ASSERT_TRUE(ArrowBitGet(validity, 3)); - ASSERT_FALSE(ArrowBitGet(validity, 4)); - - ASSERT_EQ(data_buffer[0], -123); - ASSERT_EQ(data_buffer[1], -1); - ASSERT_EQ(data_buffer[2], 1); - ASSERT_EQ(data_buffer[3], 123); - ASSERT_EQ(data_buffer[4], 0); -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteInt8) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_INT8}}), - ADBC_STATUS_OK); - ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, - {-123, -1, 1, 123, std::nullopt}), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopySmallInt) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopySmallInt[i]); - } -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteInt16) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_INT16}}), - ADBC_STATUS_OK); - ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, - {-123, -1, 1, 123, std::nullopt}), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopySmallInt) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopySmallInt[i]); - } -} - -// COPY (SELECT CAST("col" AS INTEGER) AS "col" FROM ( VALUES (-123), (-1), (1), (123), -// (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyInteger[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, - 0x85, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, - 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, - 0x00, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadInteger) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyInteger; - data.size_bytes = sizeof(kTestPgCopyInteger); - - auto col_type = PostgresType(PostgresTypeId::kInt4); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyInteger, sizeof(kTestPgCopyInteger)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 5); - ASSERT_EQ(array->n_children, 1); - - auto validity = reinterpret_cast(array->children[0]->buffers[0]); - auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_TRUE(ArrowBitGet(validity, 2)); - ASSERT_TRUE(ArrowBitGet(validity, 3)); - ASSERT_FALSE(ArrowBitGet(validity, 4)); - - ASSERT_EQ(data_buffer[0], -123); - ASSERT_EQ(data_buffer[1], -1); - ASSERT_EQ(data_buffer[2], 1); - ASSERT_EQ(data_buffer[3], 123); - ASSERT_EQ(data_buffer[4], 0); -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteInt32) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_INT32}}), - ADBC_STATUS_OK); - ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, - {-123, -1, 1, 123, std::nullopt}), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyInteger) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyInteger[i]); - } -} - -// COPY (SELECT CAST("col" AS BIGINT) AS "col" FROM ( VALUES (-123), (-1), (1), (123), -// (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyBigInt[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0x85, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadBigInt) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyBigInt; - data.size_bytes = sizeof(kTestPgCopyBigInt); - - auto col_type = PostgresType(PostgresTypeId::kInt8); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyBigInt, sizeof(kTestPgCopyBigInt)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 5); - ASSERT_EQ(array->n_children, 1); - - auto validity = reinterpret_cast(array->children[0]->buffers[0]); - auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_TRUE(ArrowBitGet(validity, 2)); - ASSERT_TRUE(ArrowBitGet(validity, 3)); - ASSERT_FALSE(ArrowBitGet(validity, 4)); - - ASSERT_EQ(data_buffer[0], -123); - ASSERT_EQ(data_buffer[1], -1); - ASSERT_EQ(data_buffer[2], 1); - ASSERT_EQ(data_buffer[3], 123); - ASSERT_EQ(data_buffer[4], 0); -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteInt64) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_INT64}}), - ADBC_STATUS_OK); - ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, - {-123, -1, 1, 123, std::nullopt}), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyBigInt) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyBigInt[i]); - } -} - -// COPY (SELECT CAST("col" AS REAL) AS "col" FROM ( VALUES (-123.456), (-1), (1), -// (123.456), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyReal[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xc2, 0xf6, 0xe9, - 0x79, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xbf, 0x80, 0x00, 0x00, 0x00, 0x01, 0x00, - 0x00, 0x00, 0x04, 0x3f, 0x80, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x42, - 0xf6, 0xe9, 0x79, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadReal) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyReal; - data.size_bytes = sizeof(kTestPgCopyReal); - - auto col_type = PostgresType(PostgresTypeId::kFloat4); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyReal, sizeof(kTestPgCopyReal)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 5); - ASSERT_EQ(array->n_children, 1); - - auto validity = reinterpret_cast(array->children[0]->buffers[0]); - auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_TRUE(ArrowBitGet(validity, 2)); - ASSERT_TRUE(ArrowBitGet(validity, 3)); - ASSERT_FALSE(ArrowBitGet(validity, 4)); - - ASSERT_FLOAT_EQ(data_buffer[0], -123.456); - ASSERT_EQ(data_buffer[1], -1); - ASSERT_EQ(data_buffer[2], 1); - ASSERT_FLOAT_EQ(data_buffer[3], 123.456); - ASSERT_EQ(data_buffer[4], 0); -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteReal) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_FLOAT}}), - ADBC_STATUS_OK); - ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, - {-123.456, -1, 1, 123.456, std::nullopt}), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyReal) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyReal[i]) << " mismatch at index: " << i; - } -} - -// COPY (SELECT CAST("col" AS DOUBLE PRECISION) AS "col" FROM ( VALUES (-123.456), (-1), -// (1), (123.456), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyDoublePrecision[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xc0, 0x5e, 0xdd, - 0x2f, 0x1a, 0x9f, 0xbe, 0x77, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xbf, 0xf0, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x3f, 0xf0, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x40, 0x5e, 0xdd, - 0x2f, 0x1a, 0x9f, 0xbe, 0x77, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadDoublePrecision) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyDoublePrecision; - data.size_bytes = sizeof(kTestPgCopyDoublePrecision); - - auto col_type = PostgresType(PostgresTypeId::kFloat8); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyDoublePrecision, - sizeof(kTestPgCopyDoublePrecision)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 5); - ASSERT_EQ(array->n_children, 1); - - auto validity = reinterpret_cast(array->children[0]->buffers[0]); - auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_TRUE(ArrowBitGet(validity, 2)); - ASSERT_TRUE(ArrowBitGet(validity, 3)); - ASSERT_FALSE(ArrowBitGet(validity, 4)); - - ASSERT_DOUBLE_EQ(data_buffer[0], -123.456); - ASSERT_EQ(data_buffer[1], -1); - ASSERT_EQ(data_buffer[2], 1); - ASSERT_DOUBLE_EQ(data_buffer[3], 123.456); - ASSERT_EQ(data_buffer[4], 0); -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteDoublePrecision) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_DOUBLE}}), - ADBC_STATUS_OK); - ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, - {-123.456, -1, 1, 123.456, std::nullopt}), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyDoublePrecision) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyDoublePrecision[i]); - } -} - -static uint8_t kTestPgCopyDate[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xff, 0xff, - 0x71, 0x54, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x8e, 0xad, 0x00, 0x01, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadDate) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyDate; - data.size_bytes = sizeof(kTestPgCopyDate); - - auto col_type = PostgresType(PostgresTypeId::kDate); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyDate, sizeof(kTestPgCopyDate)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 3); - ASSERT_EQ(array->n_children, 1); - - auto validity = reinterpret_cast(array->children[0]->buffers[0]); - auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_FALSE(ArrowBitGet(validity, 2)); - - ASSERT_EQ(data_buffer[0], -25567); - ASSERT_EQ(data_buffer[1], 47482); -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteDate) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_DATE32}}), - ADBC_STATUS_OK); - ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, - {-25567, 47482, std::nullopt}), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyDate) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyDate[i]); - } -} - - -// For full coverage, ensure that this contains NUMERIC examples that: -// - Have >= four zeroes to the left of the decimal point -// - Have >= four zeroes to the right of the decimal point -// - Include special values (nan, -inf, inf, NULL) -// - Have >= four trailing zeroes to the right of the decimal point -// - Have >= four leading zeroes before the first digit to the right of the decimal point -// - Is < 0 (negative) -// COPY (SELECT CAST(col AS NUMERIC) AS col FROM ( VALUES (1000000), ('0.00001234'), -// ('1.0000'), (-123.456), (123.456), ('nan'), ('-inf'), ('inf'), (NULL)) AS drvd(col)) TO -// STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyNumeric[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, - 0x01, 0xff, 0xfe, 0x00, 0x00, 0x00, 0x08, 0x04, 0xd2, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x01, 0x00, 0x01, 0x00, - 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x40, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, - 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x7b, 0x11, 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x00, 0xf0, 0x00, 0x00, 0x20, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x00, 0xd0, 0x00, 0x00, 0x20, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadNumeric) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyNumeric; - data.size_bytes = sizeof(kTestPgCopyNumeric); - - auto col_type = PostgresType(PostgresTypeId::kNumeric); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyNumeric, sizeof(kTestPgCopyNumeric)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 9); - ASSERT_EQ(array->n_children, 1); - - nanoarrow::UniqueSchema schema; - tester.GetSchema(schema.get()); - - nanoarrow::UniqueArrayView array_view; - ASSERT_EQ(ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), nullptr), - NANOARROW_OK); - ASSERT_EQ(array_view->children[0]->storage_type, NANOARROW_TYPE_STRING); - ASSERT_EQ(ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr), NANOARROW_OK); - - auto validity = array_view->children[0]->buffer_views[0].data.as_uint8; - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_TRUE(ArrowBitGet(validity, 2)); - ASSERT_TRUE(ArrowBitGet(validity, 3)); - ASSERT_TRUE(ArrowBitGet(validity, 4)); - ASSERT_TRUE(ArrowBitGet(validity, 5)); - ASSERT_TRUE(ArrowBitGet(validity, 6)); - ASSERT_TRUE(ArrowBitGet(validity, 7)); - ASSERT_FALSE(ArrowBitGet(validity, 8)); - - struct ArrowStringView item; - item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 0); - EXPECT_EQ(std::string(item.data, item.size_bytes), "1000000"); - item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 1); - EXPECT_EQ(std::string(item.data, item.size_bytes), "0.00001234"); - item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 2); - EXPECT_EQ(std::string(item.data, item.size_bytes), "1.0000"); - item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 3); - EXPECT_EQ(std::string(item.data, item.size_bytes), "-123.456"); - item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 4); - EXPECT_EQ(std::string(item.data, item.size_bytes), "123.456"); - item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 5); - EXPECT_EQ(std::string(item.data, item.size_bytes), "nan"); - item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 6); - EXPECT_EQ(std::string(item.data, item.size_bytes), "-inf"); - item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 7); - EXPECT_EQ(std::string(item.data, item.size_bytes), "inf"); -} - -// This buffer is similar to the read variant above but removes special values -// nan, ±inf as they are not supported via the Arrow Decimal types -// COPY (SELECT CAST(col AS NUMERIC) AS col FROM ( VALUES (NULL), (-123.456), -// ('0.00001234'), (1.0000), (123.456), (1000000)) AS drvd(col)) -// TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyNumericWrite[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, - 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x40, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, - 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0xff, 0xfe, 0x00, 0x00, 0x00, - 0x08, 0x04, 0xd2, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0a, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteNumeric) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128; - constexpr int32_t size = 128; - constexpr int32_t precision = 38; - constexpr int32_t scale = 8; - - struct ArrowDecimal decimal1; - struct ArrowDecimal decimal2; - struct ArrowDecimal decimal3; - struct ArrowDecimal decimal4; - struct ArrowDecimal decimal5; - - ArrowDecimalInit(&decimal1, size, 19, 8); - ArrowDecimalSetInt(&decimal1, -12345600000); - ArrowDecimalInit(&decimal2, size, 19, 8); - ArrowDecimalSetInt(&decimal2, 1234); - ArrowDecimalInit(&decimal3, size, 19, 8); - ArrowDecimalSetInt(&decimal3, 100000000); - ArrowDecimalInit(&decimal4, size, 19, 8); - ArrowDecimalSetInt(&decimal4, 12345600000); - ArrowDecimalInit(&decimal5, size, 19, 8); - ArrowDecimalSetInt(&decimal5, 100000000000000); - - const std::vector> values = { - std::nullopt, &decimal1, &decimal2, &decimal3, &decimal4, &decimal5}; - - ArrowSchemaInit(&schema.value); - ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0); - ASSERT_EQ(AdbcNsArrowSchemaSetTypeDecimal(schema.value.children[0], - type, precision, scale), 0); - ASSERT_EQ(ArrowSchemaSetName(schema.value.children[0], "col"), 0); - ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, - &na_error, values), ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyNumericWrite) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyNumericWrite[i]) << " at position " << i; - } -} - -// COPY (SELECT CAST(col AS TIMESTAMP) FROM ( VALUES ('1900-01-01 12:34:56'), -// ('2100-01-01 12:34:56'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT BINARY); -static uint8_t kTestPgCopyTimestamp[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, - 0x00, 0x08, 0xff, 0xf4, 0xc9, 0xf9, 0x07, 0xe5, 0x9c, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0b, 0x36, 0x30, 0x2d, 0xa5, - 0xfc, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadTimestamp) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyTimestamp; - data.size_bytes = sizeof(kTestPgCopyTimestamp); - - auto col_type = PostgresType(PostgresTypeId::kTimestamp); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyTimestamp, sizeof(kTestPgCopyTimestamp)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 3); - ASSERT_EQ(array->n_children, 1); - - auto validity = reinterpret_cast(array->children[0]->buffers[0]); - auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_FALSE(ArrowBitGet(validity, 3)); - - ASSERT_EQ(data_buffer[0], -2208943504000000); - ASSERT_EQ(data_buffer[1], 4102490096000000); -} - -using TimestampTestParamType = std::tuple>>; - -class PostgresCopyWriteTimestampTest : public testing::TestWithParam< - TimestampTestParamType> { -}; - -TEST_P(PostgresCopyWriteTimestampTest, WritesProperBufferValues) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - - TimestampTestParamType parameters = GetParam(); - enum ArrowTimeUnit unit = std::get<0>(parameters); - const char* timezone = std::get<1>(parameters); - - const std::vector> values = std::get<2>(parameters); - - ArrowSchemaInit(&schema.value); - ArrowSchemaSetTypeStruct(&schema.value, 1); - ArrowSchemaSetTypeDateTime(schema->children[0], - NANOARROW_TYPE_TIMESTAMP, - unit, - timezone); - ArrowSchemaSetName(schema->children[0], "col"); - ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, - &array.value, - &na_error, - values), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyTimestamp) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyTimestamp[i]); - } -} - -static const std::vector ts_values { - {NANOARROW_TIME_UNIT_SECOND, nullptr, - {-2208943504, 4102490096, std::nullopt}}, - {NANOARROW_TIME_UNIT_MILLI, nullptr, - {-2208943504000, 4102490096000, std::nullopt}}, - {NANOARROW_TIME_UNIT_MICRO, nullptr, - {-2208943504000000, 4102490096000000, std::nullopt}}, - {NANOARROW_TIME_UNIT_NANO, nullptr, - {-2208943504000000000, 4102490096000000000, std::nullopt}}, - {NANOARROW_TIME_UNIT_SECOND, "UTC", - {-2208943504, 4102490096, std::nullopt}}, - {NANOARROW_TIME_UNIT_MILLI, "UTC", - {-2208943504000, 4102490096000, std::nullopt}}, - {NANOARROW_TIME_UNIT_MICRO, "UTC", - {-2208943504000000, 4102490096000000, std::nullopt}}, - {NANOARROW_TIME_UNIT_NANO, "UTC", - {-2208943504000000000, 4102490096000000000, std::nullopt}}, - {NANOARROW_TIME_UNIT_SECOND, "America/New_York", - {-2208943504, 4102490096, std::nullopt}}, - {NANOARROW_TIME_UNIT_MILLI, "America/New_York", - {-2208943504000, 4102490096000, std::nullopt}}, - {NANOARROW_TIME_UNIT_MICRO, "America/New_York", - {-2208943504000000, 4102490096000000, std::nullopt}}, - {NANOARROW_TIME_UNIT_NANO, "America/New_York", - {-2208943504000000000, 4102490096000000000, std::nullopt}}, -}; - -INSTANTIATE_TEST_SUITE_P(PostgresCopyWriteTimestamp, - PostgresCopyWriteTimestampTest, - testing::ValuesIn(ts_values)); - -// COPY (SELECT CAST(col AS INTERVAL) FROM ( VALUES ('-1 months -2 days -4 seconds'), -// ('1 months 2 days 4 seconds'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT BINARY); -static uint8_t kTestPgCopyInterval[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xc2, 0xf7, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, - 0xff, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3d, 0x09, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadInterval) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyInterval; - data.size_bytes = sizeof(kTestPgCopyInterval); - - auto col_type = PostgresType(PostgresTypeId::kInterval); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyInterval, sizeof(kTestPgCopyInterval)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 3); - ASSERT_EQ(array->n_children, 1); - - nanoarrow::UniqueSchema schema; - tester.GetSchema(schema.get()); - - nanoarrow::UniqueArrayView array_view; - ASSERT_EQ(ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), nullptr), - NANOARROW_OK); - ASSERT_EQ(array_view->children[0]->storage_type, - NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO); - ASSERT_EQ(ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr), NANOARROW_OK); - - auto validity = array_view->children[0]->buffer_views[0].data.as_uint8; - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_FALSE(ArrowBitGet(validity, 2)); - - struct ArrowInterval interval; - ArrowIntervalInit(&interval, NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO); - ArrowArrayViewGetIntervalUnsafe(array_view->children[0], 0, &interval); - ASSERT_EQ(interval.months, -1); - ASSERT_EQ(interval.days, -2); - ASSERT_EQ(interval.ns, -4000000000); - ArrowArrayViewGetIntervalUnsafe(array_view->children[0], 1, &interval); - ASSERT_EQ(interval.months, 1); - ASSERT_EQ(interval.days, 2); - ASSERT_EQ(interval.ns, 4000000000); -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteInterval) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - const enum ArrowType type = NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO; - // values are days, months, ns - struct ArrowInterval neg_interval; - struct ArrowInterval pos_interval; - - ArrowIntervalInit(&neg_interval, type); - ArrowIntervalInit(&pos_interval, type); - - neg_interval.months = -1; - neg_interval.days = -2; - neg_interval.ns = -4000000000; - - pos_interval.months = 1; - pos_interval.days = 2; - pos_interval.ns = 4000000000; - - const std::vector> values = { - &neg_interval, &pos_interval, std::nullopt}; - - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", type}}), ADBC_STATUS_OK); - - ASSERT_EQ(adbc_validation::MakeBatch( - &schema.value, &array.value, &na_error, values), ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyInterval) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyInterval[i]); - } -} - -// Writing a DURATION from NANOARROW produces INTERVAL in postgres without day/month -// COPY (SELECT CAST(col AS INTERVAL) FROM ( VALUES ('-4 seconds'), -// ('4 seconds'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT BINARY); -static uint8_t kTestPgCopyDuration[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xc2, 0xf7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3d, 0x09, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff}; -using DurationTestParamType = std::tuple>>; - -class PostgresCopyWriteDurationTest : public testing::TestWithParam< - DurationTestParamType> {}; - -TEST_P(PostgresCopyWriteDurationTest, WritesProperBufferValues) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - const enum ArrowType type = NANOARROW_TYPE_DURATION; - - DurationTestParamType parameters = GetParam(); - enum ArrowTimeUnit unit = std::get<0>(parameters); - const std::vector> values = std::get<1>(parameters); - - ArrowSchemaInit(&schema.value); - ArrowSchemaSetTypeStruct(&schema.value, 1); - ArrowSchemaSetTypeDateTime(schema->children[0], type, unit, nullptr); - ArrowSchemaSetName(schema->children[0], "col"); - ASSERT_EQ(adbc_validation::MakeBatch( - &schema.value, &array.value, &na_error, values), ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyDuration) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyDuration[i]); - } -} - -static const std::vector duration_params { - {NANOARROW_TIME_UNIT_SECOND, {-4, 4, std::nullopt}}, - {NANOARROW_TIME_UNIT_MILLI, {-4000, 4000, std::nullopt}}, - {NANOARROW_TIME_UNIT_MICRO, {-4000000, 4000000, std::nullopt}}, - {NANOARROW_TIME_UNIT_NANO, {-4000000000, 4000000000, std::nullopt}}, -}; - -INSTANTIATE_TEST_SUITE_P(PostgresCopyWriteDuration, - PostgresCopyWriteDurationTest, - testing::ValuesIn(duration_params)); - -// COPY (SELECT CAST("col" AS TEXT) AS "col" FROM ( VALUES ('abc'), ('1234'), -// (NULL::text)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyText[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x03, 0x61, 0x62, 0x63, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x31, 0x32, - 0x33, 0x34, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadText) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyText; - data.size_bytes = sizeof(kTestPgCopyText); - - auto col_type = PostgresType(PostgresTypeId::kText); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyText, sizeof(kTestPgCopyText)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 3); - ASSERT_EQ(array->n_children, 1); - - auto validity = reinterpret_cast(array->children[0]->buffers[0]); - auto offsets = reinterpret_cast(array->children[0]->buffers[1]); - auto data_buffer = reinterpret_cast(array->children[0]->buffers[2]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_FALSE(ArrowBitGet(validity, 2)); - - ASSERT_EQ(offsets[0], 0); - ASSERT_EQ(offsets[1], 3); - ASSERT_EQ(offsets[2], 7); - ASSERT_EQ(offsets[3], 7); - - ASSERT_EQ(std::string(data_buffer + 0, 3), "abc"); - ASSERT_EQ(std::string(data_buffer + 3, 4), "1234"); -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteString) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_STRING}}), - ADBC_STATUS_OK); - ASSERT_EQ(adbc_validation::MakeBatch( - &schema.value, &array.value, &na_error, {"abc", "1234", std::nullopt}), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyText) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyText[i]); - } -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteLargeString) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - ASSERT_EQ( - adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_LARGE_STRING}}), - ADBC_STATUS_OK); - ASSERT_EQ(adbc_validation::MakeBatch( - &schema.value, &array.value, &na_error, {"abc", "1234", std::nullopt}), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyText) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyText[i]); - } -} - -// COPY (SELECT CAST("col" AS BYTEA) AS "col" FROM ( VALUES (''), ('\x0001'), -// ('\x01020304'), ('\xFEFF'), (NULL)) AS drvd("col")) TO STDOUT -// WITH (FORMAT binary); -static uint8_t kTestPgCopyBinary[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, - 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0xfe, 0xff, - 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadBinary) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyBinary; - data.size_bytes = sizeof(kTestPgCopyBinary); - - auto col_type = PostgresType(PostgresTypeId::kBytea); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyBinary, sizeof(kTestPgCopyBinary)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 5); - ASSERT_EQ(array->n_children, 1); - - auto validity = reinterpret_cast(array->children[0]->buffers[0]); - auto offsets = reinterpret_cast(array->children[0]->buffers[1]); - auto data_buffer = reinterpret_cast(array->children[0]->buffers[2]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_TRUE(ArrowBitGet(validity, 2)); - ASSERT_TRUE(ArrowBitGet(validity, 3)); - ASSERT_FALSE(ArrowBitGet(validity, 4)); - - ASSERT_EQ(offsets[0], 0); - ASSERT_EQ(offsets[1], 0); - ASSERT_EQ(offsets[2], 2); - ASSERT_EQ(offsets[3], 6); - ASSERT_EQ(offsets[4], 8); - ASSERT_EQ(offsets[5], 8); - - ASSERT_EQ(data_buffer[0], 0x00); - ASSERT_EQ(data_buffer[1], 0x01); - ASSERT_EQ(data_buffer[2], 0x01); - ASSERT_EQ(data_buffer[3], 0x02); - ASSERT_EQ(data_buffer[4], 0x03); - ASSERT_EQ(data_buffer[5], 0x04); - ASSERT_EQ(data_buffer[6], 0xfe); - ASSERT_EQ(data_buffer[7], 0xff); -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteBinary) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_BINARY}}), - ADBC_STATUS_OK); - ASSERT_EQ(adbc_validation::MakeBatch>( - &schema.value, &array.value, &na_error, - { - std::vector{}, - std::vector{std::byte{0x00}, std::byte{0x01}}, - std::vector{ - std::byte{0x01}, std::byte{0x02}, std::byte{0x03}, std::byte{0x04} - }, - std::vector{std::byte{0xfe}, std::byte{0xff}}, - std::nullopt}), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyBinary) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyBinary[i]) << "failure at index " << i; - } -} - - -// COPY (SELECT CAST("col" AS INTEGER ARRAY) AS "col" FROM ( VALUES ('{-123, -1}'), ('{0, -// 1, 123}'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyIntegerArray[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, - 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0x85, 0x00, 0x00, 0x00, - 0x04, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x03, 0x00, - 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x7b, 0x00, - 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadArray) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyIntegerArray; - data.size_bytes = sizeof(kTestPgCopyIntegerArray); - - auto col_type = PostgresType(PostgresTypeId::kInt4).Array(); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyIntegerArray, - sizeof(kTestPgCopyIntegerArray)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 3); - ASSERT_EQ(array->n_children, 1); - ASSERT_EQ(array->children[0]->n_children, 1); - ASSERT_EQ(array->children[0]->children[0]->length, 5); - - auto validity = reinterpret_cast(array->children[0]->buffers[0]); - auto offsets = reinterpret_cast(array->children[0]->buffers[1]); - auto data_buffer = - reinterpret_cast(array->children[0]->children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_FALSE(ArrowBitGet(validity, 2)); - - ASSERT_EQ(offsets[0], 0); - ASSERT_EQ(offsets[1], 2); - ASSERT_EQ(offsets[2], 5); - ASSERT_EQ(offsets[3], 5); - - ASSERT_EQ(data_buffer[0], -123); - ASSERT_EQ(data_buffer[1], -1); - ASSERT_EQ(data_buffer[2], 0); - ASSERT_EQ(data_buffer[3], 1); - ASSERT_EQ(data_buffer[4], 123); -} - -// CREATE TYPE custom_record AS (nested1 integer, nested2 double precision); -// COPY (SELECT CAST("col" AS custom_record) AS "col" FROM ( VALUES ('(123, 456.789)'), -// ('(12, 345.678)'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyCustomRecord[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, - 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, - 0x00, 0x7b, 0x00, 0x00, 0x02, 0xbd, 0x00, 0x00, 0x00, 0x08, 0x40, 0x7c, 0x8c, - 0x9f, 0xbe, 0x76, 0xc8, 0xb4, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, - 0x00, 0x02, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x0c, 0x00, 0x00, 0x02, 0xbd, 0x00, 0x00, 0x00, 0x08, 0x40, 0x75, 0x9a, 0xd9, - 0x16, 0x87, 0x2b, 0x02, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadCustomRecord) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyCustomRecord; - data.size_bytes = sizeof(kTestPgCopyCustomRecord); - - auto col_type = PostgresType(PostgresTypeId::kRecord); - col_type.AppendChild("nested1", PostgresType(PostgresTypeId::kInt4)); - col_type.AppendChild("nested2", PostgresType(PostgresTypeId::kFloat8)); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyCustomRecord, - sizeof(kTestPgCopyCustomRecord)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 3); - ASSERT_EQ(array->n_children, 1); - ASSERT_EQ(array->children[0]->n_children, 2); - ASSERT_EQ(array->children[0]->children[0]->length, 3); - ASSERT_EQ(array->children[0]->children[1]->length, 3); - - auto validity = reinterpret_cast(array->children[0]->buffers[0]); - auto data_buffer1 = - reinterpret_cast(array->children[0]->children[0]->buffers[1]); - auto data_buffer2 = - reinterpret_cast(array->children[0]->children[1]->buffers[1]); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_FALSE(ArrowBitGet(validity, 2)); - - ASSERT_EQ(data_buffer1[0], 123); - ASSERT_EQ(data_buffer1[1], 12); - ASSERT_EQ(data_buffer1[2], 0); - - ASSERT_DOUBLE_EQ(data_buffer2[0], 456.789); - ASSERT_DOUBLE_EQ(data_buffer2[1], 345.678); - ASSERT_DOUBLE_EQ(data_buffer2[2], 0); -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteMultiBatch) { - // Regression test for https://github.com/apache/arrow-adbc/issues/1310 - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_INT32}}), - NANOARROW_OK); - ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, - {-123, -1, 1, 123, std::nullopt}), - NANOARROW_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - size_t buf_size = sizeof(kTestPgCopyInteger) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyInteger[i]); - } - - tester.Rewind(); - ASSERT_EQ(tester.WriteArray(&array.value, nullptr), ENODATA); - - buf = tester.WriteBuffer(); - // Ignore the header and footer - buf_size = sizeof(kTestPgCopyInteger) - 21; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyInteger[i + 19]); - } -} - -} // namespace adbcpq diff --git a/c/driver/postgresql/statement.cc b/c/driver/postgresql/statement.cc index 68fd45a944..601716b163 100644 --- a/c/driver/postgresql/statement.cc +++ b/c/driver/postgresql/statement.cc @@ -39,7 +39,7 @@ #include "common/utils.h" #include "connection.h" #include "error.h" -#include "postgres_copy_reader.h" +#include "copy/writer.h" #include "postgres_type.h" #include "postgres_util.h" #include "result_helper.h" diff --git a/c/driver/postgresql/statement.h b/c/driver/postgresql/statement.h index c822390d8c..d469ca112a 100644 --- a/c/driver/postgresql/statement.h +++ b/c/driver/postgresql/statement.h @@ -27,7 +27,7 @@ #include #include "common/utils.h" -#include "postgres_copy_reader.h" +#include "copy/reader.h" #include "postgres_type.h" #define ADBC_POSTGRESQL_OPTION_BATCH_SIZE_HINT_BYTES \ diff --git a/r/adbcpostgresql/bootstrap.R b/r/adbcpostgresql/bootstrap.R index 9bcc414988..d69a14bbe4 100644 --- a/r/adbcpostgresql/bootstrap.R +++ b/r/adbcpostgresql/bootstrap.R @@ -21,7 +21,9 @@ files_to_vendor <- c( "../../adbc.h", "../../c/driver/postgresql/postgres_util.h", "../../c/driver/postgresql/postgres_type.h", - "../../c/driver/postgresql/postgres_copy_reader.h", + "../../c/driver/postgresql/copy/copy_common.h", + "../../c/driver/postgresql/copy/reader.h", + "../../c/driver/postgresql/copy/writer.h", "../../c/driver/postgresql/statement.h", "../../c/driver/postgresql/statement.cc", "../../c/driver/postgresql/connection.h", @@ -56,6 +58,10 @@ if (all(file.exists(files_to_vendor))) { ) ) + if (!dir.exists("src/copy")) { + dir.create("src/copy") + } + if (all(file.copy(files_to_vendor, "src"))) { file.rename( c( @@ -64,7 +70,10 @@ if (all(file.exists(files_to_vendor))) { "src/nanoarrow.hpp", "src/options.h", "src/utils.c", - "src/utils.h" + "src/utils.h", + "src/copy_common.h", + "src/reader.h", + "src/writer.h" ), c( "src/nanoarrow/nanoarrow.c", @@ -72,7 +81,10 @@ if (all(file.exists(files_to_vendor))) { "src/nanoarrow/nanoarrow.hpp", "src/common/options.h", "src/common/utils.c", - "src/common/utils.h" + "src/common/utils.h", + "src/copy/copy_common.h", + "src/copy/reader.h", + "src/copy/writer.h" ) ) cat("All files successfully copied to src/\n") diff --git a/r/adbcpostgresql/src/.gitignore b/r/adbcpostgresql/src/.gitignore index 44d84da682..8a47096b86 100644 --- a/r/adbcpostgresql/src/.gitignore +++ b/r/adbcpostgresql/src/.gitignore @@ -29,7 +29,6 @@ postgresql.cc statement.h statement.cc postgres_type.h -postgres_copy_reader.h postgres_util.h result_helper.h result_helper.cc diff --git a/r/adbcpostgresql/src/copy/.gitignore b/r/adbcpostgresql/src/copy/.gitignore new file mode 100644 index 0000000000..53201cb854 --- /dev/null +++ b/r/adbcpostgresql/src/copy/.gitignore @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +*.h