From 2fde0a21f8c82f511cc618253d7b731a561b93b1 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 4 Jan 2024 17:07:38 -0500 Subject: [PATCH 1/9] Split source files --- c/driver/postgresql/copy/copy_common.h | 45 + c/driver/postgresql/copy/reader.h | 1026 ++++++++++ c/driver/postgresql/copy/writer.h | 672 +++++++ c/driver/postgresql/postgres_copy_reader.h | 1663 ----------------- .../postgresql/postgres_copy_reader_test.cc | 3 +- c/driver/postgresql/statement.cc | 2 +- c/driver/postgresql/statement.h | 2 +- 7 files changed, 1747 insertions(+), 1666 deletions(-) create mode 100644 c/driver/postgresql/copy/copy_common.h create mode 100644 c/driver/postgresql/copy/reader.h create mode 100644 c/driver/postgresql/copy/writer.h diff --git a/c/driver/postgresql/copy/copy_common.h b/c/driver/postgresql/copy/copy_common.h new file mode 100644 index 0000000000..04205907da --- /dev/null +++ b/c/driver/postgresql/copy/copy_common.h @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +// Windows +#define NOMINMAX + +#include + +// R 3.6 / Windows builds on a very old toolchain that does not define ENODATA +#if defined(_WIN32) && !defined(MSVC) && !defined(ENODATA) +#define ENODATA 120 +#endif + +namespace adbcpq { + +// "PGCOPY\n\377\r\n\0" +static int8_t kPgCopyBinarySignature[] = {0x50, 0x47, 0x43, 0x4F, + 0x50, 0x59, 0x0A, static_cast(0xFF), + 0x0D, 0x0A, 0x00}; + +// The maximum value in microseconds that can be converted into nanoseconds +// without overflow +constexpr int64_t kMaxSafeMicrosToNanos = 9223372036854775L; + +// The minimum value in microseconds that can be converted into nanoseconds +// without overflow +constexpr int64_t kMinSafeMicrosToNanos = -9223372036854775L; + +} // namespace adbcpq diff --git a/c/driver/postgresql/copy/reader.h b/c/driver/postgresql/copy/reader.h new file mode 100644 index 0000000000..8ba0568ad9 --- /dev/null +++ b/c/driver/postgresql/copy/reader.h @@ -0,0 +1,1026 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +#include "copy_common.h" +#include "../postgres_type.h" +#include "../postgres_util.h" + +namespace adbcpq { + +// Read a value from the buffer without checking the buffer size. Advances +// the cursor of data and reduces its size by sizeof(T). +template +inline T ReadUnsafe(ArrowBufferView* data) { + T out; + memcpy(&out, data->data.data, sizeof(T)); + out = SwapNetworkToHost(out); + data->data.as_uint8 += sizeof(T); + data->size_bytes -= sizeof(T); + return out; +} + +// Define some explicit specializations for types that don't have a SwapNetworkToHost +// overload. +template <> +inline int8_t ReadUnsafe(ArrowBufferView* data) { + int8_t out = data->data.as_int8[0]; + data->data.as_uint8 += sizeof(int8_t); + data->size_bytes -= sizeof(int8_t); + return out; +} + +template <> +inline int16_t ReadUnsafe(ArrowBufferView* data) { + return static_cast(ReadUnsafe(data)); +} + +template <> +inline int32_t ReadUnsafe(ArrowBufferView* data) { + return static_cast(ReadUnsafe(data)); +} + +template <> +inline int64_t ReadUnsafe(ArrowBufferView* data) { + return static_cast(ReadUnsafe(data)); +} + +template +ArrowErrorCode ReadChecked(ArrowBufferView* data, T* out, ArrowError* error) { + if (data->size_bytes < static_cast(sizeof(T))) { + ArrowErrorSet(error, "Unexpected end of input (expected %d bytes but found %ld)", + static_cast(sizeof(T)), + static_cast(data->size_bytes)); // NOLINT(runtime/int) + return EINVAL; + } + + *out = ReadUnsafe(data); + return NANOARROW_OK; +} + +class PostgresCopyFieldReader { + public: + PostgresCopyFieldReader() : validity_(nullptr), offsets_(nullptr), data_(nullptr) { + memset(&schema_view_, 0, sizeof(ArrowSchemaView)); + } + + virtual ~PostgresCopyFieldReader() {} + + void Init(const PostgresType& pg_type) { pg_type_ = pg_type; } + + const PostgresType& InputType() const { return pg_type_; } + + virtual ArrowErrorCode InitSchema(ArrowSchema* schema) { + NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view_, schema, nullptr)); + return NANOARROW_OK; + } + + virtual ArrowErrorCode InitArray(ArrowArray* array) { + // Cache some buffer pointers + validity_ = ArrowArrayValidityBitmap(array); + for (int32_t i = 0; i < 3; i++) { + switch (schema_view_.layout.buffer_type[i]) { + case NANOARROW_BUFFER_TYPE_DATA_OFFSET: + if (schema_view_.layout.element_size_bits[i] == 32) { + offsets_ = ArrowArrayBuffer(array, i); + } + break; + case NANOARROW_BUFFER_TYPE_DATA: + data_ = ArrowArrayBuffer(array, i); + break; + default: + break; + } + } + + return NANOARROW_OK; + } + + virtual ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, + ArrowArray* array, ArrowError* error) { + return ENOTSUP; + } + + virtual ArrowErrorCode FinishArray(ArrowArray* array, ArrowError* error) { + return NANOARROW_OK; + } + + protected: + PostgresType pg_type_; + ArrowSchemaView schema_view_; + ArrowBitmap* validity_; + ArrowBuffer* offsets_; + ArrowBuffer* data_; + std::vector> children_; + + ArrowErrorCode AppendValid(ArrowArray* array) { + if (validity_->buffer.data != nullptr) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(validity_, true, 1)); + } + + array->length++; + return NANOARROW_OK; + } +}; + +// Reader for a Postgres boolean (one byte -> bitmap) +class PostgresCopyBooleanFieldReader : public PostgresCopyFieldReader { + public: + ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, + ArrowError* error) override { + if (field_size_bytes <= 0) { + return ArrowArrayAppendNull(array, 1); + } + + if (field_size_bytes != 1) { + ArrowErrorSet(error, "Expected field with one byte but found field with %d bytes", + static_cast(field_size_bytes)); // NOLINT(runtime/int) + return EINVAL; + } + + int64_t bytes_required = _ArrowBytesForBits(array->length + 1); + if (bytes_required > data_->size_bytes) { + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendFill(data_, 0, bytes_required - data_->size_bytes)); + } + + if (ReadUnsafe(data)) { + ArrowBitSet(data_->data, array->length); + } else { + ArrowBitClear(data_->data, array->length); + } + + return AppendValid(array); + } +}; + +// Reader for Pg->Arrow conversions whose representations are identical minus +// the bswap from network endian. This includes all integral and float types. +template +class PostgresCopyNetworkEndianFieldReader : public PostgresCopyFieldReader { + public: + ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, + ArrowError* error) override { + if (field_size_bytes <= 0) { + return ArrowArrayAppendNull(array, 1); + } + + if (field_size_bytes != static_cast(sizeof(T))) { + ArrowErrorSet(error, "Expected field with %d bytes but found field with %d bytes", + static_cast(sizeof(T)), + static_cast(field_size_bytes)); // NOLINT(runtime/int) + return EINVAL; + } + + T value = kOffset + ReadUnsafe(data); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, &value, sizeof(T))); + return AppendValid(array); + } +}; + +// Reader for Intervals +class PostgresCopyIntervalFieldReader : public PostgresCopyFieldReader { + public: + ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, + ArrowError* error) override { + if (field_size_bytes <= 0) { + return ArrowArrayAppendNull(array, 1); + } + + if (field_size_bytes != 16) { + ArrowErrorSet(error, "Expected field with %d bytes but found field with %d bytes", + 16, + static_cast(field_size_bytes)); // NOLINT(runtime/int) + return EINVAL; + } + + // postgres stores time as usec, arrow stores as ns + const int64_t time_usec = ReadUnsafe(data); + int64_t time; + + if (time_usec > kMaxSafeMicrosToNanos || time_usec < kMinSafeMicrosToNanos) { + ArrowErrorSet(error, + "[libpq] Interval with time value %" PRId64 + " usec would overflow when converting to nanoseconds", + time_usec); + return EINVAL; + } + + time = time_usec * 1000; + + const int32_t days = ReadUnsafe(data); + const int32_t months = ReadUnsafe(data); + + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, &months, sizeof(int32_t))); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, &days, sizeof(int32_t))); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, &time, sizeof(int64_t))); + return AppendValid(array); + } +}; + +// // Converts COPY resulting from the Postgres NUMERIC type into a string. +// Rewritten based on the Postgres implementation of NUMERIC cast to string in +// src/backend/utils/adt/numeric.c : get_str_from_var() (Note that in the initial source, +// DEC_DIGITS is always 4 and DBASE is always 10000). +// +// Briefly, the Postgres representation of "numeric" is an array of int16_t ("digits") +// from most significant to least significant. Each "digit" is a value between 0000 and +// 9999. There are weight + 1 digits before the decimal point and dscale digits after the +// decimal point. Both of those values can be zero or negative. A "sign" component +// encodes the positive or negativeness of the value and is also used to encode special +// values (inf, -inf, and nan). +class PostgresCopyNumericFieldReader : public PostgresCopyFieldReader { + public: + ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, + ArrowError* error) override { + // -1 for NULL + if (field_size_bytes < 0) { + return ArrowArrayAppendNull(array, 1); + } + + // Read the input + if (data->size_bytes < static_cast(4 * sizeof(int16_t))) { + ArrowErrorSet(error, + "Expected at least %d bytes of field data for numeric copy data but " + "only %d bytes of input remain", + static_cast(4 * sizeof(int16_t)), + static_cast(data->size_bytes)); // NOLINT(runtime/int) + return EINVAL; + } + + int16_t ndigits = ReadUnsafe(data); + int16_t weight = ReadUnsafe(data); + uint16_t sign = ReadUnsafe(data); + uint16_t dscale = ReadUnsafe(data); + + if (data->size_bytes < static_cast(ndigits * sizeof(int16_t))) { + ArrowErrorSet(error, + "Expected at least %d bytes of field data for numeric digits copy " + "data but only %d bytes of input remain", + static_cast(ndigits * sizeof(int16_t)), + static_cast(data->size_bytes)); // NOLINT(runtime/int) + return EINVAL; + } + + digits_.clear(); + for (int16_t i = 0; i < ndigits; i++) { + digits_.push_back(ReadUnsafe(data)); + } + + // Handle special values + std::string special_value; + switch (sign) { + case kNumericNAN: + special_value = std::string("nan"); + break; + case kNumericPinf: + special_value = std::string("inf"); + break; + case kNumericNinf: + special_value = std::string("-inf"); + break; + case kNumericPos: + case kNumericNeg: + special_value = std::string(""); + break; + default: + ArrowErrorSet(error, + "Unexpected value for sign read from Postgres numeric field: %d", + static_cast(sign)); + return EINVAL; + } + + if (!special_value.empty()) { + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(data_, special_value.data(), special_value.size())); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(offsets_, data_->size_bytes)); + return AppendValid(array); + } + + // Calculate string space requirement + int64_t max_chars_required = std::max(1, (weight + 1) * kDecDigits); + max_chars_required += dscale + kDecDigits + 2; + NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(data_, max_chars_required)); + char* out0 = reinterpret_cast(data_->data + data_->size_bytes); + char* out = out0; + + // Build output string in-place, starting with the negative sign + if (sign == kNumericNeg) { + *out++ = '-'; + } + + // ...then digits before the decimal point + int d; + int d1; + int16_t dig; + + if (weight < 0) { + d = weight + 1; + *out++ = '0'; + } else { + for (d = 0; d <= weight; d++) { + if (d < ndigits) { + dig = digits_[d]; + } else { + dig = 0; + } + + // To strip leading zeroes + int append = (d > 0); + + for (const auto pow10 : {1000, 100, 10, 1}) { + d1 = dig / pow10; + dig -= d1 * pow10; + append |= (d1 > 0); + if (append) { + *out++ = d1 + '0'; + } + } + } + } + + // ...then the decimal point + digits after it. This may write more digits + // than specified by dscale so we need to keep track of how many we want to + // keep here. + int64_t actual_chars_required = out - out0; + + if (dscale > 0) { + *out++ = '.'; + actual_chars_required += dscale + 1; + + for (int i = 0; i < dscale; i++, d++, i += kDecDigits) { + if (d >= 0 && d < ndigits) { + dig = digits_[d]; + } else { + dig = 0; + } + + for (const auto pow10 : {1000, 100, 10, 1}) { + d1 = dig / pow10; + dig -= d1 * pow10; + *out++ = d1 + '0'; + } + } + } + + // Update data buffer size and add offsets + data_->size_bytes += actual_chars_required; + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(offsets_, data_->size_bytes)); + return AppendValid(array); + } + + private: + std::vector digits_; + + // Number of decimal digits per Postgres digit + static const int kDecDigits = 4; + // The "base" of the Postgres representation (i.e., each "digit" is 0 to 9999) + static const int kNBase = 10000; + // Valid values for the sign component + static const uint16_t kNumericPos = 0x0000; + static const uint16_t kNumericNeg = 0x4000; + static const uint16_t kNumericNAN = 0xC000; + static const uint16_t kNumericPinf = 0xD000; + static const uint16_t kNumericNinf = 0xF000; +}; + +// Reader for Pg->Arrow conversions whose Arrow representation is simply the +// bytes of the field representation. This can be used with binary and string +// Arrow types and any Postgres type. +class PostgresCopyBinaryFieldReader : public PostgresCopyFieldReader { + public: + ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, + ArrowError* error) override { + // -1 for NULL (0 would be empty string) + if (field_size_bytes < 0) { + return ArrowArrayAppendNull(array, 1); + } + + if (field_size_bytes > data->size_bytes) { + ArrowErrorSet(error, "Expected %d bytes of field data but got %d bytes of input", + static_cast(field_size_bytes), + static_cast(data->size_bytes)); // NOLINT(runtime/int) + return EINVAL; + } + + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, data->data.data, field_size_bytes)); + data->data.as_uint8 += field_size_bytes; + data->size_bytes -= field_size_bytes; + + int32_t* offsets = reinterpret_cast(offsets_->data); + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendInt32(offsets_, offsets[array->length] + field_size_bytes)); + + return AppendValid(array); + } +}; + +class PostgresCopyArrayFieldReader : public PostgresCopyFieldReader { + public: + void InitChild(std::unique_ptr child) { + child_ = std::move(child); + child_->Init(pg_type_.child(0)); + } + + ArrowErrorCode InitSchema(ArrowSchema* schema) override { + NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitSchema(schema)); + NANOARROW_RETURN_NOT_OK(child_->InitSchema(schema->children[0])); + return NANOARROW_OK; + } + + ArrowErrorCode InitArray(ArrowArray* array) override { + NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitArray(array)); + NANOARROW_RETURN_NOT_OK(child_->InitArray(array->children[0])); + return NANOARROW_OK; + } + + ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, + ArrowError* error) override { + if (field_size_bytes <= 0) { + return ArrowArrayAppendNull(array, 1); + } + + // Keep the cursor where we start to parse the array so we can check + // the number of bytes read against the field size when finished + const uint8_t* data0 = data->data.as_uint8; + + int32_t n_dim; + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &n_dim, error)); + int32_t flags; + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &flags, error)); + uint32_t element_type_oid; + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &element_type_oid, error)); + + // We could validate the OID here, but this is a poor fit for all cases + // (e.g. testing) since the OID can be specific to each database + + if (n_dim < 0) { + ArrowErrorSet(error, "Expected array n_dim > 0 but got %d", + static_cast(n_dim)); // NOLINT(runtime/int) + return EINVAL; + } + + // This is apparently allowed + if (n_dim == 0) { + NANOARROW_RETURN_NOT_OK(ArrowArrayFinishElement(array)); + return NANOARROW_OK; + } + + int64_t n_items = 1; + for (int32_t i = 0; i < n_dim; i++) { + int32_t dim_size; + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &dim_size, error)); + n_items *= dim_size; + + int32_t lower_bound; + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &lower_bound, error)); + if (lower_bound != 1) { + ArrowErrorSet(error, "Array value with lower bound != 1 is not supported"); + return EINVAL; + } + } + + for (int64_t i = 0; i < n_items; i++) { + int32_t child_field_size_bytes; + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &child_field_size_bytes, error)); + NANOARROW_RETURN_NOT_OK( + child_->Read(data, child_field_size_bytes, array->children[0], error)); + } + + int64_t bytes_read = data->data.as_uint8 - data0; + if (bytes_read != field_size_bytes) { + ArrowErrorSet(error, "Expected to read %d bytes from array field but read %d bytes", + static_cast(field_size_bytes), + static_cast(bytes_read)); // NOLINT(runtime/int) + return EINVAL; + } + + NANOARROW_RETURN_NOT_OK(ArrowArrayFinishElement(array)); + return NANOARROW_OK; + } + + private: + std::unique_ptr child_; +}; + +class PostgresCopyRecordFieldReader : public PostgresCopyFieldReader { + public: + void AppendChild(std::unique_ptr child) { + int64_t child_i = static_cast(children_.size()); + children_.push_back(std::move(child)); + children_[child_i]->Init(pg_type_.child(child_i)); + } + + ArrowErrorCode InitSchema(ArrowSchema* schema) override { + NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitSchema(schema)); + for (int64_t i = 0; i < schema->n_children; i++) { + NANOARROW_RETURN_NOT_OK(children_[i]->InitSchema(schema->children[i])); + } + + return NANOARROW_OK; + } + + ArrowErrorCode InitArray(ArrowArray* array) override { + NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitArray(array)); + for (int64_t i = 0; i < array->n_children; i++) { + NANOARROW_RETURN_NOT_OK(children_[i]->InitArray(array->children[i])); + } + + return NANOARROW_OK; + } + + ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, + ArrowError* error) override { + if (field_size_bytes < 0) { + return ArrowArrayAppendNull(array, 1); + } + + // Keep the cursor where we start to parse the field so we can check + // the number of bytes read against the field size when finished + const uint8_t* data0 = data->data.as_uint8; + + int32_t n_fields; + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &n_fields, error)); + if (n_fields != array->n_children) { + ArrowErrorSet(error, "Expected nested record type to have %ld fields but got %d", + static_cast(array->n_children), // NOLINT(runtime/int) + static_cast(n_fields)); // NOLINT(runtime/int) + return EINVAL; + } + + for (int32_t i = 0; i < n_fields; i++) { + uint32_t child_oid; + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &child_oid, error)); + + int32_t child_field_size_bytes; + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &child_field_size_bytes, error)); + int result = + children_[i]->Read(data, child_field_size_bytes, array->children[i], error); + + // On overflow, pretend all previous children for this struct were never + // appended to. This leaves array in a valid state in the specific case + // where EOVERFLOW was returned so that a higher level caller can attempt + // to try again after creating a new array. + if (result == EOVERFLOW) { + for (int16_t j = 0; j < i; j++) { + array->children[j]->length--; + } + } + + if (result != NANOARROW_OK) { + return result; + } + } + + // field size == -1 means don't check (e.g., for a top-level row tuple) + int64_t bytes_read = data->data.as_uint8 - data0; + if (field_size_bytes != -1 && bytes_read != field_size_bytes) { + ArrowErrorSet(error, + "Expected to read %d bytes from record field but read %d bytes", + static_cast(field_size_bytes), + static_cast(bytes_read)); // NOLINT(runtime/int) + return EINVAL; + } + + array->length++; + return NANOARROW_OK; + } + + private: + std::vector> children_; +}; + +// Subtely different from a Record field item: field count is an int16_t +// instead of an int32_t and each field is not prefixed by its OID. +class PostgresCopyFieldTupleReader : public PostgresCopyFieldReader { + public: + void AppendChild(std::unique_ptr child) { + int64_t child_i = static_cast(children_.size()); + children_.push_back(std::move(child)); + children_[child_i]->Init(pg_type_.child(child_i)); + } + + ArrowErrorCode InitSchema(ArrowSchema* schema) override { + NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitSchema(schema)); + for (int64_t i = 0; i < schema->n_children; i++) { + NANOARROW_RETURN_NOT_OK(children_[i]->InitSchema(schema->children[i])); + } + + return NANOARROW_OK; + } + + ArrowErrorCode InitArray(ArrowArray* array) override { + NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitArray(array)); + for (int64_t i = 0; i < array->n_children; i++) { + NANOARROW_RETURN_NOT_OK(children_[i]->InitArray(array->children[i])); + } + + return NANOARROW_OK; + } + + ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, + ArrowError* error) override { + int16_t n_fields; + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &n_fields, error)); + if (n_fields == -1) { + return ENODATA; + } else if (n_fields != array->n_children) { + ArrowErrorSet(error, + "Expected -1 for end-of-stream or number of fields in output array " + "(%ld) but got %d", + static_cast(array->n_children), // NOLINT(runtime/int) + static_cast(n_fields)); // NOLINT(runtime/int) + return EINVAL; + } + + for (int16_t i = 0; i < n_fields; i++) { + int32_t child_field_size_bytes; + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &child_field_size_bytes, error)); + int result = + children_[i]->Read(data, child_field_size_bytes, array->children[i], error); + + // On overflow, pretend all previous children for this struct were never + // appended to. This leaves array in a valid state in the specific case + // where EOVERFLOW was returned so that a higher level caller can attempt + // to try again after creating a new array. + if (result == EOVERFLOW) { + for (int16_t j = 0; j < i; j++) { + array->children[j]->length--; + } + } + + if (result != NANOARROW_OK) { + return result; + } + } + + array->length++; + return NANOARROW_OK; + } + + private: + std::vector> children_; +}; + +// Factory for a PostgresCopyFieldReader that instantiates the proper subclass +// and gives a nice error for Postgres type -> Arrow type conversions that aren't +// supported. +static inline ArrowErrorCode ErrorCantConvert(ArrowError* error, + const PostgresType& pg_type, + const ArrowSchemaView& schema_view) { + ArrowErrorSet(error, "Can't convert Postgres type '%s' to Arrow type '%s'", + pg_type.typname().c_str(), + ArrowTypeString(schema_view.type)); // NOLINT(runtime/int) + return EINVAL; +} + +static inline ArrowErrorCode MakeCopyFieldReader(const PostgresType& pg_type, + ArrowSchema* schema, + PostgresCopyFieldReader** out, + ArrowError* error) { + ArrowSchemaView schema_view; + NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view, schema, nullptr)); + + switch (schema_view.type) { + case NANOARROW_TYPE_BOOL: + switch (pg_type.type_id()) { + case PostgresTypeId::kBool: + *out = new PostgresCopyBooleanFieldReader(); + return NANOARROW_OK; + default: + return ErrorCantConvert(error, pg_type, schema_view); + } + + case NANOARROW_TYPE_INT16: + switch (pg_type.type_id()) { + case PostgresTypeId::kInt2: + *out = new PostgresCopyNetworkEndianFieldReader(); + return NANOARROW_OK; + default: + return ErrorCantConvert(error, pg_type, schema_view); + } + + case NANOARROW_TYPE_INT32: + switch (pg_type.type_id()) { + case PostgresTypeId::kInt4: + case PostgresTypeId::kOid: + case PostgresTypeId::kRegproc: + *out = new PostgresCopyNetworkEndianFieldReader(); + return NANOARROW_OK; + default: + return ErrorCantConvert(error, pg_type, schema_view); + } + + case NANOARROW_TYPE_INT64: + switch (pg_type.type_id()) { + case PostgresTypeId::kInt8: + *out = new PostgresCopyNetworkEndianFieldReader(); + return NANOARROW_OK; + default: + return ErrorCantConvert(error, pg_type, schema_view); + } + + case NANOARROW_TYPE_FLOAT: + switch (pg_type.type_id()) { + case PostgresTypeId::kFloat4: + *out = new PostgresCopyNetworkEndianFieldReader(); + return NANOARROW_OK; + default: + return ErrorCantConvert(error, pg_type, schema_view); + } + + case NANOARROW_TYPE_DOUBLE: + switch (pg_type.type_id()) { + case PostgresTypeId::kFloat8: + *out = new PostgresCopyNetworkEndianFieldReader(); + return NANOARROW_OK; + default: + return ErrorCantConvert(error, pg_type, schema_view); + } + + case NANOARROW_TYPE_STRING: + switch (pg_type.type_id()) { + case PostgresTypeId::kChar: + case PostgresTypeId::kVarchar: + case PostgresTypeId::kText: + case PostgresTypeId::kBpchar: + case PostgresTypeId::kName: + *out = new PostgresCopyBinaryFieldReader(); + return NANOARROW_OK; + case PostgresTypeId::kNumeric: + *out = new PostgresCopyNumericFieldReader(); + return NANOARROW_OK; + default: + return ErrorCantConvert(error, pg_type, schema_view); + } + + case NANOARROW_TYPE_BINARY: + // No need to check pg_type here: we can return the bytes of any + // Postgres type as binary. + *out = new PostgresCopyBinaryFieldReader(); + return NANOARROW_OK; + + case NANOARROW_TYPE_LIST: + switch (pg_type.type_id()) { + case PostgresTypeId::kArray: { + if (pg_type.n_children() != 1) { + ArrowErrorSet( + error, "Expected Postgres array type to have one child but found %ld", + static_cast(pg_type.n_children())); // NOLINT(runtime/int) + return EINVAL; + } + + auto array_reader = std::unique_ptr( + new PostgresCopyArrayFieldReader()); + array_reader->Init(pg_type); + + PostgresCopyFieldReader* child_reader; + NANOARROW_RETURN_NOT_OK(MakeCopyFieldReader( + pg_type.child(0), schema->children[0], &child_reader, error)); + array_reader->InitChild(std::unique_ptr(child_reader)); + + *out = array_reader.release(); + return NANOARROW_OK; + } + default: + return ErrorCantConvert(error, pg_type, schema_view); + } + + case NANOARROW_TYPE_STRUCT: + switch (pg_type.type_id()) { + case PostgresTypeId::kRecord: { + if (pg_type.n_children() != schema->n_children) { + ArrowErrorSet(error, + "Can't convert Postgres record type with %ld chlidren to Arrow " + "struct type with %ld children", + static_cast(pg_type.n_children()), // NOLINT(runtime/int) + static_cast(schema->n_children)); // NOLINT(runtime/int) + return EINVAL; + } + + auto record_reader = std::unique_ptr( + new PostgresCopyRecordFieldReader()); + record_reader->Init(pg_type); + + for (int64_t i = 0; i < pg_type.n_children(); i++) { + PostgresCopyFieldReader* child_reader; + NANOARROW_RETURN_NOT_OK(MakeCopyFieldReader( + pg_type.child(i), schema->children[i], &child_reader, error)); + record_reader->AppendChild( + std::unique_ptr(child_reader)); + } + + *out = record_reader.release(); + return NANOARROW_OK; + } + default: + return ErrorCantConvert(error, pg_type, schema_view); + } + + case NANOARROW_TYPE_DATE32: { + // 2000-01-01 + constexpr int32_t kPostgresDateEpoch = 10957; + *out = new PostgresCopyNetworkEndianFieldReader(); + return NANOARROW_OK; + } + + case NANOARROW_TYPE_TIME64: { + *out = new PostgresCopyNetworkEndianFieldReader(); + return NANOARROW_OK; + } + + case NANOARROW_TYPE_TIMESTAMP: + switch (pg_type.type_id()) { + case PostgresTypeId::kTimestamp: + case PostgresTypeId::kTimestamptz: { + // 2000-01-01 00:00:00.000000 in microseconds + constexpr int64_t kPostgresTimestampEpoch = 946684800000000; + *out = new PostgresCopyNetworkEndianFieldReader(); + return NANOARROW_OK; + } + default: + return ErrorCantConvert(error, pg_type, schema_view); + } + case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: + switch (pg_type.type_id()) { + case PostgresTypeId::kInterval: { + *out = new PostgresCopyIntervalFieldReader(); + return NANOARROW_OK; + } + default: + return ErrorCantConvert(error, pg_type, schema_view); + } + + default: + return ErrorCantConvert(error, pg_type, schema_view); + } +} + +class PostgresCopyStreamReader { + public: + ArrowErrorCode Init(PostgresType pg_type) { + if (pg_type.type_id() != PostgresTypeId::kRecord) { + return EINVAL; + } + + pg_type_ = std::move(pg_type); + root_reader_.Init(pg_type_); + array_size_approx_bytes_ = 0; + return NANOARROW_OK; + } + + int64_t array_size_approx_bytes() const { return array_size_approx_bytes_; } + + ArrowErrorCode SetOutputSchema(ArrowSchema* schema, ArrowError* error) { + if (std::string(schema_->format) != "+s") { + ArrowErrorSet( + error, + "Expected output schema of type struct but got output schema with format '%s'", + schema_->format); // NOLINT(runtime/int) + return EINVAL; + } + + if (schema_->n_children != root_reader_.InputType().n_children()) { + ArrowErrorSet(error, + "Expected output schema with %ld columns to match Postgres input but " + "got schema with %ld columns", + static_cast( // NOLINT(runtime/int) + root_reader_.InputType().n_children()), + static_cast(schema->n_children)); // NOLINT(runtime/int) + return EINVAL; + } + + schema_.reset(schema); + return NANOARROW_OK; + } + + ArrowErrorCode InferOutputSchema(ArrowError* error) { + schema_.reset(); + ArrowSchemaInit(schema_.get()); + NANOARROW_RETURN_NOT_OK(root_reader_.InputType().SetSchema(schema_.get())); + return NANOARROW_OK; + } + + ArrowErrorCode InitFieldReaders(ArrowError* error) { + if (schema_->release == nullptr) { + return EINVAL; + } + + const PostgresType& root_type = root_reader_.InputType(); + + for (int64_t i = 0; i < root_type.n_children(); i++) { + const PostgresType& child_type = root_type.child(i); + PostgresCopyFieldReader* child_reader; + NANOARROW_RETURN_NOT_OK( + MakeCopyFieldReader(child_type, schema_->children[i], &child_reader, error)); + root_reader_.AppendChild(std::unique_ptr(child_reader)); + } + + NANOARROW_RETURN_NOT_OK(root_reader_.InitSchema(schema_.get())); + return NANOARROW_OK; + } + + ArrowErrorCode ReadHeader(ArrowBufferView* data, ArrowError* error) { + if (data->size_bytes < static_cast(sizeof(kPgCopyBinarySignature))) { + ArrowErrorSet( + error, + "Expected PGCOPY signature of %ld bytes at beginning of stream but " + "found %ld bytes of input", + static_cast(sizeof(kPgCopyBinarySignature)), // NOLINT(runtime/int) + static_cast(data->size_bytes)); // NOLINT(runtime/int) + return EINVAL; + } + + if (memcmp(data->data.data, kPgCopyBinarySignature, sizeof(kPgCopyBinarySignature)) != + 0) { + ArrowErrorSet(error, "Invalid PGCOPY signature at beginning of stream"); + return EINVAL; + } + + data->data.as_uint8 += sizeof(kPgCopyBinarySignature); + data->size_bytes -= sizeof(kPgCopyBinarySignature); + + uint32_t flags; + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &flags, error)); + uint32_t extension_length; + NANOARROW_RETURN_NOT_OK(ReadChecked(data, &extension_length, error)); + + if (data->size_bytes < static_cast(extension_length)) { + ArrowErrorSet(error, + "Expected %ld bytes of extension metadata at start of stream but " + "found %ld bytes of input", + static_cast(extension_length), // NOLINT(runtime/int) + static_cast(data->size_bytes)); // NOLINT(runtime/int) + return EINVAL; + } + + data->data.as_uint8 += extension_length; + data->size_bytes -= extension_length; + return NANOARROW_OK; + } + + ArrowErrorCode ReadRecord(ArrowBufferView* data, ArrowError* error) { + if (array_->release == nullptr) { + NANOARROW_RETURN_NOT_OK( + ArrowArrayInitFromSchema(array_.get(), schema_.get(), error)); + NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array_.get())); + NANOARROW_RETURN_NOT_OK(root_reader_.InitArray(array_.get())); + array_size_approx_bytes_ = 0; + } + + const uint8_t* start = data->data.as_uint8; + NANOARROW_RETURN_NOT_OK(root_reader_.Read(data, -1, array_.get(), error)); + array_size_approx_bytes_ += (data->data.as_uint8 - start); + return NANOARROW_OK; + } + + ArrowErrorCode GetSchema(ArrowSchema* out) { + return ArrowSchemaDeepCopy(schema_.get(), out); + } + + ArrowErrorCode GetArray(ArrowArray* out, ArrowError* error) { + if (array_->release == nullptr) { + return EINVAL; + } + + NANOARROW_RETURN_NOT_OK(ArrowArrayFinishBuildingDefault(array_.get(), error)); + ArrowArrayMove(array_.get(), out); + return NANOARROW_OK; + } + + const PostgresType& pg_type() const { return pg_type_; } + + private: + PostgresType pg_type_; + PostgresCopyFieldTupleReader root_reader_; + nanoarrow::UniqueSchema schema_; + nanoarrow::UniqueArray array_; + int64_t array_size_approx_bytes_; +}; + +} // namespace adbcpq diff --git a/c/driver/postgresql/copy/writer.h b/c/driver/postgresql/copy/writer.h new file mode 100644 index 0000000000..e2ea32c0e2 --- /dev/null +++ b/c/driver/postgresql/copy/writer.h @@ -0,0 +1,672 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include + +#include + +#include "copy_common.h" +#include "../postgres_util.h" + +namespace adbcpq { + +// The maximum value in seconds that can be converted into microseconds +// without overflow +constexpr int64_t kMaxSafeSecondsToMicros = 9223372036854L; + +// The minimum value in seconds that can be converted into microseconds +// without overflow +constexpr int64_t kMinSafeSecondsToMicros = -9223372036854L; + +// The maximum value in milliseconds that can be converted into microseconds +// without overflow +constexpr int64_t kMaxSafeMillisToMicros = 9223372036854775L; + +// The minimum value in milliseconds that can be converted into microseconds +// without overflow +constexpr int64_t kMinSafeMillisToMicros = -9223372036854775L; + + +// 2000-01-01 00:00:00.000000 in microseconds +constexpr int64_t kPostgresTimestampEpoch = 946684800000000L; + +// Write a value to a buffer without checking the buffer size. Advances +// the cursor of buffer and reduces it by sizeof(T) +template +inline void WriteUnsafe(ArrowBuffer* buffer, T in) { + const T value = SwapNetworkToHost(in); + ArrowBufferAppendUnsafe(buffer, &value, sizeof(T)); +} + +template <> +inline void WriteUnsafe(ArrowBuffer* buffer, int8_t in) { + ArrowBufferAppendUnsafe(buffer, &in, sizeof(int8_t)); +} + +template <> +inline void WriteUnsafe(ArrowBuffer* buffer, int16_t in) { + WriteUnsafe(buffer, in); +} + +template <> +inline void WriteUnsafe(ArrowBuffer* buffer, int32_t in) { + WriteUnsafe(buffer, in); +} + +template <> +inline void WriteUnsafe(ArrowBuffer* buffer, int64_t in) { + WriteUnsafe(buffer, in); +} + +template +ArrowErrorCode WriteChecked(ArrowBuffer* buffer, T in, ArrowError* error) { + NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, sizeof(T))); + WriteUnsafe(buffer, in); + return NANOARROW_OK; +} + +class PostgresCopyFieldWriter { + public: + virtual ~PostgresCopyFieldWriter() {} + + void Init(struct ArrowArrayView* array_view) { array_view_ = array_view; }; + + virtual ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) { + return ENOTSUP; + } + + protected: + struct ArrowArrayView* array_view_; + std::vector> children_; +}; + +class PostgresCopyFieldTupleWriter : public PostgresCopyFieldWriter { + public: + void AppendChild(std::unique_ptr child) { + int64_t child_i = static_cast(children_.size()); + children_.push_back(std::move(child)); + children_[child_i]->Init(array_view_->children[child_i]); + } + + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + if (index >= array_view_->length) { + return ENODATA; + } + + const int16_t n_fields = children_.size(); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, n_fields, error)); + + for (int16_t i = 0; i < n_fields; i++) { + const int8_t is_null = ArrowArrayViewIsNull(array_view_->children[i], index); + if (is_null) { + constexpr int32_t field_size_bytes = -1; + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); + } else { + children_[i]->Write(buffer, index, error); + } + } + + return NANOARROW_OK; + } + + private: + std::vector> children_; +}; + +class PostgresCopyBooleanFieldWriter : public PostgresCopyFieldWriter { + public: + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + constexpr int32_t field_size_bytes = 1; + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); + const int8_t value = + static_cast(ArrowArrayViewGetIntUnsafe(array_view_, index)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, value, error)); + + return ADBC_STATUS_OK; + } +}; + +template +class PostgresCopyNetworkEndianFieldWriter : public PostgresCopyFieldWriter { + public: + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + constexpr int32_t field_size_bytes = sizeof(T); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); + const T value = + static_cast(ArrowArrayViewGetIntUnsafe(array_view_, index)) - kOffset; + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, value, error)); + + return ADBC_STATUS_OK; + } +}; + +class PostgresCopyFloatFieldWriter : public PostgresCopyFieldWriter { + public: + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + constexpr int32_t field_size_bytes = sizeof(uint32_t); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); + + uint32_t value; + float raw_value = ArrowArrayViewGetDoubleUnsafe(array_view_, index); + std::memcpy(&value, &raw_value, sizeof(uint32_t)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, value, error)); + + return ADBC_STATUS_OK; + } +}; + +class PostgresCopyDoubleFieldWriter : public PostgresCopyFieldWriter { + public: + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + constexpr int32_t field_size_bytes = sizeof(uint64_t); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); + + uint64_t value; + double raw_value = ArrowArrayViewGetDoubleUnsafe(array_view_, index); + std::memcpy(&value, &raw_value, sizeof(uint64_t)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, value, error)); + + return ADBC_STATUS_OK; + } +}; + +class PostgresCopyIntervalFieldWriter : public PostgresCopyFieldWriter { + public: + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + constexpr int32_t field_size_bytes = 16; + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); + + struct ArrowInterval interval; + ArrowIntervalInit(&interval, NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO); + ArrowArrayViewGetIntervalUnsafe(array_view_, index, &interval); + const int64_t ms = interval.ns / 1000; + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, ms, error)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, interval.days, error)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, interval.months, error)); + + return ADBC_STATUS_OK; + } +}; + +// Inspiration for this taken from get_str_from_var in the pg source +// src/backend/utils/adt/numeric.c +template +class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { +public: + PostgresCopyNumericFieldWriter(int32_t precision, int32_t scale) : + precision_{precision}, scale_{scale} {} + + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + struct ArrowDecimal decimal; + ArrowDecimalInit(&decimal, bitwidth_, precision_, scale_); + ArrowArrayViewGetDecimalUnsafe(array_view_, index, &decimal); + + const int16_t sign = ArrowDecimalSign(&decimal) > 0 ? kNumericPos : kNumericNeg; + + // Number of decimal digits per Postgres digit + constexpr int kDecDigits = 4; + std::vector pg_digits; + int16_t weight = -(scale_ / kDecDigits); + int16_t dscale = scale_; + bool seen_decimal = scale_ == 0; + bool truncating_trailing_zeros = true; + + char decimal_string[max_decimal_digits_ + 1]; + int digits_remaining = DecimalToString(&decimal, decimal_string); + do { + const int start_pos = digits_remaining < kDecDigits ? + 0 : digits_remaining - kDecDigits; + const size_t len = digits_remaining < 4 ? digits_remaining : kDecDigits; + char substr[kDecDigits + 1]; + std::memcpy(substr, decimal_string + start_pos, len); + substr[len] = '\0'; + int16_t val = static_cast(std::atoi(substr)); + + if (val == 0) { + if (!seen_decimal && truncating_trailing_zeros) { + dscale -= kDecDigits; + } + } else { + pg_digits.insert(pg_digits.begin(), val); + if (!seen_decimal && truncating_trailing_zeros) { + if (val % 1000 == 0) { + dscale -= 3; + } else if (val % 100 == 0) { + dscale -= 2; + } else if (val % 10 == 0) { + dscale -= 1; + } + } + truncating_trailing_zeros = false; + } + digits_remaining -= kDecDigits; + if (digits_remaining <= 0) { + break; + } + weight++; + + if (start_pos <= static_cast(std::strlen(decimal_string)) - scale_) { + seen_decimal = true; + } + } while (true); + + int16_t ndigits = pg_digits.size(); + int32_t field_size_bytes = sizeof(ndigits) + + sizeof(weight) + + sizeof(sign) + + sizeof(dscale) + + ndigits * sizeof(int16_t); + + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, ndigits, error)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, weight, error)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, sign, error)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, dscale, error)); + + const size_t pg_digit_bytes = sizeof(int16_t) * pg_digits.size(); + NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, pg_digit_bytes)); + for (auto pg_digit : pg_digits) { + WriteUnsafe(buffer, pg_digit); + } + + return ADBC_STATUS_OK; + } + +private: + // returns the length of the string + template + int DecimalToString(struct ArrowDecimal* decimal, char* out) { + constexpr size_t nwords = (DEC_WIDTH == 128) ? 2 : 4; + uint8_t tmp[DEC_WIDTH / 8]; + ArrowDecimalGetBytes(decimal, tmp); + uint64_t buf[DEC_WIDTH / 64]; + std::memcpy(buf, tmp, sizeof(buf)); + const int16_t sign = ArrowDecimalSign(decimal) > 0 ? kNumericPos : kNumericNeg; + const bool is_negative = sign == kNumericNeg ? true : false; + if (is_negative) { + buf[0] = ~buf[0] + 1; + for (size_t i = 1; i < nwords; i++) { + buf[i] = ~buf[i]; + } + } + + // Basic approach adopted from https://stackoverflow.com/a/8023862/621736 + char s[max_decimal_digits_ + 1]; + std::memset(s, '0', sizeof(s) - 1); + s[sizeof(s) - 1] = '\0'; + + for (size_t i = 0; i < DEC_WIDTH; i++) { + int carry; + + carry = (buf[nwords - 1] >= 0x7FFFFFFFFFFFFFFF); + for (size_t j = nwords - 1; j > 0; j--) { + buf[j] = ((buf[j] << 1) & 0xFFFFFFFFFFFFFFFF) + (buf[j-1] >= 0x7FFFFFFFFFFFFFFF); + } + buf[0] = ((buf[0] << 1) & 0xFFFFFFFFFFFFFFFF); + + for (int j = sizeof(s) - 2; j>= 0; j--) { + s[j] += s[j] - '0' + carry; + carry = (s[j] > '9'); + if (carry) { + s[j] -= 10; + } + } + } + + char* p = s; + while ((p[0] == '0') && (p < &s[sizeof(s) - 2])) { + p++; + } + + const size_t ndigits = sizeof(s) - 1 - (p - s); + std::memcpy(out, p, ndigits); + out[ndigits] = '\0'; + + return ndigits; + } + + static constexpr uint16_t kNumericPos = 0x0000; + static constexpr uint16_t kNumericNeg = 0x4000; + static constexpr int32_t bitwidth_ = (T == NANOARROW_TYPE_DECIMAL128) ? 128 : 256; + static constexpr size_t max_decimal_digits_ = + (T == NANOARROW_TYPE_DECIMAL128) ? 39 : 78; + const int32_t precision_; + const int32_t scale_; +}; + +template +class PostgresCopyDurationFieldWriter : public PostgresCopyFieldWriter { + public: + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + constexpr int32_t field_size_bytes = 16; + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); + + int64_t raw_value = ArrowArrayViewGetIntUnsafe(array_view_, index); + int64_t value; + + bool overflow_safe = true; + switch (TU) { + case NANOARROW_TIME_UNIT_SECOND: + if ((overflow_safe = raw_value <= kMaxSafeSecondsToMicros && + raw_value >= kMinSafeSecondsToMicros)) { + value = raw_value * 1000000; + } + break; + case NANOARROW_TIME_UNIT_MILLI: + if ((overflow_safe = raw_value <= kMaxSafeMillisToMicros && + raw_value >= kMinSafeMillisToMicros)) { + value = raw_value * 1000; + } + break; + case NANOARROW_TIME_UNIT_MICRO: + value = raw_value; + break; + case NANOARROW_TIME_UNIT_NANO: + value = raw_value / 1000; + break; + } + + if (!overflow_safe) { + ArrowErrorSet( + error, "Row %" PRId64 " duration value %" PRId64 " with unit %d would overflow", + index, raw_value, TU); + return ADBC_STATUS_INVALID_ARGUMENT; + } + + // 2000-01-01 00:00:00.000000 in microseconds + constexpr uint32_t days = 0; + constexpr uint32_t months = 0; + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, value, error)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, days, error)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, months, error)); + + return ADBC_STATUS_OK; + } +}; + +class PostgresCopyBinaryFieldWriter : public PostgresCopyFieldWriter { + public: + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + struct ArrowBufferView buffer_view = ArrowArrayViewGetBytesUnsafe(array_view_, index); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, buffer_view.size_bytes, error)); + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(buffer, buffer_view.data.as_uint8, buffer_view.size_bytes)); + + return ADBC_STATUS_OK; + } +}; + +class PostgresCopyBinaryDictFieldWriter : public PostgresCopyFieldWriter { + public: + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + int64_t dict_index = ArrowArrayViewGetIntUnsafe(array_view_, index); + if (ArrowArrayViewIsNull(array_view_->dictionary, dict_index)) { + constexpr int32_t field_size_bytes = -1; + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); + } else { + struct ArrowBufferView buffer_view = + ArrowArrayViewGetBytesUnsafe(array_view_->dictionary, dict_index); + NANOARROW_RETURN_NOT_OK( + WriteChecked(buffer, buffer_view.size_bytes, error)); + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(buffer, buffer_view.data.as_uint8, buffer_view.size_bytes)); + } + + return ADBC_STATUS_OK; + } +}; + +template +class PostgresCopyTimestampFieldWriter : public PostgresCopyFieldWriter { + public: + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + constexpr int32_t field_size_bytes = sizeof(int64_t); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); + + int64_t raw_value = ArrowArrayViewGetIntUnsafe(array_view_, index); + int64_t value; + + bool overflow_safe = true; + switch (TU) { + case NANOARROW_TIME_UNIT_SECOND: + if ((overflow_safe = raw_value <= kMaxSafeSecondsToMicros && + raw_value >= kMinSafeSecondsToMicros)) { + value = raw_value * 1000000; + } + break; + case NANOARROW_TIME_UNIT_MILLI: + if ((overflow_safe = raw_value <= kMaxSafeMillisToMicros && + raw_value >= kMinSafeMillisToMicros)) { + value = raw_value * 1000; + } + break; + case NANOARROW_TIME_UNIT_MICRO: + value = raw_value; + break; + case NANOARROW_TIME_UNIT_NANO: + value = raw_value / 1000; + break; + } + + if (!overflow_safe) { + ArrowErrorSet(error, + "[libpq] Row %" PRId64 " timestamp value %" PRId64 + " with unit %d would overflow", + index, raw_value, TU); + return ADBC_STATUS_INVALID_ARGUMENT; + } + + if (value < std::numeric_limits::min() + kPostgresTimestampEpoch) { + ArrowErrorSet(error, + "[libpq] Row %" PRId64 " timestamp value %" PRId64 + " with unit %d would underflow", + index, raw_value, TU); + return ADBC_STATUS_INVALID_ARGUMENT; + } + + const int64_t scaled = value - kPostgresTimestampEpoch; + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, scaled, error)); + + return ADBC_STATUS_OK; + } +}; + +static inline ArrowErrorCode MakeCopyFieldWriter(struct ArrowSchema* schema, + PostgresCopyFieldWriter** out, + ArrowError* error) { + struct ArrowSchemaView schema_view; + NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view, schema, error)); + + switch (schema_view.type) { + case NANOARROW_TYPE_BOOL: + *out = new PostgresCopyBooleanFieldWriter(); + return NANOARROW_OK; + case NANOARROW_TYPE_INT8: + case NANOARROW_TYPE_INT16: + *out = new PostgresCopyNetworkEndianFieldWriter(); + return NANOARROW_OK; + case NANOARROW_TYPE_INT32: + *out = new PostgresCopyNetworkEndianFieldWriter(); + return NANOARROW_OK; + case NANOARROW_TYPE_INT64: + *out = new PostgresCopyNetworkEndianFieldWriter(); + return NANOARROW_OK; + case NANOARROW_TYPE_DATE32: { + constexpr int32_t kPostgresDateEpoch = 10957; + *out = new PostgresCopyNetworkEndianFieldWriter(); + return NANOARROW_OK; + } + case NANOARROW_TYPE_FLOAT: + *out = new PostgresCopyFloatFieldWriter(); + return NANOARROW_OK; + case NANOARROW_TYPE_DOUBLE: + *out = new PostgresCopyDoubleFieldWriter(); + return NANOARROW_OK; + case NANOARROW_TYPE_DECIMAL128: { + const auto precision = schema_view.decimal_precision; + const auto scale = schema_view.decimal_scale; + *out = new PostgresCopyNumericFieldWriter< + NANOARROW_TYPE_DECIMAL128>(precision, scale); + return NANOARROW_OK; + } + case NANOARROW_TYPE_DECIMAL256: { + const auto precision = schema_view.decimal_precision; + const auto scale = schema_view.decimal_scale; + *out = new PostgresCopyNumericFieldWriter< + NANOARROW_TYPE_DECIMAL256>(precision, scale); + return NANOARROW_OK; + } + case NANOARROW_TYPE_BINARY: + case NANOARROW_TYPE_STRING: + case NANOARROW_TYPE_LARGE_STRING: + *out = new PostgresCopyBinaryFieldWriter(); + return NANOARROW_OK; + case NANOARROW_TYPE_TIMESTAMP: { + switch (schema_view.time_unit) { + case NANOARROW_TIME_UNIT_NANO: + *out = new PostgresCopyTimestampFieldWriter(); + break; + case NANOARROW_TIME_UNIT_MILLI: + *out = new PostgresCopyTimestampFieldWriter(); + break; + case NANOARROW_TIME_UNIT_MICRO: + *out = new PostgresCopyTimestampFieldWriter(); + break; + case NANOARROW_TIME_UNIT_SECOND: + *out = new PostgresCopyTimestampFieldWriter(); + break; + } + return NANOARROW_OK; + } + case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: + *out = new PostgresCopyIntervalFieldWriter(); + return NANOARROW_OK; + case NANOARROW_TYPE_DURATION: { + switch (schema_view.time_unit) { + case NANOARROW_TIME_UNIT_SECOND: + *out = new PostgresCopyDurationFieldWriter(); + break; + case NANOARROW_TIME_UNIT_MILLI: + *out = new PostgresCopyDurationFieldWriter(); + break; + case NANOARROW_TIME_UNIT_MICRO: + *out = new PostgresCopyDurationFieldWriter(); + + break; + case NANOARROW_TIME_UNIT_NANO: + *out = new PostgresCopyDurationFieldWriter(); + break; + } + return NANOARROW_OK; + } + case NANOARROW_TYPE_DICTIONARY: { + struct ArrowSchemaView value_view; + NANOARROW_RETURN_NOT_OK( + ArrowSchemaViewInit(&value_view, schema->dictionary, error)); + switch (value_view.type) { + case NANOARROW_TYPE_BINARY: + case NANOARROW_TYPE_STRING: + case NANOARROW_TYPE_LARGE_BINARY: + case NANOARROW_TYPE_LARGE_STRING: + *out = new PostgresCopyBinaryDictFieldWriter(); + return NANOARROW_OK; + default: + break; + } + } + default: + break; + } + + ArrowErrorSet(error, "COPY Writer not implemented for type %d", schema_view.type); + return EINVAL; +} + +class PostgresCopyStreamWriter { + public: + ArrowErrorCode Init(struct ArrowSchema* schema) { + schema_ = schema; + NANOARROW_RETURN_NOT_OK( + ArrowArrayViewInitFromSchema(&array_view_.value, schema, nullptr)); + root_writer_.Init(&array_view_.value); + ArrowBufferInit(&buffer_.value); + return NANOARROW_OK; + } + + ArrowErrorCode SetArray(struct ArrowArray* array) { + NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArray(&array_view_.value, array, nullptr)); + return NANOARROW_OK; + } + + ArrowErrorCode WriteHeader(ArrowError* error) { + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(&buffer_.value, kPgCopyBinarySignature, + sizeof(kPgCopyBinarySignature))); + + const uint32_t flag_fields = 0; + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(&buffer_.value, &flag_fields, sizeof(flag_fields))); + + const uint32_t extension_bytes = 0; + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(&buffer_.value, &extension_bytes, sizeof(extension_bytes))); + + return NANOARROW_OK; + } + + ArrowErrorCode WriteRecord(ArrowError* error) { + NANOARROW_RETURN_NOT_OK(root_writer_.Write(&buffer_.value, records_written_, error)); + records_written_++; + return NANOARROW_OK; + } + + ArrowErrorCode InitFieldWriters(ArrowError* error) { + if (schema_->release == nullptr) { + return EINVAL; + } + + for (int64_t i = 0; i < schema_->n_children; i++) { + PostgresCopyFieldWriter* child_writer = nullptr; + NANOARROW_RETURN_NOT_OK( + MakeCopyFieldWriter(schema_->children[i], &child_writer, error)); + root_writer_.AppendChild(std::unique_ptr(child_writer)); + } + + return NANOARROW_OK; + } + + const struct ArrowBuffer& WriteBuffer() const { return buffer_.value; } + + void Rewind() { + records_written_ = 0; + buffer_->size_bytes = 0; + } + + private: + PostgresCopyFieldTupleWriter root_writer_; + struct ArrowSchema* schema_; + Handle array_view_; + Handle buffer_; + int64_t records_written_ = 0; +}; + + } // namespace adbcpq diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/postgres_copy_reader.h index 8a9192c329..669512fd1d 100644 --- a/c/driver/postgresql/postgres_copy_reader.h +++ b/c/driver/postgresql/postgres_copy_reader.h @@ -21,1668 +21,5 @@ #define NOMINMAX #include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "common/utils.h" #include "postgres_type.h" -#include "postgres_util.h" - -// R 3.6 / Windows builds on a very old toolchain that does not define ENODATA -#if defined(_WIN32) && !defined(MSVC) && !defined(ENODATA) -#define ENODATA 120 -#endif - -namespace adbcpq { - -// "PGCOPY\n\377\r\n\0" -static int8_t kPgCopyBinarySignature[] = {0x50, 0x47, 0x43, 0x4F, - 0x50, 0x59, 0x0A, static_cast(0xFF), - 0x0D, 0x0A, 0x00}; - -// The maximum value in seconds that can be converted into microseconds -// without overflow -constexpr int64_t kMaxSafeSecondsToMicros = 9223372036854L; - -// The minimum value in seconds that can be converted into microseconds -// without overflow -constexpr int64_t kMinSafeSecondsToMicros = -9223372036854L; - -// The maximum value in milliseconds that can be converted into microseconds -// without overflow -constexpr int64_t kMaxSafeMillisToMicros = 9223372036854775L; - -// The minimum value in milliseconds that can be converted into microseconds -// without overflow -constexpr int64_t kMinSafeMillisToMicros = -9223372036854775L; - -// The maximum value in microseconds that can be converted into nanoseconds -// without overflow -constexpr int64_t kMaxSafeMicrosToNanos = 9223372036854775L; - -// The minimum value in microseconds that can be converted into nanoseconds -// without overflow -constexpr int64_t kMinSafeMicrosToNanos = -9223372036854775L; - -// 2000-01-01 00:00:00.000000 in microseconds -constexpr int64_t kPostgresTimestampEpoch = 946684800000000L; - -// Read a value from the buffer without checking the buffer size. Advances -// the cursor of data and reduces its size by sizeof(T). -template -inline T ReadUnsafe(ArrowBufferView* data) { - T out; - memcpy(&out, data->data.data, sizeof(T)); - out = SwapNetworkToHost(out); - data->data.as_uint8 += sizeof(T); - data->size_bytes -= sizeof(T); - return out; -} - -// Define some explicit specializations for types that don't have a SwapNetworkToHost -// overload. -template <> -inline int8_t ReadUnsafe(ArrowBufferView* data) { - int8_t out = data->data.as_int8[0]; - data->data.as_uint8 += sizeof(int8_t); - data->size_bytes -= sizeof(int8_t); - return out; -} - -template <> -inline int16_t ReadUnsafe(ArrowBufferView* data) { - return static_cast(ReadUnsafe(data)); -} - -template <> -inline int32_t ReadUnsafe(ArrowBufferView* data) { - return static_cast(ReadUnsafe(data)); -} - -template <> -inline int64_t ReadUnsafe(ArrowBufferView* data) { - return static_cast(ReadUnsafe(data)); -} - -template -ArrowErrorCode ReadChecked(ArrowBufferView* data, T* out, ArrowError* error) { - if (data->size_bytes < static_cast(sizeof(T))) { - ArrowErrorSet(error, "Unexpected end of input (expected %d bytes but found %ld)", - static_cast(sizeof(T)), - static_cast(data->size_bytes)); // NOLINT(runtime/int) - return EINVAL; - } - - *out = ReadUnsafe(data); - return NANOARROW_OK; -} - -// Write a value to a buffer without checking the buffer size. Advances -// the cursor of buffer and reduces it by sizeof(T) -template -inline void WriteUnsafe(ArrowBuffer* buffer, T in) { - const T value = SwapNetworkToHost(in); - ArrowBufferAppendUnsafe(buffer, &value, sizeof(T)); -} - -template <> -inline void WriteUnsafe(ArrowBuffer* buffer, int8_t in) { - ArrowBufferAppendUnsafe(buffer, &in, sizeof(int8_t)); -} - -template <> -inline void WriteUnsafe(ArrowBuffer* buffer, int16_t in) { - WriteUnsafe(buffer, in); -} - -template <> -inline void WriteUnsafe(ArrowBuffer* buffer, int32_t in) { - WriteUnsafe(buffer, in); -} - -template <> -inline void WriteUnsafe(ArrowBuffer* buffer, int64_t in) { - WriteUnsafe(buffer, in); -} - -template -ArrowErrorCode WriteChecked(ArrowBuffer* buffer, T in, ArrowError* error) { - NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, sizeof(T))); - WriteUnsafe(buffer, in); - return NANOARROW_OK; -} - -class PostgresCopyFieldReader { - public: - PostgresCopyFieldReader() : validity_(nullptr), offsets_(nullptr), data_(nullptr) { - memset(&schema_view_, 0, sizeof(ArrowSchemaView)); - } - - virtual ~PostgresCopyFieldReader() {} - - void Init(const PostgresType& pg_type) { pg_type_ = pg_type; } - - const PostgresType& InputType() const { return pg_type_; } - - virtual ArrowErrorCode InitSchema(ArrowSchema* schema) { - NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view_, schema, nullptr)); - return NANOARROW_OK; - } - - virtual ArrowErrorCode InitArray(ArrowArray* array) { - // Cache some buffer pointers - validity_ = ArrowArrayValidityBitmap(array); - for (int32_t i = 0; i < 3; i++) { - switch (schema_view_.layout.buffer_type[i]) { - case NANOARROW_BUFFER_TYPE_DATA_OFFSET: - if (schema_view_.layout.element_size_bits[i] == 32) { - offsets_ = ArrowArrayBuffer(array, i); - } - break; - case NANOARROW_BUFFER_TYPE_DATA: - data_ = ArrowArrayBuffer(array, i); - break; - default: - break; - } - } - - return NANOARROW_OK; - } - - virtual ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, - ArrowArray* array, ArrowError* error) { - return ENOTSUP; - } - - virtual ArrowErrorCode FinishArray(ArrowArray* array, ArrowError* error) { - return NANOARROW_OK; - } - - protected: - PostgresType pg_type_; - ArrowSchemaView schema_view_; - ArrowBitmap* validity_; - ArrowBuffer* offsets_; - ArrowBuffer* data_; - std::vector> children_; - - ArrowErrorCode AppendValid(ArrowArray* array) { - if (validity_->buffer.data != nullptr) { - NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(validity_, true, 1)); - } - - array->length++; - return NANOARROW_OK; - } -}; - -// Reader for a Postgres boolean (one byte -> bitmap) -class PostgresCopyBooleanFieldReader : public PostgresCopyFieldReader { - public: - ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, - ArrowError* error) override { - if (field_size_bytes <= 0) { - return ArrowArrayAppendNull(array, 1); - } - - if (field_size_bytes != 1) { - ArrowErrorSet(error, "Expected field with one byte but found field with %d bytes", - static_cast(field_size_bytes)); // NOLINT(runtime/int) - return EINVAL; - } - - int64_t bytes_required = _ArrowBytesForBits(array->length + 1); - if (bytes_required > data_->size_bytes) { - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendFill(data_, 0, bytes_required - data_->size_bytes)); - } - - if (ReadUnsafe(data)) { - ArrowBitSet(data_->data, array->length); - } else { - ArrowBitClear(data_->data, array->length); - } - - return AppendValid(array); - } -}; - -// Reader for Pg->Arrow conversions whose representations are identical minus -// the bswap from network endian. This includes all integral and float types. -template -class PostgresCopyNetworkEndianFieldReader : public PostgresCopyFieldReader { - public: - ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, - ArrowError* error) override { - if (field_size_bytes <= 0) { - return ArrowArrayAppendNull(array, 1); - } - - if (field_size_bytes != static_cast(sizeof(T))) { - ArrowErrorSet(error, "Expected field with %d bytes but found field with %d bytes", - static_cast(sizeof(T)), - static_cast(field_size_bytes)); // NOLINT(runtime/int) - return EINVAL; - } - - T value = kOffset + ReadUnsafe(data); - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, &value, sizeof(T))); - return AppendValid(array); - } -}; - -// Reader for Intervals -class PostgresCopyIntervalFieldReader : public PostgresCopyFieldReader { - public: - ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, - ArrowError* error) override { - if (field_size_bytes <= 0) { - return ArrowArrayAppendNull(array, 1); - } - - if (field_size_bytes != 16) { - ArrowErrorSet(error, "Expected field with %d bytes but found field with %d bytes", - 16, - static_cast(field_size_bytes)); // NOLINT(runtime/int) - return EINVAL; - } - - // postgres stores time as usec, arrow stores as ns - const int64_t time_usec = ReadUnsafe(data); - int64_t time; - - if (time_usec > kMaxSafeMicrosToNanos || time_usec < kMinSafeMicrosToNanos) { - ArrowErrorSet(error, - "[libpq] Interval with time value %" PRId64 - " usec would overflow when converting to nanoseconds", - time_usec); - return EINVAL; - } - - time = time_usec * 1000; - - const int32_t days = ReadUnsafe(data); - const int32_t months = ReadUnsafe(data); - - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, &months, sizeof(int32_t))); - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, &days, sizeof(int32_t))); - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, &time, sizeof(int64_t))); - return AppendValid(array); - } -}; - -// // Converts COPY resulting from the Postgres NUMERIC type into a string. -// Rewritten based on the Postgres implementation of NUMERIC cast to string in -// src/backend/utils/adt/numeric.c : get_str_from_var() (Note that in the initial source, -// DEC_DIGITS is always 4 and DBASE is always 10000). -// -// Briefly, the Postgres representation of "numeric" is an array of int16_t ("digits") -// from most significant to least significant. Each "digit" is a value between 0000 and -// 9999. There are weight + 1 digits before the decimal point and dscale digits after the -// decimal point. Both of those values can be zero or negative. A "sign" component -// encodes the positive or negativeness of the value and is also used to encode special -// values (inf, -inf, and nan). -class PostgresCopyNumericFieldReader : public PostgresCopyFieldReader { - public: - ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, - ArrowError* error) override { - // -1 for NULL - if (field_size_bytes < 0) { - return ArrowArrayAppendNull(array, 1); - } - - // Read the input - if (data->size_bytes < static_cast(4 * sizeof(int16_t))) { - ArrowErrorSet(error, - "Expected at least %d bytes of field data for numeric copy data but " - "only %d bytes of input remain", - static_cast(4 * sizeof(int16_t)), - static_cast(data->size_bytes)); // NOLINT(runtime/int) - return EINVAL; - } - - int16_t ndigits = ReadUnsafe(data); - int16_t weight = ReadUnsafe(data); - uint16_t sign = ReadUnsafe(data); - uint16_t dscale = ReadUnsafe(data); - - if (data->size_bytes < static_cast(ndigits * sizeof(int16_t))) { - ArrowErrorSet(error, - "Expected at least %d bytes of field data for numeric digits copy " - "data but only %d bytes of input remain", - static_cast(ndigits * sizeof(int16_t)), - static_cast(data->size_bytes)); // NOLINT(runtime/int) - return EINVAL; - } - - digits_.clear(); - for (int16_t i = 0; i < ndigits; i++) { - digits_.push_back(ReadUnsafe(data)); - } - - // Handle special values - std::string special_value; - switch (sign) { - case kNumericNAN: - special_value = std::string("nan"); - break; - case kNumericPinf: - special_value = std::string("inf"); - break; - case kNumericNinf: - special_value = std::string("-inf"); - break; - case kNumericPos: - case kNumericNeg: - special_value = std::string(""); - break; - default: - ArrowErrorSet(error, - "Unexpected value for sign read from Postgres numeric field: %d", - static_cast(sign)); - return EINVAL; - } - - if (!special_value.empty()) { - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppend(data_, special_value.data(), special_value.size())); - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(offsets_, data_->size_bytes)); - return AppendValid(array); - } - - // Calculate string space requirement - int64_t max_chars_required = std::max(1, (weight + 1) * kDecDigits); - max_chars_required += dscale + kDecDigits + 2; - NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(data_, max_chars_required)); - char* out0 = reinterpret_cast(data_->data + data_->size_bytes); - char* out = out0; - - // Build output string in-place, starting with the negative sign - if (sign == kNumericNeg) { - *out++ = '-'; - } - - // ...then digits before the decimal point - int d; - int d1; - int16_t dig; - - if (weight < 0) { - d = weight + 1; - *out++ = '0'; - } else { - for (d = 0; d <= weight; d++) { - if (d < ndigits) { - dig = digits_[d]; - } else { - dig = 0; - } - - // To strip leading zeroes - int append = (d > 0); - - for (const auto pow10 : {1000, 100, 10, 1}) { - d1 = dig / pow10; - dig -= d1 * pow10; - append |= (d1 > 0); - if (append) { - *out++ = d1 + '0'; - } - } - } - } - - // ...then the decimal point + digits after it. This may write more digits - // than specified by dscale so we need to keep track of how many we want to - // keep here. - int64_t actual_chars_required = out - out0; - - if (dscale > 0) { - *out++ = '.'; - actual_chars_required += dscale + 1; - - for (int i = 0; i < dscale; i++, d++, i += kDecDigits) { - if (d >= 0 && d < ndigits) { - dig = digits_[d]; - } else { - dig = 0; - } - - for (const auto pow10 : {1000, 100, 10, 1}) { - d1 = dig / pow10; - dig -= d1 * pow10; - *out++ = d1 + '0'; - } - } - } - - // Update data buffer size and add offsets - data_->size_bytes += actual_chars_required; - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(offsets_, data_->size_bytes)); - return AppendValid(array); - } - - private: - std::vector digits_; - - // Number of decimal digits per Postgres digit - static const int kDecDigits = 4; - // The "base" of the Postgres representation (i.e., each "digit" is 0 to 9999) - static const int kNBase = 10000; - // Valid values for the sign component - static const uint16_t kNumericPos = 0x0000; - static const uint16_t kNumericNeg = 0x4000; - static const uint16_t kNumericNAN = 0xC000; - static const uint16_t kNumericPinf = 0xD000; - static const uint16_t kNumericNinf = 0xF000; -}; - -// Reader for Pg->Arrow conversions whose Arrow representation is simply the -// bytes of the field representation. This can be used with binary and string -// Arrow types and any Postgres type. -class PostgresCopyBinaryFieldReader : public PostgresCopyFieldReader { - public: - ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, - ArrowError* error) override { - // -1 for NULL (0 would be empty string) - if (field_size_bytes < 0) { - return ArrowArrayAppendNull(array, 1); - } - - if (field_size_bytes > data->size_bytes) { - ArrowErrorSet(error, "Expected %d bytes of field data but got %d bytes of input", - static_cast(field_size_bytes), - static_cast(data->size_bytes)); // NOLINT(runtime/int) - return EINVAL; - } - - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, data->data.data, field_size_bytes)); - data->data.as_uint8 += field_size_bytes; - data->size_bytes -= field_size_bytes; - - int32_t* offsets = reinterpret_cast(offsets_->data); - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendInt32(offsets_, offsets[array->length] + field_size_bytes)); - - return AppendValid(array); - } -}; - -class PostgresCopyArrayFieldReader : public PostgresCopyFieldReader { - public: - void InitChild(std::unique_ptr child) { - child_ = std::move(child); - child_->Init(pg_type_.child(0)); - } - - ArrowErrorCode InitSchema(ArrowSchema* schema) override { - NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitSchema(schema)); - NANOARROW_RETURN_NOT_OK(child_->InitSchema(schema->children[0])); - return NANOARROW_OK; - } - - ArrowErrorCode InitArray(ArrowArray* array) override { - NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitArray(array)); - NANOARROW_RETURN_NOT_OK(child_->InitArray(array->children[0])); - return NANOARROW_OK; - } - - ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, - ArrowError* error) override { - if (field_size_bytes <= 0) { - return ArrowArrayAppendNull(array, 1); - } - - // Keep the cursor where we start to parse the array so we can check - // the number of bytes read against the field size when finished - const uint8_t* data0 = data->data.as_uint8; - - int32_t n_dim; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &n_dim, error)); - int32_t flags; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &flags, error)); - uint32_t element_type_oid; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &element_type_oid, error)); - - // We could validate the OID here, but this is a poor fit for all cases - // (e.g. testing) since the OID can be specific to each database - - if (n_dim < 0) { - ArrowErrorSet(error, "Expected array n_dim > 0 but got %d", - static_cast(n_dim)); // NOLINT(runtime/int) - return EINVAL; - } - - // This is apparently allowed - if (n_dim == 0) { - NANOARROW_RETURN_NOT_OK(ArrowArrayFinishElement(array)); - return NANOARROW_OK; - } - - int64_t n_items = 1; - for (int32_t i = 0; i < n_dim; i++) { - int32_t dim_size; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &dim_size, error)); - n_items *= dim_size; - - int32_t lower_bound; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &lower_bound, error)); - if (lower_bound != 1) { - ArrowErrorSet(error, "Array value with lower bound != 1 is not supported"); - return EINVAL; - } - } - - for (int64_t i = 0; i < n_items; i++) { - int32_t child_field_size_bytes; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &child_field_size_bytes, error)); - NANOARROW_RETURN_NOT_OK( - child_->Read(data, child_field_size_bytes, array->children[0], error)); - } - - int64_t bytes_read = data->data.as_uint8 - data0; - if (bytes_read != field_size_bytes) { - ArrowErrorSet(error, "Expected to read %d bytes from array field but read %d bytes", - static_cast(field_size_bytes), - static_cast(bytes_read)); // NOLINT(runtime/int) - return EINVAL; - } - - NANOARROW_RETURN_NOT_OK(ArrowArrayFinishElement(array)); - return NANOARROW_OK; - } - - private: - std::unique_ptr child_; -}; - -class PostgresCopyRecordFieldReader : public PostgresCopyFieldReader { - public: - void AppendChild(std::unique_ptr child) { - int64_t child_i = static_cast(children_.size()); - children_.push_back(std::move(child)); - children_[child_i]->Init(pg_type_.child(child_i)); - } - - ArrowErrorCode InitSchema(ArrowSchema* schema) override { - NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitSchema(schema)); - for (int64_t i = 0; i < schema->n_children; i++) { - NANOARROW_RETURN_NOT_OK(children_[i]->InitSchema(schema->children[i])); - } - - return NANOARROW_OK; - } - - ArrowErrorCode InitArray(ArrowArray* array) override { - NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitArray(array)); - for (int64_t i = 0; i < array->n_children; i++) { - NANOARROW_RETURN_NOT_OK(children_[i]->InitArray(array->children[i])); - } - - return NANOARROW_OK; - } - - ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, - ArrowError* error) override { - if (field_size_bytes < 0) { - return ArrowArrayAppendNull(array, 1); - } - - // Keep the cursor where we start to parse the field so we can check - // the number of bytes read against the field size when finished - const uint8_t* data0 = data->data.as_uint8; - - int32_t n_fields; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &n_fields, error)); - if (n_fields != array->n_children) { - ArrowErrorSet(error, "Expected nested record type to have %ld fields but got %d", - static_cast(array->n_children), // NOLINT(runtime/int) - static_cast(n_fields)); // NOLINT(runtime/int) - return EINVAL; - } - - for (int32_t i = 0; i < n_fields; i++) { - uint32_t child_oid; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &child_oid, error)); - - int32_t child_field_size_bytes; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &child_field_size_bytes, error)); - int result = - children_[i]->Read(data, child_field_size_bytes, array->children[i], error); - - // On overflow, pretend all previous children for this struct were never - // appended to. This leaves array in a valid state in the specific case - // where EOVERFLOW was returned so that a higher level caller can attempt - // to try again after creating a new array. - if (result == EOVERFLOW) { - for (int16_t j = 0; j < i; j++) { - array->children[j]->length--; - } - } - - if (result != NANOARROW_OK) { - return result; - } - } - - // field size == -1 means don't check (e.g., for a top-level row tuple) - int64_t bytes_read = data->data.as_uint8 - data0; - if (field_size_bytes != -1 && bytes_read != field_size_bytes) { - ArrowErrorSet(error, - "Expected to read %d bytes from record field but read %d bytes", - static_cast(field_size_bytes), - static_cast(bytes_read)); // NOLINT(runtime/int) - return EINVAL; - } - - array->length++; - return NANOARROW_OK; - } - - private: - std::vector> children_; -}; - -// Subtely different from a Record field item: field count is an int16_t -// instead of an int32_t and each field is not prefixed by its OID. -class PostgresCopyFieldTupleReader : public PostgresCopyFieldReader { - public: - void AppendChild(std::unique_ptr child) { - int64_t child_i = static_cast(children_.size()); - children_.push_back(std::move(child)); - children_[child_i]->Init(pg_type_.child(child_i)); - } - - ArrowErrorCode InitSchema(ArrowSchema* schema) override { - NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitSchema(schema)); - for (int64_t i = 0; i < schema->n_children; i++) { - NANOARROW_RETURN_NOT_OK(children_[i]->InitSchema(schema->children[i])); - } - - return NANOARROW_OK; - } - - ArrowErrorCode InitArray(ArrowArray* array) override { - NANOARROW_RETURN_NOT_OK(PostgresCopyFieldReader::InitArray(array)); - for (int64_t i = 0; i < array->n_children; i++) { - NANOARROW_RETURN_NOT_OK(children_[i]->InitArray(array->children[i])); - } - - return NANOARROW_OK; - } - - ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, - ArrowError* error) override { - int16_t n_fields; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &n_fields, error)); - if (n_fields == -1) { - return ENODATA; - } else if (n_fields != array->n_children) { - ArrowErrorSet(error, - "Expected -1 for end-of-stream or number of fields in output array " - "(%ld) but got %d", - static_cast(array->n_children), // NOLINT(runtime/int) - static_cast(n_fields)); // NOLINT(runtime/int) - return EINVAL; - } - - for (int16_t i = 0; i < n_fields; i++) { - int32_t child_field_size_bytes; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &child_field_size_bytes, error)); - int result = - children_[i]->Read(data, child_field_size_bytes, array->children[i], error); - - // On overflow, pretend all previous children for this struct were never - // appended to. This leaves array in a valid state in the specific case - // where EOVERFLOW was returned so that a higher level caller can attempt - // to try again after creating a new array. - if (result == EOVERFLOW) { - for (int16_t j = 0; j < i; j++) { - array->children[j]->length--; - } - } - - if (result != NANOARROW_OK) { - return result; - } - } - - array->length++; - return NANOARROW_OK; - } - - private: - std::vector> children_; -}; - -// Factory for a PostgresCopyFieldReader that instantiates the proper subclass -// and gives a nice error for Postgres type -> Arrow type conversions that aren't -// supported. -static inline ArrowErrorCode ErrorCantConvert(ArrowError* error, - const PostgresType& pg_type, - const ArrowSchemaView& schema_view) { - ArrowErrorSet(error, "Can't convert Postgres type '%s' to Arrow type '%s'", - pg_type.typname().c_str(), - ArrowTypeString(schema_view.type)); // NOLINT(runtime/int) - return EINVAL; -} - -static inline ArrowErrorCode MakeCopyFieldReader(const PostgresType& pg_type, - ArrowSchema* schema, - PostgresCopyFieldReader** out, - ArrowError* error) { - ArrowSchemaView schema_view; - NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view, schema, nullptr)); - - switch (schema_view.type) { - case NANOARROW_TYPE_BOOL: - switch (pg_type.type_id()) { - case PostgresTypeId::kBool: - *out = new PostgresCopyBooleanFieldReader(); - return NANOARROW_OK; - default: - return ErrorCantConvert(error, pg_type, schema_view); - } - - case NANOARROW_TYPE_INT16: - switch (pg_type.type_id()) { - case PostgresTypeId::kInt2: - *out = new PostgresCopyNetworkEndianFieldReader(); - return NANOARROW_OK; - default: - return ErrorCantConvert(error, pg_type, schema_view); - } - - case NANOARROW_TYPE_INT32: - switch (pg_type.type_id()) { - case PostgresTypeId::kInt4: - case PostgresTypeId::kOid: - case PostgresTypeId::kRegproc: - *out = new PostgresCopyNetworkEndianFieldReader(); - return NANOARROW_OK; - default: - return ErrorCantConvert(error, pg_type, schema_view); - } - - case NANOARROW_TYPE_INT64: - switch (pg_type.type_id()) { - case PostgresTypeId::kInt8: - *out = new PostgresCopyNetworkEndianFieldReader(); - return NANOARROW_OK; - default: - return ErrorCantConvert(error, pg_type, schema_view); - } - - case NANOARROW_TYPE_FLOAT: - switch (pg_type.type_id()) { - case PostgresTypeId::kFloat4: - *out = new PostgresCopyNetworkEndianFieldReader(); - return NANOARROW_OK; - default: - return ErrorCantConvert(error, pg_type, schema_view); - } - - case NANOARROW_TYPE_DOUBLE: - switch (pg_type.type_id()) { - case PostgresTypeId::kFloat8: - *out = new PostgresCopyNetworkEndianFieldReader(); - return NANOARROW_OK; - default: - return ErrorCantConvert(error, pg_type, schema_view); - } - - case NANOARROW_TYPE_STRING: - switch (pg_type.type_id()) { - case PostgresTypeId::kChar: - case PostgresTypeId::kVarchar: - case PostgresTypeId::kText: - case PostgresTypeId::kBpchar: - case PostgresTypeId::kName: - *out = new PostgresCopyBinaryFieldReader(); - return NANOARROW_OK; - case PostgresTypeId::kNumeric: - *out = new PostgresCopyNumericFieldReader(); - return NANOARROW_OK; - default: - return ErrorCantConvert(error, pg_type, schema_view); - } - - case NANOARROW_TYPE_BINARY: - // No need to check pg_type here: we can return the bytes of any - // Postgres type as binary. - *out = new PostgresCopyBinaryFieldReader(); - return NANOARROW_OK; - - case NANOARROW_TYPE_LIST: - switch (pg_type.type_id()) { - case PostgresTypeId::kArray: { - if (pg_type.n_children() != 1) { - ArrowErrorSet( - error, "Expected Postgres array type to have one child but found %ld", - static_cast(pg_type.n_children())); // NOLINT(runtime/int) - return EINVAL; - } - - auto array_reader = std::unique_ptr( - new PostgresCopyArrayFieldReader()); - array_reader->Init(pg_type); - - PostgresCopyFieldReader* child_reader; - NANOARROW_RETURN_NOT_OK(MakeCopyFieldReader( - pg_type.child(0), schema->children[0], &child_reader, error)); - array_reader->InitChild(std::unique_ptr(child_reader)); - - *out = array_reader.release(); - return NANOARROW_OK; - } - default: - return ErrorCantConvert(error, pg_type, schema_view); - } - - case NANOARROW_TYPE_STRUCT: - switch (pg_type.type_id()) { - case PostgresTypeId::kRecord: { - if (pg_type.n_children() != schema->n_children) { - ArrowErrorSet(error, - "Can't convert Postgres record type with %ld chlidren to Arrow " - "struct type with %ld children", - static_cast(pg_type.n_children()), // NOLINT(runtime/int) - static_cast(schema->n_children)); // NOLINT(runtime/int) - return EINVAL; - } - - auto record_reader = std::unique_ptr( - new PostgresCopyRecordFieldReader()); - record_reader->Init(pg_type); - - for (int64_t i = 0; i < pg_type.n_children(); i++) { - PostgresCopyFieldReader* child_reader; - NANOARROW_RETURN_NOT_OK(MakeCopyFieldReader( - pg_type.child(i), schema->children[i], &child_reader, error)); - record_reader->AppendChild( - std::unique_ptr(child_reader)); - } - - *out = record_reader.release(); - return NANOARROW_OK; - } - default: - return ErrorCantConvert(error, pg_type, schema_view); - } - - case NANOARROW_TYPE_DATE32: { - // 2000-01-01 - constexpr int32_t kPostgresDateEpoch = 10957; - *out = new PostgresCopyNetworkEndianFieldReader(); - return NANOARROW_OK; - } - - case NANOARROW_TYPE_TIME64: { - *out = new PostgresCopyNetworkEndianFieldReader(); - return NANOARROW_OK; - } - - case NANOARROW_TYPE_TIMESTAMP: - switch (pg_type.type_id()) { - case PostgresTypeId::kTimestamp: - case PostgresTypeId::kTimestamptz: { - // 2000-01-01 00:00:00.000000 in microseconds - constexpr int64_t kPostgresTimestampEpoch = 946684800000000; - *out = new PostgresCopyNetworkEndianFieldReader(); - return NANOARROW_OK; - } - default: - return ErrorCantConvert(error, pg_type, schema_view); - } - case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: - switch (pg_type.type_id()) { - case PostgresTypeId::kInterval: { - *out = new PostgresCopyIntervalFieldReader(); - return NANOARROW_OK; - } - default: - return ErrorCantConvert(error, pg_type, schema_view); - } - - default: - return ErrorCantConvert(error, pg_type, schema_view); - } -} - -class PostgresCopyStreamReader { - public: - ArrowErrorCode Init(PostgresType pg_type) { - if (pg_type.type_id() != PostgresTypeId::kRecord) { - return EINVAL; - } - - pg_type_ = std::move(pg_type); - root_reader_.Init(pg_type_); - array_size_approx_bytes_ = 0; - return NANOARROW_OK; - } - - int64_t array_size_approx_bytes() const { return array_size_approx_bytes_; } - - ArrowErrorCode SetOutputSchema(ArrowSchema* schema, ArrowError* error) { - if (std::string(schema_->format) != "+s") { - ArrowErrorSet( - error, - "Expected output schema of type struct but got output schema with format '%s'", - schema_->format); // NOLINT(runtime/int) - return EINVAL; - } - - if (schema_->n_children != root_reader_.InputType().n_children()) { - ArrowErrorSet(error, - "Expected output schema with %ld columns to match Postgres input but " - "got schema with %ld columns", - static_cast( // NOLINT(runtime/int) - root_reader_.InputType().n_children()), - static_cast(schema->n_children)); // NOLINT(runtime/int) - return EINVAL; - } - - schema_.reset(schema); - return NANOARROW_OK; - } - - ArrowErrorCode InferOutputSchema(ArrowError* error) { - schema_.reset(); - ArrowSchemaInit(schema_.get()); - NANOARROW_RETURN_NOT_OK(root_reader_.InputType().SetSchema(schema_.get())); - return NANOARROW_OK; - } - - ArrowErrorCode InitFieldReaders(ArrowError* error) { - if (schema_->release == nullptr) { - return EINVAL; - } - - const PostgresType& root_type = root_reader_.InputType(); - - for (int64_t i = 0; i < root_type.n_children(); i++) { - const PostgresType& child_type = root_type.child(i); - PostgresCopyFieldReader* child_reader; - NANOARROW_RETURN_NOT_OK( - MakeCopyFieldReader(child_type, schema_->children[i], &child_reader, error)); - root_reader_.AppendChild(std::unique_ptr(child_reader)); - } - - NANOARROW_RETURN_NOT_OK(root_reader_.InitSchema(schema_.get())); - return NANOARROW_OK; - } - - ArrowErrorCode ReadHeader(ArrowBufferView* data, ArrowError* error) { - if (data->size_bytes < static_cast(sizeof(kPgCopyBinarySignature))) { - ArrowErrorSet( - error, - "Expected PGCOPY signature of %ld bytes at beginning of stream but " - "found %ld bytes of input", - static_cast(sizeof(kPgCopyBinarySignature)), // NOLINT(runtime/int) - static_cast(data->size_bytes)); // NOLINT(runtime/int) - return EINVAL; - } - - if (memcmp(data->data.data, kPgCopyBinarySignature, sizeof(kPgCopyBinarySignature)) != - 0) { - ArrowErrorSet(error, "Invalid PGCOPY signature at beginning of stream"); - return EINVAL; - } - - data->data.as_uint8 += sizeof(kPgCopyBinarySignature); - data->size_bytes -= sizeof(kPgCopyBinarySignature); - - uint32_t flags; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &flags, error)); - uint32_t extension_length; - NANOARROW_RETURN_NOT_OK(ReadChecked(data, &extension_length, error)); - - if (data->size_bytes < static_cast(extension_length)) { - ArrowErrorSet(error, - "Expected %ld bytes of extension metadata at start of stream but " - "found %ld bytes of input", - static_cast(extension_length), // NOLINT(runtime/int) - static_cast(data->size_bytes)); // NOLINT(runtime/int) - return EINVAL; - } - - data->data.as_uint8 += extension_length; - data->size_bytes -= extension_length; - return NANOARROW_OK; - } - - ArrowErrorCode ReadRecord(ArrowBufferView* data, ArrowError* error) { - if (array_->release == nullptr) { - NANOARROW_RETURN_NOT_OK( - ArrowArrayInitFromSchema(array_.get(), schema_.get(), error)); - NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array_.get())); - NANOARROW_RETURN_NOT_OK(root_reader_.InitArray(array_.get())); - array_size_approx_bytes_ = 0; - } - - const uint8_t* start = data->data.as_uint8; - NANOARROW_RETURN_NOT_OK(root_reader_.Read(data, -1, array_.get(), error)); - array_size_approx_bytes_ += (data->data.as_uint8 - start); - return NANOARROW_OK; - } - - ArrowErrorCode GetSchema(ArrowSchema* out) { - return ArrowSchemaDeepCopy(schema_.get(), out); - } - - ArrowErrorCode GetArray(ArrowArray* out, ArrowError* error) { - if (array_->release == nullptr) { - return EINVAL; - } - - NANOARROW_RETURN_NOT_OK(ArrowArrayFinishBuildingDefault(array_.get(), error)); - ArrowArrayMove(array_.get(), out); - return NANOARROW_OK; - } - - const PostgresType& pg_type() const { return pg_type_; } - - private: - PostgresType pg_type_; - PostgresCopyFieldTupleReader root_reader_; - nanoarrow::UniqueSchema schema_; - nanoarrow::UniqueArray array_; - int64_t array_size_approx_bytes_; -}; - -class PostgresCopyFieldWriter { - public: - virtual ~PostgresCopyFieldWriter() {} - - void Init(struct ArrowArrayView* array_view) { array_view_ = array_view; }; - - virtual ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) { - return ENOTSUP; - } - - protected: - struct ArrowArrayView* array_view_; - std::vector> children_; -}; - -class PostgresCopyFieldTupleWriter : public PostgresCopyFieldWriter { - public: - void AppendChild(std::unique_ptr child) { - int64_t child_i = static_cast(children_.size()); - children_.push_back(std::move(child)); - children_[child_i]->Init(array_view_->children[child_i]); - } - - ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { - if (index >= array_view_->length) { - return ENODATA; - } - - const int16_t n_fields = children_.size(); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, n_fields, error)); - - for (int16_t i = 0; i < n_fields; i++) { - const int8_t is_null = ArrowArrayViewIsNull(array_view_->children[i], index); - if (is_null) { - constexpr int32_t field_size_bytes = -1; - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); - } else { - children_[i]->Write(buffer, index, error); - } - } - - return NANOARROW_OK; - } - - private: - std::vector> children_; -}; - -class PostgresCopyBooleanFieldWriter : public PostgresCopyFieldWriter { - public: - ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { - constexpr int32_t field_size_bytes = 1; - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); - const int8_t value = - static_cast(ArrowArrayViewGetIntUnsafe(array_view_, index)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, value, error)); - - return ADBC_STATUS_OK; - } -}; - -template -class PostgresCopyNetworkEndianFieldWriter : public PostgresCopyFieldWriter { - public: - ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { - constexpr int32_t field_size_bytes = sizeof(T); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); - const T value = - static_cast(ArrowArrayViewGetIntUnsafe(array_view_, index)) - kOffset; - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, value, error)); - - return ADBC_STATUS_OK; - } -}; - -class PostgresCopyFloatFieldWriter : public PostgresCopyFieldWriter { - public: - ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { - constexpr int32_t field_size_bytes = sizeof(uint32_t); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); - - uint32_t value; - float raw_value = ArrowArrayViewGetDoubleUnsafe(array_view_, index); - std::memcpy(&value, &raw_value, sizeof(uint32_t)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, value, error)); - - return ADBC_STATUS_OK; - } -}; - -class PostgresCopyDoubleFieldWriter : public PostgresCopyFieldWriter { - public: - ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { - constexpr int32_t field_size_bytes = sizeof(uint64_t); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); - - uint64_t value; - double raw_value = ArrowArrayViewGetDoubleUnsafe(array_view_, index); - std::memcpy(&value, &raw_value, sizeof(uint64_t)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, value, error)); - - return ADBC_STATUS_OK; - } -}; - -class PostgresCopyIntervalFieldWriter : public PostgresCopyFieldWriter { - public: - ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { - constexpr int32_t field_size_bytes = 16; - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); - - struct ArrowInterval interval; - ArrowIntervalInit(&interval, NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO); - ArrowArrayViewGetIntervalUnsafe(array_view_, index, &interval); - const int64_t ms = interval.ns / 1000; - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, ms, error)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, interval.days, error)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, interval.months, error)); - - return ADBC_STATUS_OK; - } -}; - -// Inspiration for this taken from get_str_from_var in the pg source -// src/backend/utils/adt/numeric.c -template -class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { -public: - PostgresCopyNumericFieldWriter(int32_t precision, int32_t scale) : - precision_{precision}, scale_{scale} {} - - ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { - struct ArrowDecimal decimal; - ArrowDecimalInit(&decimal, bitwidth_, precision_, scale_); - ArrowArrayViewGetDecimalUnsafe(array_view_, index, &decimal); - - const int16_t sign = ArrowDecimalSign(&decimal) > 0 ? kNumericPos : kNumericNeg; - - // Number of decimal digits per Postgres digit - constexpr int kDecDigits = 4; - std::vector pg_digits; - int16_t weight = -(scale_ / kDecDigits); - int16_t dscale = scale_; - bool seen_decimal = scale_ == 0; - bool truncating_trailing_zeros = true; - - char decimal_string[max_decimal_digits_ + 1]; - int digits_remaining = DecimalToString(&decimal, decimal_string); - do { - const int start_pos = digits_remaining < kDecDigits ? - 0 : digits_remaining - kDecDigits; - const size_t len = digits_remaining < 4 ? digits_remaining : kDecDigits; - char substr[kDecDigits + 1]; - std::memcpy(substr, decimal_string + start_pos, len); - substr[len] = '\0'; - int16_t val = static_cast(std::atoi(substr)); - - if (val == 0) { - if (!seen_decimal && truncating_trailing_zeros) { - dscale -= kDecDigits; - } - } else { - pg_digits.insert(pg_digits.begin(), val); - if (!seen_decimal && truncating_trailing_zeros) { - if (val % 1000 == 0) { - dscale -= 3; - } else if (val % 100 == 0) { - dscale -= 2; - } else if (val % 10 == 0) { - dscale -= 1; - } - } - truncating_trailing_zeros = false; - } - digits_remaining -= kDecDigits; - if (digits_remaining <= 0) { - break; - } - weight++; - - if (start_pos <= static_cast(std::strlen(decimal_string)) - scale_) { - seen_decimal = true; - } - } while (true); - - int16_t ndigits = pg_digits.size(); - int32_t field_size_bytes = sizeof(ndigits) - + sizeof(weight) - + sizeof(sign) - + sizeof(dscale) - + ndigits * sizeof(int16_t); - - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, ndigits, error)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, weight, error)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, sign, error)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, dscale, error)); - - const size_t pg_digit_bytes = sizeof(int16_t) * pg_digits.size(); - NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, pg_digit_bytes)); - for (auto pg_digit : pg_digits) { - WriteUnsafe(buffer, pg_digit); - } - - return ADBC_STATUS_OK; - } - -private: - // returns the length of the string - template - int DecimalToString(struct ArrowDecimal* decimal, char* out) { - constexpr size_t nwords = (DEC_WIDTH == 128) ? 2 : 4; - uint8_t tmp[DEC_WIDTH / 8]; - ArrowDecimalGetBytes(decimal, tmp); - uint64_t buf[DEC_WIDTH / 64]; - std::memcpy(buf, tmp, sizeof(buf)); - const int16_t sign = ArrowDecimalSign(decimal) > 0 ? kNumericPos : kNumericNeg; - const bool is_negative = sign == kNumericNeg ? true : false; - if (is_negative) { - buf[0] = ~buf[0] + 1; - for (size_t i = 1; i < nwords; i++) { - buf[i] = ~buf[i]; - } - } - - // Basic approach adopted from https://stackoverflow.com/a/8023862/621736 - char s[max_decimal_digits_ + 1]; - std::memset(s, '0', sizeof(s) - 1); - s[sizeof(s) - 1] = '\0'; - - for (size_t i = 0; i < DEC_WIDTH; i++) { - int carry; - - carry = (buf[nwords - 1] >= 0x7FFFFFFFFFFFFFFF); - for (size_t j = nwords - 1; j > 0; j--) { - buf[j] = ((buf[j] << 1) & 0xFFFFFFFFFFFFFFFF) + (buf[j-1] >= 0x7FFFFFFFFFFFFFFF); - } - buf[0] = ((buf[0] << 1) & 0xFFFFFFFFFFFFFFFF); - - for (int j = sizeof(s) - 2; j>= 0; j--) { - s[j] += s[j] - '0' + carry; - carry = (s[j] > '9'); - if (carry) { - s[j] -= 10; - } - } - } - - char* p = s; - while ((p[0] == '0') && (p < &s[sizeof(s) - 2])) { - p++; - } - - const size_t ndigits = sizeof(s) - 1 - (p - s); - std::memcpy(out, p, ndigits); - out[ndigits] = '\0'; - - return ndigits; - } - - static constexpr uint16_t kNumericPos = 0x0000; - static constexpr uint16_t kNumericNeg = 0x4000; - static constexpr int32_t bitwidth_ = (T == NANOARROW_TYPE_DECIMAL128) ? 128 : 256; - static constexpr size_t max_decimal_digits_ = - (T == NANOARROW_TYPE_DECIMAL128) ? 39 : 78; - const int32_t precision_; - const int32_t scale_; -}; - -template -class PostgresCopyDurationFieldWriter : public PostgresCopyFieldWriter { - public: - ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { - constexpr int32_t field_size_bytes = 16; - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); - - int64_t raw_value = ArrowArrayViewGetIntUnsafe(array_view_, index); - int64_t value; - - bool overflow_safe = true; - switch (TU) { - case NANOARROW_TIME_UNIT_SECOND: - if ((overflow_safe = raw_value <= kMaxSafeSecondsToMicros && - raw_value >= kMinSafeSecondsToMicros)) { - value = raw_value * 1000000; - } - break; - case NANOARROW_TIME_UNIT_MILLI: - if ((overflow_safe = raw_value <= kMaxSafeMillisToMicros && - raw_value >= kMinSafeMillisToMicros)) { - value = raw_value * 1000; - } - break; - case NANOARROW_TIME_UNIT_MICRO: - value = raw_value; - break; - case NANOARROW_TIME_UNIT_NANO: - value = raw_value / 1000; - break; - } - - if (!overflow_safe) { - ArrowErrorSet( - error, "Row %" PRId64 " duration value %" PRId64 " with unit %d would overflow", - index, raw_value, TU); - return ADBC_STATUS_INVALID_ARGUMENT; - } - - // 2000-01-01 00:00:00.000000 in microseconds - constexpr uint32_t days = 0; - constexpr uint32_t months = 0; - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, value, error)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, days, error)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, months, error)); - - return ADBC_STATUS_OK; - } -}; - -class PostgresCopyBinaryFieldWriter : public PostgresCopyFieldWriter { - public: - ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { - struct ArrowBufferView buffer_view = ArrowArrayViewGetBytesUnsafe(array_view_, index); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, buffer_view.size_bytes, error)); - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppend(buffer, buffer_view.data.as_uint8, buffer_view.size_bytes)); - - return ADBC_STATUS_OK; - } -}; - -class PostgresCopyBinaryDictFieldWriter : public PostgresCopyFieldWriter { - public: - ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { - int64_t dict_index = ArrowArrayViewGetIntUnsafe(array_view_, index); - if (ArrowArrayViewIsNull(array_view_->dictionary, dict_index)) { - constexpr int32_t field_size_bytes = -1; - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); - } else { - struct ArrowBufferView buffer_view = - ArrowArrayViewGetBytesUnsafe(array_view_->dictionary, dict_index); - NANOARROW_RETURN_NOT_OK( - WriteChecked(buffer, buffer_view.size_bytes, error)); - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppend(buffer, buffer_view.data.as_uint8, buffer_view.size_bytes)); - } - - return ADBC_STATUS_OK; - } -}; - -template -class PostgresCopyTimestampFieldWriter : public PostgresCopyFieldWriter { - public: - ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { - constexpr int32_t field_size_bytes = sizeof(int64_t); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); - - int64_t raw_value = ArrowArrayViewGetIntUnsafe(array_view_, index); - int64_t value; - - bool overflow_safe = true; - switch (TU) { - case NANOARROW_TIME_UNIT_SECOND: - if ((overflow_safe = raw_value <= kMaxSafeSecondsToMicros && - raw_value >= kMinSafeSecondsToMicros)) { - value = raw_value * 1000000; - } - break; - case NANOARROW_TIME_UNIT_MILLI: - if ((overflow_safe = raw_value <= kMaxSafeMillisToMicros && - raw_value >= kMinSafeMillisToMicros)) { - value = raw_value * 1000; - } - break; - case NANOARROW_TIME_UNIT_MICRO: - value = raw_value; - break; - case NANOARROW_TIME_UNIT_NANO: - value = raw_value / 1000; - break; - } - - if (!overflow_safe) { - ArrowErrorSet(error, - "[libpq] Row %" PRId64 " timestamp value %" PRId64 - " with unit %d would overflow", - index, raw_value, TU); - return ADBC_STATUS_INVALID_ARGUMENT; - } - - if (value < std::numeric_limits::min() + kPostgresTimestampEpoch) { - ArrowErrorSet(error, - "[libpq] Row %" PRId64 " timestamp value %" PRId64 - " with unit %d would underflow", - index, raw_value, TU); - return ADBC_STATUS_INVALID_ARGUMENT; - } - - const int64_t scaled = value - kPostgresTimestampEpoch; - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, scaled, error)); - - return ADBC_STATUS_OK; - } -}; - -static inline ArrowErrorCode MakeCopyFieldWriter(struct ArrowSchema* schema, - PostgresCopyFieldWriter** out, - ArrowError* error) { - struct ArrowSchemaView schema_view; - NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view, schema, error)); - - switch (schema_view.type) { - case NANOARROW_TYPE_BOOL: - *out = new PostgresCopyBooleanFieldWriter(); - return NANOARROW_OK; - case NANOARROW_TYPE_INT8: - case NANOARROW_TYPE_INT16: - *out = new PostgresCopyNetworkEndianFieldWriter(); - return NANOARROW_OK; - case NANOARROW_TYPE_INT32: - *out = new PostgresCopyNetworkEndianFieldWriter(); - return NANOARROW_OK; - case NANOARROW_TYPE_INT64: - *out = new PostgresCopyNetworkEndianFieldWriter(); - return NANOARROW_OK; - case NANOARROW_TYPE_DATE32: { - constexpr int32_t kPostgresDateEpoch = 10957; - *out = new PostgresCopyNetworkEndianFieldWriter(); - return NANOARROW_OK; - } - case NANOARROW_TYPE_FLOAT: - *out = new PostgresCopyFloatFieldWriter(); - return NANOARROW_OK; - case NANOARROW_TYPE_DOUBLE: - *out = new PostgresCopyDoubleFieldWriter(); - return NANOARROW_OK; - case NANOARROW_TYPE_DECIMAL128: { - const auto precision = schema_view.decimal_precision; - const auto scale = schema_view.decimal_scale; - *out = new PostgresCopyNumericFieldWriter< - NANOARROW_TYPE_DECIMAL128>(precision, scale); - return NANOARROW_OK; - } - case NANOARROW_TYPE_DECIMAL256: { - const auto precision = schema_view.decimal_precision; - const auto scale = schema_view.decimal_scale; - *out = new PostgresCopyNumericFieldWriter< - NANOARROW_TYPE_DECIMAL256>(precision, scale); - return NANOARROW_OK; - } - case NANOARROW_TYPE_BINARY: - case NANOARROW_TYPE_STRING: - case NANOARROW_TYPE_LARGE_STRING: - *out = new PostgresCopyBinaryFieldWriter(); - return NANOARROW_OK; - case NANOARROW_TYPE_TIMESTAMP: { - switch (schema_view.time_unit) { - case NANOARROW_TIME_UNIT_NANO: - *out = new PostgresCopyTimestampFieldWriter(); - break; - case NANOARROW_TIME_UNIT_MILLI: - *out = new PostgresCopyTimestampFieldWriter(); - break; - case NANOARROW_TIME_UNIT_MICRO: - *out = new PostgresCopyTimestampFieldWriter(); - break; - case NANOARROW_TIME_UNIT_SECOND: - *out = new PostgresCopyTimestampFieldWriter(); - break; - } - return NANOARROW_OK; - } - case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: - *out = new PostgresCopyIntervalFieldWriter(); - return NANOARROW_OK; - case NANOARROW_TYPE_DURATION: { - switch (schema_view.time_unit) { - case NANOARROW_TIME_UNIT_SECOND: - *out = new PostgresCopyDurationFieldWriter(); - break; - case NANOARROW_TIME_UNIT_MILLI: - *out = new PostgresCopyDurationFieldWriter(); - break; - case NANOARROW_TIME_UNIT_MICRO: - *out = new PostgresCopyDurationFieldWriter(); - - break; - case NANOARROW_TIME_UNIT_NANO: - *out = new PostgresCopyDurationFieldWriter(); - break; - } - return NANOARROW_OK; - } - case NANOARROW_TYPE_DICTIONARY: { - struct ArrowSchemaView value_view; - NANOARROW_RETURN_NOT_OK( - ArrowSchemaViewInit(&value_view, schema->dictionary, error)); - switch (value_view.type) { - case NANOARROW_TYPE_BINARY: - case NANOARROW_TYPE_STRING: - case NANOARROW_TYPE_LARGE_BINARY: - case NANOARROW_TYPE_LARGE_STRING: - *out = new PostgresCopyBinaryDictFieldWriter(); - return NANOARROW_OK; - default: - break; - } - } - default: - break; - } - - ArrowErrorSet(error, "COPY Writer not implemented for type %d", schema_view.type); - return EINVAL; -} - -class PostgresCopyStreamWriter { - public: - ArrowErrorCode Init(struct ArrowSchema* schema) { - schema_ = schema; - NANOARROW_RETURN_NOT_OK( - ArrowArrayViewInitFromSchema(&array_view_.value, schema, nullptr)); - root_writer_.Init(&array_view_.value); - ArrowBufferInit(&buffer_.value); - return NANOARROW_OK; - } - - ArrowErrorCode SetArray(struct ArrowArray* array) { - NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArray(&array_view_.value, array, nullptr)); - return NANOARROW_OK; - } - - ArrowErrorCode WriteHeader(ArrowError* error) { - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(&buffer_.value, kPgCopyBinarySignature, - sizeof(kPgCopyBinarySignature))); - - const uint32_t flag_fields = 0; - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppend(&buffer_.value, &flag_fields, sizeof(flag_fields))); - - const uint32_t extension_bytes = 0; - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppend(&buffer_.value, &extension_bytes, sizeof(extension_bytes))); - - return NANOARROW_OK; - } - - ArrowErrorCode WriteRecord(ArrowError* error) { - NANOARROW_RETURN_NOT_OK(root_writer_.Write(&buffer_.value, records_written_, error)); - records_written_++; - return NANOARROW_OK; - } - - ArrowErrorCode InitFieldWriters(ArrowError* error) { - if (schema_->release == nullptr) { - return EINVAL; - } - - for (int64_t i = 0; i < schema_->n_children; i++) { - PostgresCopyFieldWriter* child_writer = nullptr; - NANOARROW_RETURN_NOT_OK( - MakeCopyFieldWriter(schema_->children[i], &child_writer, error)); - root_writer_.AppendChild(std::unique_ptr(child_writer)); - } - - return NANOARROW_OK; - } - - const struct ArrowBuffer& WriteBuffer() const { return buffer_.value; } - - void Rewind() { - records_written_ = 0; - buffer_->size_bytes = 0; - } - - private: - PostgresCopyFieldTupleWriter root_writer_; - struct ArrowSchema* schema_; - Handle array_view_; - Handle buffer_; - int64_t records_written_ = 0; -}; - -} // namespace adbcpq diff --git a/c/driver/postgresql/postgres_copy_reader_test.cc b/c/driver/postgresql/postgres_copy_reader_test.cc index 201aa223a2..628afa28b2 100644 --- a/c/driver/postgresql/postgres_copy_reader_test.cc +++ b/c/driver/postgresql/postgres_copy_reader_test.cc @@ -22,7 +22,8 @@ #include #include -#include "postgres_copy_reader.h" +#include "copy/reader.h" +#include "copy/writer.h" #include "validation/adbc_validation_util.h" namespace adbcpq { diff --git a/c/driver/postgresql/statement.cc b/c/driver/postgresql/statement.cc index 68fd45a944..601716b163 100644 --- a/c/driver/postgresql/statement.cc +++ b/c/driver/postgresql/statement.cc @@ -39,7 +39,7 @@ #include "common/utils.h" #include "connection.h" #include "error.h" -#include "postgres_copy_reader.h" +#include "copy/writer.h" #include "postgres_type.h" #include "postgres_util.h" #include "result_helper.h" diff --git a/c/driver/postgresql/statement.h b/c/driver/postgresql/statement.h index c822390d8c..d469ca112a 100644 --- a/c/driver/postgresql/statement.h +++ b/c/driver/postgresql/statement.h @@ -27,7 +27,7 @@ #include #include "common/utils.h" -#include "postgres_copy_reader.h" +#include "copy/reader.h" #include "postgres_type.h" #define ADBC_POSTGRESQL_OPTION_BATCH_SIZE_HINT_BYTES \ From d165d21236fb9bc9dffa8e22181158962b1213a7 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 4 Jan 2024 17:48:37 -0500 Subject: [PATCH 2/9] split test files --- c/driver/postgresql/CMakeLists.txt | 2 +- c/driver/postgresql/copy/CMakeLists.txt | 46 + .../copy/postgres_copy_reader_test.cc | 697 +++++++++ .../copy/postgres_copy_test_common.h | 124 ++ .../copy/postgres_copy_writer_test.cc | 630 ++++++++ c/driver/postgresql/copy/reader.h | 4 +- c/driver/postgresql/copy/writer.h | 3 +- c/driver/postgresql/postgres_copy_reader.h | 25 - .../postgresql/postgres_copy_reader_test.cc | 1380 ----------------- 9 files changed, 1502 insertions(+), 1409 deletions(-) create mode 100644 c/driver/postgresql/copy/CMakeLists.txt create mode 100644 c/driver/postgresql/copy/postgres_copy_reader_test.cc create mode 100644 c/driver/postgresql/copy/postgres_copy_test_common.h create mode 100644 c/driver/postgresql/copy/postgres_copy_writer_test.cc delete mode 100644 c/driver/postgresql/postgres_copy_reader.h delete mode 100644 c/driver/postgresql/postgres_copy_reader_test.cc diff --git a/c/driver/postgresql/CMakeLists.txt b/c/driver/postgresql/CMakeLists.txt index b98e6ea3fc..166b4f5a6f 100644 --- a/c/driver/postgresql/CMakeLists.txt +++ b/c/driver/postgresql/CMakeLists.txt @@ -76,7 +76,6 @@ if(ADBC_BUILD_TESTS) driver-postgresql SOURCES postgres_type_test.cc - postgres_copy_reader_test.cc postgresql_test.cc EXTRA_LINK_LIBS adbc_driver_common @@ -92,6 +91,7 @@ if(ADBC_BUILD_TESTS) ${REPOSITORY_ROOT}/c/driver) adbc_configure_target(adbc-driver-postgresql-test) endif() +add_subdirectory(copy) if(ADBC_BUILD_BENCHMARKS) find_package(benchmark REQUIRED) diff --git a/c/driver/postgresql/copy/CMakeLists.txt b/c/driver/postgresql/copy/CMakeLists.txt new file mode 100644 index 0000000000..c79f7c0cf4 --- /dev/null +++ b/c/driver/postgresql/copy/CMakeLists.txt @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +if(ADBC_TEST_LINKAGE STREQUAL "shared") + set(TEST_LINK_LIBS adbc_driver_postgresql_shared) +else() + set(TEST_LINK_LIBS adbc_driver_postgresql_static) +endif() + +if(ADBC_BUILD_TESTS) + add_test_case(driver_postgresql_copy_test + PREFIX + adbc + EXTRA_LABELS + driver-postgresql + SOURCES + postgres_copy_reader_test.cc + postgres_copy_writer_test.cc + EXTRA_LINK_LIBS + adbc_driver_common + adbc_validation + nanoarrow + ${TEST_LINK_LIBS}) + target_compile_features(adbc-driver-postgresql-copy-test PRIVATE cxx_std_17) + target_include_directories(adbc-driver-postgresql-copy-test SYSTEM + PRIVATE ${REPOSITORY_ROOT} + ${REPOSITORY_ROOT}/c/ + ${LIBPQ_INCLUDE_DIRS} + ${REPOSITORY_ROOT}/c/vendor + ${REPOSITORY_ROOT}/c/driver) + adbc_configure_target(adbc-driver-postgresql-copy-test) +endif() diff --git a/c/driver/postgresql/copy/postgres_copy_reader_test.cc b/c/driver/postgresql/copy/postgres_copy_reader_test.cc new file mode 100644 index 0000000000..55a61b27ed --- /dev/null +++ b/c/driver/postgresql/copy/postgres_copy_reader_test.cc @@ -0,0 +1,697 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include "postgresql/copy/reader.h" +#include "postgres_copy_test_common.h" + +namespace adbcpq { + +class PostgresCopyStreamTester { + public: + ArrowErrorCode Init(const PostgresType& root_type, ArrowError* error = nullptr) { + NANOARROW_RETURN_NOT_OK(reader_.Init(root_type)); + NANOARROW_RETURN_NOT_OK(reader_.InferOutputSchema(error)); + NANOARROW_RETURN_NOT_OK(reader_.InitFieldReaders(error)); + return NANOARROW_OK; + } + + ArrowErrorCode ReadAll(ArrowBufferView* data, ArrowError* error = nullptr) { + NANOARROW_RETURN_NOT_OK(reader_.ReadHeader(data, error)); + + int result; + do { + result = reader_.ReadRecord(data, error); + } while (result == NANOARROW_OK); + + return result; + } + + void GetSchema(ArrowSchema* out) { reader_.GetSchema(out); } + + ArrowErrorCode GetArray(ArrowArray* out, ArrowError* error = nullptr) { + return reader_.GetArray(out, error); + } + + private: + PostgresCopyStreamReader reader_; +}; + + +TEST(PostgresCopyUtilsTest, PostgresCopyReadBoolean) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyBoolean; + data.size_bytes = sizeof(kTestPgCopyBoolean); + + auto col_type = PostgresType(PostgresTypeId::kBool); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyBoolean, sizeof(kTestPgCopyBoolean)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 3); + ASSERT_EQ(array->n_children, 1); + + const uint8_t* validity = + reinterpret_cast(array->children[0]->buffers[0]); + const uint8_t* data_buffer = + reinterpret_cast(array->children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_FALSE(ArrowBitGet(validity, 2)); + + ASSERT_TRUE(ArrowBitGet(data_buffer, 0)); + ASSERT_FALSE(ArrowBitGet(data_buffer, 1)); + ASSERT_FALSE(ArrowBitGet(data_buffer, 2)); +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyReadSmallInt) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopySmallInt; + data.size_bytes = sizeof(kTestPgCopySmallInt); + + auto col_type = PostgresType(PostgresTypeId::kInt2); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopySmallInt, sizeof(kTestPgCopySmallInt)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 5); + ASSERT_EQ(array->n_children, 1); + + auto validity = reinterpret_cast(array->children[0]->buffers[0]); + auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_TRUE(ArrowBitGet(validity, 2)); + ASSERT_TRUE(ArrowBitGet(validity, 3)); + ASSERT_FALSE(ArrowBitGet(validity, 4)); + + ASSERT_EQ(data_buffer[0], -123); + ASSERT_EQ(data_buffer[1], -1); + ASSERT_EQ(data_buffer[2], 1); + ASSERT_EQ(data_buffer[3], 123); + ASSERT_EQ(data_buffer[4], 0); +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyReadInteger) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyInteger; + data.size_bytes = sizeof(kTestPgCopyInteger); + + auto col_type = PostgresType(PostgresTypeId::kInt4); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyInteger, sizeof(kTestPgCopyInteger)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 5); + ASSERT_EQ(array->n_children, 1); + + auto validity = reinterpret_cast(array->children[0]->buffers[0]); + auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_TRUE(ArrowBitGet(validity, 2)); + ASSERT_TRUE(ArrowBitGet(validity, 3)); + ASSERT_FALSE(ArrowBitGet(validity, 4)); + + ASSERT_EQ(data_buffer[0], -123); + ASSERT_EQ(data_buffer[1], -1); + ASSERT_EQ(data_buffer[2], 1); + ASSERT_EQ(data_buffer[3], 123); + ASSERT_EQ(data_buffer[4], 0); +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyReadBigInt) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyBigInt; + data.size_bytes = sizeof(kTestPgCopyBigInt); + + auto col_type = PostgresType(PostgresTypeId::kInt8); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyBigInt, sizeof(kTestPgCopyBigInt)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 5); + ASSERT_EQ(array->n_children, 1); + + auto validity = reinterpret_cast(array->children[0]->buffers[0]); + auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_TRUE(ArrowBitGet(validity, 2)); + ASSERT_TRUE(ArrowBitGet(validity, 3)); + ASSERT_FALSE(ArrowBitGet(validity, 4)); + + ASSERT_EQ(data_buffer[0], -123); + ASSERT_EQ(data_buffer[1], -1); + ASSERT_EQ(data_buffer[2], 1); + ASSERT_EQ(data_buffer[3], 123); + ASSERT_EQ(data_buffer[4], 0); +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyReadReal) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyReal; + data.size_bytes = sizeof(kTestPgCopyReal); + + auto col_type = PostgresType(PostgresTypeId::kFloat4); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyReal, sizeof(kTestPgCopyReal)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 5); + ASSERT_EQ(array->n_children, 1); + + auto validity = reinterpret_cast(array->children[0]->buffers[0]); + auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_TRUE(ArrowBitGet(validity, 2)); + ASSERT_TRUE(ArrowBitGet(validity, 3)); + ASSERT_FALSE(ArrowBitGet(validity, 4)); + + ASSERT_FLOAT_EQ(data_buffer[0], -123.456); + ASSERT_EQ(data_buffer[1], -1); + ASSERT_EQ(data_buffer[2], 1); + ASSERT_FLOAT_EQ(data_buffer[3], 123.456); + ASSERT_EQ(data_buffer[4], 0); +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyReadDoublePrecision) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyDoublePrecision; + data.size_bytes = sizeof(kTestPgCopyDoublePrecision); + + auto col_type = PostgresType(PostgresTypeId::kFloat8); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyDoublePrecision, + sizeof(kTestPgCopyDoublePrecision)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 5); + ASSERT_EQ(array->n_children, 1); + + auto validity = reinterpret_cast(array->children[0]->buffers[0]); + auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_TRUE(ArrowBitGet(validity, 2)); + ASSERT_TRUE(ArrowBitGet(validity, 3)); + ASSERT_FALSE(ArrowBitGet(validity, 4)); + + ASSERT_DOUBLE_EQ(data_buffer[0], -123.456); + ASSERT_EQ(data_buffer[1], -1); + ASSERT_EQ(data_buffer[2], 1); + ASSERT_DOUBLE_EQ(data_buffer[3], 123.456); + ASSERT_EQ(data_buffer[4], 0); +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyReadDate) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyDate; + data.size_bytes = sizeof(kTestPgCopyDate); + + auto col_type = PostgresType(PostgresTypeId::kDate); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyDate, sizeof(kTestPgCopyDate)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 3); + ASSERT_EQ(array->n_children, 1); + + auto validity = reinterpret_cast(array->children[0]->buffers[0]); + auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_FALSE(ArrowBitGet(validity, 2)); + + ASSERT_EQ(data_buffer[0], -25567); + ASSERT_EQ(data_buffer[1], 47482); +} + + +// For full coverage, ensure that this contains NUMERIC examples that: +// - Have >= four zeroes to the left of the decimal point +// - Have >= four zeroes to the right of the decimal point +// - Include special values (nan, -inf, inf, NULL) +// - Have >= four trailing zeroes to the right of the decimal point +// - Have >= four leading zeroes before the first digit to the right of the decimal point +// - Is < 0 (negative) +// COPY (SELECT CAST(col AS NUMERIC) AS col FROM ( VALUES (1000000), ('0.00001234'), +// ('1.0000'), (-123.456), (123.456), ('nan'), ('-inf'), ('inf'), (NULL)) AS drvd(col)) TO +// STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyNumeric[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, + 0x01, 0xff, 0xfe, 0x00, 0x00, 0x00, 0x08, 0x04, 0xd2, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x01, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x40, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, + 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x7b, 0x11, 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0xf0, 0x00, 0x00, 0x20, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0xd0, 0x00, 0x00, 0x20, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +TEST(PostgresCopyUtilsTest, PostgresCopyReadNumeric) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyNumeric; + data.size_bytes = sizeof(kTestPgCopyNumeric); + + auto col_type = PostgresType(PostgresTypeId::kNumeric); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyNumeric, sizeof(kTestPgCopyNumeric)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 9); + ASSERT_EQ(array->n_children, 1); + + nanoarrow::UniqueSchema schema; + tester.GetSchema(schema.get()); + + nanoarrow::UniqueArrayView array_view; + ASSERT_EQ(ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), nullptr), + NANOARROW_OK); + ASSERT_EQ(array_view->children[0]->storage_type, NANOARROW_TYPE_STRING); + ASSERT_EQ(ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr), NANOARROW_OK); + + auto validity = array_view->children[0]->buffer_views[0].data.as_uint8; + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_TRUE(ArrowBitGet(validity, 2)); + ASSERT_TRUE(ArrowBitGet(validity, 3)); + ASSERT_TRUE(ArrowBitGet(validity, 4)); + ASSERT_TRUE(ArrowBitGet(validity, 5)); + ASSERT_TRUE(ArrowBitGet(validity, 6)); + ASSERT_TRUE(ArrowBitGet(validity, 7)); + ASSERT_FALSE(ArrowBitGet(validity, 8)); + + struct ArrowStringView item; + item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 0); + EXPECT_EQ(std::string(item.data, item.size_bytes), "1000000"); + item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 1); + EXPECT_EQ(std::string(item.data, item.size_bytes), "0.00001234"); + item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 2); + EXPECT_EQ(std::string(item.data, item.size_bytes), "1.0000"); + item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 3); + EXPECT_EQ(std::string(item.data, item.size_bytes), "-123.456"); + item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 4); + EXPECT_EQ(std::string(item.data, item.size_bytes), "123.456"); + item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 5); + EXPECT_EQ(std::string(item.data, item.size_bytes), "nan"); + item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 6); + EXPECT_EQ(std::string(item.data, item.size_bytes), "-inf"); + item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 7); + EXPECT_EQ(std::string(item.data, item.size_bytes), "inf"); +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyReadTimestamp) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyTimestamp; + data.size_bytes = sizeof(kTestPgCopyTimestamp); + + auto col_type = PostgresType(PostgresTypeId::kTimestamp); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyTimestamp, sizeof(kTestPgCopyTimestamp)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 3); + ASSERT_EQ(array->n_children, 1); + + auto validity = reinterpret_cast(array->children[0]->buffers[0]); + auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_FALSE(ArrowBitGet(validity, 3)); + + ASSERT_EQ(data_buffer[0], -2208943504000000); + ASSERT_EQ(data_buffer[1], 4102490096000000); +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyReadInterval) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyInterval; + data.size_bytes = sizeof(kTestPgCopyInterval); + + auto col_type = PostgresType(PostgresTypeId::kInterval); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyInterval, sizeof(kTestPgCopyInterval)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 3); + ASSERT_EQ(array->n_children, 1); + + nanoarrow::UniqueSchema schema; + tester.GetSchema(schema.get()); + + nanoarrow::UniqueArrayView array_view; + ASSERT_EQ(ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), nullptr), + NANOARROW_OK); + ASSERT_EQ(array_view->children[0]->storage_type, + NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO); + ASSERT_EQ(ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr), NANOARROW_OK); + + auto validity = array_view->children[0]->buffer_views[0].data.as_uint8; + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_FALSE(ArrowBitGet(validity, 2)); + + struct ArrowInterval interval; + ArrowIntervalInit(&interval, NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO); + ArrowArrayViewGetIntervalUnsafe(array_view->children[0], 0, &interval); + ASSERT_EQ(interval.months, -1); + ASSERT_EQ(interval.days, -2); + ASSERT_EQ(interval.ns, -4000000000); + ArrowArrayViewGetIntervalUnsafe(array_view->children[0], 1, &interval); + ASSERT_EQ(interval.months, 1); + ASSERT_EQ(interval.days, 2); + ASSERT_EQ(interval.ns, 4000000000); +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyReadText) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyText; + data.size_bytes = sizeof(kTestPgCopyText); + + auto col_type = PostgresType(PostgresTypeId::kText); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyText, sizeof(kTestPgCopyText)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 3); + ASSERT_EQ(array->n_children, 1); + + auto validity = reinterpret_cast(array->children[0]->buffers[0]); + auto offsets = reinterpret_cast(array->children[0]->buffers[1]); + auto data_buffer = reinterpret_cast(array->children[0]->buffers[2]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_FALSE(ArrowBitGet(validity, 2)); + + ASSERT_EQ(offsets[0], 0); + ASSERT_EQ(offsets[1], 3); + ASSERT_EQ(offsets[2], 7); + ASSERT_EQ(offsets[3], 7); + + ASSERT_EQ(std::string(data_buffer + 0, 3), "abc"); + ASSERT_EQ(std::string(data_buffer + 3, 4), "1234"); +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyReadBinary) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyBinary; + data.size_bytes = sizeof(kTestPgCopyBinary); + + auto col_type = PostgresType(PostgresTypeId::kBytea); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyBinary, sizeof(kTestPgCopyBinary)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 5); + ASSERT_EQ(array->n_children, 1); + + auto validity = reinterpret_cast(array->children[0]->buffers[0]); + auto offsets = reinterpret_cast(array->children[0]->buffers[1]); + auto data_buffer = reinterpret_cast(array->children[0]->buffers[2]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_TRUE(ArrowBitGet(validity, 2)); + ASSERT_TRUE(ArrowBitGet(validity, 3)); + ASSERT_FALSE(ArrowBitGet(validity, 4)); + + ASSERT_EQ(offsets[0], 0); + ASSERT_EQ(offsets[1], 0); + ASSERT_EQ(offsets[2], 2); + ASSERT_EQ(offsets[3], 6); + ASSERT_EQ(offsets[4], 8); + ASSERT_EQ(offsets[5], 8); + + ASSERT_EQ(data_buffer[0], 0x00); + ASSERT_EQ(data_buffer[1], 0x01); + ASSERT_EQ(data_buffer[2], 0x01); + ASSERT_EQ(data_buffer[3], 0x02); + ASSERT_EQ(data_buffer[4], 0x03); + ASSERT_EQ(data_buffer[5], 0x04); + ASSERT_EQ(data_buffer[6], 0xfe); + ASSERT_EQ(data_buffer[7], 0xff); +} + + +// COPY (SELECT CAST("col" AS INTEGER ARRAY) AS "col" FROM ( VALUES ('{-123, -1}'), ('{0, +// 1, 123}'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyIntegerArray[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0x85, 0x00, 0x00, 0x00, + 0x04, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x03, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x7b, 0x00, + 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +TEST(PostgresCopyUtilsTest, PostgresCopyReadArray) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyIntegerArray; + data.size_bytes = sizeof(kTestPgCopyIntegerArray); + + auto col_type = PostgresType(PostgresTypeId::kInt4).Array(); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyIntegerArray, + sizeof(kTestPgCopyIntegerArray)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 3); + ASSERT_EQ(array->n_children, 1); + ASSERT_EQ(array->children[0]->n_children, 1); + ASSERT_EQ(array->children[0]->children[0]->length, 5); + + auto validity = reinterpret_cast(array->children[0]->buffers[0]); + auto offsets = reinterpret_cast(array->children[0]->buffers[1]); + auto data_buffer = + reinterpret_cast(array->children[0]->children[0]->buffers[1]); + ASSERT_NE(validity, nullptr); + ASSERT_NE(data_buffer, nullptr); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_FALSE(ArrowBitGet(validity, 2)); + + ASSERT_EQ(offsets[0], 0); + ASSERT_EQ(offsets[1], 2); + ASSERT_EQ(offsets[2], 5); + ASSERT_EQ(offsets[3], 5); + + ASSERT_EQ(data_buffer[0], -123); + ASSERT_EQ(data_buffer[1], -1); + ASSERT_EQ(data_buffer[2], 0); + ASSERT_EQ(data_buffer[3], 1); + ASSERT_EQ(data_buffer[4], 123); +} + +// CREATE TYPE custom_record AS (nested1 integer, nested2 double precision); +// COPY (SELECT CAST("col" AS custom_record) AS "col" FROM ( VALUES ('(123, 456.789)'), +// ('(12, 345.678)'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyCustomRecord[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, + 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, + 0x00, 0x7b, 0x00, 0x00, 0x02, 0xbd, 0x00, 0x00, 0x00, 0x08, 0x40, 0x7c, 0x8c, + 0x9f, 0xbe, 0x76, 0xc8, 0xb4, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, + 0x00, 0x02, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x02, 0xbd, 0x00, 0x00, 0x00, 0x08, 0x40, 0x75, 0x9a, 0xd9, + 0x16, 0x87, 0x2b, 0x02, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +TEST(PostgresCopyUtilsTest, PostgresCopyReadCustomRecord) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyCustomRecord; + data.size_bytes = sizeof(kTestPgCopyCustomRecord); + + auto col_type = PostgresType(PostgresTypeId::kRecord); + col_type.AppendChild("nested1", PostgresType(PostgresTypeId::kInt4)); + col_type.AppendChild("nested2", PostgresType(PostgresTypeId::kFloat8)); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyCustomRecord, + sizeof(kTestPgCopyCustomRecord)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 3); + ASSERT_EQ(array->n_children, 1); + ASSERT_EQ(array->children[0]->n_children, 2); + ASSERT_EQ(array->children[0]->children[0]->length, 3); + ASSERT_EQ(array->children[0]->children[1]->length, 3); + + auto validity = reinterpret_cast(array->children[0]->buffers[0]); + auto data_buffer1 = + reinterpret_cast(array->children[0]->children[0]->buffers[1]); + auto data_buffer2 = + reinterpret_cast(array->children[0]->children[1]->buffers[1]); + + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_FALSE(ArrowBitGet(validity, 2)); + + ASSERT_EQ(data_buffer1[0], 123); + ASSERT_EQ(data_buffer1[1], 12); + ASSERT_EQ(data_buffer1[2], 0); + + ASSERT_DOUBLE_EQ(data_buffer2[0], 456.789); + ASSERT_DOUBLE_EQ(data_buffer2[1], 345.678); + ASSERT_DOUBLE_EQ(data_buffer2[2], 0); +} + +} // namespace adbcpq diff --git a/c/driver/postgresql/copy/postgres_copy_test_common.h b/c/driver/postgresql/copy/postgres_copy_test_common.h new file mode 100644 index 0000000000..cdc256d77a --- /dev/null +++ b/c/driver/postgresql/copy/postgres_copy_test_common.h @@ -0,0 +1,124 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +namespace adbcpq { + +// COPY (SELECT CAST("col" AS BOOLEAN) AS "col" FROM ( VALUES (TRUE), (FALSE), (NULL)) AS +// drvd("col")) TO STDOUT; +static uint8_t kTestPgCopyBoolean[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + + +// COPY (SELECT CAST("col" AS SMALLINT) AS "col" FROM ( VALUES (-123), (-1), (1), (123), +// (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopySmallInt[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x02, 0xff, 0x85, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0xff, 0xff, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +// COPY (SELECT CAST("col" AS INTEGER) AS "col" FROM ( VALUES (-123), (-1), (1), (123), +// (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyInteger[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, + 0x85, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, + 0x00, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + + +// COPY (SELECT CAST("col" AS BIGINT) AS "col" FROM ( VALUES (-123), (-1), (1), (123), +// (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyBigInt[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x85, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +// COPY (SELECT CAST("col" AS REAL) AS "col" FROM ( VALUES (-123.456), (-1), (1), +// (123.456), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyReal[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xc2, 0xf6, 0xe9, + 0x79, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xbf, 0x80, 0x00, 0x00, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x04, 0x3f, 0x80, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x42, + 0xf6, 0xe9, 0x79, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +// COPY (SELECT CAST("col" AS DOUBLE PRECISION) AS "col" FROM ( VALUES (-123.456), (-1), +// (1), (123.456), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyDoublePrecision[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xc0, 0x5e, 0xdd, + 0x2f, 0x1a, 0x9f, 0xbe, 0x77, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xbf, 0xf0, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x3f, 0xf0, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x40, 0x5e, 0xdd, + 0x2f, 0x1a, 0x9f, 0xbe, 0x77, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +static uint8_t kTestPgCopyDate[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xff, 0xff, + 0x71, 0x54, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x8e, 0xad, 0x00, 0x01, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +// COPY (SELECT CAST(col AS TIMESTAMP) FROM ( VALUES ('1900-01-01 12:34:56'), +// ('2100-01-01 12:34:56'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT BINARY); +static uint8_t kTestPgCopyTimestamp[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, + 0x00, 0x08, 0xff, 0xf4, 0xc9, 0xf9, 0x07, 0xe5, 0x9c, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0b, 0x36, 0x30, 0x2d, 0xa5, + 0xfc, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +// COPY (SELECT CAST(col AS INTERVAL) FROM ( VALUES ('-1 months -2 days -4 seconds'), +// ('1 months 2 days 4 seconds'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT BINARY); +static uint8_t kTestPgCopyInterval[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xc2, 0xf7, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, + 0xff, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3d, 0x09, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff}; + +// COPY (SELECT CAST("col" AS TEXT) AS "col" FROM ( VALUES ('abc'), ('1234'), +// (NULL::text)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyText[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x03, 0x61, 0x62, 0x63, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x31, 0x32, + 0x33, 0x34, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +// COPY (SELECT CAST("col" AS BYTEA) AS "col" FROM ( VALUES (''), ('\x0001'), +// ('\x01020304'), ('\xFEFF'), (NULL)) AS drvd("col")) TO STDOUT +// WITH (FORMAT binary); +static uint8_t kTestPgCopyBinary[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0xfe, 0xff, + 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + + +} // namespace adbcpq diff --git a/c/driver/postgresql/copy/postgres_copy_writer_test.cc b/c/driver/postgresql/copy/postgres_copy_writer_test.cc new file mode 100644 index 0000000000..2a33b47776 --- /dev/null +++ b/c/driver/postgresql/copy/postgres_copy_writer_test.cc @@ -0,0 +1,630 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include +#include +#include + +#include "postgresql/copy/writer.h" +#include "validation/adbc_validation_util.h" +#include "postgres_copy_test_common.h" + +namespace adbcpq { + +class PostgresCopyStreamWriteTester { + public: + ArrowErrorCode Init(struct ArrowSchema* schema, struct ArrowArray* array, + struct ArrowError* error = nullptr) { + NANOARROW_RETURN_NOT_OK(writer_.Init(schema)); + NANOARROW_RETURN_NOT_OK(writer_.InitFieldWriters(error)); + NANOARROW_RETURN_NOT_OK(writer_.SetArray(array)); + return NANOARROW_OK; + } + + ArrowErrorCode WriteAll(struct ArrowError* error) { + NANOARROW_RETURN_NOT_OK(writer_.WriteHeader(error)); + + int result; + do { + result = writer_.WriteRecord(error); + } while (result == NANOARROW_OK); + + return result; + } + + ArrowErrorCode WriteArray(struct ArrowArray* array, struct ArrowError* error) { + writer_.SetArray(array); + int result; + do { + result = writer_.WriteRecord(error); + } while (result == NANOARROW_OK); + + return result; + } + + const struct ArrowBuffer& WriteBuffer() const { return writer_.WriteBuffer(); } + + void Rewind() { writer_.Rewind(); } + + private: + PostgresCopyStreamWriter writer_; +}; + + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteBoolean) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + adbc_validation::Handle buffer; + struct ArrowError na_error; + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_BOOL}}), + ADBC_STATUS_OK); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, + {true, false, std::nullopt}), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyBoolean) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyBoolean[i]); + } +} + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteInt8) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_INT8}}), + ADBC_STATUS_OK); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, + {-123, -1, 1, 123, std::nullopt}), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopySmallInt) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopySmallInt[i]); + } +} + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteInt16) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_INT16}}), + ADBC_STATUS_OK); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, + {-123, -1, 1, 123, std::nullopt}), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopySmallInt) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopySmallInt[i]); + } +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteInt32) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_INT32}}), + ADBC_STATUS_OK); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, + {-123, -1, 1, 123, std::nullopt}), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyInteger) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyInteger[i]); + } +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteInt64) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_INT64}}), + ADBC_STATUS_OK); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, + {-123, -1, 1, 123, std::nullopt}), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyBigInt) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyBigInt[i]); + } +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteReal) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_FLOAT}}), + ADBC_STATUS_OK); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, + {-123.456, -1, 1, 123.456, std::nullopt}), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyReal) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyReal[i]) << " mismatch at index: " << i; + } +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteDoublePrecision) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_DOUBLE}}), + ADBC_STATUS_OK); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, + {-123.456, -1, 1, 123.456, std::nullopt}), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyDoublePrecision) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyDoublePrecision[i]); + } +} + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteDate) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_DATE32}}), + ADBC_STATUS_OK); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, + {-25567, 47482, std::nullopt}), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyDate) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyDate[i]); + } +} + +// This buffer is similar to the read variant above but removes special values +// nan, ±inf as they are not supported via the Arrow Decimal types +// COPY (SELECT CAST(col AS NUMERIC) AS col FROM ( VALUES (NULL), (-123.456), +// ('0.00001234'), (1.0000), (123.456), (1000000)) AS drvd(col)) +// TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyNumericWrite[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x40, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, + 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0xff, 0xfe, 0x00, 0x00, 0x00, + 0x08, 0x04, 0xd2, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0a, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0xff, 0xff}; + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteNumeric) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128; + constexpr int32_t size = 128; + constexpr int32_t precision = 38; + constexpr int32_t scale = 8; + + struct ArrowDecimal decimal1; + struct ArrowDecimal decimal2; + struct ArrowDecimal decimal3; + struct ArrowDecimal decimal4; + struct ArrowDecimal decimal5; + + ArrowDecimalInit(&decimal1, size, 19, 8); + ArrowDecimalSetInt(&decimal1, -12345600000); + ArrowDecimalInit(&decimal2, size, 19, 8); + ArrowDecimalSetInt(&decimal2, 1234); + ArrowDecimalInit(&decimal3, size, 19, 8); + ArrowDecimalSetInt(&decimal3, 100000000); + ArrowDecimalInit(&decimal4, size, 19, 8); + ArrowDecimalSetInt(&decimal4, 12345600000); + ArrowDecimalInit(&decimal5, size, 19, 8); + ArrowDecimalSetInt(&decimal5, 100000000000000); + + const std::vector> values = { + std::nullopt, &decimal1, &decimal2, &decimal3, &decimal4, &decimal5}; + + ArrowSchemaInit(&schema.value); + ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0); + ASSERT_EQ(AdbcNsArrowSchemaSetTypeDecimal(schema.value.children[0], + type, precision, scale), 0); + ASSERT_EQ(ArrowSchemaSetName(schema.value.children[0], "col"), 0); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, + &na_error, values), ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyNumericWrite) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyNumericWrite[i]) << " at position " << i; + } +} + +using TimestampTestParamType = std::tuple>>; + +class PostgresCopyWriteTimestampTest : public testing::TestWithParam< + TimestampTestParamType> { +}; + +TEST_P(PostgresCopyWriteTimestampTest, WritesProperBufferValues) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + + TimestampTestParamType parameters = GetParam(); + enum ArrowTimeUnit unit = std::get<0>(parameters); + const char* timezone = std::get<1>(parameters); + + const std::vector> values = std::get<2>(parameters); + + ArrowSchemaInit(&schema.value); + ArrowSchemaSetTypeStruct(&schema.value, 1); + ArrowSchemaSetTypeDateTime(schema->children[0], + NANOARROW_TYPE_TIMESTAMP, + unit, + timezone); + ArrowSchemaSetName(schema->children[0], "col"); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, + &array.value, + &na_error, + values), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyTimestamp) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyTimestamp[i]); + } +} + +static const std::vector ts_values { + {NANOARROW_TIME_UNIT_SECOND, nullptr, + {-2208943504, 4102490096, std::nullopt}}, + {NANOARROW_TIME_UNIT_MILLI, nullptr, + {-2208943504000, 4102490096000, std::nullopt}}, + {NANOARROW_TIME_UNIT_MICRO, nullptr, + {-2208943504000000, 4102490096000000, std::nullopt}}, + {NANOARROW_TIME_UNIT_NANO, nullptr, + {-2208943504000000000, 4102490096000000000, std::nullopt}}, + {NANOARROW_TIME_UNIT_SECOND, "UTC", + {-2208943504, 4102490096, std::nullopt}}, + {NANOARROW_TIME_UNIT_MILLI, "UTC", + {-2208943504000, 4102490096000, std::nullopt}}, + {NANOARROW_TIME_UNIT_MICRO, "UTC", + {-2208943504000000, 4102490096000000, std::nullopt}}, + {NANOARROW_TIME_UNIT_NANO, "UTC", + {-2208943504000000000, 4102490096000000000, std::nullopt}}, + {NANOARROW_TIME_UNIT_SECOND, "America/New_York", + {-2208943504, 4102490096, std::nullopt}}, + {NANOARROW_TIME_UNIT_MILLI, "America/New_York", + {-2208943504000, 4102490096000, std::nullopt}}, + {NANOARROW_TIME_UNIT_MICRO, "America/New_York", + {-2208943504000000, 4102490096000000, std::nullopt}}, + {NANOARROW_TIME_UNIT_NANO, "America/New_York", + {-2208943504000000000, 4102490096000000000, std::nullopt}}, +}; + +INSTANTIATE_TEST_SUITE_P(PostgresCopyWriteTimestamp, + PostgresCopyWriteTimestampTest, + testing::ValuesIn(ts_values)); + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteInterval) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + const enum ArrowType type = NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO; + // values are days, months, ns + struct ArrowInterval neg_interval; + struct ArrowInterval pos_interval; + + ArrowIntervalInit(&neg_interval, type); + ArrowIntervalInit(&pos_interval, type); + + neg_interval.months = -1; + neg_interval.days = -2; + neg_interval.ns = -4000000000; + + pos_interval.months = 1; + pos_interval.days = 2; + pos_interval.ns = 4000000000; + + const std::vector> values = { + &neg_interval, &pos_interval, std::nullopt}; + + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", type}}), ADBC_STATUS_OK); + + ASSERT_EQ(adbc_validation::MakeBatch( + &schema.value, &array.value, &na_error, values), ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyInterval) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyInterval[i]); + } +} + +// Writing a DURATION from NANOARROW produces INTERVAL in postgres without day/month +// COPY (SELECT CAST(col AS INTERVAL) FROM ( VALUES ('-4 seconds'), +// ('4 seconds'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT BINARY); +static uint8_t kTestPgCopyDuration[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xc2, 0xf7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3d, 0x09, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff}; +using DurationTestParamType = std::tuple>>; + +class PostgresCopyWriteDurationTest : public testing::TestWithParam< + DurationTestParamType> {}; + +TEST_P(PostgresCopyWriteDurationTest, WritesProperBufferValues) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + const enum ArrowType type = NANOARROW_TYPE_DURATION; + + DurationTestParamType parameters = GetParam(); + enum ArrowTimeUnit unit = std::get<0>(parameters); + const std::vector> values = std::get<1>(parameters); + + ArrowSchemaInit(&schema.value); + ArrowSchemaSetTypeStruct(&schema.value, 1); + ArrowSchemaSetTypeDateTime(schema->children[0], type, unit, nullptr); + ArrowSchemaSetName(schema->children[0], "col"); + ASSERT_EQ(adbc_validation::MakeBatch( + &schema.value, &array.value, &na_error, values), ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyDuration) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyDuration[i]); + } +} + +static const std::vector duration_params { + {NANOARROW_TIME_UNIT_SECOND, {-4, 4, std::nullopt}}, + {NANOARROW_TIME_UNIT_MILLI, {-4000, 4000, std::nullopt}}, + {NANOARROW_TIME_UNIT_MICRO, {-4000000, 4000000, std::nullopt}}, + {NANOARROW_TIME_UNIT_NANO, {-4000000000, 4000000000, std::nullopt}}, +}; + +INSTANTIATE_TEST_SUITE_P(PostgresCopyWriteDuration, + PostgresCopyWriteDurationTest, + testing::ValuesIn(duration_params)); + + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteString) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_STRING}}), + ADBC_STATUS_OK); + ASSERT_EQ(adbc_validation::MakeBatch( + &schema.value, &array.value, &na_error, {"abc", "1234", std::nullopt}), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyText) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyText[i]); + } +} + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteLargeString) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + ASSERT_EQ( + adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_LARGE_STRING}}), + ADBC_STATUS_OK); + ASSERT_EQ(adbc_validation::MakeBatch( + &schema.value, &array.value, &na_error, {"abc", "1234", std::nullopt}), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyText) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyText[i]); + } +} + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteBinary) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_BINARY}}), + ADBC_STATUS_OK); + ASSERT_EQ(adbc_validation::MakeBatch>( + &schema.value, &array.value, &na_error, + { + std::vector{}, + std::vector{std::byte{0x00}, std::byte{0x01}}, + std::vector{ + std::byte{0x01}, std::byte{0x02}, std::byte{0x03}, std::byte{0x04} + }, + std::vector{std::byte{0xfe}, std::byte{0xff}}, + std::nullopt}), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + constexpr size_t buf_size = sizeof(kTestPgCopyBinary) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyBinary[i]) << "failure at index " << i; + } +} + + +TEST(PostgresCopyUtilsTest, PostgresCopyWriteMultiBatch) { + // Regression test for https://github.com/apache/arrow-adbc/issues/1310 + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_INT32}}), + NANOARROW_OK); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, + {-123, -1, 1, 123, std::nullopt}), + NANOARROW_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + struct ArrowBuffer buf = tester.WriteBuffer(); + // The last 2 bytes of a message can be transmitted via PQputCopyData + // so no need to test those bytes from the Writer + size_t buf_size = sizeof(kTestPgCopyInteger) - 2; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyInteger[i]); + } + + tester.Rewind(); + ASSERT_EQ(tester.WriteArray(&array.value, nullptr), ENODATA); + + buf = tester.WriteBuffer(); + // Ignore the header and footer + buf_size = sizeof(kTestPgCopyInteger) - 21; + ASSERT_EQ(buf.size_bytes, buf_size); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyInteger[i + 19]); + } +} + +} // namespace adbcpq diff --git a/c/driver/postgresql/copy/reader.h b/c/driver/postgresql/copy/reader.h index 8ba0568ad9..cf63c13682 100644 --- a/c/driver/postgresql/copy/reader.h +++ b/c/driver/postgresql/copy/reader.h @@ -27,8 +27,8 @@ #include #include "copy_common.h" -#include "../postgres_type.h" -#include "../postgres_util.h" +#include "postgresql/postgres_type.h" +#include "postgresql/postgres_util.h" namespace adbcpq { diff --git a/c/driver/postgresql/copy/writer.h b/c/driver/postgresql/copy/writer.h index e2ea32c0e2..4e8a882ac8 100644 --- a/c/driver/postgresql/copy/writer.h +++ b/c/driver/postgresql/copy/writer.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include #include @@ -26,7 +27,7 @@ #include #include "copy_common.h" -#include "../postgres_util.h" +#include "postgresql/postgres_util.h" namespace adbcpq { diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/postgres_copy_reader.h deleted file mode 100644 index 669512fd1d..0000000000 --- a/c/driver/postgresql/postgres_copy_reader.h +++ /dev/null @@ -1,25 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -// Windows -#define NOMINMAX - -#include - -#include "postgres_type.h" diff --git a/c/driver/postgresql/postgres_copy_reader_test.cc b/c/driver/postgresql/postgres_copy_reader_test.cc deleted file mode 100644 index 628afa28b2..0000000000 --- a/c/driver/postgresql/postgres_copy_reader_test.cc +++ /dev/null @@ -1,1380 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include - -#include -#include -#include - -#include "copy/reader.h" -#include "copy/writer.h" -#include "validation/adbc_validation_util.h" - -namespace adbcpq { - -class PostgresCopyStreamTester { - public: - ArrowErrorCode Init(const PostgresType& root_type, ArrowError* error = nullptr) { - NANOARROW_RETURN_NOT_OK(reader_.Init(root_type)); - NANOARROW_RETURN_NOT_OK(reader_.InferOutputSchema(error)); - NANOARROW_RETURN_NOT_OK(reader_.InitFieldReaders(error)); - return NANOARROW_OK; - } - - ArrowErrorCode ReadAll(ArrowBufferView* data, ArrowError* error = nullptr) { - NANOARROW_RETURN_NOT_OK(reader_.ReadHeader(data, error)); - - int result; - do { - result = reader_.ReadRecord(data, error); - } while (result == NANOARROW_OK); - - return result; - } - - void GetSchema(ArrowSchema* out) { reader_.GetSchema(out); } - - ArrowErrorCode GetArray(ArrowArray* out, ArrowError* error = nullptr) { - return reader_.GetArray(out, error); - } - - private: - PostgresCopyStreamReader reader_; -}; - -class PostgresCopyStreamWriteTester { - public: - ArrowErrorCode Init(struct ArrowSchema* schema, struct ArrowArray* array, - struct ArrowError* error = nullptr) { - NANOARROW_RETURN_NOT_OK(writer_.Init(schema)); - NANOARROW_RETURN_NOT_OK(writer_.InitFieldWriters(error)); - NANOARROW_RETURN_NOT_OK(writer_.SetArray(array)); - return NANOARROW_OK; - } - - ArrowErrorCode WriteAll(struct ArrowError* error) { - NANOARROW_RETURN_NOT_OK(writer_.WriteHeader(error)); - - int result; - do { - result = writer_.WriteRecord(error); - } while (result == NANOARROW_OK); - - return result; - } - - ArrowErrorCode WriteArray(struct ArrowArray* array, struct ArrowError* error) { - writer_.SetArray(array); - int result; - do { - result = writer_.WriteRecord(error); - } while (result == NANOARROW_OK); - - return result; - } - - const struct ArrowBuffer& WriteBuffer() const { return writer_.WriteBuffer(); } - - void Rewind() { writer_.Rewind(); } - - private: - PostgresCopyStreamWriter writer_; -}; - -// COPY (SELECT CAST("col" AS BOOLEAN) AS "col" FROM ( VALUES (TRUE), (FALSE), (NULL)) AS -// drvd("col")) TO STDOUT; -static uint8_t kTestPgCopyBoolean[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadBoolean) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyBoolean; - data.size_bytes = sizeof(kTestPgCopyBoolean); - - auto col_type = PostgresType(PostgresTypeId::kBool); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyBoolean, sizeof(kTestPgCopyBoolean)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 3); - ASSERT_EQ(array->n_children, 1); - - const uint8_t* validity = - reinterpret_cast(array->children[0]->buffers[0]); - const uint8_t* data_buffer = - reinterpret_cast(array->children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_FALSE(ArrowBitGet(validity, 2)); - - ASSERT_TRUE(ArrowBitGet(data_buffer, 0)); - ASSERT_FALSE(ArrowBitGet(data_buffer, 1)); - ASSERT_FALSE(ArrowBitGet(data_buffer, 2)); -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteBoolean) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - adbc_validation::Handle buffer; - struct ArrowError na_error; - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_BOOL}}), - ADBC_STATUS_OK); - ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, - {true, false, std::nullopt}), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyBoolean) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyBoolean[i]); - } -} - -// COPY (SELECT CAST("col" AS SMALLINT) AS "col" FROM ( VALUES (-123), (-1), (1), (123), -// (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopySmallInt[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x02, 0xff, 0x85, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0xff, 0xff, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadSmallInt) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopySmallInt; - data.size_bytes = sizeof(kTestPgCopySmallInt); - - auto col_type = PostgresType(PostgresTypeId::kInt2); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopySmallInt, sizeof(kTestPgCopySmallInt)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 5); - ASSERT_EQ(array->n_children, 1); - - auto validity = reinterpret_cast(array->children[0]->buffers[0]); - auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_TRUE(ArrowBitGet(validity, 2)); - ASSERT_TRUE(ArrowBitGet(validity, 3)); - ASSERT_FALSE(ArrowBitGet(validity, 4)); - - ASSERT_EQ(data_buffer[0], -123); - ASSERT_EQ(data_buffer[1], -1); - ASSERT_EQ(data_buffer[2], 1); - ASSERT_EQ(data_buffer[3], 123); - ASSERT_EQ(data_buffer[4], 0); -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteInt8) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_INT8}}), - ADBC_STATUS_OK); - ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, - {-123, -1, 1, 123, std::nullopt}), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopySmallInt) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopySmallInt[i]); - } -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteInt16) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_INT16}}), - ADBC_STATUS_OK); - ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, - {-123, -1, 1, 123, std::nullopt}), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopySmallInt) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopySmallInt[i]); - } -} - -// COPY (SELECT CAST("col" AS INTEGER) AS "col" FROM ( VALUES (-123), (-1), (1), (123), -// (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyInteger[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, - 0x85, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, - 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, - 0x00, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadInteger) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyInteger; - data.size_bytes = sizeof(kTestPgCopyInteger); - - auto col_type = PostgresType(PostgresTypeId::kInt4); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyInteger, sizeof(kTestPgCopyInteger)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 5); - ASSERT_EQ(array->n_children, 1); - - auto validity = reinterpret_cast(array->children[0]->buffers[0]); - auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_TRUE(ArrowBitGet(validity, 2)); - ASSERT_TRUE(ArrowBitGet(validity, 3)); - ASSERT_FALSE(ArrowBitGet(validity, 4)); - - ASSERT_EQ(data_buffer[0], -123); - ASSERT_EQ(data_buffer[1], -1); - ASSERT_EQ(data_buffer[2], 1); - ASSERT_EQ(data_buffer[3], 123); - ASSERT_EQ(data_buffer[4], 0); -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteInt32) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_INT32}}), - ADBC_STATUS_OK); - ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, - {-123, -1, 1, 123, std::nullopt}), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyInteger) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyInteger[i]); - } -} - -// COPY (SELECT CAST("col" AS BIGINT) AS "col" FROM ( VALUES (-123), (-1), (1), (123), -// (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyBigInt[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0x85, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadBigInt) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyBigInt; - data.size_bytes = sizeof(kTestPgCopyBigInt); - - auto col_type = PostgresType(PostgresTypeId::kInt8); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyBigInt, sizeof(kTestPgCopyBigInt)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 5); - ASSERT_EQ(array->n_children, 1); - - auto validity = reinterpret_cast(array->children[0]->buffers[0]); - auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_TRUE(ArrowBitGet(validity, 2)); - ASSERT_TRUE(ArrowBitGet(validity, 3)); - ASSERT_FALSE(ArrowBitGet(validity, 4)); - - ASSERT_EQ(data_buffer[0], -123); - ASSERT_EQ(data_buffer[1], -1); - ASSERT_EQ(data_buffer[2], 1); - ASSERT_EQ(data_buffer[3], 123); - ASSERT_EQ(data_buffer[4], 0); -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteInt64) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_INT64}}), - ADBC_STATUS_OK); - ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, - {-123, -1, 1, 123, std::nullopt}), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyBigInt) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyBigInt[i]); - } -} - -// COPY (SELECT CAST("col" AS REAL) AS "col" FROM ( VALUES (-123.456), (-1), (1), -// (123.456), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyReal[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xc2, 0xf6, 0xe9, - 0x79, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xbf, 0x80, 0x00, 0x00, 0x00, 0x01, 0x00, - 0x00, 0x00, 0x04, 0x3f, 0x80, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x42, - 0xf6, 0xe9, 0x79, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadReal) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyReal; - data.size_bytes = sizeof(kTestPgCopyReal); - - auto col_type = PostgresType(PostgresTypeId::kFloat4); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyReal, sizeof(kTestPgCopyReal)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 5); - ASSERT_EQ(array->n_children, 1); - - auto validity = reinterpret_cast(array->children[0]->buffers[0]); - auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_TRUE(ArrowBitGet(validity, 2)); - ASSERT_TRUE(ArrowBitGet(validity, 3)); - ASSERT_FALSE(ArrowBitGet(validity, 4)); - - ASSERT_FLOAT_EQ(data_buffer[0], -123.456); - ASSERT_EQ(data_buffer[1], -1); - ASSERT_EQ(data_buffer[2], 1); - ASSERT_FLOAT_EQ(data_buffer[3], 123.456); - ASSERT_EQ(data_buffer[4], 0); -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteReal) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_FLOAT}}), - ADBC_STATUS_OK); - ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, - {-123.456, -1, 1, 123.456, std::nullopt}), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyReal) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyReal[i]) << " mismatch at index: " << i; - } -} - -// COPY (SELECT CAST("col" AS DOUBLE PRECISION) AS "col" FROM ( VALUES (-123.456), (-1), -// (1), (123.456), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyDoublePrecision[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xc0, 0x5e, 0xdd, - 0x2f, 0x1a, 0x9f, 0xbe, 0x77, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0xbf, 0xf0, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x3f, 0xf0, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x40, 0x5e, 0xdd, - 0x2f, 0x1a, 0x9f, 0xbe, 0x77, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadDoublePrecision) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyDoublePrecision; - data.size_bytes = sizeof(kTestPgCopyDoublePrecision); - - auto col_type = PostgresType(PostgresTypeId::kFloat8); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyDoublePrecision, - sizeof(kTestPgCopyDoublePrecision)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 5); - ASSERT_EQ(array->n_children, 1); - - auto validity = reinterpret_cast(array->children[0]->buffers[0]); - auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_TRUE(ArrowBitGet(validity, 2)); - ASSERT_TRUE(ArrowBitGet(validity, 3)); - ASSERT_FALSE(ArrowBitGet(validity, 4)); - - ASSERT_DOUBLE_EQ(data_buffer[0], -123.456); - ASSERT_EQ(data_buffer[1], -1); - ASSERT_EQ(data_buffer[2], 1); - ASSERT_DOUBLE_EQ(data_buffer[3], 123.456); - ASSERT_EQ(data_buffer[4], 0); -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteDoublePrecision) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_DOUBLE}}), - ADBC_STATUS_OK); - ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, - {-123.456, -1, 1, 123.456, std::nullopt}), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyDoublePrecision) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyDoublePrecision[i]); - } -} - -static uint8_t kTestPgCopyDate[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xff, 0xff, - 0x71, 0x54, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x8e, 0xad, 0x00, 0x01, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadDate) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyDate; - data.size_bytes = sizeof(kTestPgCopyDate); - - auto col_type = PostgresType(PostgresTypeId::kDate); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyDate, sizeof(kTestPgCopyDate)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 3); - ASSERT_EQ(array->n_children, 1); - - auto validity = reinterpret_cast(array->children[0]->buffers[0]); - auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_FALSE(ArrowBitGet(validity, 2)); - - ASSERT_EQ(data_buffer[0], -25567); - ASSERT_EQ(data_buffer[1], 47482); -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteDate) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_DATE32}}), - ADBC_STATUS_OK); - ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, - {-25567, 47482, std::nullopt}), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyDate) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyDate[i]); - } -} - - -// For full coverage, ensure that this contains NUMERIC examples that: -// - Have >= four zeroes to the left of the decimal point -// - Have >= four zeroes to the right of the decimal point -// - Include special values (nan, -inf, inf, NULL) -// - Have >= four trailing zeroes to the right of the decimal point -// - Have >= four leading zeroes before the first digit to the right of the decimal point -// - Is < 0 (negative) -// COPY (SELECT CAST(col AS NUMERIC) AS col FROM ( VALUES (1000000), ('0.00001234'), -// ('1.0000'), (-123.456), (123.456), ('nan'), ('-inf'), ('inf'), (NULL)) AS drvd(col)) TO -// STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyNumeric[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, - 0x01, 0xff, 0xfe, 0x00, 0x00, 0x00, 0x08, 0x04, 0xd2, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x01, 0x00, 0x01, 0x00, - 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x40, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, - 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x7b, 0x11, 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x00, 0xf0, 0x00, 0x00, 0x20, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x00, 0xd0, 0x00, 0x00, 0x20, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadNumeric) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyNumeric; - data.size_bytes = sizeof(kTestPgCopyNumeric); - - auto col_type = PostgresType(PostgresTypeId::kNumeric); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyNumeric, sizeof(kTestPgCopyNumeric)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 9); - ASSERT_EQ(array->n_children, 1); - - nanoarrow::UniqueSchema schema; - tester.GetSchema(schema.get()); - - nanoarrow::UniqueArrayView array_view; - ASSERT_EQ(ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), nullptr), - NANOARROW_OK); - ASSERT_EQ(array_view->children[0]->storage_type, NANOARROW_TYPE_STRING); - ASSERT_EQ(ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr), NANOARROW_OK); - - auto validity = array_view->children[0]->buffer_views[0].data.as_uint8; - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_TRUE(ArrowBitGet(validity, 2)); - ASSERT_TRUE(ArrowBitGet(validity, 3)); - ASSERT_TRUE(ArrowBitGet(validity, 4)); - ASSERT_TRUE(ArrowBitGet(validity, 5)); - ASSERT_TRUE(ArrowBitGet(validity, 6)); - ASSERT_TRUE(ArrowBitGet(validity, 7)); - ASSERT_FALSE(ArrowBitGet(validity, 8)); - - struct ArrowStringView item; - item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 0); - EXPECT_EQ(std::string(item.data, item.size_bytes), "1000000"); - item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 1); - EXPECT_EQ(std::string(item.data, item.size_bytes), "0.00001234"); - item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 2); - EXPECT_EQ(std::string(item.data, item.size_bytes), "1.0000"); - item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 3); - EXPECT_EQ(std::string(item.data, item.size_bytes), "-123.456"); - item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 4); - EXPECT_EQ(std::string(item.data, item.size_bytes), "123.456"); - item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 5); - EXPECT_EQ(std::string(item.data, item.size_bytes), "nan"); - item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 6); - EXPECT_EQ(std::string(item.data, item.size_bytes), "-inf"); - item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 7); - EXPECT_EQ(std::string(item.data, item.size_bytes), "inf"); -} - -// This buffer is similar to the read variant above but removes special values -// nan, ±inf as they are not supported via the Arrow Decimal types -// COPY (SELECT CAST(col AS NUMERIC) AS col FROM ( VALUES (NULL), (-123.456), -// ('0.00001234'), (1.0000), (123.456), (1000000)) AS drvd(col)) -// TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyNumericWrite[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, - 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x40, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, - 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0xff, 0xfe, 0x00, 0x00, 0x00, - 0x08, 0x04, 0xd2, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0a, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteNumeric) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128; - constexpr int32_t size = 128; - constexpr int32_t precision = 38; - constexpr int32_t scale = 8; - - struct ArrowDecimal decimal1; - struct ArrowDecimal decimal2; - struct ArrowDecimal decimal3; - struct ArrowDecimal decimal4; - struct ArrowDecimal decimal5; - - ArrowDecimalInit(&decimal1, size, 19, 8); - ArrowDecimalSetInt(&decimal1, -12345600000); - ArrowDecimalInit(&decimal2, size, 19, 8); - ArrowDecimalSetInt(&decimal2, 1234); - ArrowDecimalInit(&decimal3, size, 19, 8); - ArrowDecimalSetInt(&decimal3, 100000000); - ArrowDecimalInit(&decimal4, size, 19, 8); - ArrowDecimalSetInt(&decimal4, 12345600000); - ArrowDecimalInit(&decimal5, size, 19, 8); - ArrowDecimalSetInt(&decimal5, 100000000000000); - - const std::vector> values = { - std::nullopt, &decimal1, &decimal2, &decimal3, &decimal4, &decimal5}; - - ArrowSchemaInit(&schema.value); - ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0); - ASSERT_EQ(AdbcNsArrowSchemaSetTypeDecimal(schema.value.children[0], - type, precision, scale), 0); - ASSERT_EQ(ArrowSchemaSetName(schema.value.children[0], "col"), 0); - ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, - &na_error, values), ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyNumericWrite) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyNumericWrite[i]) << " at position " << i; - } -} - -// COPY (SELECT CAST(col AS TIMESTAMP) FROM ( VALUES ('1900-01-01 12:34:56'), -// ('2100-01-01 12:34:56'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT BINARY); -static uint8_t kTestPgCopyTimestamp[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, - 0x00, 0x08, 0xff, 0xf4, 0xc9, 0xf9, 0x07, 0xe5, 0x9c, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0b, 0x36, 0x30, 0x2d, 0xa5, - 0xfc, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadTimestamp) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyTimestamp; - data.size_bytes = sizeof(kTestPgCopyTimestamp); - - auto col_type = PostgresType(PostgresTypeId::kTimestamp); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyTimestamp, sizeof(kTestPgCopyTimestamp)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 3); - ASSERT_EQ(array->n_children, 1); - - auto validity = reinterpret_cast(array->children[0]->buffers[0]); - auto data_buffer = reinterpret_cast(array->children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_FALSE(ArrowBitGet(validity, 3)); - - ASSERT_EQ(data_buffer[0], -2208943504000000); - ASSERT_EQ(data_buffer[1], 4102490096000000); -} - -using TimestampTestParamType = std::tuple>>; - -class PostgresCopyWriteTimestampTest : public testing::TestWithParam< - TimestampTestParamType> { -}; - -TEST_P(PostgresCopyWriteTimestampTest, WritesProperBufferValues) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - - TimestampTestParamType parameters = GetParam(); - enum ArrowTimeUnit unit = std::get<0>(parameters); - const char* timezone = std::get<1>(parameters); - - const std::vector> values = std::get<2>(parameters); - - ArrowSchemaInit(&schema.value); - ArrowSchemaSetTypeStruct(&schema.value, 1); - ArrowSchemaSetTypeDateTime(schema->children[0], - NANOARROW_TYPE_TIMESTAMP, - unit, - timezone); - ArrowSchemaSetName(schema->children[0], "col"); - ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, - &array.value, - &na_error, - values), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyTimestamp) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyTimestamp[i]); - } -} - -static const std::vector ts_values { - {NANOARROW_TIME_UNIT_SECOND, nullptr, - {-2208943504, 4102490096, std::nullopt}}, - {NANOARROW_TIME_UNIT_MILLI, nullptr, - {-2208943504000, 4102490096000, std::nullopt}}, - {NANOARROW_TIME_UNIT_MICRO, nullptr, - {-2208943504000000, 4102490096000000, std::nullopt}}, - {NANOARROW_TIME_UNIT_NANO, nullptr, - {-2208943504000000000, 4102490096000000000, std::nullopt}}, - {NANOARROW_TIME_UNIT_SECOND, "UTC", - {-2208943504, 4102490096, std::nullopt}}, - {NANOARROW_TIME_UNIT_MILLI, "UTC", - {-2208943504000, 4102490096000, std::nullopt}}, - {NANOARROW_TIME_UNIT_MICRO, "UTC", - {-2208943504000000, 4102490096000000, std::nullopt}}, - {NANOARROW_TIME_UNIT_NANO, "UTC", - {-2208943504000000000, 4102490096000000000, std::nullopt}}, - {NANOARROW_TIME_UNIT_SECOND, "America/New_York", - {-2208943504, 4102490096, std::nullopt}}, - {NANOARROW_TIME_UNIT_MILLI, "America/New_York", - {-2208943504000, 4102490096000, std::nullopt}}, - {NANOARROW_TIME_UNIT_MICRO, "America/New_York", - {-2208943504000000, 4102490096000000, std::nullopt}}, - {NANOARROW_TIME_UNIT_NANO, "America/New_York", - {-2208943504000000000, 4102490096000000000, std::nullopt}}, -}; - -INSTANTIATE_TEST_SUITE_P(PostgresCopyWriteTimestamp, - PostgresCopyWriteTimestampTest, - testing::ValuesIn(ts_values)); - -// COPY (SELECT CAST(col AS INTERVAL) FROM ( VALUES ('-1 months -2 days -4 seconds'), -// ('1 months 2 days 4 seconds'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT BINARY); -static uint8_t kTestPgCopyInterval[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xc2, 0xf7, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, - 0xff, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3d, 0x09, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadInterval) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyInterval; - data.size_bytes = sizeof(kTestPgCopyInterval); - - auto col_type = PostgresType(PostgresTypeId::kInterval); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyInterval, sizeof(kTestPgCopyInterval)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 3); - ASSERT_EQ(array->n_children, 1); - - nanoarrow::UniqueSchema schema; - tester.GetSchema(schema.get()); - - nanoarrow::UniqueArrayView array_view; - ASSERT_EQ(ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), nullptr), - NANOARROW_OK); - ASSERT_EQ(array_view->children[0]->storage_type, - NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO); - ASSERT_EQ(ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr), NANOARROW_OK); - - auto validity = array_view->children[0]->buffer_views[0].data.as_uint8; - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_FALSE(ArrowBitGet(validity, 2)); - - struct ArrowInterval interval; - ArrowIntervalInit(&interval, NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO); - ArrowArrayViewGetIntervalUnsafe(array_view->children[0], 0, &interval); - ASSERT_EQ(interval.months, -1); - ASSERT_EQ(interval.days, -2); - ASSERT_EQ(interval.ns, -4000000000); - ArrowArrayViewGetIntervalUnsafe(array_view->children[0], 1, &interval); - ASSERT_EQ(interval.months, 1); - ASSERT_EQ(interval.days, 2); - ASSERT_EQ(interval.ns, 4000000000); -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteInterval) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - const enum ArrowType type = NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO; - // values are days, months, ns - struct ArrowInterval neg_interval; - struct ArrowInterval pos_interval; - - ArrowIntervalInit(&neg_interval, type); - ArrowIntervalInit(&pos_interval, type); - - neg_interval.months = -1; - neg_interval.days = -2; - neg_interval.ns = -4000000000; - - pos_interval.months = 1; - pos_interval.days = 2; - pos_interval.ns = 4000000000; - - const std::vector> values = { - &neg_interval, &pos_interval, std::nullopt}; - - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", type}}), ADBC_STATUS_OK); - - ASSERT_EQ(adbc_validation::MakeBatch( - &schema.value, &array.value, &na_error, values), ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyInterval) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyInterval[i]); - } -} - -// Writing a DURATION from NANOARROW produces INTERVAL in postgres without day/month -// COPY (SELECT CAST(col AS INTERVAL) FROM ( VALUES ('-4 seconds'), -// ('4 seconds'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT BINARY); -static uint8_t kTestPgCopyDuration[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xc2, 0xf7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3d, 0x09, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff}; -using DurationTestParamType = std::tuple>>; - -class PostgresCopyWriteDurationTest : public testing::TestWithParam< - DurationTestParamType> {}; - -TEST_P(PostgresCopyWriteDurationTest, WritesProperBufferValues) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - const enum ArrowType type = NANOARROW_TYPE_DURATION; - - DurationTestParamType parameters = GetParam(); - enum ArrowTimeUnit unit = std::get<0>(parameters); - const std::vector> values = std::get<1>(parameters); - - ArrowSchemaInit(&schema.value); - ArrowSchemaSetTypeStruct(&schema.value, 1); - ArrowSchemaSetTypeDateTime(schema->children[0], type, unit, nullptr); - ArrowSchemaSetName(schema->children[0], "col"); - ASSERT_EQ(adbc_validation::MakeBatch( - &schema.value, &array.value, &na_error, values), ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyDuration) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyDuration[i]); - } -} - -static const std::vector duration_params { - {NANOARROW_TIME_UNIT_SECOND, {-4, 4, std::nullopt}}, - {NANOARROW_TIME_UNIT_MILLI, {-4000, 4000, std::nullopt}}, - {NANOARROW_TIME_UNIT_MICRO, {-4000000, 4000000, std::nullopt}}, - {NANOARROW_TIME_UNIT_NANO, {-4000000000, 4000000000, std::nullopt}}, -}; - -INSTANTIATE_TEST_SUITE_P(PostgresCopyWriteDuration, - PostgresCopyWriteDurationTest, - testing::ValuesIn(duration_params)); - -// COPY (SELECT CAST("col" AS TEXT) AS "col" FROM ( VALUES ('abc'), ('1234'), -// (NULL::text)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyText[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x03, 0x61, 0x62, 0x63, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x31, 0x32, - 0x33, 0x34, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadText) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyText; - data.size_bytes = sizeof(kTestPgCopyText); - - auto col_type = PostgresType(PostgresTypeId::kText); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyText, sizeof(kTestPgCopyText)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 3); - ASSERT_EQ(array->n_children, 1); - - auto validity = reinterpret_cast(array->children[0]->buffers[0]); - auto offsets = reinterpret_cast(array->children[0]->buffers[1]); - auto data_buffer = reinterpret_cast(array->children[0]->buffers[2]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_FALSE(ArrowBitGet(validity, 2)); - - ASSERT_EQ(offsets[0], 0); - ASSERT_EQ(offsets[1], 3); - ASSERT_EQ(offsets[2], 7); - ASSERT_EQ(offsets[3], 7); - - ASSERT_EQ(std::string(data_buffer + 0, 3), "abc"); - ASSERT_EQ(std::string(data_buffer + 3, 4), "1234"); -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteString) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_STRING}}), - ADBC_STATUS_OK); - ASSERT_EQ(adbc_validation::MakeBatch( - &schema.value, &array.value, &na_error, {"abc", "1234", std::nullopt}), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyText) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyText[i]); - } -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteLargeString) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - ASSERT_EQ( - adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_LARGE_STRING}}), - ADBC_STATUS_OK); - ASSERT_EQ(adbc_validation::MakeBatch( - &schema.value, &array.value, &na_error, {"abc", "1234", std::nullopt}), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyText) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyText[i]); - } -} - -// COPY (SELECT CAST("col" AS BYTEA) AS "col" FROM ( VALUES (''), ('\x0001'), -// ('\x01020304'), ('\xFEFF'), (NULL)) AS drvd("col")) TO STDOUT -// WITH (FORMAT binary); -static uint8_t kTestPgCopyBinary[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, - 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0xfe, 0xff, - 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadBinary) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyBinary; - data.size_bytes = sizeof(kTestPgCopyBinary); - - auto col_type = PostgresType(PostgresTypeId::kBytea); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyBinary, sizeof(kTestPgCopyBinary)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 5); - ASSERT_EQ(array->n_children, 1); - - auto validity = reinterpret_cast(array->children[0]->buffers[0]); - auto offsets = reinterpret_cast(array->children[0]->buffers[1]); - auto data_buffer = reinterpret_cast(array->children[0]->buffers[2]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_TRUE(ArrowBitGet(validity, 2)); - ASSERT_TRUE(ArrowBitGet(validity, 3)); - ASSERT_FALSE(ArrowBitGet(validity, 4)); - - ASSERT_EQ(offsets[0], 0); - ASSERT_EQ(offsets[1], 0); - ASSERT_EQ(offsets[2], 2); - ASSERT_EQ(offsets[3], 6); - ASSERT_EQ(offsets[4], 8); - ASSERT_EQ(offsets[5], 8); - - ASSERT_EQ(data_buffer[0], 0x00); - ASSERT_EQ(data_buffer[1], 0x01); - ASSERT_EQ(data_buffer[2], 0x01); - ASSERT_EQ(data_buffer[3], 0x02); - ASSERT_EQ(data_buffer[4], 0x03); - ASSERT_EQ(data_buffer[5], 0x04); - ASSERT_EQ(data_buffer[6], 0xfe); - ASSERT_EQ(data_buffer[7], 0xff); -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteBinary) { - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_BINARY}}), - ADBC_STATUS_OK); - ASSERT_EQ(adbc_validation::MakeBatch>( - &schema.value, &array.value, &na_error, - { - std::vector{}, - std::vector{std::byte{0x00}, std::byte{0x01}}, - std::vector{ - std::byte{0x01}, std::byte{0x02}, std::byte{0x03}, std::byte{0x04} - }, - std::vector{std::byte{0xfe}, std::byte{0xff}}, - std::nullopt}), - ADBC_STATUS_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - const struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - constexpr size_t buf_size = sizeof(kTestPgCopyBinary) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyBinary[i]) << "failure at index " << i; - } -} - - -// COPY (SELECT CAST("col" AS INTEGER ARRAY) AS "col" FROM ( VALUES ('{-123, -1}'), ('{0, -// 1, 123}'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyIntegerArray[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, - 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0x85, 0x00, 0x00, 0x00, - 0x04, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x03, 0x00, - 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x7b, 0x00, - 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadArray) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyIntegerArray; - data.size_bytes = sizeof(kTestPgCopyIntegerArray); - - auto col_type = PostgresType(PostgresTypeId::kInt4).Array(); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyIntegerArray, - sizeof(kTestPgCopyIntegerArray)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 3); - ASSERT_EQ(array->n_children, 1); - ASSERT_EQ(array->children[0]->n_children, 1); - ASSERT_EQ(array->children[0]->children[0]->length, 5); - - auto validity = reinterpret_cast(array->children[0]->buffers[0]); - auto offsets = reinterpret_cast(array->children[0]->buffers[1]); - auto data_buffer = - reinterpret_cast(array->children[0]->children[0]->buffers[1]); - ASSERT_NE(validity, nullptr); - ASSERT_NE(data_buffer, nullptr); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_FALSE(ArrowBitGet(validity, 2)); - - ASSERT_EQ(offsets[0], 0); - ASSERT_EQ(offsets[1], 2); - ASSERT_EQ(offsets[2], 5); - ASSERT_EQ(offsets[3], 5); - - ASSERT_EQ(data_buffer[0], -123); - ASSERT_EQ(data_buffer[1], -1); - ASSERT_EQ(data_buffer[2], 0); - ASSERT_EQ(data_buffer[3], 1); - ASSERT_EQ(data_buffer[4], 123); -} - -// CREATE TYPE custom_record AS (nested1 integer, nested2 double precision); -// COPY (SELECT CAST("col" AS custom_record) AS "col" FROM ( VALUES ('(123, 456.789)'), -// ('(12, 345.678)'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT binary); -static uint8_t kTestPgCopyCustomRecord[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, - 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, - 0x00, 0x7b, 0x00, 0x00, 0x02, 0xbd, 0x00, 0x00, 0x00, 0x08, 0x40, 0x7c, 0x8c, - 0x9f, 0xbe, 0x76, 0xc8, 0xb4, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, - 0x00, 0x02, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x0c, 0x00, 0x00, 0x02, 0xbd, 0x00, 0x00, 0x00, 0x08, 0x40, 0x75, 0x9a, 0xd9, - 0x16, 0x87, 0x2b, 0x02, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -TEST(PostgresCopyUtilsTest, PostgresCopyReadCustomRecord) { - ArrowBufferView data; - data.data.as_uint8 = kTestPgCopyCustomRecord; - data.size_bytes = sizeof(kTestPgCopyCustomRecord); - - auto col_type = PostgresType(PostgresTypeId::kRecord); - col_type.AppendChild("nested1", PostgresType(PostgresTypeId::kInt4)); - col_type.AppendChild("nested2", PostgresType(PostgresTypeId::kFloat8)); - PostgresType input_type(PostgresTypeId::kRecord); - input_type.AppendChild("col", col_type); - - PostgresCopyStreamTester tester; - ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); - ASSERT_EQ(tester.ReadAll(&data), ENODATA); - ASSERT_EQ(data.data.as_uint8 - kTestPgCopyCustomRecord, - sizeof(kTestPgCopyCustomRecord)); - ASSERT_EQ(data.size_bytes, 0); - - nanoarrow::UniqueArray array; - ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); - ASSERT_EQ(array->length, 3); - ASSERT_EQ(array->n_children, 1); - ASSERT_EQ(array->children[0]->n_children, 2); - ASSERT_EQ(array->children[0]->children[0]->length, 3); - ASSERT_EQ(array->children[0]->children[1]->length, 3); - - auto validity = reinterpret_cast(array->children[0]->buffers[0]); - auto data_buffer1 = - reinterpret_cast(array->children[0]->children[0]->buffers[1]); - auto data_buffer2 = - reinterpret_cast(array->children[0]->children[1]->buffers[1]); - - ASSERT_TRUE(ArrowBitGet(validity, 0)); - ASSERT_TRUE(ArrowBitGet(validity, 1)); - ASSERT_FALSE(ArrowBitGet(validity, 2)); - - ASSERT_EQ(data_buffer1[0], 123); - ASSERT_EQ(data_buffer1[1], 12); - ASSERT_EQ(data_buffer1[2], 0); - - ASSERT_DOUBLE_EQ(data_buffer2[0], 456.789); - ASSERT_DOUBLE_EQ(data_buffer2[1], 345.678); - ASSERT_DOUBLE_EQ(data_buffer2[2], 0); -} - -TEST(PostgresCopyUtilsTest, PostgresCopyWriteMultiBatch) { - // Regression test for https://github.com/apache/arrow-adbc/issues/1310 - adbc_validation::Handle schema; - adbc_validation::Handle array; - struct ArrowError na_error; - ASSERT_EQ(adbc_validation::MakeSchema(&schema.value, {{"col", NANOARROW_TYPE_INT32}}), - NANOARROW_OK); - ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, &na_error, - {-123, -1, 1, 123, std::nullopt}), - NANOARROW_OK); - - PostgresCopyStreamWriteTester tester; - ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); - ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); - - struct ArrowBuffer buf = tester.WriteBuffer(); - // The last 2 bytes of a message can be transmitted via PQputCopyData - // so no need to test those bytes from the Writer - size_t buf_size = sizeof(kTestPgCopyInteger) - 2; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyInteger[i]); - } - - tester.Rewind(); - ASSERT_EQ(tester.WriteArray(&array.value, nullptr), ENODATA); - - buf = tester.WriteBuffer(); - // Ignore the header and footer - buf_size = sizeof(kTestPgCopyInteger) - 21; - ASSERT_EQ(buf.size_bytes, buf_size); - for (size_t i = 0; i < buf_size; i++) { - ASSERT_EQ(buf.data[i], kTestPgCopyInteger[i + 19]); - } -} - -} // namespace adbcpq From 8fff4406011b2f7e6b44da6973d3d43b1e77854b Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 4 Jan 2024 17:58:52 -0500 Subject: [PATCH 3/9] R fixups --- r/adbcpostgresql/bootstrap.R | 3 ++- r/adbcpostgresql/src/.gitignore | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/r/adbcpostgresql/bootstrap.R b/r/adbcpostgresql/bootstrap.R index 9bcc414988..b8e8f8d139 100644 --- a/r/adbcpostgresql/bootstrap.R +++ b/r/adbcpostgresql/bootstrap.R @@ -21,7 +21,8 @@ files_to_vendor <- c( "../../adbc.h", "../../c/driver/postgresql/postgres_util.h", "../../c/driver/postgresql/postgres_type.h", - "../../c/driver/postgresql/postgres_copy_reader.h", + "../../c/driver/postgresql/copy/reader.h", + "../../c/driver/postgresql/copy/writer.h", "../../c/driver/postgresql/statement.h", "../../c/driver/postgresql/statement.cc", "../../c/driver/postgresql/connection.h", diff --git a/r/adbcpostgresql/src/.gitignore b/r/adbcpostgresql/src/.gitignore index 44d84da682..8207d8f627 100644 --- a/r/adbcpostgresql/src/.gitignore +++ b/r/adbcpostgresql/src/.gitignore @@ -29,7 +29,8 @@ postgresql.cc statement.h statement.cc postgres_type.h -postgres_copy_reader.h +copy/reader.h +copy/writer.h postgres_util.h result_helper.h result_helper.cc From 7352a82dfd01d40512a762972066a96c7adb888a Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 4 Jan 2024 18:43:31 -0500 Subject: [PATCH 4/9] fix R --- r/adbcpostgresql/bootstrap.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/r/adbcpostgresql/bootstrap.R b/r/adbcpostgresql/bootstrap.R index b8e8f8d139..9676d5403f 100644 --- a/r/adbcpostgresql/bootstrap.R +++ b/r/adbcpostgresql/bootstrap.R @@ -66,6 +66,8 @@ if (all(file.exists(files_to_vendor))) { "src/options.h", "src/utils.c", "src/utils.h" + "src/reader.h", + "src/writer.h" ), c( "src/nanoarrow/nanoarrow.c", @@ -73,7 +75,9 @@ if (all(file.exists(files_to_vendor))) { "src/nanoarrow/nanoarrow.hpp", "src/common/options.h", "src/common/utils.c", - "src/common/utils.h" + "src/common/utils.h", + "src/copy/reader.h", + "src/copy/writer.h" ) ) cat("All files successfully copied to src/\n") From 0ff39c924bbe6e612137538dcb455d095e6600a2 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 4 Jan 2024 18:53:02 -0500 Subject: [PATCH 5/9] syntax fix --- r/adbcpostgresql/bootstrap.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/adbcpostgresql/bootstrap.R b/r/adbcpostgresql/bootstrap.R index 9676d5403f..617d88d3ee 100644 --- a/r/adbcpostgresql/bootstrap.R +++ b/r/adbcpostgresql/bootstrap.R @@ -65,7 +65,7 @@ if (all(file.exists(files_to_vendor))) { "src/nanoarrow.hpp", "src/options.h", "src/utils.c", - "src/utils.h" + "src/utils.h", "src/reader.h", "src/writer.h" ), From f5596610ff596480b4086fb9512beee6dfe24c50 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 5 Jan 2024 16:45:46 -0500 Subject: [PATCH 6/9] move CMake back to parent directory --- c/driver/postgresql/CMakeLists.txt | 23 ++++++++++++- c/driver/postgresql/copy/CMakeLists.txt | 46 ------------------------- 2 files changed, 22 insertions(+), 47 deletions(-) delete mode 100644 c/driver/postgresql/copy/CMakeLists.txt diff --git a/c/driver/postgresql/CMakeLists.txt b/c/driver/postgresql/CMakeLists.txt index 166b4f5a6f..18eb773d6f 100644 --- a/c/driver/postgresql/CMakeLists.txt +++ b/c/driver/postgresql/CMakeLists.txt @@ -90,8 +90,29 @@ if(ADBC_BUILD_TESTS) ${REPOSITORY_ROOT}/c/vendor ${REPOSITORY_ROOT}/c/driver) adbc_configure_target(adbc-driver-postgresql-test) + + add_test_case(driver_postgresql_copy_test + PREFIX + adbc + EXTRA_LABELS + driver-postgresql + SOURCES + copy/postgres_copy_reader_test.cc + copy/postgres_copy_writer_test.cc + EXTRA_LINK_LIBS + adbc_driver_common + adbc_validation + nanoarrow + ${TEST_LINK_LIBS}) + target_compile_features(adbc-driver-postgresql-copy-test PRIVATE cxx_std_17) + target_include_directories(adbc-driver-postgresql-copy-test SYSTEM + PRIVATE ${REPOSITORY_ROOT} + ${REPOSITORY_ROOT}/c/ + ${LIBPQ_INCLUDE_DIRS} + ${REPOSITORY_ROOT}/c/vendor + ${REPOSITORY_ROOT}/c/driver) + adbc_configure_target(adbc-driver-postgresql-copy-test) endif() -add_subdirectory(copy) if(ADBC_BUILD_BENCHMARKS) find_package(benchmark REQUIRED) diff --git a/c/driver/postgresql/copy/CMakeLists.txt b/c/driver/postgresql/copy/CMakeLists.txt deleted file mode 100644 index c79f7c0cf4..0000000000 --- a/c/driver/postgresql/copy/CMakeLists.txt +++ /dev/null @@ -1,46 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -if(ADBC_TEST_LINKAGE STREQUAL "shared") - set(TEST_LINK_LIBS adbc_driver_postgresql_shared) -else() - set(TEST_LINK_LIBS adbc_driver_postgresql_static) -endif() - -if(ADBC_BUILD_TESTS) - add_test_case(driver_postgresql_copy_test - PREFIX - adbc - EXTRA_LABELS - driver-postgresql - SOURCES - postgres_copy_reader_test.cc - postgres_copy_writer_test.cc - EXTRA_LINK_LIBS - adbc_driver_common - adbc_validation - nanoarrow - ${TEST_LINK_LIBS}) - target_compile_features(adbc-driver-postgresql-copy-test PRIVATE cxx_std_17) - target_include_directories(adbc-driver-postgresql-copy-test SYSTEM - PRIVATE ${REPOSITORY_ROOT} - ${REPOSITORY_ROOT}/c/ - ${LIBPQ_INCLUDE_DIRS} - ${REPOSITORY_ROOT}/c/vendor - ${REPOSITORY_ROOT}/c/driver) - adbc_configure_target(adbc-driver-postgresql-copy-test) -endif() From 1c5907e4fba5916d2bbff7175ec590c0e5573cfc Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 5 Jan 2024 16:47:58 -0500 Subject: [PATCH 7/9] more R fixes? --- c/driver/postgresql/copy/reader.h | 4 ++-- c/driver/postgresql/copy/writer.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/c/driver/postgresql/copy/reader.h b/c/driver/postgresql/copy/reader.h index cf63c13682..8ba0568ad9 100644 --- a/c/driver/postgresql/copy/reader.h +++ b/c/driver/postgresql/copy/reader.h @@ -27,8 +27,8 @@ #include #include "copy_common.h" -#include "postgresql/postgres_type.h" -#include "postgresql/postgres_util.h" +#include "../postgres_type.h" +#include "../postgres_util.h" namespace adbcpq { diff --git a/c/driver/postgresql/copy/writer.h b/c/driver/postgresql/copy/writer.h index 4e8a882ac8..b04f370f77 100644 --- a/c/driver/postgresql/copy/writer.h +++ b/c/driver/postgresql/copy/writer.h @@ -27,7 +27,7 @@ #include #include "copy_common.h" -#include "postgresql/postgres_util.h" +#include "../postgres_util.h" namespace adbcpq { From e7c62f1680e81dde3687d65750ba55e137ab51a3 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 6 Jan 2024 11:40:37 -0500 Subject: [PATCH 8/9] create copy dir --- r/adbcpostgresql/bootstrap.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/r/adbcpostgresql/bootstrap.R b/r/adbcpostgresql/bootstrap.R index 617d88d3ee..29e6f15343 100644 --- a/r/adbcpostgresql/bootstrap.R +++ b/r/adbcpostgresql/bootstrap.R @@ -57,6 +57,10 @@ if (all(file.exists(files_to_vendor))) { ) ) + if (!dir.exists("src/copy")) { + dir.create("src/copy") + } + if (all(file.copy(files_to_vendor, "src"))) { file.rename( c( From 1d1f2928018c998a3500da6bb1c1223063d01174 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 9 Jan 2024 22:16:53 -0400 Subject: [PATCH 9/9] maybe fix vendoring --- r/adbcpostgresql/bootstrap.R | 3 +++ r/adbcpostgresql/src/.gitignore | 2 -- r/adbcpostgresql/src/copy/.gitignore | 18 ++++++++++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 r/adbcpostgresql/src/copy/.gitignore diff --git a/r/adbcpostgresql/bootstrap.R b/r/adbcpostgresql/bootstrap.R index 29e6f15343..d69a14bbe4 100644 --- a/r/adbcpostgresql/bootstrap.R +++ b/r/adbcpostgresql/bootstrap.R @@ -21,6 +21,7 @@ files_to_vendor <- c( "../../adbc.h", "../../c/driver/postgresql/postgres_util.h", "../../c/driver/postgresql/postgres_type.h", + "../../c/driver/postgresql/copy/copy_common.h", "../../c/driver/postgresql/copy/reader.h", "../../c/driver/postgresql/copy/writer.h", "../../c/driver/postgresql/statement.h", @@ -70,6 +71,7 @@ if (all(file.exists(files_to_vendor))) { "src/options.h", "src/utils.c", "src/utils.h", + "src/copy_common.h", "src/reader.h", "src/writer.h" ), @@ -80,6 +82,7 @@ if (all(file.exists(files_to_vendor))) { "src/common/options.h", "src/common/utils.c", "src/common/utils.h", + "src/copy/copy_common.h", "src/copy/reader.h", "src/copy/writer.h" ) diff --git a/r/adbcpostgresql/src/.gitignore b/r/adbcpostgresql/src/.gitignore index 8207d8f627..8a47096b86 100644 --- a/r/adbcpostgresql/src/.gitignore +++ b/r/adbcpostgresql/src/.gitignore @@ -29,8 +29,6 @@ postgresql.cc statement.h statement.cc postgres_type.h -copy/reader.h -copy/writer.h postgres_util.h result_helper.h result_helper.cc diff --git a/r/adbcpostgresql/src/copy/.gitignore b/r/adbcpostgresql/src/copy/.gitignore new file mode 100644 index 0000000000..53201cb854 --- /dev/null +++ b/r/adbcpostgresql/src/copy/.gitignore @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +*.h