diff --git a/c/vendor/nanoarrow/nanoarrow.c b/c/vendor/nanoarrow/nanoarrow.c index ab3e337175..0af57027a5 100644 --- a/c/vendor/nanoarrow/nanoarrow.c +++ b/c/vendor/nanoarrow/nanoarrow.c @@ -28,7 +28,7 @@ const char* ArrowNanoarrowVersion(void) { return NANOARROW_VERSION; } int ArrowNanoarrowVersionInt(void) { return NANOARROW_VERSION_INT; } -int ArrowErrorSet(struct ArrowError* error, const char* fmt, ...) { +ArrowErrorCode ArrowErrorSet(struct ArrowError* error, const char* fmt, ...) { if (error == NULL) { return NANOARROW_OK; } @@ -49,14 +49,6 @@ int ArrowErrorSet(struct ArrowError* error, const char* fmt, ...) { } } -const char* ArrowErrorMessage(struct ArrowError* error) { - if (error == NULL) { - return ""; - } else { - return error->message; - } -} - void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) { layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_VALIDITY; layout->buffer_data_type[0] = NANOARROW_TYPE_BOOL; @@ -200,11 +192,15 @@ void ArrowFree(void* ptr) { free(ptr); } static uint8_t* ArrowBufferAllocatorMallocReallocate( struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t old_size, int64_t new_size) { + NANOARROW_UNUSED(allocator); + NANOARROW_UNUSED(old_size); return (uint8_t*)ArrowRealloc(ptr, new_size); } static void ArrowBufferAllocatorMallocFree(struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size) { + NANOARROW_UNUSED(allocator); + NANOARROW_UNUSED(size); ArrowFree(ptr); } @@ -218,6 +214,10 @@ struct ArrowBufferAllocator ArrowBufferAllocatorDefault(void) { static uint8_t* ArrowBufferAllocatorNeverReallocate( struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t old_size, int64_t new_size) { + NANOARROW_UNUSED(allocator); + NANOARROW_UNUSED(ptr); + NANOARROW_UNUSED(old_size); + NANOARROW_UNUSED(new_size); return NULL; } @@ -231,6 +231,205 @@ struct ArrowBufferAllocator ArrowBufferDeallocator( allocator.private_data = private_data; return allocator; } + +static const int kInt32DecimalDigits = 9; + +static const uint64_t kUInt32PowersOfTen[] = { + 1ULL, 10ULL, 100ULL, 1000ULL, 10000ULL, + 100000ULL, 1000000ULL, 10000000ULL, 100000000ULL, 1000000000ULL}; + +// Adapted from Arrow C++ to use 32-bit words for better C portability +// https://github.com/apache/arrow/blob/cd3321b28b0c9703e5d7105d6146c1270bbadd7f/cpp/src/arrow/util/decimal.cc#L524-L544 +static void ShiftAndAdd(struct ArrowStringView value, uint32_t* out, int64_t out_size) { + // We use strtoll for parsing, which needs input that is null-terminated + char chunk_string[16]; + + for (int64_t posn = 0; posn < value.size_bytes;) { + int64_t remaining = value.size_bytes - posn; + + int64_t group_size; + if (remaining > kInt32DecimalDigits) { + group_size = kInt32DecimalDigits; + } else { + group_size = remaining; + } + + const uint64_t multiple = kUInt32PowersOfTen[group_size]; + + memcpy(chunk_string, value.data + posn, group_size); + chunk_string[group_size] = '\0'; + uint32_t chunk = (uint32_t)strtoll(chunk_string, NULL, 10); + + for (int64_t i = 0; i < out_size; i++) { + uint64_t tmp = out[i]; + tmp *= multiple; + tmp += chunk; + out[i] = (uint32_t)(tmp & 0xFFFFFFFFULL); + chunk = (uint32_t)(tmp >> 32); + } + posn += group_size; + } +} + +ArrowErrorCode ArrowDecimalSetDigits(struct ArrowDecimal* decimal, + struct ArrowStringView value) { + // Check for sign + int is_negative = value.data[0] == '-'; + int has_sign = is_negative || value.data[0] == '+'; + value.data += has_sign; + value.size_bytes -= has_sign; + + // Check all characters are digits that are not the negative sign + for (int64_t i = 0; i < value.size_bytes; i++) { + char c = value.data[i]; + if (c < '0' || c > '9') { + return EINVAL; + } + } + + // Skip over leading 0s + int64_t n_leading_zeroes = 0; + for (int64_t i = 0; i < value.size_bytes; i++) { + if (value.data[i] == '0') { + n_leading_zeroes++; + } else { + break; + } + } + + value.data += n_leading_zeroes; + value.size_bytes -= n_leading_zeroes; + + // Use 32-bit words for portability + uint32_t words32[8]; + int n_words32 = decimal->n_words * 2; + NANOARROW_DCHECK(n_words32 <= 8); + memset(words32, 0, sizeof(words32)); + + ShiftAndAdd(value, words32, n_words32); + + if (decimal->low_word_index == 0) { + memcpy(decimal->words, words32, sizeof(uint32_t) * n_words32); + } else { + uint64_t lo; + uint64_t hi; + + for (int i = 0; i < decimal->n_words; i++) { + lo = (uint64_t)words32[i * 2]; + hi = (uint64_t)words32[i * 2 + 1] << 32; + decimal->words[decimal->n_words - i - 1] = lo | hi; + } + } + + if (is_negative) { + ArrowDecimalNegate(decimal); + } + + return NANOARROW_OK; +} + +// Adapted from Arrow C++ for C +// https://github.com/apache/arrow/blob/cd3321b28b0c9703e5d7105d6146c1270bbadd7f/cpp/src/arrow/util/decimal.cc#L365 +ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const struct ArrowDecimal* decimal, + struct ArrowBuffer* buffer) { + int is_negative = ArrowDecimalSign(decimal) < 0; + + uint64_t words_little_endian[4]; + if (decimal->low_word_index == 0) { + memcpy(words_little_endian, decimal->words, decimal->n_words * sizeof(uint64_t)); + } else { + for (int i = 0; i < decimal->n_words; i++) { + words_little_endian[i] = decimal->words[decimal->n_words - i - 1]; + } + } + + // We've already made a copy, so negate that if needed + if (is_negative) { + uint64_t carry = 1; + for (int i = 0; i < decimal->n_words; i++) { + uint64_t elem = words_little_endian[i]; + elem = ~elem + carry; + carry &= (elem == 0); + words_little_endian[i] = elem; + } + } + + // Find the most significant word that is non-zero + int most_significant_elem_idx = -1; + for (int i = decimal->n_words - 1; i >= 0; i--) { + if (words_little_endian[i] != 0) { + most_significant_elem_idx = i; + break; + } + } + + // If they are all zero, the output is just '0' + if (most_significant_elem_idx == -1) { + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt8(buffer, '0')); + return NANOARROW_OK; + } + + // Define segments such that each segment represents 9 digits with the + // least significant group of 9 digits first. For example, if the input represents + // 9876543210123456789, then segments will be [123456789, 876543210, 9]. + // We handle at most a signed 256 bit integer, whose maximum value occupies 77 + // characters. Thus, we need at most 9 segments. + const uint32_t k1e9 = 1000000000U; + int num_segments = 0; + uint32_t segments[9]; + memset(segments, 0, sizeof(segments)); + uint64_t* most_significant_elem = words_little_endian + most_significant_elem_idx; + + do { + // Compute remainder = words_little_endian % 1e9 and words_little_endian = + // words_little_endian / 1e9. + uint32_t remainder = 0; + uint64_t* elem = most_significant_elem; + + do { + // Compute dividend = (remainder << 32) | *elem (a virtual 96-bit integer); + // *elem = dividend / 1e9; + // remainder = dividend % 1e9. + uint32_t hi = (uint32_t)(*elem >> 32); + uint32_t lo = (uint32_t)(*elem & 0xFFFFFFFFULL); + uint64_t dividend_hi = ((uint64_t)(remainder) << 32) | hi; + uint64_t quotient_hi = dividend_hi / k1e9; + remainder = (uint32_t)(dividend_hi % k1e9); + uint64_t dividend_lo = ((uint64_t)(remainder) << 32) | lo; + uint64_t quotient_lo = dividend_lo / k1e9; + remainder = (uint32_t)(dividend_lo % k1e9); + + *elem = (quotient_hi << 32) | quotient_lo; + } while (elem-- != words_little_endian); + + segments[num_segments++] = remainder; + } while (*most_significant_elem != 0 || most_significant_elem-- != words_little_endian); + + // We know our output has no more than 9 digits per segment, plus a negative sign, + // plus any further digits between our output of 9 digits plus enough + // extra characters to ensure that snprintf() with n = 21 (maximum length of %lu + // including a the null terminator) is bounded properly. + NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, num_segments * 9 + 1 + 21 - 9)); + if (is_negative) { + buffer->data[buffer->size_bytes++] = '-'; + } + + // The most significant segment should have no leading zeroes + int n_chars = snprintf((char*)buffer->data + buffer->size_bytes, 21, "%lu", + (unsigned long)segments[num_segments - 1]); + buffer->size_bytes += n_chars; + + // Subsequent output needs to be left-padded with zeroes such that each segment + // takes up exactly 9 digits. + for (int i = num_segments - 2; i >= 0; i--) { + int n_chars = snprintf((char*)buffer->data + buffer->size_bytes, 21, "%09lu", + (unsigned long)segments[i]); + buffer->size_bytes += n_chars; + NANOARROW_DCHECK(buffer->size_bytes <= buffer->capacity_bytes); + } + + return NANOARROW_OK; +} // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information @@ -255,7 +454,7 @@ struct ArrowBufferAllocator ArrowBufferDeallocator( #include "nanoarrow.h" -static void ArrowSchemaRelease(struct ArrowSchema* schema) { +static void ArrowSchemaReleaseInternal(struct ArrowSchema* schema) { if (schema->format != NULL) ArrowFree((void*)schema->format); if (schema->name != NULL) ArrowFree((void*)schema->name); if (schema->metadata != NULL) ArrowFree((void*)schema->metadata); @@ -267,7 +466,7 @@ static void ArrowSchemaRelease(struct ArrowSchema* schema) { for (int64_t i = 0; i < schema->n_children; i++) { if (schema->children[i] != NULL) { if (schema->children[i]->release != NULL) { - schema->children[i]->release(schema->children[i]); + ArrowSchemaRelease(schema->children[i]); } ArrowFree(schema->children[i]); @@ -282,7 +481,7 @@ static void ArrowSchemaRelease(struct ArrowSchema* schema) { // release() callback. if (schema->dictionary != NULL) { if (schema->dictionary->release != NULL) { - schema->dictionary->release(schema->dictionary); + ArrowSchemaRelease(schema->dictionary); } ArrowFree(schema->dictionary); @@ -404,7 +603,7 @@ void ArrowSchemaInit(struct ArrowSchema* schema) { schema->children = NULL; schema->dictionary = NULL; schema->private_data = NULL; - schema->release = &ArrowSchemaRelease; + schema->release = &ArrowSchemaReleaseInternal; } ArrowErrorCode ArrowSchemaSetType(struct ArrowSchema* schema, enum ArrowType type) { @@ -440,7 +639,7 @@ ArrowErrorCode ArrowSchemaInitFromType(struct ArrowSchema* schema, enum ArrowTyp int result = ArrowSchemaSetType(schema, type); if (result != NANOARROW_OK) { - schema->release(schema); + ArrowSchemaRelease(schema); return result; } @@ -529,10 +728,33 @@ ArrowErrorCode ArrowSchemaSetTypeDateTime(struct ArrowSchema* schema, enum Arrow int n_chars; switch (type) { case NANOARROW_TYPE_TIME32: + if (timezone != NULL) { + return EINVAL; + } + + switch (time_unit) { + case NANOARROW_TIME_UNIT_MICRO: + case NANOARROW_TIME_UNIT_NANO: + return EINVAL; + default: + break; + } + + n_chars = snprintf(buffer, sizeof(buffer), "tt%s", time_unit_str); + break; case NANOARROW_TYPE_TIME64: if (timezone != NULL) { return EINVAL; } + + switch (time_unit) { + case NANOARROW_TIME_UNIT_SECOND: + case NANOARROW_TIME_UNIT_MILLI: + return EINVAL; + default: + break; + } + n_chars = snprintf(buffer, sizeof(buffer), "tt%s", time_unit_str); break; case NANOARROW_TYPE_TIMESTAMP: @@ -716,13 +938,13 @@ ArrowErrorCode ArrowSchemaAllocateDictionary(struct ArrowSchema* schema) { return NANOARROW_OK; } -ArrowErrorCode ArrowSchemaDeepCopy(struct ArrowSchema* schema, +ArrowErrorCode ArrowSchemaDeepCopy(const struct ArrowSchema* schema, struct ArrowSchema* schema_out) { ArrowSchemaInit(schema_out); int result = ArrowSchemaSetFormat(schema_out, schema->format); if (result != NANOARROW_OK) { - schema_out->release(schema_out); + ArrowSchemaRelease(schema_out); return result; } @@ -730,26 +952,26 @@ ArrowErrorCode ArrowSchemaDeepCopy(struct ArrowSchema* schema, result = ArrowSchemaSetName(schema_out, schema->name); if (result != NANOARROW_OK) { - schema_out->release(schema_out); + ArrowSchemaRelease(schema_out); return result; } result = ArrowSchemaSetMetadata(schema_out, schema->metadata); if (result != NANOARROW_OK) { - schema_out->release(schema_out); + ArrowSchemaRelease(schema_out); return result; } result = ArrowSchemaAllocateChildren(schema_out, schema->n_children); if (result != NANOARROW_OK) { - schema_out->release(schema_out); + ArrowSchemaRelease(schema_out); return result; } for (int64_t i = 0; i < schema->n_children; i++) { result = ArrowSchemaDeepCopy(schema->children[i], schema_out->children[i]); if (result != NANOARROW_OK) { - schema_out->release(schema_out); + ArrowSchemaRelease(schema_out); return result; } } @@ -757,13 +979,13 @@ ArrowErrorCode ArrowSchemaDeepCopy(struct ArrowSchema* schema, if (schema->dictionary != NULL) { result = ArrowSchemaAllocateDictionary(schema_out); if (result != NANOARROW_OK) { - schema_out->release(schema_out); + ArrowSchemaRelease(schema_out); return result; } result = ArrowSchemaDeepCopy(schema->dictionary, schema_out->dictionary); if (result != NANOARROW_OK) { - schema_out->release(schema_out); + ArrowSchemaRelease(schema_out); return result; } } @@ -845,8 +1067,7 @@ static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view, // decimal case 'd': if (format[1] != ':' || format[2] == '\0') { - ArrowErrorSet(error, "Expected ':precision,scale[,bitwidth]' following 'd'", - format + 3); + ArrowErrorSet(error, "Expected ':precision,scale[,bitwidth]' following 'd'"); return EINVAL; } @@ -1191,13 +1412,15 @@ static ArrowErrorCode ArrowSchemaViewValidateNChildren( for (int64_t i = 0; i < schema_view->schema->n_children; i++) { child = schema_view->schema->children[i]; if (child == NULL) { - ArrowErrorSet(error, "Expected valid schema at schema->children[%d] but found NULL", - i); + ArrowErrorSet(error, + "Expected valid schema at schema->children[%ld] but found NULL", + (long)i); return EINVAL; } else if (child->release == NULL) { ArrowErrorSet( error, - "Expected valid schema at schema->children[%d] but found a released schema", i); + "Expected valid schema at schema->children[%ld] but found a released schema", + (long)i); return EINVAL; } } @@ -1336,7 +1559,8 @@ static ArrowErrorCode ArrowSchemaViewValidate(struct ArrowSchemaView* schema_vie } ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view, - struct ArrowSchema* schema, struct ArrowError* error) { + const struct ArrowSchema* schema, + struct ArrowError* error) { if (schema == NULL) { ArrowErrorSet(error, "Expected non-NULL schema"); return EINVAL; @@ -1364,8 +1588,7 @@ ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view, } const char* format_end_out; - ArrowErrorCode result = - ArrowSchemaViewParse(schema_view, format, &format_end_out, error); + int result = ArrowSchemaViewParse(schema_view, format, &format_end_out, error); if (result != NANOARROW_OK) { if (error != NULL) { @@ -1387,16 +1610,31 @@ ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view, schema_view->type = NANOARROW_TYPE_DICTIONARY; } - result = ArrowSchemaViewValidate(schema_view, schema_view->storage_type, error); - if (result != NANOARROW_OK) { - return result; - } + NANOARROW_RETURN_NOT_OK( + ArrowSchemaViewValidate(schema_view, schema_view->storage_type, error)); if (schema_view->storage_type != schema_view->type) { - result = ArrowSchemaViewValidate(schema_view, schema_view->type, error); - if (result != NANOARROW_OK) { - return result; - } + NANOARROW_RETURN_NOT_OK( + ArrowSchemaViewValidate(schema_view, schema_view->type, error)); + } + + int64_t unknown_flags = schema->flags & ~NANOARROW_FLAG_ALL_SUPPORTED; + if (unknown_flags != 0) { + ArrowErrorSet(error, "Unknown ArrowSchema flag"); + return EINVAL; + } + + if (schema->flags & ARROW_FLAG_DICTIONARY_ORDERED && + schema_view->type != NANOARROW_TYPE_DICTIONARY) { + ArrowErrorSet(error, + "ARROW_FLAG_DICTIONARY_ORDERED is only relevant for dictionaries"); + return EINVAL; + } + + if (schema->flags & ARROW_FLAG_MAP_KEYS_SORTED && + schema_view->type != NANOARROW_TYPE_MAP) { + ArrowErrorSet(error, "ARROW_FLAG_MAP_KEYS_SORTED is only relevant for a map type"); + return EINVAL; } ArrowLayoutInit(&schema_view->layout, schema_view->storage_type); @@ -1408,10 +1646,12 @@ ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view, schema_view->extension_name = ArrowCharView(NULL); schema_view->extension_metadata = ArrowCharView(NULL); - ArrowMetadataGetValue(schema->metadata, ArrowCharView("ARROW:extension:name"), - &schema_view->extension_name); - ArrowMetadataGetValue(schema->metadata, ArrowCharView("ARROW:extension:metadata"), - &schema_view->extension_metadata); + NANOARROW_RETURN_NOT_OK(ArrowMetadataGetValue(schema->metadata, + ArrowCharView("ARROW:extension:name"), + &schema_view->extension_name)); + NANOARROW_RETURN_NOT_OK(ArrowMetadataGetValue(schema->metadata, + ArrowCharView("ARROW:extension:metadata"), + &schema_view->extension_metadata)); return NANOARROW_OK; } @@ -1444,7 +1684,7 @@ static int64_t ArrowSchemaTypeToStringInternal(struct ArrowSchemaView* schema_vi } } -// Helper for bookeeping to emulate sprintf()-like behaviour spread +// Helper for bookkeeping to emulate sprintf()-like behaviour spread // among multiple sprintf calls. static inline void ArrowToStringLogChars(char** out, int64_t n_chars_last, int64_t* n_remaining, int64_t* n_chars) { @@ -1462,7 +1702,7 @@ static inline void ArrowToStringLogChars(char** out, int64_t n_chars_last, } } -int64_t ArrowSchemaToString(struct ArrowSchema* schema, char* out, int64_t n, +int64_t ArrowSchemaToString(const struct ArrowSchema* schema, char* out, int64_t n, char recursive) { if (schema == NULL) { return snprintf(out, n, "[invalid: pointer is null]"); @@ -1599,7 +1839,9 @@ int64_t ArrowMetadataSizeOf(const char* metadata) { struct ArrowMetadataReader reader; struct ArrowStringView key; struct ArrowStringView value; - ArrowMetadataReaderInit(&reader, metadata); + if (ArrowMetadataReaderInit(&reader, metadata) != NANOARROW_OK) { + return 0; + } int64_t size = sizeof(int32_t); while (ArrowMetadataReaderRead(&reader, &key, &value) == NANOARROW_OK) { @@ -1615,7 +1857,7 @@ static ArrowErrorCode ArrowMetadataGetValueInternal(const char* metadata, struct ArrowMetadataReader reader; struct ArrowStringView existing_key; struct ArrowStringView existing_value; - ArrowMetadataReaderInit(&reader, metadata); + NANOARROW_RETURN_NOT_OK(ArrowMetadataReaderInit(&reader, metadata)); while (ArrowMetadataReaderRead(&reader, &existing_key, &existing_value) == NANOARROW_OK) { @@ -1642,7 +1884,10 @@ ArrowErrorCode ArrowMetadataGetValue(const char* metadata, struct ArrowStringVie char ArrowMetadataHasKey(const char* metadata, struct ArrowStringView key) { struct ArrowStringView value = ArrowCharView(NULL); - ArrowMetadataGetValue(metadata, key, &value); + if (ArrowMetadataGetValue(metadata, key, &value) != NANOARROW_OK) { + return 0; + } + return value.data != NULL; } @@ -1779,7 +2024,7 @@ ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer, #include "nanoarrow.h" -static void ArrowArrayRelease(struct ArrowArray* array) { +static void ArrowArrayReleaseInternal(struct ArrowArray* array) { // Release buffers held by this array struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; @@ -1797,7 +2042,7 @@ static void ArrowArrayRelease(struct ArrowArray* array) { for (int64_t i = 0; i < array->n_children; i++) { if (array->children[i] != NULL) { if (array->children[i]->release != NULL) { - array->children[i]->release(array->children[i]); + ArrowArrayRelease(array->children[i]); } ArrowFree(array->children[i]); @@ -1812,7 +2057,7 @@ static void ArrowArrayRelease(struct ArrowArray* array) { // release() callback. if (array->dictionary != NULL) { if (array->dictionary->release != NULL) { - array->dictionary->release(array->dictionary); + ArrowArrayRelease(array->dictionary); } ArrowFree(array->dictionary); @@ -1890,7 +2135,7 @@ ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray* array, array->buffers = NULL; array->children = NULL; array->dictionary = NULL; - array->release = &ArrowArrayRelease; + array->release = &ArrowArrayReleaseInternal; array->private_data = NULL; struct ArrowArrayPrivateData* private_data = @@ -1912,7 +2157,7 @@ ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray* array, int result = ArrowArraySetStorageType(array, storage_type); if (result != NANOARROW_OK) { - array->release(array); + ArrowArrayRelease(array); return result; } @@ -1924,7 +2169,7 @@ ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray* array, } ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array, - struct ArrowArrayView* array_view, + const struct ArrowArrayView* array_view, struct ArrowError* error) { NANOARROW_RETURN_NOT_OK_WITH_ERROR( ArrowArrayInitFromType(array, array_view->storage_type), error); @@ -1937,7 +2182,7 @@ ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array, if (array_view->n_children > 0) { result = ArrowArrayAllocateChildren(array, array_view->n_children); if (result != NANOARROW_OK) { - array->release(array); + ArrowArrayRelease(array); return result; } @@ -1945,7 +2190,7 @@ ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array, result = ArrowArrayInitFromArrayView(array->children[i], array_view->children[i], error); if (result != NANOARROW_OK) { - array->release(array); + ArrowArrayRelease(array); return result; } } @@ -1954,14 +2199,14 @@ ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array, if (array_view->dictionary != NULL) { result = ArrowArrayAllocateDictionary(array); if (result != NANOARROW_OK) { - array->release(array); + ArrowArrayRelease(array); return result; } result = ArrowArrayInitFromArrayView(array->dictionary, array_view->dictionary, error); if (result != NANOARROW_OK) { - array->release(array); + ArrowArrayRelease(array); return result; } } @@ -1970,7 +2215,7 @@ ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array, } ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array, - struct ArrowSchema* schema, + const struct ArrowSchema* schema, struct ArrowError* error) { struct ArrowArrayView array_view; NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromSchema(&array_view, schema, error)); @@ -2173,7 +2418,7 @@ static ArrowErrorCode ArrowArrayFinalizeBuffers(struct ArrowArray* array) { case NANOARROW_TYPE_LARGE_BINARY: case NANOARROW_TYPE_LARGE_STRING: if (ArrowArrayBuffer(array, 2)->data == NULL) { - ArrowBufferAppendUInt8(ArrowArrayBuffer(array, 2), 0); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt8(ArrowArrayBuffer(array, 2), 0)); } break; default: @@ -2195,7 +2440,7 @@ static void ArrowArrayFlushInternalPointers(struct ArrowArray* array) { struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; - for (int64_t i = 0; i < 3; i++) { + for (int64_t i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { private_data->buffer_data[i] = ArrowArrayBuffer(array, i)->data; } @@ -2293,7 +2538,7 @@ ArrowErrorCode ArrowArrayViewAllocateDictionary(struct ArrowArrayView* array_vie } ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view, - struct ArrowSchema* schema, + const struct ArrowSchema* schema, struct ArrowError* error) { struct ArrowSchemaView schema_view; int result = ArrowSchemaViewInit(&schema_view, schema, error); @@ -2343,8 +2588,8 @@ ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view, } memset(array_view->union_type_id_map, -1, 256); - int8_t n_type_ids = _ArrowParseUnionTypeIds(schema_view.union_type_ids, - array_view->union_type_id_map + 128); + int32_t n_type_ids = _ArrowParseUnionTypeIds(schema_view.union_type_ids, + array_view->union_type_id_map + 128); for (int8_t child_index = 0; child_index < n_type_ids; child_index++) { int8_t type_id = array_view->union_type_id_map[128 + child_index]; array_view->union_type_id_map[type_id] = child_index; @@ -2379,7 +2624,7 @@ void ArrowArrayViewReset(struct ArrowArrayView* array_view) { } void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length) { - for (int i = 0; i < 3; i++) { + for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8; switch (array_view->layout.buffer_type[i]) { @@ -2427,28 +2672,15 @@ void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length) // This version recursively extracts information from the array and stores it // in the array view, performing any checks that require the original array. static int ArrowArrayViewSetArrayInternal(struct ArrowArrayView* array_view, - struct ArrowArray* array, + const struct ArrowArray* array, struct ArrowError* error) { - // Check length and offset - if (array->offset < 0) { - ArrowErrorSet(error, "Expected array offset >= 0 but found array offset of %ld", - (long)array->offset); - return EINVAL; - } - - if (array->length < 0) { - ArrowErrorSet(error, "Expected array length >= 0 but found array length of %ld", - (long)array->length); - return EINVAL; - } - array_view->array = array; array_view->offset = array->offset; array_view->length = array->length; array_view->null_count = array->null_count; int64_t buffers_required = 0; - for (int i = 0; i < 3; i++) { + for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) { break; } @@ -2507,6 +2739,18 @@ static int ArrowArrayViewSetArrayInternal(struct ArrowArrayView* array_view, static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view, struct ArrowError* error) { + if (array_view->length < 0) { + ArrowErrorSet(error, "Expected length >= 0 but found length %ld", + (long)array_view->length); + return EINVAL; + } + + if (array_view->offset < 0) { + ArrowErrorSet(error, "Expected offset >= 0 but found offset %ld", + (long)array_view->offset); + return EINVAL; + } + // Calculate buffer sizes that do not require buffer access. If marked as // unknown, assign the buffer size; otherwise, validate it. int64_t offset_plus_length = array_view->offset + array_view->length; @@ -2767,7 +3011,7 @@ static int ArrowArrayViewValidateDefault(struct ArrowArrayView* array_view, } ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view, - struct ArrowArray* array, + const struct ArrowArray* array, struct ArrowError* error) { // Extract information from the array into the array view NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArrayInternal(array_view, array, error)); @@ -2780,7 +3024,7 @@ ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view, } ArrowErrorCode ArrowArrayViewSetArrayMinimal(struct ArrowArrayView* array_view, - struct ArrowArray* array, + const struct ArrowArray* array, struct ArrowError* error) { // Extract information from the array into the array view NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArrayInternal(array_view, array, error)); @@ -2799,10 +3043,8 @@ static int ArrowAssertIncreasingInt32(struct ArrowBufferView view, } for (int64_t i = 1; i < view.size_bytes / (int64_t)sizeof(int32_t); i++) { - int32_t diff = view.data.as_int32[i] - view.data.as_int32[i - 1]; - if (diff < 0) { - ArrowErrorSet(error, "[%ld] Expected element size >= 0 but found element size %ld", - (long)i, (long)diff); + if (view.data.as_int32[i] < view.data.as_int32[i - 1]) { + ArrowErrorSet(error, "[%ld] Expected element size >= 0", (long)i); return EINVAL; } } @@ -2817,10 +3059,8 @@ static int ArrowAssertIncreasingInt64(struct ArrowBufferView view, } for (int64_t i = 1; i < view.size_bytes / (int64_t)sizeof(int64_t); i++) { - int64_t diff = view.data.as_int64[i] - view.data.as_int64[i - 1]; - if (diff < 0) { - ArrowErrorSet(error, "[%ld] Expected element size >= 0 but found element size %ld", - (long)i, (long)diff); + if (view.data.as_int64[i] < view.data.as_int64[i - 1]) { + ArrowErrorSet(error, "[%ld] Expected element size >= 0", (long)i); return EINVAL; } } @@ -2865,7 +3105,7 @@ static int ArrowAssertInt8In(struct ArrowBufferView view, const int8_t* values, static int ArrowArrayViewValidateFull(struct ArrowArrayView* array_view, struct ArrowError* error) { - for (int i = 0; i < 3; i++) { + for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { switch (array_view->layout.buffer_type[i]) { case NANOARROW_BUFFER_TYPE_DATA_OFFSET: if (array_view->layout.element_size_bits[i] == 32) { @@ -2914,7 +3154,7 @@ static int ArrowArrayViewValidateFull(struct ArrowArrayView* array_view, error, "[%ld] Expected union offset for child id %d to be between 0 and %ld but " "found offset value %ld", - (long)i, (int)child_id, (long)child_length, offset); + (long)i, (int)child_id, (long)child_length, (long)offset); return EINVAL; } } @@ -2927,8 +3167,8 @@ static int ArrowArrayViewValidateFull(struct ArrowArrayView* array_view, // Dictionary valiation not implemented if (array_view->dictionary != NULL) { - ArrowErrorSet(error, "Validation for dictionary-encoded arrays is not implemented"); - return ENOTSUP; + NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateFull(array_view->dictionary, error)); + // TODO: validate the indices } return NANOARROW_OK; @@ -3011,6 +3251,7 @@ static int ArrowBasicArrayStreamGetNext(struct ArrowArrayStream* array_stream, static const char* ArrowBasicArrayStreamGetLastError( struct ArrowArrayStream* array_stream) { + NANOARROW_UNUSED(array_stream); return NULL; } @@ -3023,12 +3264,12 @@ static void ArrowBasicArrayStreamRelease(struct ArrowArrayStream* array_stream) (struct BasicArrayStreamPrivate*)array_stream->private_data; if (private_data->schema.release != NULL) { - private_data->schema.release(&private_data->schema); + ArrowSchemaRelease(&private_data->schema); } for (int64_t i = 0; i < private_data->n_arrays; i++) { if (private_data->arrays[i].release != NULL) { - private_data->arrays[i].release(&private_data->arrays[i]); + ArrowArrayRelease(&private_data->arrays[i]); } } @@ -3042,8 +3283,9 @@ static void ArrowBasicArrayStreamRelease(struct ArrowArrayStream* array_stream) ArrowErrorCode ArrowBasicArrayStreamInit(struct ArrowArrayStream* array_stream, struct ArrowSchema* schema, int64_t n_arrays) { - struct BasicArrayStreamPrivate* private_data = (struct BasicArrayStreamPrivate*)ArrowMalloc( - sizeof(struct BasicArrayStreamPrivate)); + struct BasicArrayStreamPrivate* private_data = + (struct BasicArrayStreamPrivate*)ArrowMalloc( + sizeof(struct BasicArrayStreamPrivate)); if (private_data == NULL) { return ENOMEM; } @@ -3082,7 +3324,7 @@ void ArrowBasicArrayStreamSetArray(struct ArrowArrayStream* array_stream, int64_ ArrowArrayMove(array, &private_data->arrays[i]); } -ArrowErrorCode ArrowBasicArrayStreamValidate(struct ArrowArrayStream* array_stream, +ArrowErrorCode ArrowBasicArrayStreamValidate(const struct ArrowArrayStream* array_stream, struct ArrowError* error) { struct BasicArrayStreamPrivate* private_data = (struct BasicArrayStreamPrivate*)array_stream->private_data; diff --git a/c/vendor/nanoarrow/nanoarrow.h b/c/vendor/nanoarrow/nanoarrow.h index 01317473d8..012216cd51 100644 --- a/c/vendor/nanoarrow/nanoarrow.h +++ b/c/vendor/nanoarrow/nanoarrow.h @@ -19,9 +19,9 @@ #define NANOARROW_BUILD_ID_H_INCLUDED #define NANOARROW_VERSION_MAJOR 0 -#define NANOARROW_VERSION_MINOR 3 +#define NANOARROW_VERSION_MINOR 4 #define NANOARROW_VERSION_PATCH 0 -#define NANOARROW_VERSION "0.3.0-SNAPSHOT" +#define NANOARROW_VERSION "0.4.0" #define NANOARROW_VERSION_INT \ (NANOARROW_VERSION_MAJOR * 10000 + NANOARROW_VERSION_MINOR * 100 + \ @@ -162,25 +162,6 @@ struct ArrowArrayStream { #endif // ARROW_C_STREAM_INTERFACE #endif // ARROW_FLAG_DICTIONARY_ORDERED -/// \brief Move the contents of src into dst and set src->release to NULL -static inline void ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dst) { - memcpy(dst, src, sizeof(struct ArrowSchema)); - src->release = NULL; -} - -/// \brief Move the contents of src into dst and set src->release to NULL -static inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dst) { - memcpy(dst, src, sizeof(struct ArrowArray)); - src->release = NULL; -} - -/// \brief Move the contents of src into dst and set src->release to NULL -static inline void ArrowArrayStreamMove(struct ArrowArrayStream* src, - struct ArrowArrayStream* dst) { - memcpy(dst, src, sizeof(struct ArrowArrayStream)); - src->release = NULL; -} - /// @} // Utility macros @@ -220,6 +201,34 @@ static inline void ArrowArrayStreamMove(struct ArrowArrayStream* src, } while (0) #endif +#if defined(NANOARROW_DEBUG) +// For checking ArrowErrorSet() calls for valid printf format strings/arguments +// If using mingw's c99-compliant printf, we need a different format-checking attribute +#if defined(__USE_MINGW_ANSI_STDIO) && defined(__MINGW_PRINTF_FORMAT) +#define NANOARROW_CHECK_PRINTF_ATTRIBUTE \ + __attribute__((format(__MINGW_PRINTF_FORMAT, 2, 3))) +#elif defined(__GNUC__) +#define NANOARROW_CHECK_PRINTF_ATTRIBUTE __attribute__((format(printf, 2, 3))) +#else +#define NANOARROW_CHECK_PRINTF_ATTRIBUTE +#endif + +// For checking calls to functions that return ArrowErrorCode +#if defined(__GNUC__) && (__GNUC__ >= 4) +#define NANOARROW_CHECK_RETURN_ATTRIBUTE __attribute__((warn_unused_result)) +#elif defined(_MSC_VER) && (_MSC_VER >= 1700) +#define NANOARROW_CHECK_RETURN_ATTRIBUTE _Check_return_ +#else +#define NANOARROW_CHECK_RETURN_ATTRIBUTE +#endif + +#else +#define NANOARROW_CHECK_RETURN_ATTRIBUTE +#define NANOARROW_CHECK_PRINTF_ATTRIBUTE +#endif + +#define NANOARROW_UNUSED(x) (void)(x) + /// \brief Return code for success. /// \ingroup nanoarrow-errors #define NANOARROW_OK 0 @@ -228,6 +237,64 @@ static inline void ArrowArrayStreamMove(struct ArrowArrayStream* src, /// \ingroup nanoarrow-errors typedef int ArrowErrorCode; +#if defined(NANOARROW_DEBUG) +#define ArrowErrorCode NANOARROW_CHECK_RETURN_ATTRIBUTE ArrowErrorCode +#endif + +/// \brief Flags supported by ArrowSchemaViewInit() +/// \ingroup nanoarrow-schema-view +#define NANOARROW_FLAG_ALL_SUPPORTED \ + (ARROW_FLAG_DICTIONARY_ORDERED | ARROW_FLAG_NULLABLE | ARROW_FLAG_MAP_KEYS_SORTED) + +/// \brief Error type containing a UTF-8 encoded message. +/// \ingroup nanoarrow-errors +struct ArrowError { + /// \brief A character buffer with space for an error message. + char message[1024]; +}; + +/// \brief Ensure an ArrowError is null-terminated by zeroing the first character. +/// \ingroup nanoarrow-errors +/// +/// If error is NULL, this function does nothing. +static inline void ArrowErrorInit(struct ArrowError* error) { + if (error != NULL) { + error->message[0] = '\0'; + } +} + +/// \brief Get the contents of an error +/// \ingroup nanoarrow-errors +/// +/// If error is NULL, returns "", or returns the contents of the error message +/// otherwise. +static inline const char* ArrowErrorMessage(struct ArrowError* error) { + if (error == NULL) { + return ""; + } else { + return error->message; + } +} + +/// \brief Set the contents of an error from an existing null-terminated string +/// \ingroup nanoarrow-errors +/// +/// If error is NULL, this function does nothing. +static inline void ArrowErrorSetString(struct ArrowError* error, const char* src) { + if (error == NULL) { + return; + } + + int64_t src_len = strlen(src); + if (src_len >= ((int64_t)sizeof(error->message))) { + memcpy(error->message, src, sizeof(error->message) - 1); + error->message[sizeof(error->message) - 1] = '\0'; + } else { + memcpy(error->message, src, src_len); + error->message[src_len] = '\0'; + } +} + /// \brief Check the result of an expression and return it if not NANOARROW_OK /// \ingroup nanoarrow-errors #define NANOARROW_RETURN_NOT_OK(EXPR) \ @@ -245,11 +312,11 @@ typedef int ArrowErrorCode; _NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR, ERROR_EXPR, #EXPR) #if defined(NANOARROW_DEBUG) && !defined(NANOARROW_PRINT_AND_DIE) -#define NANOARROW_PRINT_AND_DIE(VALUE, EXPR_STR) \ - do { \ - fprintf(stderr, "%s failed with errno %d\n* %s:%d\n", EXPR_STR, (int)(VALUE), \ - __FILE__, (int)__LINE__); \ - abort(); \ +#define NANOARROW_PRINT_AND_DIE(VALUE, EXPR_STR) \ + do { \ + fprintf(stderr, "%s failed with code %d\n* %s:%d\n", EXPR_STR, (int)(VALUE), \ + __FILE__, (int)__LINE__); \ + abort(); \ } while (0) #endif @@ -264,16 +331,105 @@ typedef int ArrowErrorCode; /// \ingroup nanoarrow-errors /// /// If nanoarrow was built in debug mode (i.e., defined(NANOARROW_DEBUG) is true), -/// print a message to stderr and abort. If nanoarrow was bulit in release mode, +/// print a message to stderr and abort. If nanoarrow was built in release mode, /// this statement has no effect. You can customize fatal error behaviour /// be defining the NANOARROW_PRINT_AND_DIE macro before including nanoarrow.h /// This macro is provided as a convenience for users and is not used internally. #define NANOARROW_ASSERT_OK(EXPR) \ _NANOARROW_ASSERT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR, #EXPR) + +#define _NANOARROW_DCHECK_IMPL(EXPR, EXPR_STR) \ + do { \ + if (!(EXPR)) NANOARROW_PRINT_AND_DIE(-1, EXPR_STR); \ + } while (0) + +#define NANOARROW_DCHECK(EXPR) _NANOARROW_DCHECK_IMPL(EXPR, #EXPR) #else #define NANOARROW_ASSERT_OK(EXPR) EXPR +#define NANOARROW_DCHECK(EXPR) #endif +static inline void ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dst) { + NANOARROW_DCHECK(src != NULL); + NANOARROW_DCHECK(dst != NULL); + + memcpy(dst, src, sizeof(struct ArrowSchema)); + src->release = NULL; +} + +static inline void ArrowSchemaRelease(struct ArrowSchema* schema) { + NANOARROW_DCHECK(schema != NULL); + schema->release(schema); + NANOARROW_DCHECK(schema->release == NULL); +} + +static inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dst) { + NANOARROW_DCHECK(src != NULL); + NANOARROW_DCHECK(dst != NULL); + + memcpy(dst, src, sizeof(struct ArrowArray)); + src->release = NULL; +} + +static inline void ArrowArrayRelease(struct ArrowArray* array) { + NANOARROW_DCHECK(array != NULL); + array->release(array); + NANOARROW_DCHECK(array->release == NULL); +} + +static inline void ArrowArrayStreamMove(struct ArrowArrayStream* src, + struct ArrowArrayStream* dst) { + NANOARROW_DCHECK(src != NULL); + NANOARROW_DCHECK(dst != NULL); + + memcpy(dst, src, sizeof(struct ArrowArrayStream)); + src->release = NULL; +} + +static inline const char* ArrowArrayStreamGetLastError( + struct ArrowArrayStream* array_stream) { + NANOARROW_DCHECK(array_stream != NULL); + + const char* value = array_stream->get_last_error(array_stream); + if (value == NULL) { + return ""; + } else { + return value; + } +} + +static inline ArrowErrorCode ArrowArrayStreamGetSchema( + struct ArrowArrayStream* array_stream, struct ArrowSchema* out, + struct ArrowError* error) { + NANOARROW_DCHECK(array_stream != NULL); + + int result = array_stream->get_schema(array_stream, out); + if (result != NANOARROW_OK && error != NULL) { + ArrowErrorSetString(error, ArrowArrayStreamGetLastError(array_stream)); + } + + return result; +} + +static inline ArrowErrorCode ArrowArrayStreamGetNext( + struct ArrowArrayStream* array_stream, struct ArrowArray* out, + struct ArrowError* error) { + NANOARROW_DCHECK(array_stream != NULL); + + int result = array_stream->get_next(array_stream, out); + if (result != NANOARROW_OK && error != NULL) { + ArrowErrorSetString(error, ArrowArrayStreamGetLastError(array_stream)); + } + + return result; +} + +static inline void ArrowArrayStreamRelease(struct ArrowArrayStream* array_stream) { + NANOARROW_DCHECK(array_stream != NULL); + array_stream->release(array_stream); + NANOARROW_DCHECK(array_stream->release == NULL); +} + static char _ArrowIsLittleEndian(void) { uint32_t check = 1; char first_byte; @@ -481,6 +637,14 @@ enum ArrowBufferType { NANOARROW_BUFFER_TYPE_DATA }; +/// \brief The maximum number of buffers in an ArrowArrayView or ArrowLayout +/// \ingroup nanoarrow-array-view +/// +/// All currently supported types have 3 buffers or fewer; however, future types +/// may involve a variable number of buffers (e.g., string view). These buffers +/// will be represented by separate members of the ArrowArrayView or ArrowLayout. +#define NANOARROW_MAX_FIXED_BUFFERS 3 + /// \brief An non-owning view of a string /// \ingroup nanoarrow-utils struct ArrowStringView { @@ -593,13 +757,13 @@ struct ArrowBitmap { /// the length and offset of the array. struct ArrowLayout { /// \brief The function of each buffer - enum ArrowBufferType buffer_type[3]; + enum ArrowBufferType buffer_type[NANOARROW_MAX_FIXED_BUFFERS]; /// \brief The data type of each buffer - enum ArrowType buffer_data_type[3]; + enum ArrowType buffer_data_type[NANOARROW_MAX_FIXED_BUFFERS]; /// \brief The size of an element each buffer or 0 if this size is variable or unknown - int64_t element_size_bits[3]; + int64_t element_size_bits[NANOARROW_MAX_FIXED_BUFFERS]; /// \brief The number of elements in the child array per element in this array for a /// fixed-size list @@ -618,7 +782,7 @@ struct ArrowLayout { struct ArrowArrayView { /// \brief The underlying ArrowArray or NULL if it has not been set or /// if the buffers in this ArrowArrayView are not backed by an ArrowArray. - struct ArrowArray* array; + const struct ArrowArray* array; /// \brief The number of elements from the physical start of the buffers. int64_t offset; @@ -641,7 +805,7 @@ struct ArrowArrayView { struct ArrowLayout layout; /// \brief This Array's buffers as ArrowBufferView objects - struct ArrowBufferView buffer_views[3]; + struct ArrowBufferView buffer_views[NANOARROW_MAX_FIXED_BUFFERS]; /// \brief The number of children of this view int64_t n_children; @@ -669,12 +833,12 @@ struct ArrowArrayPrivateData { struct ArrowBitmap bitmap; // Holder for additional buffers as required - struct ArrowBuffer buffers[2]; + struct ArrowBuffer buffers[NANOARROW_MAX_FIXED_BUFFERS - 1]; // The array of pointers to buffers. This must be updated after a sequence // of appends to synchronize its values with the actual buffer addresses // (which may have ben reallocated uring that time) - const void* buffer_data[3]; + const void* buffer_data[NANOARROW_MAX_FIXED_BUFFERS]; // The storage data type, or NANOARROW_TYPE_UNINITIALIZED if unknown enum ArrowType storage_type; @@ -760,19 +924,20 @@ static inline void ArrowDecimalInit(struct ArrowDecimal* decimal, int32_t bitwid /// This does not check if the decimal's precision sufficiently small to fit /// within the signed 64-bit integer range (A precision less than or equal /// to 18 is sufficiently small). -static inline int64_t ArrowDecimalGetIntUnsafe(struct ArrowDecimal* decimal) { +static inline int64_t ArrowDecimalGetIntUnsafe(const struct ArrowDecimal* decimal) { return (int64_t)decimal->words[decimal->low_word_index]; } /// \brief Copy the bytes of this decimal into a sufficiently large buffer /// \ingroup nanoarrow-utils -static inline void ArrowDecimalGetBytes(struct ArrowDecimal* decimal, uint8_t* out) { +static inline void ArrowDecimalGetBytes(const struct ArrowDecimal* decimal, + uint8_t* out) { memcpy(out, decimal->words, decimal->n_words * sizeof(uint64_t)); } /// \brief Returns 1 if the value represented by decimal is >= 0 or -1 otherwise /// \ingroup nanoarrow-utils -static inline int64_t ArrowDecimalSign(struct ArrowDecimal* decimal) { +static inline int64_t ArrowDecimalSign(const struct ArrowDecimal* decimal) { return 1 | ((int64_t)(decimal->words[decimal->high_word_index]) >> 63); } @@ -788,6 +953,28 @@ static inline void ArrowDecimalSetInt(struct ArrowDecimal* decimal, int64_t valu decimal->words[decimal->low_word_index] = value; } +/// \brief Negate the value of this decimal in place +/// \ingroup nanoarrow-utils +static inline void ArrowDecimalNegate(struct ArrowDecimal* decimal) { + uint64_t carry = 1; + + if (decimal->low_word_index == 0) { + for (int i = 0; i < decimal->n_words; i++) { + uint64_t elem = decimal->words[i]; + elem = ~elem + carry; + carry &= (elem == 0); + decimal->words[i] = elem; + } + } else { + for (int i = decimal->low_word_index; i >= 0; i--) { + uint64_t elem = decimal->words[i]; + elem = ~elem + carry; + carry &= (elem == 0); + decimal->words[i] = elem; + } + } +} + /// \brief Copy bytes from a buffer into this decimal /// \ingroup nanoarrow-utils static inline void ArrowDecimalSetBytes(struct ArrowDecimal* decimal, @@ -840,7 +1027,6 @@ static inline void ArrowDecimalSetBytes(struct ArrowDecimal* decimal, #define ArrowNanoarrowVersion NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowNanoarrowVersion) #define ArrowNanoarrowVersionInt \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowNanoarrowVersionInt) -#define ArrowErrorMessage NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowErrorMessage) #define ArrowMalloc NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMalloc) #define ArrowRealloc NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowRealloc) #define ArrowFree NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowFree) @@ -850,6 +1036,9 @@ static inline void ArrowDecimalSetBytes(struct ArrowDecimal* decimal, NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBufferDeallocator) #define ArrowErrorSet NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowErrorSet) #define ArrowLayoutInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowLayoutInit) +#define ArrowDecimalSetDigits NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDecimalSetDigits) +#define ArrowDecimalAppendDigitsToBuffer \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDecimalAppendDigitsToBuffer) #define ArrowSchemaInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaInit) #define ArrowSchemaInitFromType \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaInitFromType) @@ -986,6 +1175,60 @@ struct ArrowBufferAllocator ArrowBufferDeallocator( /// @} +/// \brief Move the contents of an src ArrowSchema into dst and set src->release to NULL +/// \ingroup nanoarrow-arrow-cdata +static inline void ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dst); + +/// \brief Call the release callback of an ArrowSchema +/// \ingroup nanoarrow-arrow-cdata +static inline void ArrowSchemaRelease(struct ArrowSchema* schema); + +/// \brief Move the contents of an src ArrowArray into dst and set src->release to NULL +/// \ingroup nanoarrow-arrow-cdata +static inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dst); + +/// \brief Call the release callback of an ArrowArray +static inline void ArrowArrayRelease(struct ArrowArray* array); + +/// \brief Move the contents of an src ArrowArrayStream into dst and set src->release to +/// NULL \ingroup nanoarrow-arrow-cdata +static inline void ArrowArrayStreamMove(struct ArrowArrayStream* src, + struct ArrowArrayStream* dst); + +/// \brief Call the get_schema callback of an ArrowArrayStream +/// \ingroup nanoarrow-arrow-cdata +/// +/// Unlike the get_schema callback, this wrapper checks the return code +/// and propagates the error reported by get_last_error into error. This +/// makes it significantly less verbose to iterate over array streams +/// using NANOARROW_RETURN_NOT_OK()-style error handling. +static inline ArrowErrorCode ArrowArrayStreamGetSchema( + struct ArrowArrayStream* array_stream, struct ArrowSchema* out, + struct ArrowError* error); + +/// \brief Call the get_schema callback of an ArrowArrayStream +/// \ingroup nanoarrow-arrow-cdata +/// +/// Unlike the get_next callback, this wrapper checks the return code +/// and propagates the error reported by get_last_error into error. This +/// makes it significantly less verbose to iterate over array streams +/// using NANOARROW_RETURN_NOT_OK()-style error handling. +static inline ArrowErrorCode ArrowArrayStreamGetNext( + struct ArrowArrayStream* array_stream, struct ArrowArray* out, + struct ArrowError* error); + +/// \brief Call the get_next callback of an ArrowArrayStream +/// \ingroup nanoarrow-arrow-cdata +/// +/// Unlike the get_next callback, this function never returns NULL (i.e., its +/// result is safe to use in printf-style error formatters). Null values from the +/// original callback are reported as "". +static inline const char* ArrowArrayStreamGetLastError( + struct ArrowArrayStream* array_stream); + +/// \brief Call the release callback of an ArrowArrayStream +static inline void ArrowArrayStreamRelease(struct ArrowArrayStream* array_stream); + /// \defgroup nanoarrow-errors Error handling /// /// Functions generally return an errno-compatible error code; functions that @@ -1005,31 +1248,11 @@ struct ArrowBufferAllocator ArrowBufferDeallocator( /// /// @{ -/// \brief Error type containing a UTF-8 encoded message. -struct ArrowError { - /// \brief A character buffer with space for an error message. - char message[1024]; -}; - -/// \brief Ensure an ArrowError is null-terminated by zeroing the first character. -/// -/// If error is NULL, this function does nothing. -static inline void ArrowErrorInit(struct ArrowError* error) { - if (error) { - error->message[0] = '\0'; - } -} - /// \brief Set the contents of an error using printf syntax. /// /// If error is NULL, this function does nothing and returns NANOARROW_OK. -ArrowErrorCode ArrowErrorSet(struct ArrowError* error, const char* fmt, ...); - -/// \brief Get the contents of an error -/// -/// If error is NULL, returns "", or returns the contents of the error message -/// otherwise. -const char* ArrowErrorMessage(struct ArrowError* error); +NANOARROW_CHECK_PRINTF_ATTRIBUTE int ArrowErrorSet(struct ArrowError* error, + const char* fmt, ...); /// @} @@ -1049,6 +1272,14 @@ void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type); /// \brief Create a string view from a null-terminated string static inline struct ArrowStringView ArrowCharView(const char* value); +/// \brief Sets the integer value of an ArrowDecimal from a string +ArrowErrorCode ArrowDecimalSetDigits(struct ArrowDecimal* decimal, + struct ArrowStringView value); + +/// \brief Get the integer value of an ArrowDecimal as string +ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const struct ArrowDecimal* decimal, + struct ArrowBuffer* buffer); + /// @} /// \defgroup nanoarrow-schema Creating schemas @@ -1078,7 +1309,7 @@ ArrowErrorCode ArrowSchemaInitFromType(struct ArrowSchema* schema, enum ArrowTyp /// and returns the number of characters required for the output if /// n were sufficiently large. If recursive is non-zero, the result will /// also include children. -int64_t ArrowSchemaToString(struct ArrowSchema* schema, char* out, int64_t n, +int64_t ArrowSchemaToString(const struct ArrowSchema* schema, char* out, int64_t n, char recursive); /// \brief Set the format field of a schema from an ArrowType @@ -1140,7 +1371,7 @@ ArrowErrorCode ArrowSchemaSetTypeUnion(struct ArrowSchema* schema, enum ArrowTyp /// \brief Make a (recursive) copy of a schema /// /// Allocates and copies fields of schema into schema_out. -ArrowErrorCode ArrowSchemaDeepCopy(struct ArrowSchema* schema, +ArrowErrorCode ArrowSchemaDeepCopy(const struct ArrowSchema* schema, struct ArrowSchema* schema_out); /// \brief Copy format into schema->format @@ -1252,10 +1483,10 @@ ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer, /// Contains more readily extractable values than a raw ArrowSchema. /// Clients can stack or statically allocate this structure but are /// encouraged to use the provided getters to ensure forward -/// compatiblity. +/// compatibility. struct ArrowSchemaView { /// \brief A pointer to the schema represented by this view - struct ArrowSchema* schema; + const struct ArrowSchema* schema; /// \brief The data type represented by the schema /// @@ -1338,7 +1569,8 @@ struct ArrowSchemaView { /// \brief Initialize an ArrowSchemaView ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view, - struct ArrowSchema* schema, struct ArrowError* error); + const struct ArrowSchema* schema, + struct ArrowError* error); /// @} @@ -1482,6 +1714,14 @@ static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t l /// \brief Count true values in a bitmap static inline int64_t ArrowBitCountSet(const uint8_t* bits, int64_t i_from, int64_t i_to); +/// \brief Extract int8 boolean values from a range in a bitmap +static inline void ArrowBitsUnpackInt8(const uint8_t* bits, int64_t start_offset, + int64_t length, int8_t* out); + +/// \brief Extract int32 boolean values from a range in a bitmap +static inline void ArrowBitsUnpackInt32(const uint8_t* bits, int64_t start_offset, + int64_t length, int32_t* out); + /// \brief Initialize an ArrowBitmap /// /// Initialize the builder's buffer, empty its cache, and reset the size to zero @@ -1559,7 +1799,7 @@ ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray* array, /// Caller is responsible for calling the array->release callback if /// NANOARROW_OK is returned. ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array, - struct ArrowSchema* schema, + const struct ArrowSchema* schema, struct ArrowError* error); /// \brief Initialize the contents of an ArrowArray from an ArrowArrayView @@ -1567,7 +1807,7 @@ ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array, /// Caller is responsible for calling the array->release callback if /// NANOARROW_OK is returned. ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array, - struct ArrowArrayView* array_view, + const struct ArrowArrayView* array_view, struct ArrowError* error); /// \brief Allocate the array->children array @@ -1657,18 +1897,21 @@ static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray* array, /// \brief Append a string of bytes to an array /// /// Returns NANOARROW_OK if value can be exactly represented by -/// the underlying storage type or EINVAL otherwise (e.g., -/// the underlying array is not a binary, string, large binary, large string, -/// or fixed-size binary array, or value is the wrong size for a fixed-size -/// binary array). +/// the underlying storage type, EOVERFLOW if appending value would overflow +/// the offset type (e.g., if the data buffer would be larger than 2 GB for a +/// non-large string type), or EINVAL otherwise (e.g., the underlying array is not a +/// binary, string, large binary, large string, or fixed-size binary array, or value is +/// the wrong size for a fixed-size binary array). static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array, struct ArrowBufferView value); /// \brief Append a string value to an array /// /// Returns NANOARROW_OK if value can be exactly represented by -/// the underlying storage type or EINVAL otherwise (e.g., -/// the underlying array is not a string or large string array). +/// the underlying storage type, EOVERFLOW if appending value would overflow +/// the offset type (e.g., if the data buffer would be larger than 2 GB for a +/// non-large string type), or EINVAL otherwise (e.g., the underlying array is not a +/// string or large string array). static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array, struct ArrowStringView value); @@ -1677,19 +1920,20 @@ static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array, /// Returns NANOARROW_OK if value can be exactly represented by /// the underlying storage type or EINVAL otherwise. static inline ArrowErrorCode ArrowArrayAppendInterval(struct ArrowArray* array, - struct ArrowInterval* value); + const struct ArrowInterval* value); /// \brief Append a decimal value to an array /// /// Returns NANOARROW_OK if array is a decimal array with the appropriate /// bitwidth or EINVAL otherwise. static inline ArrowErrorCode ArrowArrayAppendDecimal(struct ArrowArray* array, - struct ArrowDecimal* value); + const struct ArrowDecimal* value); /// \brief Finish a nested array element /// /// Appends a non-null element to the array based on the first child's current -/// length. Returns NANOARROW_OK if the item was successfully added or EINVAL +/// length. Returns NANOARROW_OK if the item was successfully added, EOVERFLOW +/// if the child of a list or map array would exceed INT_MAX elements, or EINVAL /// if the underlying storage type is not a struct, list, large list, or fixed-size /// list, or if there was an attempt to add a struct or fixed-size list element where the /// length of the child array(s) did not match the expected length. @@ -1725,7 +1969,7 @@ ArrowErrorCode ArrowArrayFinishBuildingDefault(struct ArrowArray* array, /// (i.e. NANOARROW_VALIDATION_LEVEL_NONE or NANOARROW_VALIDATION_LEVEL_MINIMAL) if CPU /// buffer data access is not possible or more validation (i.e., /// NANOARROW_VALIDATION_LEVEL_FULL) if buffer content was obtained from an untrusted or -/// corruptable source. +/// corruptible source. ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array, enum ArrowValidationLevel validation_level, struct ArrowError* error); @@ -1751,7 +1995,7 @@ static inline void ArrowArrayViewMove(struct ArrowArrayView* src, /// \brief Initialize the contents of an ArrowArrayView from an ArrowSchema ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view, - struct ArrowSchema* schema, + const struct ArrowSchema* schema, struct ArrowError* error); /// \brief Allocate the array_view->children array @@ -1768,12 +2012,13 @@ void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length); /// \brief Set buffer sizes and data pointers from an ArrowArray ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view, - struct ArrowArray* array, struct ArrowError* error); + const struct ArrowArray* array, + struct ArrowError* error); /// \brief Set buffer sizes and data pointers from an ArrowArray except for those /// that require dereferencing buffer content. ArrowErrorCode ArrowArrayViewSetArrayMinimal(struct ArrowArrayView* array_view, - struct ArrowArray* array, + const struct ArrowArray* array, struct ArrowError* error); /// \brief Performs checks on the content of an ArrowArrayView @@ -1792,59 +2037,60 @@ ArrowErrorCode ArrowArrayViewValidate(struct ArrowArrayView* array_view, void ArrowArrayViewReset(struct ArrowArrayView* array_view); /// \brief Check for a null element in an ArrowArrayView -static inline int8_t ArrowArrayViewIsNull(struct ArrowArrayView* array_view, int64_t i); +static inline int8_t ArrowArrayViewIsNull(const struct ArrowArrayView* array_view, + int64_t i); /// \brief Get the type id of a union array element -static inline int8_t ArrowArrayViewUnionTypeId(struct ArrowArrayView* array_view, +static inline int8_t ArrowArrayViewUnionTypeId(const struct ArrowArrayView* array_view, int64_t i); /// \brief Get the child index of a union array element -static inline int8_t ArrowArrayViewUnionChildIndex(struct ArrowArrayView* array_view, - int64_t i); +static inline int8_t ArrowArrayViewUnionChildIndex( + const struct ArrowArrayView* array_view, int64_t i); /// \brief Get the index to use into the relevant union child array -static inline int64_t ArrowArrayViewUnionChildOffset(struct ArrowArrayView* array_view, - int64_t i); +static inline int64_t ArrowArrayViewUnionChildOffset( + const struct ArrowArrayView* array_view, int64_t i); /// \brief Get an element in an ArrowArrayView as an integer /// /// This function does not check for null values, that values are actually integers, or /// that values are within a valid range for an int64. -static inline int64_t ArrowArrayViewGetIntUnsafe(struct ArrowArrayView* array_view, +static inline int64_t ArrowArrayViewGetIntUnsafe(const struct ArrowArrayView* array_view, int64_t i); /// \brief Get an element in an ArrowArrayView as an unsigned integer /// /// This function does not check for null values, that values are actually integers, or /// that values are within a valid range for a uint64. -static inline uint64_t ArrowArrayViewGetUIntUnsafe(struct ArrowArrayView* array_view, - int64_t i); +static inline uint64_t ArrowArrayViewGetUIntUnsafe( + const struct ArrowArrayView* array_view, int64_t i); /// \brief Get an element in an ArrowArrayView as a double /// /// This function does not check for null values, or /// that values are within a valid range for a double. -static inline double ArrowArrayViewGetDoubleUnsafe(struct ArrowArrayView* array_view, - int64_t i); +static inline double ArrowArrayViewGetDoubleUnsafe( + const struct ArrowArrayView* array_view, int64_t i); /// \brief Get an element in an ArrowArrayView as an ArrowStringView /// /// This function does not check for null values. static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe( - struct ArrowArrayView* array_view, int64_t i); + const struct ArrowArrayView* array_view, int64_t i); /// \brief Get an element in an ArrowArrayView as an ArrowBufferView /// /// This function does not check for null values. static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe( - struct ArrowArrayView* array_view, int64_t i); + const struct ArrowArrayView* array_view, int64_t i); /// \brief Get an element in an ArrowArrayView as an ArrowDecimal /// /// This function does not check for null values. The out parameter must /// be initialized with ArrowDecimalInit() with the proper parameters for this /// type before calling this for the first time. -static inline void ArrowArrayViewGetDecimalUnsafe(struct ArrowArrayView* array_view, +static inline void ArrowArrayViewGetDecimalUnsafe(const struct ArrowArrayView* array_view, int64_t i, struct ArrowDecimal* out); /// @} @@ -1881,11 +2127,17 @@ void ArrowBasicArrayStreamSetArray(struct ArrowArrayStream* array_stream, int64_ /// array_stream must have been initialized with ArrowBasicArrayStreamInit(). /// This function uses ArrowArrayStreamInitFromSchema() and ArrowArrayStreamSetArray() /// to validate the contents of the arrays. -ArrowErrorCode ArrowBasicArrayStreamValidate(struct ArrowArrayStream* array_stream, +ArrowErrorCode ArrowBasicArrayStreamValidate(const struct ArrowArrayStream* array_stream, struct ArrowError* error); /// @} +// Undefine ArrowErrorCode, which may have been defined to annotate functions that return +// it to warn for an unused result. +#if defined(ArrowErrorCode) +#undef ArrowErrorCode +#endif + // Inline function definitions @@ -2119,20 +2371,124 @@ static inline int64_t _ArrowBytesForBits(int64_t bits) { return (bits >> 3) + ((bits & 7) != 0); } +static inline void _ArrowBitsUnpackInt8(const uint8_t word, int8_t* out) { + out[0] = (word & 0x1) != 0; + out[1] = (word & 0x2) != 0; + out[2] = (word & 0x4) != 0; + out[3] = (word & 0x8) != 0; + out[4] = (word & 0x10) != 0; + out[5] = (word & 0x20) != 0; + out[6] = (word & 0x40) != 0; + out[7] = (word & 0x80) != 0; +} + +static inline void _ArrowBitsUnpackInt32(const uint8_t word, int32_t* out) { + out[0] = (word & 0x1) != 0; + out[1] = (word & 0x2) != 0; + out[2] = (word & 0x4) != 0; + out[3] = (word & 0x8) != 0; + out[4] = (word & 0x10) != 0; + out[5] = (word & 0x20) != 0; + out[6] = (word & 0x40) != 0; + out[7] = (word & 0x80) != 0; +} + static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) { - *out = (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 | values[4] << 4 | - values[5] << 5 | values[6] << 6 | values[7] << 7); + *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) | + ((values[3] + 0x7) & 0x8) | ((values[4] + 0xf) & 0x10) | + ((values[5] + 0x1f) & 0x20) | ((values[6] + 0x3f) & 0x40) | + ((values[7] + 0x7f) & 0x80)); } static inline void _ArrowBitmapPackInt32(const int32_t* values, uint8_t* out) { - *out = (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 | values[4] << 4 | - values[5] << 5 | values[6] << 6 | values[7] << 7); + *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) | + ((values[3] + 0x7) & 0x8) | ((values[4] + 0xf) & 0x10) | + ((values[5] + 0x1f) & 0x20) | ((values[6] + 0x3f) & 0x40) | + ((values[7] + 0x7f) & 0x80)); } static inline int8_t ArrowBitGet(const uint8_t* bits, int64_t i) { return (bits[i >> 3] >> (i & 0x07)) & 1; } +static inline void ArrowBitsUnpackInt8(const uint8_t* bits, int64_t start_offset, + int64_t length, int8_t* out) { + if (length == 0) { + return; + } + + const int64_t i_begin = start_offset; + const int64_t i_end = start_offset + length; + const int64_t i_last_valid = i_end - 1; + + const int64_t bytes_begin = i_begin / 8; + const int64_t bytes_last_valid = i_last_valid / 8; + + if (bytes_begin == bytes_last_valid) { + for (int i = 0; i < length; i++) { + out[i] = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8); + } + + return; + } + + // first byte + for (int i = 0; i < 8 - (i_begin % 8); i++) { + *out++ = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8); + } + + // middle bytes + for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) { + _ArrowBitsUnpackInt8(bits[i], out); + out += 8; + } + + // last byte + const int bits_remaining = (int)(i_end % 8 == 0 ? 8 : i_end % 8); + for (int i = 0; i < bits_remaining; i++) { + *out++ = ArrowBitGet(&bits[bytes_last_valid], i); + } +} + +static inline void ArrowBitsUnpackInt32(const uint8_t* bits, int64_t start_offset, + int64_t length, int32_t* out) { + if (length == 0) { + return; + } + + const int64_t i_begin = start_offset; + const int64_t i_end = start_offset + length; + const int64_t i_last_valid = i_end - 1; + + const int64_t bytes_begin = i_begin / 8; + const int64_t bytes_last_valid = i_last_valid / 8; + + if (bytes_begin == bytes_last_valid) { + for (int i = 0; i < length; i++) { + out[i] = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8); + } + + return; + } + + // first byte + for (int i = 0; i < 8 - (i_begin % 8); i++) { + *out++ = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8); + } + + // middle bytes + for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) { + _ArrowBitsUnpackInt32(bits[i], out); + out += 8; + } + + // last byte + const int bits_remaining = (int)(i_end % 8 == 0 ? 8 : i_end % 8); + for (int i = 0; i < bits_remaining; i++) { + *out++ = ArrowBitGet(&bits[bytes_last_valid], i); + } +} + static inline void ArrowBitSet(uint8_t* bits, int64_t i) { bits[i / 8] |= _ArrowkBitmask[i % 8]; } @@ -2348,7 +2704,7 @@ static inline void ArrowBitmapAppendInt32Unsafe(struct ArrowBitmap* bitmap, if ((out_i_cursor % 8) != 0) { int64_t n_partial_bits = _ArrowRoundUpToMultipleOf8(out_i_cursor) - out_i_cursor; for (int i = 0; i < n_partial_bits; i++) { - ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values[i]); + ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, (uint8_t)values[i]); } out_cursor++; @@ -2371,7 +2727,7 @@ static inline void ArrowBitmapAppendInt32Unsafe(struct ArrowBitmap* bitmap, // Zero out the last byte *out_cursor = 0x00; for (int i = 0; i < n_remaining; i++) { - ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]); + ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, (uint8_t)values_cursor[i]); } out_cursor++; } @@ -2445,15 +2801,17 @@ static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array, int // is made. static inline int8_t _ArrowArrayUnionChildIndex(struct ArrowArray* array, int8_t type_id) { + NANOARROW_UNUSED(array); return type_id; } static inline int8_t _ArrowArrayUnionTypeId(struct ArrowArray* array, int8_t child_index) { + NANOARROW_UNUSED(array); return child_index; } -static inline int8_t _ArrowParseUnionTypeIds(const char* type_ids, int8_t* out) { +static inline int32_t _ArrowParseUnionTypeIds(const char* type_ids, int8_t* out) { if (*type_ids == '\0') { return 0; } @@ -2505,7 +2863,7 @@ static inline int8_t _ArrowParsedUnionTypeIdsWillEqualChildIndices(const int8_t* static inline int8_t _ArrowUnionTypeIdsWillEqualChildIndices(const char* type_id_str, int64_t n_children) { int8_t type_ids[128]; - int8_t n_type_ids = _ArrowParseUnionTypeIds(type_id_str, type_ids); + int32_t n_type_ids = _ArrowParseUnionTypeIds(type_id_str, type_ids); return _ArrowParsedUnionTypeIdsWillEqualChildIndices(type_ids, n_type_ids, n_children); } @@ -2532,7 +2890,7 @@ static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array) } // Initialize any data offset buffer with a single zero - for (int i = 0; i < 3; i++) { + for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_DATA_OFFSET && private_data->layout.element_size_bits[i] == 64) { NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64(ArrowArrayBuffer(array, i), 0)); @@ -2555,7 +2913,7 @@ static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array) } static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array) { - for (int64_t i = 0; i < 3; i++) { + for (int64_t i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i); NANOARROW_RETURN_NOT_OK(ArrowBufferResize(buffer, buffer->size_bytes, 1)); } @@ -2670,7 +3028,7 @@ static inline ArrowErrorCode _ArrowArrayAppendEmptyInternal(struct ArrowArray* a struct ArrowBuffer* buffer; int64_t size_bytes; - for (int i = 0; i < 3; i++) { + for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { buffer = ArrowArrayBuffer(array, i); size_bytes = private_data->layout.element_size_bits[i] / 8; @@ -2861,8 +3219,8 @@ static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array, case NANOARROW_TYPE_STRING: case NANOARROW_TYPE_BINARY: offset = ((int32_t*)offset_buffer->data)[array->length]; - if ((offset + value.size_bytes) > INT32_MAX) { - return EINVAL; + if ((((int64_t)offset) + value.size_bytes) > INT32_MAX) { + return EOVERFLOW; } offset += (int32_t)value.size_bytes; @@ -2922,7 +3280,7 @@ static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array, } static inline ArrowErrorCode ArrowArrayAppendInterval(struct ArrowArray* array, - struct ArrowInterval* value) { + const struct ArrowInterval* value) { struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; @@ -2960,12 +3318,16 @@ static inline ArrowErrorCode ArrowArrayAppendInterval(struct ArrowArray* array, return EINVAL; } + if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); + } + array->length++; return NANOARROW_OK; } static inline ArrowErrorCode ArrowArrayAppendDecimal(struct ArrowArray* array, - struct ArrowDecimal* value) { + const struct ArrowDecimal* value) { struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); @@ -3010,7 +3372,7 @@ static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array) { case NANOARROW_TYPE_MAP: child_length = array->children[0]->length; if (child_length > INT32_MAX) { - return EINVAL; + return EOVERFLOW; } NANOARROW_RETURN_NOT_OK( ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1), (int32_t)child_length)); @@ -3059,7 +3421,7 @@ static inline ArrowErrorCode ArrowArrayFinishUnionElement(struct ArrowArray* arr switch (private_data->storage_type) { case NANOARROW_TYPE_DENSE_UNION: - // Apppend the target child length to the union offsets buffer + // Append the target child length to the union offsets buffer _NANOARROW_CHECK_RANGE(array->children[child_index]->length, 0, INT32_MAX); NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32( ArrowArrayBuffer(array, 1), (int32_t)array->children[child_index]->length - 1)); @@ -3097,7 +3459,8 @@ static inline void ArrowArrayViewMove(struct ArrowArrayView* src, ArrowArrayViewInitFromType(src, NANOARROW_TYPE_UNINITIALIZED); } -static inline int8_t ArrowArrayViewIsNull(struct ArrowArrayView* array_view, int64_t i) { +static inline int8_t ArrowArrayViewIsNull(const struct ArrowArrayView* array_view, + int64_t i) { const uint8_t* validity_buffer = array_view->buffer_views[0].data.as_uint8; i += array_view->offset; switch (array_view->storage_type) { @@ -3112,7 +3475,7 @@ static inline int8_t ArrowArrayViewIsNull(struct ArrowArrayView* array_view, int } } -static inline int8_t ArrowArrayViewUnionTypeId(struct ArrowArrayView* array_view, +static inline int8_t ArrowArrayViewUnionTypeId(const struct ArrowArrayView* array_view, int64_t i) { switch (array_view->storage_type) { case NANOARROW_TYPE_DENSE_UNION: @@ -3123,8 +3486,8 @@ static inline int8_t ArrowArrayViewUnionTypeId(struct ArrowArrayView* array_view } } -static inline int8_t ArrowArrayViewUnionChildIndex(struct ArrowArrayView* array_view, - int64_t i) { +static inline int8_t ArrowArrayViewUnionChildIndex( + const struct ArrowArrayView* array_view, int64_t i) { int8_t type_id = ArrowArrayViewUnionTypeId(array_view, i); if (array_view->union_type_id_map == NULL) { return type_id; @@ -3133,8 +3496,8 @@ static inline int8_t ArrowArrayViewUnionChildIndex(struct ArrowArrayView* array_ } } -static inline int64_t ArrowArrayViewUnionChildOffset(struct ArrowArrayView* array_view, - int64_t i) { +static inline int64_t ArrowArrayViewUnionChildOffset( + const struct ArrowArrayView* array_view, int64_t i) { switch (array_view->storage_type) { case NANOARROW_TYPE_DENSE_UNION: return array_view->buffer_views[1].data.as_int32[i]; @@ -3145,8 +3508,8 @@ static inline int64_t ArrowArrayViewUnionChildOffset(struct ArrowArrayView* arra } } -static inline int64_t ArrowArrayViewListChildOffset(struct ArrowArrayView* array_view, - int64_t i) { +static inline int64_t ArrowArrayViewListChildOffset( + const struct ArrowArrayView* array_view, int64_t i) { switch (array_view->storage_type) { case NANOARROW_TYPE_LIST: return array_view->buffer_views[1].data.as_int32[i]; @@ -3157,15 +3520,16 @@ static inline int64_t ArrowArrayViewListChildOffset(struct ArrowArrayView* array } } -static inline int64_t ArrowArrayViewGetIntUnsafe(struct ArrowArrayView* array_view, +static inline int64_t ArrowArrayViewGetIntUnsafe(const struct ArrowArrayView* array_view, int64_t i) { - struct ArrowBufferView* data_view = &array_view->buffer_views[1]; + const struct ArrowBufferView* data_view = &array_view->buffer_views[1]; i += array_view->offset; switch (array_view->storage_type) { case NANOARROW_TYPE_INT64: return data_view->data.as_int64[i]; case NANOARROW_TYPE_UINT64: return data_view->data.as_uint64[i]; + case NANOARROW_TYPE_INTERVAL_MONTHS: case NANOARROW_TYPE_INT32: return data_view->data.as_int32[i]; case NANOARROW_TYPE_UINT32: @@ -3189,15 +3553,16 @@ static inline int64_t ArrowArrayViewGetIntUnsafe(struct ArrowArrayView* array_vi } } -static inline uint64_t ArrowArrayViewGetUIntUnsafe(struct ArrowArrayView* array_view, - int64_t i) { +static inline uint64_t ArrowArrayViewGetUIntUnsafe( + const struct ArrowArrayView* array_view, int64_t i) { i += array_view->offset; - struct ArrowBufferView* data_view = &array_view->buffer_views[1]; + const struct ArrowBufferView* data_view = &array_view->buffer_views[1]; switch (array_view->storage_type) { case NANOARROW_TYPE_INT64: return data_view->data.as_int64[i]; case NANOARROW_TYPE_UINT64: return data_view->data.as_uint64[i]; + case NANOARROW_TYPE_INTERVAL_MONTHS: case NANOARROW_TYPE_INT32: return data_view->data.as_int32[i]; case NANOARROW_TYPE_UINT32: @@ -3221,10 +3586,10 @@ static inline uint64_t ArrowArrayViewGetUIntUnsafe(struct ArrowArrayView* array_ } } -static inline double ArrowArrayViewGetDoubleUnsafe(struct ArrowArrayView* array_view, - int64_t i) { +static inline double ArrowArrayViewGetDoubleUnsafe( + const struct ArrowArrayView* array_view, int64_t i) { i += array_view->offset; - struct ArrowBufferView* data_view = &array_view->buffer_views[1]; + const struct ArrowBufferView* data_view = &array_view->buffer_views[1]; switch (array_view->storage_type) { case NANOARROW_TYPE_INT64: return (double)data_view->data.as_int64[i]; @@ -3254,9 +3619,9 @@ static inline double ArrowArrayViewGetDoubleUnsafe(struct ArrowArrayView* array_ } static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe( - struct ArrowArrayView* array_view, int64_t i) { + const struct ArrowArrayView* array_view, int64_t i) { i += array_view->offset; - struct ArrowBufferView* offsets_view = &array_view->buffer_views[1]; + const struct ArrowBufferView* offsets_view = &array_view->buffer_views[1]; const char* data_view = array_view->buffer_views[2].data.as_char; struct ArrowStringView view; @@ -3287,9 +3652,9 @@ static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe( } static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe( - struct ArrowArrayView* array_view, int64_t i) { + const struct ArrowArrayView* array_view, int64_t i) { i += array_view->offset; - struct ArrowBufferView* offsets_view = &array_view->buffer_views[1]; + const struct ArrowBufferView* offsets_view = &array_view->buffer_views[1]; const uint8_t* data_view = array_view->buffer_views[2].data.as_uint8; struct ArrowBufferView view; @@ -3320,8 +3685,8 @@ static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe( return view; } -static inline void ArrowArrayViewGetIntervalUnsafe(struct ArrowArrayView* array_view, - int64_t i, struct ArrowInterval* out) { +static inline void ArrowArrayViewGetIntervalUnsafe( + const struct ArrowArrayView* array_view, int64_t i, struct ArrowInterval* out) { const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8; switch (array_view->storage_type) { case NANOARROW_TYPE_INTERVAL_MONTHS: { @@ -3347,7 +3712,7 @@ static inline void ArrowArrayViewGetIntervalUnsafe(struct ArrowArrayView* array_ } } -static inline void ArrowArrayViewGetDecimalUnsafe(struct ArrowArrayView* array_view, +static inline void ArrowArrayViewGetDecimalUnsafe(const struct ArrowArrayView* array_view, int64_t i, struct ArrowDecimal* out) { i += array_view->offset; const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8; diff --git a/c/vendor/nanoarrow/nanoarrow.hpp b/c/vendor/nanoarrow/nanoarrow.hpp index da54a57311..8d5b841e28 100644 --- a/c/vendor/nanoarrow/nanoarrow.hpp +++ b/c/vendor/nanoarrow/nanoarrow.hpp @@ -16,8 +16,8 @@ // under the License. #include -#include #include +#include #include "nanoarrow.h" @@ -88,70 +88,108 @@ namespace internal { /// /// @{ -static inline void init_pointer(struct ArrowSchema* data) { data->release = nullptr; } +template +static inline void init_pointer(T* data); + +template +static inline void move_pointer(T* src, T* dst); + +template +static inline void release_pointer(T* data); + +template <> +inline void init_pointer(struct ArrowSchema* data) { + data->release = nullptr; +} -static inline void move_pointer(struct ArrowSchema* src, struct ArrowSchema* dst) { +template <> +inline void move_pointer(struct ArrowSchema* src, struct ArrowSchema* dst) { ArrowSchemaMove(src, dst); } -static inline void release_pointer(struct ArrowSchema* data) { +template <> +inline void release_pointer(struct ArrowSchema* data) { if (data->release != nullptr) { data->release(data); } } -static inline void init_pointer(struct ArrowArray* data) { data->release = nullptr; } +template <> +inline void init_pointer(struct ArrowArray* data) { + data->release = nullptr; +} -static inline void move_pointer(struct ArrowArray* src, struct ArrowArray* dst) { +template <> +inline void move_pointer(struct ArrowArray* src, struct ArrowArray* dst) { ArrowArrayMove(src, dst); } -static inline void release_pointer(struct ArrowArray* data) { +template <> +inline void release_pointer(struct ArrowArray* data) { if (data->release != nullptr) { data->release(data); } } -static inline void init_pointer(struct ArrowArrayStream* data) { +template <> +inline void init_pointer(struct ArrowArrayStream* data) { data->release = nullptr; } -static inline void move_pointer(struct ArrowArrayStream* src, - struct ArrowArrayStream* dst) { +template <> +inline void move_pointer(struct ArrowArrayStream* src, struct ArrowArrayStream* dst) { ArrowArrayStreamMove(src, dst); } -static inline void release_pointer(ArrowArrayStream* data) { +template <> +inline void release_pointer(ArrowArrayStream* data) { if (data->release != nullptr) { data->release(data); } } -static inline void init_pointer(struct ArrowBuffer* data) { ArrowBufferInit(data); } +template <> +inline void init_pointer(struct ArrowBuffer* data) { + ArrowBufferInit(data); +} -static inline void move_pointer(struct ArrowBuffer* src, struct ArrowBuffer* dst) { +template <> +inline void move_pointer(struct ArrowBuffer* src, struct ArrowBuffer* dst) { ArrowBufferMove(src, dst); } -static inline void release_pointer(struct ArrowBuffer* data) { ArrowBufferReset(data); } +template <> +inline void release_pointer(struct ArrowBuffer* data) { + ArrowBufferReset(data); +} -static inline void init_pointer(struct ArrowBitmap* data) { ArrowBitmapInit(data); } +template <> +inline void init_pointer(struct ArrowBitmap* data) { + ArrowBitmapInit(data); +} -static inline void move_pointer(struct ArrowBitmap* src, struct ArrowBitmap* dst) { +template <> +inline void move_pointer(struct ArrowBitmap* src, struct ArrowBitmap* dst) { ArrowBitmapMove(src, dst); } -static inline void release_pointer(struct ArrowBitmap* data) { ArrowBitmapReset(data); } +template <> +inline void release_pointer(struct ArrowBitmap* data) { + ArrowBitmapReset(data); +} -static inline void init_pointer(struct ArrowArrayView* data) { +template <> +inline void init_pointer(struct ArrowArrayView* data) { ArrowArrayViewInitFromType(data, NANOARROW_TYPE_UNINITIALIZED); } -static inline void move_pointer(struct ArrowArrayView* src, struct ArrowArrayView* dst) { +template <> +inline void move_pointer(struct ArrowArrayView* src, struct ArrowArrayView* dst) { ArrowArrayViewMove(src, dst); } -static inline void release_pointer(struct ArrowArrayView* data) { +template <> +inline void release_pointer(struct ArrowArrayView* data) { ArrowArrayViewReset(data); } @@ -168,15 +206,21 @@ class Unique { /// \brief Move and take ownership of data wrapped by rhs Unique(Unique&& rhs) : Unique(rhs.get()) {} + Unique& operator=(Unique&& rhs) { + reset(rhs.get()); + return *this; + } // These objects are not copyable - Unique(Unique& rhs) = delete; + Unique(const Unique& rhs) = delete; /// \brief Get a pointer to the data owned by this object T* get() noexcept { return &data_; } + const T* get() const noexcept { return &data_; } /// \brief Use the pointer operator to access fields of this object - T* operator->() { return &data_; } + T* operator->() noexcept { return &data_; } + const T* operator->() const noexcept { return &data_; } /// \brief Call data's release callback if valid void reset() { release_pointer(&data_); } @@ -231,28 +275,124 @@ using UniqueArrayView = internal::Unique; /// \defgroup nanoarrow_hpp-array-stream ArrayStream helpers /// -/// These classes provide simple struct ArrowArrayStream implementations that +/// These classes provide simple ArrowArrayStream implementations that /// can be extended to help simplify the process of creating a valid /// ArrowArrayStream implementation or used as-is for testing. /// /// @{ +/// @brief Export an ArrowArrayStream from a standard C++ class +/// @tparam T A class with methods `int GetSchema(ArrowSchema*)`, `int +/// GetNext(ArrowArray*)`, and `const char* GetLastError()` +/// +/// This class allows a standard C++ class to be exported to a generic ArrowArrayStream +/// consumer by mapping C callback invocations to method calls on an instance of the +/// object whose lifecycle is owned by the ArrowArrayStream. See VectorArrayStream for +/// minimal useful example of this pattern. +/// +/// The methods must be accessible to the ArrayStreamFactory, either as public methods or +/// by declaring ArrayStreamFactory a friend. Implementors are encouraged (but +/// not required) to implement a ToArrayStream(ArrowArrayStream*) that creates a new +/// instance owned by the ArrowArrayStream and moves the relevant data to that instance. +/// +/// An example implementation might be: +/// +/// \code +/// class StreamImpl { +/// public: +/// // Public methods (e.g., constructor) used from C++ to initialize relevant data +/// +/// // Idiomatic exporter to move data + lifecycle responsibility to an instance +/// // managed by the ArrowArrayStream callbacks +/// void ToArrayStream(struct ArrowArrayStream* out) { +/// ArrayStreamFactory::InitArrayStream(new StreamImpl(...), out); +/// } +/// +/// private: +/// // Make relevant methods available to the ArrayStreamFactory +/// friend class ArrayStreamFactory; +/// +/// // Method implementations (called from C, not normally interacted with from C++) +/// int GetSchema(struct ArrowSchema* schema) { return ENOTSUP; } +/// int GetNext(struct ArrowArray* array) { return ENOTSUP; } +/// const char* GetLastError() { nullptr; } +/// }; +/// \endcode +/// +/// An example usage might be: +/// +/// \code +/// // Call constructor and/or public methods to initialize relevant data +/// StreamImpl impl; +/// +/// // Export to ArrowArrayStream after data are finalized +/// UniqueArrayStream stream; +/// impl.ToArrayStream(stream.get()); +/// \endcode +template +class ArrayStreamFactory { + public: + /// \brief Take ownership of instance and populate callbacks of out + static void InitArrayStream(T* instance, struct ArrowArrayStream* out) { + out->get_schema = &get_schema_wrapper; + out->get_next = &get_next_wrapper; + out->get_last_error = &get_last_error_wrapper; + out->release = &release_wrapper; + out->private_data = instance; + } + + private: + static int get_schema_wrapper(struct ArrowArrayStream* stream, + struct ArrowSchema* schema) { + return reinterpret_cast(stream->private_data)->GetSchema(schema); + } + + static int get_next_wrapper(struct ArrowArrayStream* stream, struct ArrowArray* array) { + return reinterpret_cast(stream->private_data)->GetNext(array); + } + + static const char* get_last_error_wrapper(struct ArrowArrayStream* stream) { + return reinterpret_cast(stream->private_data)->GetLastError(); + } + + static void release_wrapper(struct ArrowArrayStream* stream) { + delete reinterpret_cast(stream->private_data); + stream->release = nullptr; + stream->private_data = nullptr; + } +}; + /// \brief An empty array stream /// -/// This class can be constructed from an enum ArrowType or -/// struct ArrowSchema and implements a default get_next() method that -/// always marks the output ArrowArray as released. This class can -/// be extended with an implementation of get_next() for a custom -/// source. +/// This class can be constructed from an struct ArrowSchema and implements a default +/// get_next() method that always marks the output ArrowArray as released. +/// +/// DEPRECATED (0.4.0): Early versions of nanoarrow allowed subclasses to override +/// get_schema(), get_next(), and get_last_error(). This functionality will be removed +/// in a future release: use the pattern documented in ArrayStreamFactory to create +/// custom ArrowArrayStream implementations. class EmptyArrayStream { public: + /// \brief Create an EmptyArrayStream from an ArrowSchema + /// + /// Takes ownership of schema. + EmptyArrayStream(struct ArrowSchema* schema) : schema_(schema) { + ArrowErrorInit(&error_); + } + + /// \brief Export to ArrowArrayStream + void ToArrayStream(struct ArrowArrayStream* out) { + EmptyArrayStream* impl = new EmptyArrayStream(schema_.get()); + ArrayStreamFactory::InitArrayStream(impl, out); + } + /// \brief Create an empty UniqueArrayStream from a struct ArrowSchema /// - /// This object takes ownership of the schema and marks the source schema - /// as released. + /// DEPRECATED (0.4.0): Use the constructor + ToArrayStream() to export an + /// EmptyArrayStream to an ArrowArrayStream consumer. static UniqueArrayStream MakeUnique(struct ArrowSchema* schema) { UniqueArrayStream stream; - (new EmptyArrayStream(schema))->MakeStream(stream.get()); + EmptyArrayStream(schema).ToArrayStream(stream.get()); return stream; } @@ -262,17 +402,7 @@ class EmptyArrayStream { UniqueSchema schema_; struct ArrowError error_; - EmptyArrayStream(struct ArrowSchema* schema) : schema_(schema) { - error_.message[0] = '\0'; - } - - void MakeStream(struct ArrowArrayStream* stream) { - stream->get_schema = &get_schema_wrapper; - stream->get_next = &get_next_wrapper; - stream->get_last_error = &get_last_error_wrapper; - stream->release = &release_wrapper; - stream->private_data = this; - } + void MakeStream(struct ArrowArrayStream* stream) { ToArrayStream(stream); } virtual int get_schema(struct ArrowSchema* schema) { return ArrowSchemaDeepCopy(schema_.get(), schema); @@ -286,54 +416,72 @@ class EmptyArrayStream { virtual const char* get_last_error() { return error_.message; } private: - static int get_schema_wrapper(struct ArrowArrayStream* stream, - struct ArrowSchema* schema) { - return reinterpret_cast(stream->private_data)->get_schema(schema); - } + friend class ArrayStreamFactory; - static int get_next_wrapper(struct ArrowArrayStream* stream, struct ArrowArray* array) { - return reinterpret_cast(stream->private_data)->get_next(array); - } + int GetSchema(struct ArrowSchema* schema) { return get_schema(schema); } - static const char* get_last_error_wrapper(struct ArrowArrayStream* stream) { - return reinterpret_cast(stream->private_data)->get_last_error(); - } + int GetNext(struct ArrowArray* array) { return get_next(array); } - static void release_wrapper(struct ArrowArrayStream* stream) { - delete reinterpret_cast(stream->private_data); - stream->release = nullptr; - stream->private_data = nullptr; - } + const char* GetLastError() { return get_last_error(); } }; -/// \brief Implementation of an ArrowArrayStream backed by a vector of ArrowArray objects -class VectorArrayStream : public EmptyArrayStream { +/// \brief Implementation of an ArrowArrayStream backed by a vector of UniqueArray objects +class VectorArrayStream { public: + /// \brief Create a VectorArrayStream from an ArrowSchema + vector of UniqueArray + /// + /// Takes ownership of schema and moves arrays if possible. + VectorArrayStream(struct ArrowSchema* schema, std::vector arrays) + : offset_(0), schema_(schema), arrays_(std::move(arrays)) {} + + /// \brief Create a one-shot VectorArrayStream from an ArrowSchema + ArrowArray + /// + /// Takes ownership of schema and array. + VectorArrayStream(struct ArrowSchema* schema, struct ArrowArray* array) + : offset_(0), schema_(schema) { + arrays_.emplace_back(array); + } + + /// \brief Export to ArrowArrayStream + void ToArrayStream(struct ArrowArrayStream* out) { + VectorArrayStream* impl = new VectorArrayStream(schema_.get(), std::move(arrays_)); + ArrayStreamFactory::InitArrayStream(impl, out); + } + /// \brief Create a UniqueArrowArrayStream from an existing array /// - /// Takes ownership of the schema and the array. + /// DEPRECATED (0.4.0): Use the constructors + ToArrayStream() to export a + /// VectorArrayStream to an ArrowArrayStream consumer. static UniqueArrayStream MakeUnique(struct ArrowSchema* schema, struct ArrowArray* array) { - std::vector arrays; - arrays.emplace_back(array); - return MakeUnique(schema, std::move(arrays)); + UniqueArrayStream stream; + VectorArrayStream(schema, array).ToArrayStream(stream.get()); + return stream; } /// \brief Create a UniqueArrowArrayStream from existing arrays /// - /// This object takes ownership of the schema and arrays. + /// DEPRECATED (0.4.0): Use the constructor + ToArrayStream() to export a + /// VectorArrayStream to an ArrowArrayStream consumer. static UniqueArrayStream MakeUnique(struct ArrowSchema* schema, std::vector arrays) { UniqueArrayStream stream; - (new VectorArrayStream(schema, std::move(arrays)))->MakeStream(stream.get()); + VectorArrayStream(schema, std::move(arrays)).ToArrayStream(stream.get()); return stream; } - protected: - VectorArrayStream(struct ArrowSchema* schema, std::vector arrays) - : EmptyArrayStream(schema), arrays_(std::move(arrays)), offset_(0) {} + private: + int64_t offset_; + UniqueSchema schema_; + std::vector arrays_; + + friend class ArrayStreamFactory; - int get_next(struct ArrowArray* array) { + int GetSchema(struct ArrowSchema* schema) { + return ArrowSchemaDeepCopy(schema_.get(), schema); + } + + int GetNext(struct ArrowArray* array) { if (offset_ < static_cast(arrays_.size())) { arrays_[offset_++].move(array); } else { @@ -343,9 +491,7 @@ class VectorArrayStream : public EmptyArrayStream { return NANOARROW_OK; } - private: - std::vector arrays_; - int64_t offset_; + const char* GetLastError() { return ""; } }; /// @} diff --git a/c/vendor/vendor_nanoarrow.sh b/c/vendor/vendor_nanoarrow.sh index 45aa64fe13..c44d48e8db 100755 --- a/c/vendor/vendor_nanoarrow.sh +++ b/c/vendor/vendor_nanoarrow.sh @@ -20,7 +20,8 @@ main() { local -r repo_url="https://github.com/apache/arrow-nanoarrow" - local -r commit_sha=$(git ls-remote "$repo_url" HEAD | awk '{print $2}') + # Check releases page: https://github.com/apache/arrow-nanoarrow/releases/ + local -r commit_sha=3f83f4c48959f7a51053074672b7a330888385b1 echo "Fetching $commit_sha from $repo_url" SCRATCH=$(mktemp -d)