From 015ddbdf01c478dd22e511338a78ec54201125a1 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 3 Feb 2025 23:58:25 +0000 Subject: [PATCH 01/37] First attempt --- cpp/CMakeLists.txt | 1 + cpp/include/cudf/interop.hpp | 46 ++++- cpp/src/interop/arrow_column.cpp | 225 ++++++++++++++++++++++++ cpp/tests/CMakeLists.txt | 1 + cpp/tests/interop/arrow_column_test.cpp | 202 +++++++++++++++++++++ 5 files changed, 474 insertions(+), 1 deletion(-) create mode 100644 cpp/src/interop/arrow_column.cpp create mode 100644 cpp/tests/interop/arrow_column_test.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 0282282b5f3..d6b66a19224 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -463,6 +463,7 @@ add_library( src/hash/xxhash_64.cu src/interop/dlpack.cpp src/interop/arrow_utilities.cpp + src/interop/arrow_column.cpp src/interop/to_arrow_device.cu src/interop/to_arrow_host.cu src/interop/from_arrow_device.cu diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index 810f0377597..93f4603d33a 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #pragma once +#include "nanoarrow/nanoarrow_device.h" #include #include #include @@ -25,6 +26,8 @@ #include #include +#include + #include struct DLManagedTensor; @@ -118,6 +121,47 @@ struct column_metadata { column_metadata() = default; }; +struct arrow_column_container; + +class arrow_column { + public: + arrow_column(ArrowSchema const* schema, + ArrowDeviceArray* input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + arrow_column(ArrowSchema const* schema, + ArrowArray* input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + arrow_column(cudf::column&& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + void to_arrow(ArrowDeviceArray* output, + ArrowDeviceType device_type = ARROW_DEVICE_CUDA, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + + private: + // Using a shared_ptr allows re-export via to_arrow + std::shared_ptr container; +}; + +// class arrow_table { +// public: +// arrow_table(ArrowSchema const* schema,ArrowDeviceArray* input, +// rmm::cuda_stream_view stream = cudf::get_default_stream(), +// rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); +// arrow_table(ArrowSchema const* schema,ArrowArray* input, +// rmm::cuda_stream_view stream = cudf::get_default_stream(), +// rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); +// arrow_table(ArrowSchema const* schema,cudf::table&& input, +// rmm::cuda_stream_view stream = cudf::get_default_stream(), +// rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); +// private: +// // Using a shared_ptr allows re-export via to_arrow +// std::shared_ptr container; +// }; + /** * @brief typedef for a unique_ptr to an ArrowSchema with custom deleter * diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp new file mode 100644 index 00000000000..6787eac8747 --- /dev/null +++ b/cpp/src/interop/arrow_column.cpp @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// #include "arrow_utilities.hpp" +// +// #include +// #include +// #include +#include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// +// #include +// #include +// #include +// +#include +#include +#include + +#include +#include +#include +#include + +/* + * Notes on ownership + * + * If you start with a cudf object, it has sole ownership and we have complete control of its provenance. We can convert it to an ArrowDeviceArray that has sole ownership of each piece of data. We can also (if desired) decompose a cudf::table down into columns and create a separate ArrowDeviceArray for each column. In this case, we would also be able to export and maintain the lifetimes of each column separately. The nanoarrow array creation routines will produce an ArrowArray for each column that has its own deleter that deletes each buffer, so we could give each buffer ownership of the corresponding cudf data buffer and then we wouldn't really need any private data attribute. That would be more work than what I was initially proposing to do, which is to just have a single ArrowDeviceArray that owns all the data for the whole table, but not much different. It would also mean a bit more work during the conversion since we wouldn't simply be tying lifetimes to an underlying unique_ptr to a cudf type (wrapped through a shared pointer to a containing structure), which is a quick and dirty way to do it. + * + * If we start with an ArrowDeviceArray from another source, though, we have no guarantees about who owns what within the array. Assuming that it's a struct array representing a table, each child array could own its own data via its buffers, or all of the data could be collectively owned by the private data with each buffer having no ownership on its own. In that case, you would always need to keep the whole private data alive in order to keep any individual column alive. + * + * I think the best long-term option is to decompose as much as possible so that it is in principle possible to minimize the amount of + */ +namespace cudf { + +struct arrow_column_container_deleter { + void operator()(std::pair data) { + data.first.array.release(&data.first.array); + // The owned_columns_t + } +}; + +// Class to manage lifetime semantics and allow re-export. +struct arrow_column_container { + // An ArrowDeviceArray used to view the data. This can just be a pointer into the one owned by the + // owner. + ArrowDeviceArray* arr; + + // Also need a member that holds column views (and one for mutable?) + cudf::column_view view; + + // Declare the union + std::variant, + ArrowDeviceArray, + std::shared_ptr> + owner; + + // Question: When the input data was host data, we could presumably release + // immediately. Do we care? If so, how should we implement that? + ~arrow_column_container() { + } +}; + +arrow_column::arrow_column( + ArrowSchema const* schema, + ArrowDeviceArray* input, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + //switch (input->device_type) { + // case ARROW_DEVICE_CUDA: + // case ARROW_DEVICE_CUDA_HOST: + // case ARROW_DEVICE_CUDA_MANAGED: { + // // In this case, we have an ArrowDeviceArray with CUDA data as the + // // owner. When converting we may generate some owning cudf::column + // // objects for the Arrow data types that we support in a way that is + // // not zero-copy (e.g. bytes to bits). We need to own both the new + // // columns and the original ArrowDeviceArray, and we may as well cache + // // the column_view since we're generating it up front. + // auto view = from_arrow_device_column(schema, input, stream, mr); + // auto deleter = view.get_deleter(); + // // TODO: We shouldn't have to reach into the deleter to get the owned + // // columns, but we can clean that up by refactoring the internals of + // // the existing from_arrow* implementations later. + // std::pair owner{}; + // ArrowDeviceArrayMove(input, &owner.first); + // owner.second = std::move(deleter.owned_mem_); + // container->arr = &owner.first; + // container->owner = std::move(owner); + // // This is copy-constructing + // container->view = *view.get(); + // // We take ownership of the provided array so that we can share control + // // of the lifetime of the source data. + // // We rely on the deleter of the unique_column_view_t to delete the + // // column_view, and since we've already moved the owned_mem_ out from + // // underneath the deleter that memory will stay alive for us. + // } + // case ARROW_DEVICE_CPU: { + // auto col = from_arrow_host_column(schema, input, stream, mr); + // container->owner = std::shared_ptr(col.release()); + // } + // default: throw std::runtime_error("Unsupported ArrowDeviceArray type"); + // } +} +arrow_column::arrow_column( + ArrowSchema const* schema, + ArrowArray* input, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + //// The copy initialization of .array means that the release callback will be + //// called on that object, so we don't need to call it on the `input` in this + //// function. + //ArrowDeviceArray arr{.array = *input, .device_id = -1, .device_type = ARROW_DEVICE_CPU}; + //// TODO: Merge with the ARROW_DEVICE_CPU case above with a helper function. + //auto col = from_arrow_host_column(schema, &arr, stream, mr); + //container->owner = std::shared_ptr(col.release()); +} +arrow_column::arrow_column( + cudf::column&& input, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + // The output ArrowDeviceArray here will own all the data, so we don't need to save a column + auto output = cudf::to_arrow_device(std::move(input)); + ArrowDeviceArrayMove(output.get(), &std::get(container->owner)); + container->arr = &std::get(container->owner); +} + +arrow_column::to_arrow(ArrowDeviceArray* output, + ArrowDeviceType device_type, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) { + switch (ArrowDeviceType) { + case ARROW_DEVICE_CUDA: + case ARROW_DEVICE_CUDA_HOST: + case ARROW_DEVICE_CUDA_MANAGED: { + auto out = cudf::to_arrow_device(container->view, output, device_type, stream, mr); + // The existing to_arrow_device functions come in two flavors. One accepts a cudf::column and hands over ownership of all data. The other takes a column_view and the resulting ArrowDeviceArray only owns data if there were columns that needed to be translated on the + } + case ARROW_DEVICE_CPU: { + auto out = cudf::to_arrow_host(container->view, output, stream, mr); + } +} + +// arrow_table::arrow_table(ArrowSchema const* schema,ArrowDeviceArray* input, +// rmm::cuda_stream_view stream = cudf::get_default_stream(), +// rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { +// ArrowArrayMove(input, container->arr); } +// arrow_table::arrow_table(ArrowArray* input) { +// ArrowDeviceArray arr{ +// .array = *input, +// .device_id = -1, +// .device_type = ARROW_DEVICE_CPU}; +// ArrowArrayMove(&arr, container->arr); +// } +// arrow_table::arrow_table(cudf::table &&input) { +// auto output = cudf::to_arrow_device(input, container->arr); +// ArrowArrayMove(output.get(), container->arr); +// } + +// cudf::column_view view(); +// cudf::mutable_column_view mutable_view(); + +// Create Array whose private_data contains a shared_ptr to this->container +// The output should be consumer-allocated, see +// https://arrow.apache.org/docs/format/CDataInterface.html#member-allocation +// Note: May need stream/mr depending on where we put what logic. +// void to_arrow(ArrowDeviceArray* output); + +// class arrow_table { +// public: +// arrow_table(std::vector < std::shared_ptr columns) : columns{columns} {} +// cudf::table_view view(); +// cudf::mutable_table_view mutable_view(); +// // Create Array whose private_data contains shared_ptrs to all the underlying +// // arrow_array_containers +// void to_arrow(ArrowDeviceArray* output); +// +// private: +// // Would allow arrow_columns being in multiple arrow_tables +// std::vector < std::shared_ptr columns; +// }; + +//// ArrowArrayStream and ArrowArray overloads (they can be overloads now instead +//// of separate functions) are trivial wrappers around this function. Also need versions +//// of all three that return an arrow_column instead of an arrow_table. +// std::unique_ptr from_arrow(ArrowSchema const* schema, +// ArrowDeviceArray* input, +// rmm::cuda_stream_view stream, +// rmm::mr::device_memory_resource mr); +// +//// Produce an ArrowDeviceArray and then create an arrow_column around it. +// std::unique_ptr to_arrow( +// // Question: Do we really need a column_view overload? If we're going this +// // route, I think it's OK to always require a transfer of ownership to the +// // arrow_table, but there is potentially some small overhead there. +// std::unique_ptr input, +// rmm::cuda_stream_view stream = cudf::get_default_stream(), +// rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + +} // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index cfc6a0dc425..153c7286a07 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -283,6 +283,7 @@ ConfigureTest( # * interop tests ------------------------------------------------------------------------- ConfigureTest( INTEROP_TEST + interop/arrow_column_test.cpp interop/to_arrow_device_test.cpp interop/to_arrow_test.cpp interop/to_arrow_host_test.cpp diff --git a/cpp/tests/interop/arrow_column_test.cpp b/cpp/tests/interop/arrow_column_test.cpp new file mode 100644 index 00000000000..2cf6a8884dc --- /dev/null +++ b/cpp/tests/interop/arrow_column_test.cpp @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2020-2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + #include + +#include +#include +#include + +#include +#include + + +#include "nanoarrow/common/inline_types.h" +#include "nanoarrow/nanoarrow.h" +#include "nanoarrow/nanoarrow.hpp" +#include "nanoarrow/nanoarrow_device.h" +#include "tests/interop/nanoarrow_utils.hpp" + +// #include +// #include +// #include +// #include +// +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// +// #include +// +// #include +// +// std::unique_ptr get_cudf_table() +//{ +// std::vector> columns; +// columns.emplace_back(cudf::test::fixed_width_column_wrapper( +// {1, 2, 5, 2, 7}, {true, false, true, true, true}) +// .release()); +// columns.emplace_back(cudf::test::fixed_width_column_wrapper({1, 2, 3, 4, +// 5}).release()); columns.emplace_back(cudf::test::strings_column_wrapper({"fff", "aaa", "", +// "fff", "ccc"}, +// {true, true, true, false, true}) +// .release()); +// auto col4 = cudf::test::fixed_width_column_wrapper({1, 2, 5, 2, 7}, +// {true, false, true, true, true}); +// columns.emplace_back(cudf::dictionary::encode(col4)); +// columns.emplace_back(cudf::test::fixed_width_column_wrapper( +// {true, false, true, false, true}, {true, false, true, true, false}) +// .release()); +// columns.emplace_back(cudf::test::strings_column_wrapper( +// { +// "", +// "abc", +// "def", +// "1", +// "2", +// }, +// {0, 1, 1, 1, 1}) +// .release()); +// // columns.emplace_back(cudf::test::lists_column_wrapper({{1, 2}, {3, 4}, {}, {6}, {7, 8, +// // 9}}).release()); +// return std::make_unique(std::move(columns)); +// } +// +// std::shared_ptr get_arrow_large_string_array( +// std::vector const& data, std::vector const& mask = {}) +//{ +// std::shared_ptr large_string_array; +// arrow::LargeStringBuilder large_string_builder; +// +// CUDF_EXPECTS(large_string_builder.AppendValues(data, mask.data()).ok(), +// "Failed to append values to string builder"); +// CUDF_EXPECTS(large_string_builder.Finish(&large_string_array).ok(), +// "Failed to create arrow string array"); +// +// return large_string_array; +// } +// +// struct FromArrowTest : public cudf::test::BaseFixture {}; +// +// std::optional> export_table(std::shared_ptr +// arrow_table) +//{ +// ArrowSchema schema; +// if (!arrow::ExportSchema(*arrow_table->schema(), &schema).ok()) { return std::nullopt; } +// auto batch = arrow_table->CombineChunksToBatch().ValueOrDie(); +// ArrowArray arr; +// if (!arrow::ExportRecordBatch(*batch, &arr).ok()) { return std::nullopt; } +// auto ret = cudf::from_arrow(&schema, &arr); +// arr.release(&arr); +// schema.release(&schema); +// return {std::move(ret)}; +// } +// +// TEST_F(FromArrowTest, ChunkedArray) +//{ +// auto int64array = get_arrow_array({1, 2, 3, 4, 5}); +// auto int32array_1 = get_arrow_array({1, 2}, {1, 0}); +// auto int32array_2 = get_arrow_array({5, 2, 7}, {1, 1, 1}); +// auto string_array_1 = get_arrow_array({ +// "fff", +// "aaa", +// "", +// }); +// auto string_array_2 = get_arrow_array( +// { +// "fff", +// "ccc", +// }, +// {0, 1}); +// auto large_string_array_1 = get_arrow_large_string_array( +// { +// "", +// "abc", +// "def", +// "1", +// "2", +// }, +// {0, 1, 1, 1, 1}); +// auto dict_array1 = get_arrow_dict_array({1, 2, 5, 7}, {0, 1, 2}, {1, 0, 1}); +// auto dict_array2 = get_arrow_dict_array({1, 2, 5, 7}, {1, 3}); +// +// auto int64_chunked_array = std::make_shared(int64array); +// auto int32_chunked_array = std::make_shared( +// std::vector>{int32array_1, int32array_2}); +// auto string_chunked_array = std::make_shared( +// std::vector>{string_array_1, string_array_2}); +// auto dict_chunked_array = std::make_shared( +// std::vector>{dict_array1, dict_array2}); +// auto boolean_array = +// get_arrow_array({true, false, true, false, true}, {true, false, true, true, false}); +// auto boolean_chunked_array = std::make_shared(boolean_array); +// auto large_string_chunked_array = std::make_shared( +// std::vector>{large_string_array_1}); +// +// std::vector> schema_vector( +// {arrow::field("a", int32_chunked_array->type()), +// arrow::field("b", int64array->type()), +// arrow::field("c", string_array_1->type()), +// arrow::field("d", dict_chunked_array->type()), +// arrow::field("e", boolean_chunked_array->type()), +// arrow::field("f", large_string_array_1->type())}); +// auto schema = std::make_shared(schema_vector); +// +// auto arrow_table = arrow::Table::Make(schema, +// {int32_chunked_array, +// int64_chunked_array, +// string_chunked_array, +// dict_chunked_array, +// boolean_chunked_array, +// large_string_chunked_array}); +// +// auto expected_cudf_table = get_cudf_table(); +// +// auto got_cudf_table = export_table(arrow_table); +// ASSERT_TRUE(got_cudf_table.has_value()); +// +// CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table->view(), got_cudf_table.value()->view()); +// } + +struct ArrowColumnTest : public cudf::test::BaseFixture {}; + +TEST_F(ArrowColumnTest, TwoWayConversion) +{ + cudf::test::fixed_width_column_wrapper int_col{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}}; + auto col = cudf::column(int_col); + auto arrow_from_cudf = cudf::arrow_column(std::move(col)); + + //// Should be able to create an arrow_column from an ArrowDeviceArray. + //ArrowDeviceArray arr; + //auto tmp1 = cudf::arrow_column(&arr); + // + //// Should be able to create an arrow_column from cudf::column. It always takes ownership. + //cudf::column col; + + //// Should be able to create an arrow_table from an ArrowDeviceArray. + //ArrowDeviceArray arr2; + //cudf::arrow_table(&arr2); + // + //// Should be able to create an arrow_table from cudf::table. It always takes ownership. + //cudf::table tbl; + //cudf::arrow_table(std::move(tbl)); +} From 7661570b05b7eb917ef7d1918050e9726137d9b0 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 19 Feb 2025 00:51:15 +0000 Subject: [PATCH 02/37] Get basic conversion working starting with a cudf column --- cpp/include/cudf/interop.hpp | 31 ++- cpp/src/interop/arrow_column.cpp | 356 ++++++++++++++++-------- cpp/tests/interop/arrow_column_test.cpp | 40 +-- 3 files changed, 282 insertions(+), 145 deletions(-) diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index 93f4603d33a..175d0a0ad93 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -16,7 +16,6 @@ #pragma once -#include "nanoarrow/nanoarrow_device.h" #include #include #include @@ -40,6 +39,10 @@ struct ArrowArray; struct ArrowArrayStream; +typedef int32_t ArrowDeviceType; + +#define ARROW_DEVICE_CUDA 2 + namespace CUDF_EXPORT cudf { /** * @addtogroup interop_dlpack @@ -125,21 +128,25 @@ struct arrow_column_container; class arrow_column { public: - arrow_column(ArrowSchema const* schema, - ArrowDeviceArray* input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); - arrow_column(ArrowSchema const* schema, - ArrowArray* input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + // arrow_column(ArrowSchema const* schema, + // ArrowDeviceArray* input, + // rmm::cuda_stream_view stream = cudf::get_default_stream(), + // rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + // arrow_column(ArrowSchema const* schema, + // ArrowArray* input, + // rmm::cuda_stream_view stream = cudf::get_default_stream(), + // rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); arrow_column(cudf::column&& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + + void to_arrow_schema(ArrowSchema* output, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); void to_arrow(ArrowDeviceArray* output, - ArrowDeviceType device_type = ARROW_DEVICE_CUDA, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + ArrowDeviceType device_type = ARROW_DEVICE_CUDA, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); private: // Using a shared_ptr allows re-export via to_arrow diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index 6787eac8747..a220fb27891 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -14,156 +14,282 @@ * limitations under the License. */ -// #include "arrow_utilities.hpp" -// -// #include -// #include -// #include +//// #include "arrow_utilities.hpp" +//// +//// #include +//// #include +//// #include +#include "io/parquet/ipc/Schema_generated.h" +#include "nanoarrow/common/inline_types.h" + #include -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// -// #include -// #include -// #include -// +//// #include +//// #include +//// #include +//// #include +//// #include +//// #include +//// #include +//// #include +//// #include +//// #include +//// #include +//// +//// #include +//// #include +//// #include +//// #include #include #include - -#include +// +// #include +#include #include #include -#include - +// #include +// /* * Notes on ownership * - * If you start with a cudf object, it has sole ownership and we have complete control of its provenance. We can convert it to an ArrowDeviceArray that has sole ownership of each piece of data. We can also (if desired) decompose a cudf::table down into columns and create a separate ArrowDeviceArray for each column. In this case, we would also be able to export and maintain the lifetimes of each column separately. The nanoarrow array creation routines will produce an ArrowArray for each column that has its own deleter that deletes each buffer, so we could give each buffer ownership of the corresponding cudf data buffer and then we wouldn't really need any private data attribute. That would be more work than what I was initially proposing to do, which is to just have a single ArrowDeviceArray that owns all the data for the whole table, but not much different. It would also mean a bit more work during the conversion since we wouldn't simply be tying lifetimes to an underlying unique_ptr to a cudf type (wrapped through a shared pointer to a containing structure), which is a quick and dirty way to do it. + * If you start with a cudf object, it has sole ownership and we have complete control of its + * provenance. We can convert it to an ArrowDeviceArray that has sole ownership of each piece of + * data. We can also (if desired) decompose a cudf::table down into columns and create a separate + * ArrowDeviceArray for each column. In this case, we would also be able to export and maintain the + * lifetimes of each column separately. The nanoarrow array creation routines will produce an + * ArrowArray for each column that has its own deleter that deletes each buffer, so we could give + * each buffer ownership of the corresponding cudf data buffer and then we wouldn't really need any + * private data attribute. That would be more work than what I was initially proposing to do, which + * is to just have a single ArrowDeviceArray that owns all the data for the whole table, but not + * much different. It would also mean a bit more work during the conversion since we wouldn't simply + * be tying lifetimes to an underlying unique_ptr to a cudf type (wrapped through a shared pointer + * to a containing structure), which is a quick and dirty way to do it. * - * If we start with an ArrowDeviceArray from another source, though, we have no guarantees about who owns what within the array. Assuming that it's a struct array representing a table, each child array could own its own data via its buffers, or all of the data could be collectively owned by the private data with each buffer having no ownership on its own. In that case, you would always need to keep the whole private data alive in order to keep any individual column alive. + * If we start with an ArrowDeviceArray from another source, though, we have no guarantees about who + * owns what within the array. Assuming that it's a struct array representing a table, each child + * array could own its own data via its buffers, or all of the data could be collectively owned by + * the private data with each buffer having no ownership on its own. In that case, you would always + * need to keep the whole private data alive in order to keep any individual column alive. * - * I think the best long-term option is to decompose as much as possible so that it is in principle possible to minimize the amount of + * I think the best long-term option is to decompose as much as possible so that it is in principle + * possible to minimize the amount of */ namespace cudf { +// arrow_column::arrow_column( +// ArrowSchema const* schema, +// ArrowDeviceArray* input, +// rmm::cuda_stream_view stream, +// rmm::device_async_resource_ref mr) +//{ +// //switch (input->device_type) { +// // case ARROW_DEVICE_CUDA: +// // case ARROW_DEVICE_CUDA_HOST: +// // case ARROW_DEVICE_CUDA_MANAGED: { +// // // In this case, we have an ArrowDeviceArray with CUDA data as the +// // // owner. When converting we may generate some owning cudf::column +// // // objects for the Arrow data types that we support in a way that is +// // // not zero-copy (e.g. bytes to bits). We need to own both the new +// // // columns and the original ArrowDeviceArray, and we may as well cache +// // // the column_view since we're generating it up front. +// // auto view = from_arrow_device_column(schema, input, stream, mr); +// // auto deleter = view.get_deleter(); +// // // TODO: We shouldn't have to reach into the deleter to get the owned +// // // columns, but we can clean that up by refactoring the internals of +// // // the existing from_arrow* implementations later. +// // std::pair owner{}; +// // ArrowDeviceArrayMove(input, &owner.first); +// // owner.second = std::move(deleter.owned_mem_); +// // container->arr = &owner.first; +// // container->owner = std::move(owner); +// // // This is copy-constructing +// // container->view = *view.get(); +// // // We take ownership of the provided array so that we can share control +// // // of the lifetime of the source data. +// // // We rely on the deleter of the unique_column_view_t to delete the +// // // column_view, and since we've already moved the owned_mem_ out from +// // // underneath the deleter that memory will stay alive for us. +// // } +// // case ARROW_DEVICE_CPU: { +// // auto col = from_arrow_host_column(schema, input, stream, mr); +// // container->owner = std::shared_ptr(col.release()); +// // } +// // default: throw std::runtime_error("Unsupported ArrowDeviceArray type"); +// // } +// } +// arrow_column::arrow_column( +// ArrowSchema const* schema, +// ArrowArray* input, +// rmm::cuda_stream_view stream, +// rmm::device_async_resource_ref mr) +//{ +// //// The copy initialization of .array means that the release callback will be +// //// called on that object, so we don't need to call it on the `input` in this +// //// function. +// //ArrowDeviceArray arr{.array = *input, .device_id = -1, .device_type = ARROW_DEVICE_CPU}; +// //// TODO: Merge with the ARROW_DEVICE_CPU case above with a helper function. +// //auto col = from_arrow_host_column(schema, &arr, stream, mr); +// //container->owner = std::shared_ptr(col.release()); +// } + struct arrow_column_container_deleter { - void operator()(std::pair data) { + void operator()(std::pair data) + { data.first.array.release(&data.first.array); - // The owned_columns_t } }; // Class to manage lifetime semantics and allow re-export. struct arrow_column_container { - // An ArrowDeviceArray used to view the data. This can just be a pointer into the one owned by the - // owner. - ArrowDeviceArray* arr; + //// Also need a member that holds column views (and one for mutable?) + // cudf::column_view view; - // Also need a member that holds column views (and one for mutable?) - cudf::column_view view; - - // Declare the union - std::variant, - ArrowDeviceArray, - std::shared_ptr> - owner; + // TODO: Generalize to other possible owned data + ArrowDeviceArray owner; + ArrowSchema schema; // Question: When the input data was host data, we could presumably release // immediately. Do we care? If so, how should we implement that? - ~arrow_column_container() { + ~arrow_column_container() + { + // TODO: Do we have to sync before releasing? + ArrowArrayRelease(&owner.array); } }; -arrow_column::arrow_column( - ArrowSchema const* schema, - ArrowDeviceArray* input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) +arrow_column::arrow_column(cudf::column&& input, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) + : container{std::make_shared()} +{ + // The output ArrowDeviceArray here will own all the data, so we don't need to save a column + // TODO: metadata should be provided by the user + auto meta = cudf::column_metadata{}; + auto table_meta = std::vector{meta}; + auto tv = cudf::table_view{{input.view()}}; + auto schema = cudf::to_arrow_schema(tv, table_meta); + // ArrowSchema test_schema; + ArrowSchemaMove(schema.get(), &(container->schema)); + auto output = cudf::to_arrow_device(std::move(input)); + ArrowDeviceArrayMove(output.get(), &container->owner); +} + +struct ArrowSchemaPrivateData { + std::shared_ptr parent; + std::vector> children; + std::vector children_raw; +}; + +void SchemaReleaseCallback(ArrowSchema* schema) { - //switch (input->device_type) { - // case ARROW_DEVICE_CUDA: - // case ARROW_DEVICE_CUDA_HOST: - // case ARROW_DEVICE_CUDA_MANAGED: { - // // In this case, we have an ArrowDeviceArray with CUDA data as the - // // owner. When converting we may generate some owning cudf::column - // // objects for the Arrow data types that we support in a way that is - // // not zero-copy (e.g. bytes to bits). We need to own both the new - // // columns and the original ArrowDeviceArray, and we may as well cache - // // the column_view since we're generating it up front. - // auto view = from_arrow_device_column(schema, input, stream, mr); - // auto deleter = view.get_deleter(); - // // TODO: We shouldn't have to reach into the deleter to get the owned - // // columns, but we can clean that up by refactoring the internals of - // // the existing from_arrow* implementations later. - // std::pair owner{}; - // ArrowDeviceArrayMove(input, &owner.first); - // owner.second = std::move(deleter.owned_mem_); - // container->arr = &owner.first; - // container->owner = std::move(owner); - // // This is copy-constructing - // container->view = *view.get(); - // // We take ownership of the provided array so that we can share control - // // of the lifetime of the source data. - // // We rely on the deleter of the unique_column_view_t to delete the - // // column_view, and since we've already moved the owned_mem_ out from - // // underneath the deleter that memory will stay alive for us. - // } - // case ARROW_DEVICE_CPU: { - // auto col = from_arrow_host_column(schema, input, stream, mr); - // container->owner = std::shared_ptr(col.release()); - // } - // default: throw std::runtime_error("Unsupported ArrowDeviceArray type"); - // } + auto private_data = reinterpret_cast(schema->private_data); + for (auto& child : private_data->children) { + child->release(child.get()); + } + delete private_data; + schema->release = nullptr; } -arrow_column::arrow_column( - ArrowSchema const* schema, - ArrowArray* input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) + +void copy_schema(ArrowSchema* output, + ArrowSchema* input, + std::shared_ptr container) { - //// The copy initialization of .array means that the release callback will be - //// called on that object, so we don't need to call it on the `input` in this - //// function. - //ArrowDeviceArray arr{.array = *input, .device_id = -1, .device_type = ARROW_DEVICE_CPU}; - //// TODO: Merge with the ARROW_DEVICE_CPU case above with a helper function. - //auto col = from_arrow_host_column(schema, &arr, stream, mr); - //container->owner = std::shared_ptr(col.release()); + auto private_data = new ArrowSchemaPrivateData{container}; + output->format = input->format; + output->name = input->name; + output->metadata = input->metadata; + output->flags = input->flags; + output->n_children = input->n_children; + if (input->n_children > 0) { + // TODO: Give each child its own private data. + private_data->children_raw.resize(input->n_children); + for (auto i = 0; i < input->n_children; ++i) { + private_data->children.push_back(std::make_unique()); + private_data->children_raw[i] = private_data->children.back().get(); + copy_schema(private_data->children_raw[i], input->children[i], container); + } + } + output->children = private_data->children_raw.data(); + // TODO: This is probably not quite right but we don't actually support + // dictionaries so not sure it's worth fixing. + output->dictionary = input->dictionary; + output->release = SchemaReleaseCallback; + output->private_data = private_data; } -arrow_column::arrow_column( - cudf::column&& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) + +void arrow_column::to_arrow_schema(ArrowSchema* output, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) { - // The output ArrowDeviceArray here will own all the data, so we don't need to save a column - auto output = cudf::to_arrow_device(std::move(input)); - ArrowDeviceArrayMove(output.get(), &std::get(container->owner)); - container->arr = &std::get(container->owner); + auto& schema = container->schema; + copy_schema(output, &schema, container); +} + +struct ArrowArrayPrivateData { + std::shared_ptr parent; + std::vector> children; + std::vector children_raw; +}; + +void ArrayReleaseCallback(ArrowArray* array) +{ + auto private_data = reinterpret_cast(array->private_data); + for (auto& child : private_data->children) { + child->release(child.get()); + } + delete private_data; + array->release = nullptr; +} + +void copy_array(ArrowArray* output, + ArrowArray* input, + std::shared_ptr container) +{ + auto private_data = new ArrowArrayPrivateData{container}; + output->length = input->length; + output->null_count = input->null_count; + output->offset = input->offset; + output->n_buffers = input->n_buffers; + output->n_children = input->n_children; + output->buffers = input->buffers; + + if (input->n_children > 0) { + private_data->children_raw.resize(input->n_children); + for (auto i = 0; i < input->n_children; ++i) { + private_data->children.push_back(std::make_unique()); + copy_array(private_data->children.back().get(), input->children[i], container); + } + } + output->children = private_data->children_raw.data(); + // TODO: This is probably not quite right but we don't actually support + // dictionaries so not sure it's worth fixing. + output->dictionary = input->dictionary; + output->release = ArrayReleaseCallback; + output->private_data = private_data; } -arrow_column::to_arrow(ArrowDeviceArray* output, - ArrowDeviceType device_type, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) { - switch (ArrowDeviceType) { - case ARROW_DEVICE_CUDA: - case ARROW_DEVICE_CUDA_HOST: - case ARROW_DEVICE_CUDA_MANAGED: { - auto out = cudf::to_arrow_device(container->view, output, device_type, stream, mr); - // The existing to_arrow_device functions come in two flavors. One accepts a cudf::column and hands over ownership of all data. The other takes a column_view and the resulting ArrowDeviceArray only owns data if there were columns that needed to be translated on the - } - case ARROW_DEVICE_CPU: { - auto out = cudf::to_arrow_host(container->view, output, stream, mr); - } +void arrow_column::to_arrow(ArrowDeviceArray* output, + ArrowDeviceType device_type, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + switch (device_type) { + case ARROW_DEVICE_CUDA: + case ARROW_DEVICE_CUDA_HOST: + case ARROW_DEVICE_CUDA_MANAGED: { + auto device_arr = container->owner; + copy_array(&output->array, &device_arr.array, container); + output->device_id = device_arr.device_id; + // We can reuse the sync event by reference from the input. The + // destruction of that event is managed by the destruction of + // the underlying ArrowDeviceArray of this column. + output->sync_event = device_arr.sync_event; + output->device_type = device_type; + } + // case ARROW_DEVICE_CPU: { + // auto out = cudf::to_arrow_host(container->view, output, stream, mr); + // } + } } // arrow_table::arrow_table(ArrowSchema const* schema,ArrowDeviceArray* input, diff --git a/cpp/tests/interop/arrow_column_test.cpp b/cpp/tests/interop/arrow_column_test.cpp index 2cf6a8884dc..f6689d1cc82 100644 --- a/cpp/tests/interop/arrow_column_test.cpp +++ b/cpp/tests/interop/arrow_column_test.cpp @@ -14,22 +14,21 @@ * limitations under the License. */ - #include - -#include -#include -#include - -#include -#include - - #include "nanoarrow/common/inline_types.h" #include "nanoarrow/nanoarrow.h" #include "nanoarrow/nanoarrow.hpp" #include "nanoarrow/nanoarrow_device.h" #include "tests/interop/nanoarrow_utils.hpp" +#include + +#include +#include + +#include +#include +#include + // #include // #include // #include @@ -182,21 +181,26 @@ struct ArrowColumnTest : public cudf::test::BaseFixture {}; TEST_F(ArrowColumnTest, TwoWayConversion) { cudf::test::fixed_width_column_wrapper int_col{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}}; - auto col = cudf::column(int_col); + auto col = cudf::column(int_col); auto arrow_from_cudf = cudf::arrow_column(std::move(col)); + // Now we can extract an ArrowDeviceArray from the arrow_column + ArrowSchema schema; + arrow_from_cudf.to_arrow_schema(&schema); + ArrowDeviceArray arr; + arrow_from_cudf.to_arrow(&arr, ARROW_DEVICE_CUDA); + //// Should be able to create an arrow_column from an ArrowDeviceArray. - //ArrowDeviceArray arr; - //auto tmp1 = cudf::arrow_column(&arr); + // auto tmp1 = cudf::arrow_column(&arr); // //// Should be able to create an arrow_column from cudf::column. It always takes ownership. - //cudf::column col; + // cudf::column col; //// Should be able to create an arrow_table from an ArrowDeviceArray. - //ArrowDeviceArray arr2; - //cudf::arrow_table(&arr2); + // ArrowDeviceArray arr2; + // cudf::arrow_table(&arr2); // //// Should be able to create an arrow_table from cudf::table. It always takes ownership. - //cudf::table tbl; - //cudf::arrow_table(std::move(tbl)); + // cudf::table tbl; + // cudf::arrow_table(std::move(tbl)); } From f01a07b50b13670afcceb3b69f5ddd2c639ddf25 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 19 Feb 2025 03:47:40 +0000 Subject: [PATCH 03/37] Enable construction from an ArrowDeviceArray --- cpp/include/cudf/interop.hpp | 8 +- cpp/src/interop/arrow_column.cpp | 190 ++++++++++++------------ cpp/tests/interop/arrow_column_test.cpp | 18 ++- 3 files changed, 113 insertions(+), 103 deletions(-) diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index 175d0a0ad93..aee1017c4e9 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -128,10 +128,10 @@ struct arrow_column_container; class arrow_column { public: - // arrow_column(ArrowSchema const* schema, - // ArrowDeviceArray* input, - // rmm::cuda_stream_view stream = cudf::get_default_stream(), - // rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + arrow_column(ArrowSchema const* schema, + ArrowDeviceArray* input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); // arrow_column(ArrowSchema const* schema, // ArrowArray* input, // rmm::cuda_stream_view stream = cudf::get_default_stream(), diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index a220fb27891..f14d71ed016 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -19,8 +19,6 @@ //// #include //// #include //// #include -#include "io/parquet/ipc/Schema_generated.h" -#include "nanoarrow/common/inline_types.h" #include //// #include @@ -76,69 +74,6 @@ */ namespace cudf { -// arrow_column::arrow_column( -// ArrowSchema const* schema, -// ArrowDeviceArray* input, -// rmm::cuda_stream_view stream, -// rmm::device_async_resource_ref mr) -//{ -// //switch (input->device_type) { -// // case ARROW_DEVICE_CUDA: -// // case ARROW_DEVICE_CUDA_HOST: -// // case ARROW_DEVICE_CUDA_MANAGED: { -// // // In this case, we have an ArrowDeviceArray with CUDA data as the -// // // owner. When converting we may generate some owning cudf::column -// // // objects for the Arrow data types that we support in a way that is -// // // not zero-copy (e.g. bytes to bits). We need to own both the new -// // // columns and the original ArrowDeviceArray, and we may as well cache -// // // the column_view since we're generating it up front. -// // auto view = from_arrow_device_column(schema, input, stream, mr); -// // auto deleter = view.get_deleter(); -// // // TODO: We shouldn't have to reach into the deleter to get the owned -// // // columns, but we can clean that up by refactoring the internals of -// // // the existing from_arrow* implementations later. -// // std::pair owner{}; -// // ArrowDeviceArrayMove(input, &owner.first); -// // owner.second = std::move(deleter.owned_mem_); -// // container->arr = &owner.first; -// // container->owner = std::move(owner); -// // // This is copy-constructing -// // container->view = *view.get(); -// // // We take ownership of the provided array so that we can share control -// // // of the lifetime of the source data. -// // // We rely on the deleter of the unique_column_view_t to delete the -// // // column_view, and since we've already moved the owned_mem_ out from -// // // underneath the deleter that memory will stay alive for us. -// // } -// // case ARROW_DEVICE_CPU: { -// // auto col = from_arrow_host_column(schema, input, stream, mr); -// // container->owner = std::shared_ptr(col.release()); -// // } -// // default: throw std::runtime_error("Unsupported ArrowDeviceArray type"); -// // } -// } -// arrow_column::arrow_column( -// ArrowSchema const* schema, -// ArrowArray* input, -// rmm::cuda_stream_view stream, -// rmm::device_async_resource_ref mr) -//{ -// //// The copy initialization of .array means that the release callback will be -// //// called on that object, so we don't need to call it on the `input` in this -// //// function. -// //ArrowDeviceArray arr{.array = *input, .device_id = -1, .device_type = ARROW_DEVICE_CPU}; -// //// TODO: Merge with the ARROW_DEVICE_CPU case above with a helper function. -// //auto col = from_arrow_host_column(schema, &arr, stream, mr); -// //container->owner = std::shared_ptr(col.release()); -// } - -struct arrow_column_container_deleter { - void operator()(std::pair data) - { - data.first.array.release(&data.first.array); - } -}; - // Class to manage lifetime semantics and allow re-export. struct arrow_column_container { //// Also need a member that holds column views (and one for mutable?) @@ -157,23 +92,7 @@ struct arrow_column_container { } }; -arrow_column::arrow_column(cudf::column&& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) - : container{std::make_shared()} -{ - // The output ArrowDeviceArray here will own all the data, so we don't need to save a column - // TODO: metadata should be provided by the user - auto meta = cudf::column_metadata{}; - auto table_meta = std::vector{meta}; - auto tv = cudf::table_view{{input.view()}}; - auto schema = cudf::to_arrow_schema(tv, table_meta); - // ArrowSchema test_schema; - ArrowSchemaMove(schema.get(), &(container->schema)); - auto output = cudf::to_arrow_device(std::move(input)); - ArrowDeviceArrayMove(output.get(), &container->owner); -} - +namespace { struct ArrowSchemaPrivateData { std::shared_ptr parent; std::vector> children; @@ -191,10 +110,15 @@ void SchemaReleaseCallback(ArrowSchema* schema) } void copy_schema(ArrowSchema* output, - ArrowSchema* input, + ArrowSchema const* input, std::shared_ptr container) { - auto private_data = new ArrowSchemaPrivateData{container}; + auto private_data = new ArrowSchemaPrivateData{container}; + // TODO: Just deep copy these. They are small and we should not rely on the + // lifetime of the input schema (which is currently the case) . The spec + // doesn't say anything about moving schemas, so it's really only arrays + // where we have this support. For the lifetimes we can toss std:strings into + // private_data. output->format = input->format; output->name = input->name; output->metadata = input->metadata; @@ -217,14 +141,6 @@ void copy_schema(ArrowSchema* output, output->private_data = private_data; } -void arrow_column::to_arrow_schema(ArrowSchema* output, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - auto& schema = container->schema; - copy_schema(output, &schema, container); -} - struct ArrowArrayPrivateData { std::shared_ptr parent; std::vector> children; @@ -242,7 +158,7 @@ void ArrayReleaseCallback(ArrowArray* array) } void copy_array(ArrowArray* output, - ArrowArray* input, + ArrowArray const* input, std::shared_ptr container) { auto private_data = new ArrowArrayPrivateData{container}; @@ -268,6 +184,93 @@ void copy_array(ArrowArray* output, output->private_data = private_data; } +struct arrow_column_container_deleter { + void operator()(std::pair data) + { + data.first.array.release(&data.first.array); + } +}; + +} // namespace + +arrow_column::arrow_column(cudf::column&& input, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) + : container{std::make_shared()} +{ + // The output ArrowDeviceArray here will own all the data, so we don't need to save a column + // TODO: metadata should be provided by the user + auto meta = cudf::column_metadata{}; + auto table_meta = std::vector{meta}; + auto tv = cudf::table_view{{input.view()}}; + auto schema = cudf::to_arrow_schema(tv, table_meta); + // ArrowSchema test_schema; + ArrowSchemaMove(schema.get(), &(container->schema)); + auto output = cudf::to_arrow_device(std::move(input)); + ArrowDeviceArrayMove(output.get(), &container->owner); +} + +// IMPORTANT: This constructor will move the input array if it is device data. +// Probably won't for host data, though... is that asymmetry okay? +arrow_column::arrow_column(ArrowSchema const* schema, + ArrowDeviceArray* input, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) + : container{std::make_shared()} +{ + switch (input->device_type) { + case ARROW_DEVICE_CUDA: + case ARROW_DEVICE_CUDA_HOST: + case ARROW_DEVICE_CUDA_MANAGED: { + // In this case, we have an ArrowDeviceArray with CUDA data as the + // owner. We can simply move it into our container and safe it now. We + // do need to copy the schema, though. + copy_schema(&container->schema, schema, container); + auto device_arr = container->owner; + // This behavior is different than the old from_arrow_device function + // because now we are not create a non-owning column_view but an + // arrow_column that can manage lifetimes. + ArrowArrayMove(&input->array, &device_arr.array); + device_arr.device_type = input->device_type; + // Pointing to the existing sync event is safe assuming that the + // underlying event is managed by the private data and the release + // callback. + device_arr.sync_event = input->sync_event; + device_arr.device_id = input->device_id; + break; + } + case ARROW_DEVICE_CPU: { + throw std::runtime_error("ArrowDeviceArray with CPU data not supported"); + // auto col = from_arrow_host_column(schema, input, stream, mr); + // container->owner = std::shared_ptr(col.release()); + // break; + } + default: throw std::runtime_error("Unsupported ArrowDeviceArray type"); + } +} +// arrow_column::arrow_column( +// ArrowSchema const* schema, +// ArrowArray* input, +// rmm::cuda_stream_view stream, +// rmm::device_async_resource_ref mr) +//{ +// //// The copy initialization of .array means that the release callback will be +// //// called on that object, so we don't need to call it on the `input` in this +// //// function. +// //ArrowDeviceArray arr{.array = *input, .device_id = -1, .device_type = ARROW_DEVICE_CPU}; +// //// TODO: Merge with the ARROW_DEVICE_CPU case above with a helper function. +// //auto col = from_arrow_host_column(schema, &arr, stream, mr); +// //container->owner = std::shared_ptr(col.release()); +// } + +void arrow_column::to_arrow_schema(ArrowSchema* output, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + auto& schema = container->schema; + copy_schema(output, &schema, container); +} + void arrow_column::to_arrow(ArrowDeviceArray* output, ArrowDeviceType device_type, rmm::cuda_stream_view stream, @@ -285,6 +288,7 @@ void arrow_column::to_arrow(ArrowDeviceArray* output, // the underlying ArrowDeviceArray of this column. output->sync_event = device_arr.sync_event; output->device_type = device_type; + break; } // case ARROW_DEVICE_CPU: { // auto out = cudf::to_arrow_host(container->view, output, stream, mr); diff --git a/cpp/tests/interop/arrow_column_test.cpp b/cpp/tests/interop/arrow_column_test.cpp index f6689d1cc82..797071f1117 100644 --- a/cpp/tests/interop/arrow_column_test.cpp +++ b/cpp/tests/interop/arrow_column_test.cpp @@ -181,14 +181,20 @@ struct ArrowColumnTest : public cudf::test::BaseFixture {}; TEST_F(ArrowColumnTest, TwoWayConversion) { cudf::test::fixed_width_column_wrapper int_col{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}}; - auto col = cudf::column(int_col); - auto arrow_from_cudf = cudf::arrow_column(std::move(col)); + auto col = cudf::column(int_col); + auto arrow_column_from_cudf_column = cudf::arrow_column(std::move(col)); // Now we can extract an ArrowDeviceArray from the arrow_column - ArrowSchema schema; - arrow_from_cudf.to_arrow_schema(&schema); - ArrowDeviceArray arr; - arrow_from_cudf.to_arrow(&arr, ARROW_DEVICE_CUDA); + ArrowSchema arrow_schema_from_cudf_column; + arrow_column_from_cudf_column.to_arrow_schema(&arrow_schema_from_cudf_column); + ArrowDeviceArray arrow_array_from_arrow_column; + arrow_column_from_cudf_column.to_arrow(&arrow_array_from_arrow_column, ARROW_DEVICE_CUDA); + + // Now let's convert it back to an arrow_column + auto arrow_column_from_arrow_array = + cudf::arrow_column(&arrow_schema_from_cudf_column, &arrow_array_from_arrow_column); + + // Now do some assertions //// Should be able to create an arrow_column from an ArrowDeviceArray. // auto tmp1 = cudf::arrow_column(&arr); From 851d4dc32edd5516c42440b01433f0c1be6bba48 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 19 Feb 2025 18:49:26 +0000 Subject: [PATCH 04/37] CMake testing changes --- cpp/CMakeLists.txt | 4 ++++ cpp/tests/CMakeLists.txt | 3 +++ 2 files changed, 7 insertions(+) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d6b66a19224..704b220f6a8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -784,6 +784,10 @@ add_library( src/utilities/type_checks.cpp src/utilities/type_dispatcher.cpp ) +set_source_files_properties( + src/interop/arrow_column.cpp + PROPERTIES COMPILE_OPTIONS "-g;-O0") + # Anything that includes jitify needs to be compiled with _FILE_OFFSET_BITS=64 due to a limitation # in how conda builds glibc diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 153c7286a07..7c31b6979f1 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -296,6 +296,9 @@ ConfigureTest( nanoarrow ${ARROW_LIBRARIES} ) +set_target_properties( + INTEROP_TEST + PROPERTIES COMPILE_OPTIONS "-g;-O0") # ################################################################################################## # * io tests -------------------------------------------------------------------------------------- From a09e87d361a453f019405b8de2ea9074bcab3bf9 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 19 Feb 2025 18:49:35 +0000 Subject: [PATCH 05/37] Use nanoarray for copying schema --- cpp/src/interop/arrow_column.cpp | 51 ++------------------------------ 1 file changed, 2 insertions(+), 49 deletions(-) diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index f14d71ed016..6a618809d97 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -93,53 +93,6 @@ struct arrow_column_container { }; namespace { -struct ArrowSchemaPrivateData { - std::shared_ptr parent; - std::vector> children; - std::vector children_raw; -}; - -void SchemaReleaseCallback(ArrowSchema* schema) -{ - auto private_data = reinterpret_cast(schema->private_data); - for (auto& child : private_data->children) { - child->release(child.get()); - } - delete private_data; - schema->release = nullptr; -} - -void copy_schema(ArrowSchema* output, - ArrowSchema const* input, - std::shared_ptr container) -{ - auto private_data = new ArrowSchemaPrivateData{container}; - // TODO: Just deep copy these. They are small and we should not rely on the - // lifetime of the input schema (which is currently the case) . The spec - // doesn't say anything about moving schemas, so it's really only arrays - // where we have this support. For the lifetimes we can toss std:strings into - // private_data. - output->format = input->format; - output->name = input->name; - output->metadata = input->metadata; - output->flags = input->flags; - output->n_children = input->n_children; - if (input->n_children > 0) { - // TODO: Give each child its own private data. - private_data->children_raw.resize(input->n_children); - for (auto i = 0; i < input->n_children; ++i) { - private_data->children.push_back(std::make_unique()); - private_data->children_raw[i] = private_data->children.back().get(); - copy_schema(private_data->children_raw[i], input->children[i], container); - } - } - output->children = private_data->children_raw.data(); - // TODO: This is probably not quite right but we don't actually support - // dictionaries so not sure it's worth fixing. - output->dictionary = input->dictionary; - output->release = SchemaReleaseCallback; - output->private_data = private_data; -} struct ArrowArrayPrivateData { std::shared_ptr parent; @@ -225,7 +178,7 @@ arrow_column::arrow_column(ArrowSchema const* schema, // In this case, we have an ArrowDeviceArray with CUDA data as the // owner. We can simply move it into our container and safe it now. We // do need to copy the schema, though. - copy_schema(&container->schema, schema, container); + ArrowSchemaDeepCopy(schema, &container->schema); auto device_arr = container->owner; // This behavior is different than the old from_arrow_device function // because now we are not create a non-owning column_view but an @@ -268,7 +221,7 @@ void arrow_column::to_arrow_schema(ArrowSchema* output, rmm::device_async_resource_ref mr) { auto& schema = container->schema; - copy_schema(output, &schema, container); + ArrowSchemaDeepCopy(&schema, output); } void arrow_column::to_arrow(ArrowDeviceArray* output, From e9865bb42bea1e4cf32332f530b1255b9687c698 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 19 Feb 2025 22:35:21 +0000 Subject: [PATCH 06/37] Fix reference bug --- cpp/src/interop/arrow_column.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index 6a618809d97..0c53ab7b5b2 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -179,7 +179,7 @@ arrow_column::arrow_column(ArrowSchema const* schema, // owner. We can simply move it into our container and safe it now. We // do need to copy the schema, though. ArrowSchemaDeepCopy(schema, &container->schema); - auto device_arr = container->owner; + auto& device_arr = container->owner; // This behavior is different than the old from_arrow_device function // because now we are not create a non-owning column_view but an // arrow_column that can manage lifetimes. From a07b2ae238d3613e1e91fc1568f9a225ca9d6503 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 19 Feb 2025 22:35:38 +0000 Subject: [PATCH 07/37] More CMake testing changes --- cpp/cmake/thirdparty/get_nanoarrow.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/cmake/thirdparty/get_nanoarrow.cmake b/cpp/cmake/thirdparty/get_nanoarrow.cmake index 6765202cc5e..a4868aeac78 100644 --- a/cpp/cmake/thirdparty/get_nanoarrow.cmake +++ b/cpp/cmake/thirdparty/get_nanoarrow.cmake @@ -29,7 +29,8 @@ function(find_and_configure_nanoarrow) GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow.git GIT_TAG 4bf5a9322626e95e3717e43de7616c0a256179eb GIT_SHALLOW FALSE - OPTIONS "BUILD_SHARED_LIBS OFF" "NANOARROW_NAMESPACE cudf" ${_exclude_from_all} + OPTIONS "CMAKE_BUILD_TYPE Debug" "BUILD_SHARED_LIBS OFF" "NANOARROW_NAMESPACE cudf" + ${_exclude_from_all} ) set_target_properties(nanoarrow PROPERTIES POSITION_INDEPENDENT_CODE ON) rapids_export_find_package_root(BUILD nanoarrow "${nanoarrow_BINARY_DIR}" EXPORT_SET cudf-exports) From bd65da6d7b3f93162d0f098596c3b871f59ea3e5 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 19 Feb 2025 23:24:18 +0000 Subject: [PATCH 08/37] Enable getting views and try asserting equivalence --- cpp/include/cudf/interop.hpp | 122 ++++++++++++------------ cpp/src/interop/arrow_column.cpp | 6 ++ cpp/tests/interop/arrow_column_test.cpp | 2 + 3 files changed, 71 insertions(+), 59 deletions(-) diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index aee1017c4e9..be2934b948f 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -124,6 +124,65 @@ struct column_metadata { column_metadata() = default; }; +/** + * @brief typedef for a unique_ptr to an ArrowSchema with custom deleter + * + */ +using unique_schema_t = std::unique_ptr; + +/** + * @brief typedef for a unique_ptr to an ArrowDeviceArray with a custom deleter + * + */ +using unique_device_array_t = std::unique_ptr; + +/** + * @brief typedef for a vector of owning columns, used for conversion from ArrowDeviceArray + * + */ +using owned_columns_t = std::vector>; + +/** + * @brief functor for a custom deleter to a unique_ptr of table_view + * + * When converting from an ArrowDeviceArray, there are cases where data can't + * be zero-copy (i.e. bools or non-UINT32 dictionary indices). This custom deleter + * is used to maintain ownership over the data allocated since a `cudf::table_view` + * doesn't hold ownership. + */ +template +struct custom_view_deleter { + /** + * @brief Construct a new custom view deleter object + * + * @param owned Vector of owning columns + */ + explicit custom_view_deleter(owned_columns_t&& owned) : owned_mem_{std::move(owned)} {} + + /** + * @brief operator to delete the unique_ptr + * + * @param ptr Pointer to the object to be deleted + */ + void operator()(ViewType* ptr) const { delete ptr; } + + owned_columns_t owned_mem_; ///< Owned columns that must be deleted. +}; + +/** + * @brief typedef for a unique_ptr to a `cudf::table_view` with custom deleter + * + */ +using unique_table_view_t = + std::unique_ptr>; + +/** + * @brief typedef for a unique_ptr to a `cudf::column_view` with custom deleter + * + */ +using unique_column_view_t = + std::unique_ptr>; + struct arrow_column_container; class arrow_column { @@ -148,6 +207,10 @@ class arrow_column { rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + unique_column_view_t view( + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + private: // Using a shared_ptr allows re-export via to_arrow std::shared_ptr container; @@ -169,18 +232,6 @@ class arrow_column { // std::shared_ptr container; // }; -/** - * @brief typedef for a unique_ptr to an ArrowSchema with custom deleter - * - */ -using unique_schema_t = std::unique_ptr; - -/** - * @brief typedef for a unique_ptr to an ArrowDeviceArray with a custom deleter - * - */ -using unique_device_array_t = std::unique_ptr; - /** * @brief Create ArrowSchema from cudf table and metadata * @@ -476,46 +527,6 @@ std::unique_ptr from_arrow_host_column( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); -/** - * @brief typedef for a vector of owning columns, used for conversion from ArrowDeviceArray - * - */ -using owned_columns_t = std::vector>; - -/** - * @brief functor for a custom deleter to a unique_ptr of table_view - * - * When converting from an ArrowDeviceArray, there are cases where data can't - * be zero-copy (i.e. bools or non-UINT32 dictionary indices). This custom deleter - * is used to maintain ownership over the data allocated since a `cudf::table_view` - * doesn't hold ownership. - */ -template -struct custom_view_deleter { - /** - * @brief Construct a new custom view deleter object - * - * @param owned Vector of owning columns - */ - explicit custom_view_deleter(owned_columns_t&& owned) : owned_mem_{std::move(owned)} {} - - /** - * @brief operator to delete the unique_ptr - * - * @param ptr Pointer to the object to be deleted - */ - void operator()(ViewType* ptr) const { delete ptr; } - - owned_columns_t owned_mem_; ///< Owned columns that must be deleted. -}; - -/** - * @brief typedef for a unique_ptr to a `cudf::table_view` with custom deleter - * - */ -using unique_table_view_t = - std::unique_ptr>; - /** * @brief Create `cudf::table_view` from given `ArrowDeviceArray` and `ArrowSchema` * @@ -557,13 +568,6 @@ unique_table_view_t from_arrow_device( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); -/** - * @brief typedef for a unique_ptr to a `cudf::column_view` with custom deleter - * - */ -using unique_column_view_t = - std::unique_ptr>; - /** * @brief Create `cudf::column_view` from given `ArrowDeviceArray` and `ArrowSchema` * diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index 0c53ab7b5b2..46dcbcc61a9 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -249,6 +249,12 @@ void arrow_column::to_arrow(ArrowDeviceArray* output, } } +unique_column_view_t arrow_column::view(rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + return from_arrow_device_column(&container->schema, &container->owner, stream, mr); +} + // arrow_table::arrow_table(ArrowSchema const* schema,ArrowDeviceArray* input, // rmm::cuda_stream_view stream = cudf::get_default_stream(), // rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { diff --git a/cpp/tests/interop/arrow_column_test.cpp b/cpp/tests/interop/arrow_column_test.cpp index 797071f1117..638a7dbbf4a 100644 --- a/cpp/tests/interop/arrow_column_test.cpp +++ b/cpp/tests/interop/arrow_column_test.cpp @@ -195,6 +195,8 @@ TEST_F(ArrowColumnTest, TwoWayConversion) cudf::arrow_column(&arrow_schema_from_cudf_column, &arrow_array_from_arrow_column); // Now do some assertions + auto view = arrow_column_from_arrow_array.view(); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(col.view(), *arrow_column_from_cudf_column.view()); //// Should be able to create an arrow_column from an ArrowDeviceArray. // auto tmp1 = cudf::arrow_column(&arr); From a55e5086eaa5590614e56d18c0b5122988629830 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 20 Feb 2025 00:37:04 +0000 Subject: [PATCH 09/37] Fully passing first round of tests. --- cpp/src/interop/arrow_column.cpp | 3 +- cpp/tests/interop/arrow_column_test.cpp | 182 ++---------------------- 2 files changed, 12 insertions(+), 173 deletions(-) diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index 46dcbcc61a9..567ab756798 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -157,8 +157,7 @@ arrow_column::arrow_column(cudf::column&& input, auto table_meta = std::vector{meta}; auto tv = cudf::table_view{{input.view()}}; auto schema = cudf::to_arrow_schema(tv, table_meta); - // ArrowSchema test_schema; - ArrowSchemaMove(schema.get(), &(container->schema)); + ArrowSchemaMove(schema->children[0], &(container->schema)); auto output = cudf::to_arrow_device(std::move(input)); ArrowDeviceArrayMove(output.get(), &container->owner); } diff --git a/cpp/tests/interop/arrow_column_test.cpp b/cpp/tests/interop/arrow_column_test.cpp index 638a7dbbf4a..0e492b25ed8 100644 --- a/cpp/tests/interop/arrow_column_test.cpp +++ b/cpp/tests/interop/arrow_column_test.cpp @@ -14,176 +14,33 @@ * limitations under the License. */ -#include "nanoarrow/common/inline_types.h" -#include "nanoarrow/nanoarrow.h" -#include "nanoarrow/nanoarrow.hpp" -#include "nanoarrow/nanoarrow_device.h" -#include "tests/interop/nanoarrow_utils.hpp" - #include #include #include +#include #include #include #include -// #include -// #include -// #include -// #include -// -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// -// #include -// -// #include -// -// std::unique_ptr get_cudf_table() -//{ -// std::vector> columns; -// columns.emplace_back(cudf::test::fixed_width_column_wrapper( -// {1, 2, 5, 2, 7}, {true, false, true, true, true}) -// .release()); -// columns.emplace_back(cudf::test::fixed_width_column_wrapper({1, 2, 3, 4, -// 5}).release()); columns.emplace_back(cudf::test::strings_column_wrapper({"fff", "aaa", "", -// "fff", "ccc"}, -// {true, true, true, false, true}) -// .release()); -// auto col4 = cudf::test::fixed_width_column_wrapper({1, 2, 5, 2, 7}, -// {true, false, true, true, true}); -// columns.emplace_back(cudf::dictionary::encode(col4)); -// columns.emplace_back(cudf::test::fixed_width_column_wrapper( -// {true, false, true, false, true}, {true, false, true, true, false}) -// .release()); -// columns.emplace_back(cudf::test::strings_column_wrapper( -// { -// "", -// "abc", -// "def", -// "1", -// "2", -// }, -// {0, 1, 1, 1, 1}) -// .release()); -// // columns.emplace_back(cudf::test::lists_column_wrapper({{1, 2}, {3, 4}, {}, {6}, {7, 8, -// // 9}}).release()); -// return std::make_unique(std::move(columns)); -// } -// -// std::shared_ptr get_arrow_large_string_array( -// std::vector const& data, std::vector const& mask = {}) -//{ -// std::shared_ptr large_string_array; -// arrow::LargeStringBuilder large_string_builder; -// -// CUDF_EXPECTS(large_string_builder.AppendValues(data, mask.data()).ok(), -// "Failed to append values to string builder"); -// CUDF_EXPECTS(large_string_builder.Finish(&large_string_array).ok(), -// "Failed to create arrow string array"); -// -// return large_string_array; -// } -// -// struct FromArrowTest : public cudf::test::BaseFixture {}; -// -// std::optional> export_table(std::shared_ptr -// arrow_table) -//{ -// ArrowSchema schema; -// if (!arrow::ExportSchema(*arrow_table->schema(), &schema).ok()) { return std::nullopt; } -// auto batch = arrow_table->CombineChunksToBatch().ValueOrDie(); -// ArrowArray arr; -// if (!arrow::ExportRecordBatch(*batch, &arr).ok()) { return std::nullopt; } -// auto ret = cudf::from_arrow(&schema, &arr); -// arr.release(&arr); -// schema.release(&schema); -// return {std::move(ret)}; -// } -// -// TEST_F(FromArrowTest, ChunkedArray) -//{ -// auto int64array = get_arrow_array({1, 2, 3, 4, 5}); -// auto int32array_1 = get_arrow_array({1, 2}, {1, 0}); -// auto int32array_2 = get_arrow_array({5, 2, 7}, {1, 1, 1}); -// auto string_array_1 = get_arrow_array({ -// "fff", -// "aaa", -// "", -// }); -// auto string_array_2 = get_arrow_array( -// { -// "fff", -// "ccc", -// }, -// {0, 1}); -// auto large_string_array_1 = get_arrow_large_string_array( -// { -// "", -// "abc", -// "def", -// "1", -// "2", -// }, -// {0, 1, 1, 1, 1}); -// auto dict_array1 = get_arrow_dict_array({1, 2, 5, 7}, {0, 1, 2}, {1, 0, 1}); -// auto dict_array2 = get_arrow_dict_array({1, 2, 5, 7}, {1, 3}); -// -// auto int64_chunked_array = std::make_shared(int64array); -// auto int32_chunked_array = std::make_shared( -// std::vector>{int32array_1, int32array_2}); -// auto string_chunked_array = std::make_shared( -// std::vector>{string_array_1, string_array_2}); -// auto dict_chunked_array = std::make_shared( -// std::vector>{dict_array1, dict_array2}); -// auto boolean_array = -// get_arrow_array({true, false, true, false, true}, {true, false, true, true, false}); -// auto boolean_chunked_array = std::make_shared(boolean_array); -// auto large_string_chunked_array = std::make_shared( -// std::vector>{large_string_array_1}); -// -// std::vector> schema_vector( -// {arrow::field("a", int32_chunked_array->type()), -// arrow::field("b", int64array->type()), -// arrow::field("c", string_array_1->type()), -// arrow::field("d", dict_chunked_array->type()), -// arrow::field("e", boolean_chunked_array->type()), -// arrow::field("f", large_string_array_1->type())}); -// auto schema = std::make_shared(schema_vector); -// -// auto arrow_table = arrow::Table::Make(schema, -// {int32_chunked_array, -// int64_chunked_array, -// string_chunked_array, -// dict_chunked_array, -// boolean_chunked_array, -// large_string_chunked_array}); -// -// auto expected_cudf_table = get_cudf_table(); -// -// auto got_cudf_table = export_table(arrow_table); -// ASSERT_TRUE(got_cudf_table.has_value()); -// -// CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table->view(), got_cudf_table.value()->view()); -// } +#include +#include struct ArrowColumnTest : public cudf::test::BaseFixture {}; TEST_F(ArrowColumnTest, TwoWayConversion) { cudf::test::fixed_width_column_wrapper int_col{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}}; - auto col = cudf::column(int_col); + // This column will be moved into the arrow_column, making it invalid to + // access, but the original int_col stays valid for the remainder of the test + // scope for comparison. + auto col = cudf::column(int_col); + auto arrow_column_from_cudf_column = cudf::arrow_column(std::move(col)); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col, *arrow_column_from_cudf_column.view()); + // Now we can extract an ArrowDeviceArray from the arrow_column ArrowSchema arrow_schema_from_cudf_column; arrow_column_from_cudf_column.to_arrow_schema(&arrow_schema_from_cudf_column); @@ -193,22 +50,5 @@ TEST_F(ArrowColumnTest, TwoWayConversion) // Now let's convert it back to an arrow_column auto arrow_column_from_arrow_array = cudf::arrow_column(&arrow_schema_from_cudf_column, &arrow_array_from_arrow_column); - - // Now do some assertions - auto view = arrow_column_from_arrow_array.view(); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(col.view(), *arrow_column_from_cudf_column.view()); - - //// Should be able to create an arrow_column from an ArrowDeviceArray. - // auto tmp1 = cudf::arrow_column(&arr); - // - //// Should be able to create an arrow_column from cudf::column. It always takes ownership. - // cudf::column col; - - //// Should be able to create an arrow_table from an ArrowDeviceArray. - // ArrowDeviceArray arr2; - // cudf::arrow_table(&arr2); - // - //// Should be able to create an arrow_table from cudf::table. It always takes ownership. - // cudf::table tbl; - // cudf::arrow_table(std::move(tbl)); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col, *arrow_column_from_arrow_array.view()); } From 943eb2bbebe9f765a6ef764a6300afe543b03296 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 20 Feb 2025 01:15:20 +0000 Subject: [PATCH 10/37] Add explicit test of lifetime management --- cpp/src/interop/arrow_column.cpp | 5 +++ cpp/tests/interop/arrow_column_test.cpp | 48 +++++++++++++++++++------ 2 files changed, 43 insertions(+), 10 deletions(-) diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index 567ab756798..863b23df210 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -248,6 +248,11 @@ void arrow_column::to_arrow(ArrowDeviceArray* output, } } +// TODO: Consider just doing this work on construction of the container and +// storing the unique_column_view_t in the container. Then the container can +// safely return copies of the view ad infinitum without needing this call, and +// this call can be stream- and mr-free, matching the cudf::column::view +// method. unique_column_view_t arrow_column::view(rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { diff --git a/cpp/tests/interop/arrow_column_test.cpp b/cpp/tests/interop/arrow_column_test.cpp index 0e492b25ed8..696dff3f0dd 100644 --- a/cpp/tests/interop/arrow_column_test.cpp +++ b/cpp/tests/interop/arrow_column_test.cpp @@ -14,21 +14,29 @@ * limitations under the License. */ -#include - #include #include -#include -#include #include -#include #include #include +#include +#include + struct ArrowColumnTest : public cudf::test::BaseFixture {}; +auto export_to_arrow(cudf::arrow_column& col) +{ + // Now we can extract an ArrowDeviceArray from the arrow_column + auto schema = std::make_unique(); + col.to_arrow_schema(schema.get()); + auto array = std::make_unique(); + col.to_arrow(array.get(), ARROW_DEVICE_CUDA); + return std::make_pair(std::move(schema), std::move(array)); +} + TEST_F(ArrowColumnTest, TwoWayConversion) { cudf::test::fixed_width_column_wrapper int_col{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}}; @@ -42,13 +50,33 @@ TEST_F(ArrowColumnTest, TwoWayConversion) CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col, *arrow_column_from_cudf_column.view()); // Now we can extract an ArrowDeviceArray from the arrow_column - ArrowSchema arrow_schema_from_cudf_column; - arrow_column_from_cudf_column.to_arrow_schema(&arrow_schema_from_cudf_column); - ArrowDeviceArray arrow_array_from_arrow_column; - arrow_column_from_cudf_column.to_arrow(&arrow_array_from_arrow_column, ARROW_DEVICE_CUDA); + auto [arrow_schema_from_cudf_column, arrow_array_from_arrow_column] = + export_to_arrow(arrow_column_from_cudf_column); + arrow_column_from_cudf_column.to_arrow_schema(arrow_schema_from_cudf_column.get()); + arrow_column_from_cudf_column.to_arrow(arrow_array_from_arrow_column.get(), ARROW_DEVICE_CUDA); // Now let's convert it back to an arrow_column auto arrow_column_from_arrow_array = - cudf::arrow_column(&arrow_schema_from_cudf_column, &arrow_array_from_arrow_column); + cudf::arrow_column(arrow_schema_from_cudf_column.get(), arrow_array_from_arrow_column.get()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col, *arrow_column_from_arrow_array.view()); } + +TEST_F(ArrowColumnTest, LifetimeManagement) +{ + cudf::test::fixed_width_column_wrapper int_col{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}}; + auto col = std::make_unique(int_col); + auto arrow_column_from_cudf_column = std::make_unique(std::move(*col)); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col, *arrow_column_from_cudf_column->view()); + + auto [schema1, array1] = export_to_arrow(*arrow_column_from_cudf_column); + auto [schema2, array2] = export_to_arrow(*arrow_column_from_cudf_column); + + // Delete the original owner of the data, then reimport and ensure that we + // are still referencing the same valid original data. + arrow_column_from_cudf_column.reset(); + auto col1 = std::make_unique(schema1.get(), array1.get()); + auto col2 = std::make_unique(schema2.get(), array2.get()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col, *col1->view()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*col1->view(), *col2->view()); +} From 461c6c826116eaf5c562382e2e1cea6470b0b75c Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 20 Feb 2025 01:33:34 +0000 Subject: [PATCH 11/37] Basic arrow table --- cpp/include/cudf/interop.hpp | 40 ++++++++++++---------- cpp/src/interop/arrow_column.cpp | 45 ++++++++++++++----------- cpp/tests/interop/arrow_column_test.cpp | 38 ++++++++++++++++----- 3 files changed, 79 insertions(+), 44 deletions(-) diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index be2934b948f..fe15d54e0f1 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -183,7 +183,7 @@ using unique_table_view_t = using unique_column_view_t = std::unique_ptr>; -struct arrow_column_container; +struct arrow_array_container; class arrow_column { public: @@ -213,24 +213,30 @@ class arrow_column { private: // Using a shared_ptr allows re-export via to_arrow - std::shared_ptr container; + std::shared_ptr container; }; -// class arrow_table { -// public: -// arrow_table(ArrowSchema const* schema,ArrowDeviceArray* input, -// rmm::cuda_stream_view stream = cudf::get_default_stream(), -// rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); -// arrow_table(ArrowSchema const* schema,ArrowArray* input, -// rmm::cuda_stream_view stream = cudf::get_default_stream(), -// rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); -// arrow_table(ArrowSchema const* schema,cudf::table&& input, -// rmm::cuda_stream_view stream = cudf::get_default_stream(), -// rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); -// private: -// // Using a shared_ptr allows re-export via to_arrow -// std::shared_ptr container; -// }; +class arrow_table { + public: + // arrow_table(ArrowSchema const* schema, + // ArrowDeviceArray* input, + // rmm::cuda_stream_view stream = cudf::get_default_stream(), + // rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + // arrow_table(ArrowSchema const* schema, + // ArrowArray* input, + // rmm::cuda_stream_view stream = cudf::get_default_stream(), + // rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + arrow_table(cudf::table&& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + unique_table_view_t view( + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + + private: + // Using a shared_ptr allows re-export via to_arrow + std::shared_ptr container; +}; /** * @brief Create ArrowSchema from cudf table and metadata diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index 863b23df210..351443be040 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -75,7 +75,7 @@ namespace cudf { // Class to manage lifetime semantics and allow re-export. -struct arrow_column_container { +struct arrow_array_container { //// Also need a member that holds column views (and one for mutable?) // cudf::column_view view; @@ -85,7 +85,7 @@ struct arrow_column_container { // Question: When the input data was host data, we could presumably release // immediately. Do we care? If so, how should we implement that? - ~arrow_column_container() + ~arrow_array_container() { // TODO: Do we have to sync before releasing? ArrowArrayRelease(&owner.array); @@ -95,7 +95,7 @@ struct arrow_column_container { namespace { struct ArrowArrayPrivateData { - std::shared_ptr parent; + std::shared_ptr parent; std::vector> children; std::vector children_raw; }; @@ -112,7 +112,7 @@ void ArrayReleaseCallback(ArrowArray* array) void copy_array(ArrowArray* output, ArrowArray const* input, - std::shared_ptr container) + std::shared_ptr container) { auto private_data = new ArrowArrayPrivateData{container}; output->length = input->length; @@ -137,19 +137,12 @@ void copy_array(ArrowArray* output, output->private_data = private_data; } -struct arrow_column_container_deleter { - void operator()(std::pair data) - { - data.first.array.release(&data.first.array); - } -}; - } // namespace arrow_column::arrow_column(cudf::column&& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) - : container{std::make_shared()} + : container{std::make_shared()} { // The output ArrowDeviceArray here will own all the data, so we don't need to save a column // TODO: metadata should be provided by the user @@ -168,7 +161,7 @@ arrow_column::arrow_column(ArrowSchema const* schema, ArrowDeviceArray* input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) - : container{std::make_shared()} + : container{std::make_shared()} { switch (input->device_type) { case ARROW_DEVICE_CUDA: @@ -259,10 +252,6 @@ unique_column_view_t arrow_column::view(rmm::cuda_stream_view stream, return from_arrow_device_column(&container->schema, &container->owner, stream, mr); } -// arrow_table::arrow_table(ArrowSchema const* schema,ArrowDeviceArray* input, -// rmm::cuda_stream_view stream = cudf::get_default_stream(), -// rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { -// ArrowArrayMove(input, container->arr); } // arrow_table::arrow_table(ArrowArray* input) { // ArrowDeviceArray arr{ // .array = *input, @@ -275,8 +264,26 @@ unique_column_view_t arrow_column::view(rmm::cuda_stream_view stream, // ArrowArrayMove(output.get(), container->arr); // } -// cudf::column_view view(); -// cudf::mutable_column_view mutable_view(); +arrow_table::arrow_table(cudf::table&& input, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) + : container{std::make_shared()} +{ + // The output ArrowDeviceArray here will own all the data, so we don't need to save a column + // TODO: metadata should be provided by the user + auto meta = cudf::column_metadata{}; + auto table_meta = std::vector{meta}; + auto schema = cudf::to_arrow_schema(input.view(), table_meta); + ArrowSchemaMove(schema.get(), &(container->schema)); + auto output = cudf::to_arrow_device(std::move(input)); + ArrowDeviceArrayMove(output.get(), &container->owner); +} + +unique_table_view_t arrow_table::view(rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + return from_arrow_device(&container->schema, &container->owner, stream, mr); +} // Create Array whose private_data contains a shared_ptr to this->container // The output should be consumer-allocated, see diff --git a/cpp/tests/interop/arrow_column_test.cpp b/cpp/tests/interop/arrow_column_test.cpp index 696dff3f0dd..219c9835cc5 100644 --- a/cpp/tests/interop/arrow_column_test.cpp +++ b/cpp/tests/interop/arrow_column_test.cpp @@ -15,7 +15,9 @@ */ #include +#include #include +#include #include @@ -40,22 +42,15 @@ auto export_to_arrow(cudf::arrow_column& col) TEST_F(ArrowColumnTest, TwoWayConversion) { cudf::test::fixed_width_column_wrapper int_col{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}}; - // This column will be moved into the arrow_column, making it invalid to - // access, but the original int_col stays valid for the remainder of the test - // scope for comparison. - auto col = cudf::column(int_col); - + auto col = cudf::column(int_col); auto arrow_column_from_cudf_column = cudf::arrow_column(std::move(col)); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col, *arrow_column_from_cudf_column.view()); - // Now we can extract an ArrowDeviceArray from the arrow_column auto [arrow_schema_from_cudf_column, arrow_array_from_arrow_column] = export_to_arrow(arrow_column_from_cudf_column); arrow_column_from_cudf_column.to_arrow_schema(arrow_schema_from_cudf_column.get()); arrow_column_from_cudf_column.to_arrow(arrow_array_from_arrow_column.get(), ARROW_DEVICE_CUDA); - // Now let's convert it back to an arrow_column auto arrow_column_from_arrow_array = cudf::arrow_column(arrow_schema_from_cudf_column.get(), arrow_array_from_arrow_column.get()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col, *arrow_column_from_arrow_array.view()); @@ -80,3 +75,30 @@ TEST_F(ArrowColumnTest, LifetimeManagement) CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col, *col1->view()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*col1->view(), *col2->view()); } + +struct ArrowTableTest : public cudf::test::BaseFixture {}; + +TEST_F(ArrowTableTest, TwoWayConversion) +{ + cudf::test::fixed_width_column_wrapper int_col{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}}; + cudf::test::fixed_width_column_wrapper float_col{ + {1., 2., 3., 4., 5., 6., 7., 8., 9., 10.}}; + auto original_view = cudf::table_view{{int_col, float_col}}; + cudf::table table{cudf::table_view{{int_col, float_col}}}; + auto arrow_table_from_cudf_table = cudf::arrow_table(std::move(table)); + + CUDF_TEST_EXPECT_TABLES_EQUAL(original_view, *arrow_table_from_cudf_table.view()); + + // // Now we can extract an ArrowDeviceArray from the arrow_column + // auto [arrow_schema_from_cudf_column, arrow_array_from_arrow_column] = + // export_to_arrow(arrow_column_from_cudf_column); + // arrow_column_from_cudf_column.to_arrow_schema(arrow_schema_from_cudf_column.get()); + // arrow_column_from_cudf_column.to_arrow(arrow_array_from_arrow_column.get(), + // ARROW_DEVICE_CUDA); + // + // // Now let's convert it back to an arrow_column + // auto arrow_column_from_arrow_array = + // cudf::arrow_column(arrow_schema_from_cudf_column.get(), + // arrow_array_from_arrow_column.get()); + // CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col, *arrow_column_from_arrow_array.view()); +} From 362529d55a6e036c58fb247cb8af7fb3910c455a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 20 Feb 2025 02:03:12 +0000 Subject: [PATCH 12/37] Add arrow conversions --- cpp/include/cudf/interop.hpp | 8 +++++ cpp/src/interop/arrow_column.cpp | 40 +++++++++++++++++++++---- cpp/tests/interop/arrow_column_test.cpp | 28 ++++++++--------- 3 files changed, 56 insertions(+), 20 deletions(-) diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index fe15d54e0f1..36111559c06 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -233,6 +233,14 @@ class arrow_table { rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + void to_arrow_schema(ArrowSchema* output, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + void to_arrow(ArrowDeviceArray* output, + ArrowDeviceType device_type = ARROW_DEVICE_CUDA, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + private: // Using a shared_ptr allows re-export via to_arrow std::shared_ptr container; diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index 351443be040..2f56d2625e6 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -285,11 +285,41 @@ unique_table_view_t arrow_table::view(rmm::cuda_stream_view stream, return from_arrow_device(&container->schema, &container->owner, stream, mr); } -// Create Array whose private_data contains a shared_ptr to this->container -// The output should be consumer-allocated, see -// https://arrow.apache.org/docs/format/CDataInterface.html#member-allocation -// Note: May need stream/mr depending on where we put what logic. -// void to_arrow(ArrowDeviceArray* output); +void arrow_table::to_arrow_schema(ArrowSchema* output, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + auto& schema = container->schema; + ArrowSchemaDeepCopy(&schema, output); +} + +// TODO: Based on the similarity of a lot of this functionality, we can +// probably define it in the arrow_array_container class and then just call it +// from here. Only a few things really need column vs table specializations. +void arrow_table::to_arrow(ArrowDeviceArray* output, + ArrowDeviceType device_type, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + switch (device_type) { + case ARROW_DEVICE_CUDA: + case ARROW_DEVICE_CUDA_HOST: + case ARROW_DEVICE_CUDA_MANAGED: { + auto device_arr = container->owner; + copy_array(&output->array, &device_arr.array, container); + output->device_id = device_arr.device_id; + // We can reuse the sync event by reference from the input. The + // destruction of that event is managed by the destruction of + // the underlying ArrowDeviceArray of this table. + output->sync_event = device_arr.sync_event; + output->device_type = device_type; + break; + } + // case ARROW_DEVICE_CPU: { + // auto out = cudf::to_arrow_host(container->view, output, stream, mr); + // } + } +} // class arrow_table { // public: diff --git a/cpp/tests/interop/arrow_column_test.cpp b/cpp/tests/interop/arrow_column_test.cpp index 219c9835cc5..1641de98bf8 100644 --- a/cpp/tests/interop/arrow_column_test.cpp +++ b/cpp/tests/interop/arrow_column_test.cpp @@ -29,13 +29,14 @@ struct ArrowColumnTest : public cudf::test::BaseFixture {}; -auto export_to_arrow(cudf::arrow_column& col) +template +auto export_to_arrow(T& obj) { // Now we can extract an ArrowDeviceArray from the arrow_column auto schema = std::make_unique(); - col.to_arrow_schema(schema.get()); + obj.to_arrow_schema(schema.get()); auto array = std::make_unique(); - col.to_arrow(array.get(), ARROW_DEVICE_CUDA); + obj.to_arrow(array.get(), ARROW_DEVICE_CUDA); return std::make_pair(std::move(schema), std::move(array)); } @@ -89,16 +90,13 @@ TEST_F(ArrowTableTest, TwoWayConversion) CUDF_TEST_EXPECT_TABLES_EQUAL(original_view, *arrow_table_from_cudf_table.view()); - // // Now we can extract an ArrowDeviceArray from the arrow_column - // auto [arrow_schema_from_cudf_column, arrow_array_from_arrow_column] = - // export_to_arrow(arrow_column_from_cudf_column); - // arrow_column_from_cudf_column.to_arrow_schema(arrow_schema_from_cudf_column.get()); - // arrow_column_from_cudf_column.to_arrow(arrow_array_from_arrow_column.get(), - // ARROW_DEVICE_CUDA); - // - // // Now let's convert it back to an arrow_column - // auto arrow_column_from_arrow_array = - // cudf::arrow_column(arrow_schema_from_cudf_column.get(), - // arrow_array_from_arrow_column.get()); - // CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col, *arrow_column_from_arrow_array.view()); + auto [arrow_schema_from_cudf_table, arrow_array_from_arrow_table] = + export_to_arrow(arrow_table_from_cudf_table); + arrow_table_from_cudf_table.to_arrow_schema(arrow_schema_from_cudf_table.get()); + arrow_table_from_cudf_table.to_arrow(arrow_array_from_arrow_table.get(), ARROW_DEVICE_CUDA); + + // auto arrow_table_from_arrow_array = + // cudf::arrow_table(arrow_schema_from_cudf_table.get(), + // arrow_array_from_arrow_table.get()); + // CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col, *arrow_table_from_arrow_array.view()); } From fda4ca4ef78befed0c1074ada370ed6410c440b2 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 20 Feb 2025 02:08:42 +0000 Subject: [PATCH 13/37] Support construction from device array --- cpp/include/cudf/interop.hpp | 9 +++-- cpp/src/interop/arrow_column.cpp | 49 ++++++++++++++++++------- cpp/tests/interop/arrow_column_test.cpp | 7 ++-- 3 files changed, 44 insertions(+), 21 deletions(-) diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index 36111559c06..d49f4b9a758 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -207,6 +207,7 @@ class arrow_column { rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + // TODO: mutable_view unique_column_view_t view( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); @@ -218,10 +219,10 @@ class arrow_column { class arrow_table { public: - // arrow_table(ArrowSchema const* schema, - // ArrowDeviceArray* input, - // rmm::cuda_stream_view stream = cudf::get_default_stream(), - // rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + arrow_table(ArrowSchema const* schema, + ArrowDeviceArray* input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); // arrow_table(ArrowSchema const* schema, // ArrowArray* input, // rmm::cuda_stream_view stream = cudf::get_default_stream(), diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index 2f56d2625e6..fde3d104714 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -321,19 +321,42 @@ void arrow_table::to_arrow(ArrowDeviceArray* output, } } -// class arrow_table { -// public: -// arrow_table(std::vector < std::shared_ptr columns) : columns{columns} {} -// cudf::table_view view(); -// cudf::mutable_table_view mutable_view(); -// // Create Array whose private_data contains shared_ptrs to all the underlying -// // arrow_array_containers -// void to_arrow(ArrowDeviceArray* output); -// -// private: -// // Would allow arrow_columns being in multiple arrow_tables -// std::vector < std::shared_ptr columns; -// }; +arrow_table::arrow_table(ArrowSchema const* schema, + ArrowDeviceArray* input, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) + : container{std::make_shared()} +{ + switch (input->device_type) { + case ARROW_DEVICE_CUDA: + case ARROW_DEVICE_CUDA_HOST: + case ARROW_DEVICE_CUDA_MANAGED: { + // In this case, we have an ArrowDeviceArray with CUDA data as the + // owner. We can simply move it into our container and safe it now. We + // do need to copy the schema, though. + ArrowSchemaDeepCopy(schema, &container->schema); + auto& device_arr = container->owner; + // This behavior is different than the old from_arrow_device function + // because now we are not create a non-owning table_view but an + // arrow_table that can manage lifetimes. + ArrowArrayMove(&input->array, &device_arr.array); + device_arr.device_type = input->device_type; + // Pointing to the existing sync event is safe assuming that the + // underlying event is managed by the private data and the release + // callback. + device_arr.sync_event = input->sync_event; + device_arr.device_id = input->device_id; + break; + } + case ARROW_DEVICE_CPU: { + throw std::runtime_error("ArrowDeviceArray with CPU data not supported"); + // auto col = from_arrow_host_table(schema, input, stream, mr); + // container->owner = std::shared_ptr(col.release()); + // break; + } + default: throw std::runtime_error("Unsupported ArrowDeviceArray type"); + } +} //// ArrowArrayStream and ArrowArray overloads (they can be overloads now instead //// of separate functions) are trivial wrappers around this function. Also need versions diff --git a/cpp/tests/interop/arrow_column_test.cpp b/cpp/tests/interop/arrow_column_test.cpp index 1641de98bf8..06ee7d6d690 100644 --- a/cpp/tests/interop/arrow_column_test.cpp +++ b/cpp/tests/interop/arrow_column_test.cpp @@ -95,8 +95,7 @@ TEST_F(ArrowTableTest, TwoWayConversion) arrow_table_from_cudf_table.to_arrow_schema(arrow_schema_from_cudf_table.get()); arrow_table_from_cudf_table.to_arrow(arrow_array_from_arrow_table.get(), ARROW_DEVICE_CUDA); - // auto arrow_table_from_arrow_array = - // cudf::arrow_table(arrow_schema_from_cudf_table.get(), - // arrow_array_from_arrow_table.get()); - // CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col, *arrow_table_from_arrow_array.view()); + auto arrow_table_from_arrow_array = + cudf::arrow_table(arrow_schema_from_cudf_table.get(), arrow_array_from_arrow_table.get()); + CUDF_TEST_EXPECT_TABLES_EQUAL(original_view, *arrow_table_from_arrow_array.view()); } From 7c07058f8831c894950883a3692cbd42de9cd471 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 20 Feb 2025 02:11:53 +0000 Subject: [PATCH 14/37] Some cleanup --- cpp/src/interop/arrow_column.cpp | 41 ++++++++------------------------ 1 file changed, 10 insertions(+), 31 deletions(-) diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index fde3d104714..5a98e7f63fd 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -14,39 +14,22 @@ * limitations under the License. */ -//// #include "arrow_utilities.hpp" -//// -//// #include -//// #include -//// #include - +#include +#include #include -//// #include -//// #include -//// #include -//// #include -//// #include -//// #include -//// #include -//// #include -//// #include -//// #include -//// #include -//// -//// #include -//// #include -//// #include -//// +#include +#include + +#include +#include + #include #include #include -// -// #include + #include #include -#include -// #include -// + /* * Notes on ownership * @@ -259,10 +242,6 @@ unique_column_view_t arrow_column::view(rmm::cuda_stream_view stream, // .device_type = ARROW_DEVICE_CPU}; // ArrowArrayMove(&arr, container->arr); // } -// arrow_table::arrow_table(cudf::table &&input) { -// auto output = cudf::to_arrow_device(input, container->arr); -// ArrowArrayMove(output.get(), container->arr); -// } arrow_table::arrow_table(cudf::table&& input, rmm::cuda_stream_view stream, From 11ec3d9d35a69effc26758bacbb4e920446b087d Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 20 Feb 2025 02:43:22 +0000 Subject: [PATCH 15/37] Add documentation --- cpp/include/cudf/interop.hpp | 111 +++++++++++++++++++++++++++++-- cpp/src/interop/arrow_column.cpp | 32 +++++++-- 2 files changed, 132 insertions(+), 11 deletions(-) diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index d49f4b9a758..080f3a8705b 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -39,9 +39,13 @@ struct ArrowArray; struct ArrowArrayStream; +#ifndef DOXYGEN_SHOULD_SKIP_THIS +// These are types from arrow that we are forward declaring for our API to +// avoid needing to include nanoarrow headers. typedef int32_t ArrowDeviceType; #define ARROW_DEVICE_CUDA 2 +#endif namespace CUDF_EXPORT cudf { /** @@ -183,10 +187,29 @@ using unique_table_view_t = using unique_column_view_t = std::unique_ptr>; +/** + * @brief A wrapper around ArrowDeviceArray data used for flexible lifetime management. + */ struct arrow_array_container; +/** + * @brief A standard interchange medium for ArrowDeviceArray data in cudf. + * + * This class provides a way to work with ArrowDeviceArray data in cudf without + * sacrificing the APIs expected of a cudf column. On the other end, it + * provides the shared lifetime management expected by arrow consumers rather + * than the single-owner mechanism of cudf::column. + */ class arrow_column { public: + /** + * @brief Construct a new arrow column object + * + * @param schema Arrow schema for the column + * @param input ArrowDeviceArray data for the column + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used for any allocations during conversion + */ arrow_column(ArrowSchema const* schema, ArrowDeviceArray* input, rmm::cuda_stream_view stream = cudf::get_default_stream(), @@ -195,30 +218,77 @@ class arrow_column { // ArrowArray* input, // rmm::cuda_stream_view stream = cudf::get_default_stream(), // rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + /** + * @brief Construct a new arrow column object + * + * @param input cudf column to convert to arrow + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used for any allocations during conversion + */ arrow_column(cudf::column&& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + /** + * @brief Convert the column to an ArrowSchema + * + * @param output ArrowSchema to populate with the column's schema + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used for any allocations during conversion + */ void to_arrow_schema(ArrowSchema* output, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + + /** + * @brief Convert the column to an ArrowDeviceArray + * + * @param output ArrowDeviceArray to populate with the column's data + * @param device_type ArrowDeviceType to set on the output + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used for any allocations during conversion + */ void to_arrow(ArrowDeviceArray* output, ArrowDeviceType device_type = ARROW_DEVICE_CUDA, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); // TODO: mutable_view + /** + * @brief Get a view of the column data + * + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used for any allocations during conversion + * @return unique_column_view_t containing a view of the column data + */ unique_column_view_t view( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); private: - // Using a shared_ptr allows re-export via to_arrow - std::shared_ptr container; + std::shared_ptr + container; ///< Shared pointer to container for the ArrowDeviceArray data; shared_ptr allows + ///< re-export via to_arrow }; +/** + * @brief A standard interchange medium for ArrowDeviceArray data in cudf. + * + * This class provides a way to work with ArrowDeviceArray data in cudf without + * sacrificing the APIs expected of a cudf table. On the other end, it + * provides the shared lifetime management expected by arrow consumers rather + * than the single-owner mechanism of cudf::table. + */ class arrow_table { public: + /** + * @brief Construct a new arrow table object + * + * @param schema Arrow schema for the table + * @param input ArrowDeviceArray data for the table + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used for any allocations during conversion + */ arrow_table(ArrowSchema const* schema, ArrowDeviceArray* input, rmm::cuda_stream_view stream = cudf::get_default_stream(), @@ -227,24 +297,57 @@ class arrow_table { // ArrowArray* input, // rmm::cuda_stream_view stream = cudf::get_default_stream(), // rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + + /** + * @brief Construct a new arrow table object + * + * @param input cudf table to convert to arrow + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used for any allocations during conversion + */ arrow_table(cudf::table&& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + + /** + * @brief Get a view of the table data + * + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used for any allocations during conversion + * @return unique_table_view_t containing a view of the table data + */ unique_table_view_t view( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + /** + * @brief Convert the table to an ArrowSchema + * + * @param output ArrowSchema to populate with the table's schema + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used for any allocations during conversion + */ void to_arrow_schema(ArrowSchema* output, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + + /** + * @brief Convert the table to an ArrowDeviceArray + * + * @param output ArrowDeviceArray to populate with the table's data + * @param device_type ArrowDeviceType to set on the output + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used for any allocations during conversion + */ void to_arrow(ArrowDeviceArray* output, ArrowDeviceType device_type = ARROW_DEVICE_CUDA, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); private: - // Using a shared_ptr allows re-export via to_arrow - std::shared_ptr container; + std::shared_ptr + container; ///< Shared pointer to container for the ArrowDeviceArray data; shared_ptr allows + ///< re-export via to_arrow }; /** diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index 5a98e7f63fd..7db35987e8c 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -57,14 +57,9 @@ */ namespace cudf { -// Class to manage lifetime semantics and allow re-export. struct arrow_array_container { - //// Also need a member that holds column views (and one for mutable?) - // cudf::column_view view; - - // TODO: Generalize to other possible owned data - ArrowDeviceArray owner; - ArrowSchema schema; + ArrowDeviceArray owner; //< ArrowDeviceArray that owns the data + ArrowSchema schema; //< ArrowSchema that describes the data // Question: When the input data was host data, we could presumably release // immediately. Do we care? If so, how should we implement that? @@ -77,12 +72,25 @@ struct arrow_array_container { namespace { +/** + * @brief Private data for an ArrowArray that contains a struct array. + * + * This struct is used to manage the lifetimes of the children of a struct array. + */ struct ArrowArrayPrivateData { std::shared_ptr parent; std::vector> children; std::vector children_raw; }; +/** + * @brief Release callback for an ArrowArray that contains a struct array. + * + * This function is called when the ArrowArray is released. It releases all of the children of the + * struct array. + * + * @param array The ArrowArray to release + */ void ArrayReleaseCallback(ArrowArray* array) { auto private_data = reinterpret_cast(array->private_data); @@ -93,6 +101,16 @@ void ArrayReleaseCallback(ArrowArray* array) array->release = nullptr; } +/** + * @brief Copy an ArrowArray. + * + * This function copies an ArrowArray and all of its children. It is used to + * export cudf arrow objects to user-provided ArrowDeviceArrays. + * + * @param output The ArrowArray to copy to + * @param input The ArrowArray to copy from + * @param container The container that owns the data + */ void copy_array(ArrowArray* output, ArrowArray const* input, std::shared_ptr container) From 2c7e78a46e97cff062cfa9d38e499302375eda35 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 21 Feb 2025 00:20:04 +0000 Subject: [PATCH 16/37] Fix some bugs --- cpp/src/interop/arrow_column.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index 7db35987e8c..69e33f02a13 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -127,7 +127,8 @@ void copy_array(ArrowArray* output, private_data->children_raw.resize(input->n_children); for (auto i = 0; i < input->n_children; ++i) { private_data->children.push_back(std::make_unique()); - copy_array(private_data->children.back().get(), input->children[i], container); + private_data->children_raw[i] = private_data->children.back().get(); + copy_array(private_data->children_raw[i], input->children[i], container); } } output->children = private_data->children_raw.data(); @@ -268,8 +269,8 @@ arrow_table::arrow_table(cudf::table&& input, { // The output ArrowDeviceArray here will own all the data, so we don't need to save a column // TODO: metadata should be provided by the user - auto meta = cudf::column_metadata{}; - auto table_meta = std::vector{meta}; + auto table_meta = std::vector{static_cast(input.num_columns()), + cudf::column_metadata{}}; auto schema = cudf::to_arrow_schema(input.view(), table_meta); ArrowSchemaMove(schema.get(), &(container->schema)); auto output = cudf::to_arrow_device(std::move(input)); From f75b17bd8b4ac47689a696055c49e81f463499c6 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 21 Feb 2025 00:22:48 +0000 Subject: [PATCH 17/37] More cleanup --- cpp/src/interop/arrow_column.cpp | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index 69e33f02a13..a441a7a0d3e 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -30,31 +30,6 @@ #include #include -/* - * Notes on ownership - * - * If you start with a cudf object, it has sole ownership and we have complete control of its - * provenance. We can convert it to an ArrowDeviceArray that has sole ownership of each piece of - * data. We can also (if desired) decompose a cudf::table down into columns and create a separate - * ArrowDeviceArray for each column. In this case, we would also be able to export and maintain the - * lifetimes of each column separately. The nanoarrow array creation routines will produce an - * ArrowArray for each column that has its own deleter that deletes each buffer, so we could give - * each buffer ownership of the corresponding cudf data buffer and then we wouldn't really need any - * private data attribute. That would be more work than what I was initially proposing to do, which - * is to just have a single ArrowDeviceArray that owns all the data for the whole table, but not - * much different. It would also mean a bit more work during the conversion since we wouldn't simply - * be tying lifetimes to an underlying unique_ptr to a cudf type (wrapped through a shared pointer - * to a containing structure), which is a quick and dirty way to do it. - * - * If we start with an ArrowDeviceArray from another source, though, we have no guarantees about who - * owns what within the array. Assuming that it's a struct array representing a table, each child - * array could own its own data via its buffers, or all of the data could be collectively owned by - * the private data with each buffer having no ownership on its own. In that case, you would always - * need to keep the whole private data alive in order to keep any individual column alive. - * - * I think the best long-term option is to decompose as much as possible so that it is in principle - * possible to minimize the amount of - */ namespace cudf { struct arrow_array_container { From 951914dd594ba36f2017fa5d9b4a3ad6daa8e4a0 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 21 Feb 2025 05:36:24 +0000 Subject: [PATCH 18/37] More CMake testing --- cpp/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 704b220f6a8..55ba444f83a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -785,9 +785,9 @@ add_library( src/utilities/type_dispatcher.cpp ) set_source_files_properties( - src/interop/arrow_column.cpp - PROPERTIES COMPILE_OPTIONS "-g;-O0") - + src/interop/arrow_column.cpp src/interop/to_arrow_schema.cpp src/interop/from_arrow_host.cu + src/interop/to_arrow_device.cu PROPERTIES COMPILE_OPTIONS "-g;-O0" +) # Anything that includes jitify needs to be compiled with _FILE_OFFSET_BITS=64 due to a limitation # in how conda builds glibc From 31f134ef78396b8e79b36f402acffbbc75accf37 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 21 Feb 2025 05:36:33 +0000 Subject: [PATCH 19/37] Get tests passing with complex nanoarrow host tables --- cpp/src/interop/arrow_column.cpp | 37 +++++++++++++++++++------ cpp/tests/interop/arrow_column_test.cpp | 36 ++++++++++++++++++++---- 2 files changed, 59 insertions(+), 14 deletions(-) diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index a441a7a0d3e..16a5687ce85 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -41,7 +41,7 @@ struct arrow_array_container { ~arrow_array_container() { // TODO: Do we have to sync before releasing? - ArrowArrayRelease(&owner.array); + if (owner.array.release != nullptr) { ArrowArrayRelease(&owner.array); } } }; @@ -116,6 +116,15 @@ void copy_array(ArrowArray* output, } // namespace +cudf::column_metadata get_column_metadata(cudf::column_view const& input) +{ + cudf::column_metadata meta{}; + for (auto i = 0; i < input.num_children(); ++i) { + meta.children_meta.push_back(get_column_metadata(input.child(i))); + } + return meta; +} + arrow_column::arrow_column(cudf::column&& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) @@ -123,7 +132,7 @@ arrow_column::arrow_column(cudf::column&& input, { // The output ArrowDeviceArray here will own all the data, so we don't need to save a column // TODO: metadata should be provided by the user - auto meta = cudf::column_metadata{}; + auto meta = get_column_metadata(input.view()); auto table_meta = std::vector{meta}; auto tv = cudf::table_view{{input.view()}}; auto schema = cudf::to_arrow_schema(tv, table_meta); @@ -237,6 +246,15 @@ unique_column_view_t arrow_column::view(rmm::cuda_stream_view stream, // ArrowArrayMove(&arr, container->arr); // } +std::vector get_table_metadata(cudf::table_view const& input) +{ + auto meta = std::vector{}; + for (auto i = 0; i < input.num_columns(); ++i) { + meta.push_back(get_column_metadata(input.column(i))); + } + return meta; +} + arrow_table::arrow_table(cudf::table&& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) @@ -244,8 +262,7 @@ arrow_table::arrow_table(cudf::table&& input, { // The output ArrowDeviceArray here will own all the data, so we don't need to save a column // TODO: metadata should be provided by the user - auto table_meta = std::vector{static_cast(input.num_columns()), - cudf::column_metadata{}}; + auto table_meta = get_table_metadata(input.view()); auto schema = cudf::to_arrow_schema(input.view(), table_meta); ArrowSchemaMove(schema.get(), &(container->schema)); auto output = cudf::to_arrow_device(std::move(input)); @@ -322,10 +339,14 @@ arrow_table::arrow_table(ArrowSchema const* schema, break; } case ARROW_DEVICE_CPU: { - throw std::runtime_error("ArrowDeviceArray with CPU data not supported"); - // auto col = from_arrow_host_table(schema, input, stream, mr); - // container->owner = std::shared_ptr(col.release()); - // break; + // I'm not sure if there is a more efficient approach than doing this + // back-and-forth conversion without writing a lot of bespoke logic. I + // suspect that the overhead of the memory copies will dwarf any extra + // work here, but it's worth benchmarking to be sure. + auto tbl = from_arrow_host(schema, input, stream, mr); + auto tmp_table = arrow_table(std::move(*tbl)); + container = tmp_table.container; + break; } default: throw std::runtime_error("Unsupported ArrowDeviceArray type"); } diff --git a/cpp/tests/interop/arrow_column_test.cpp b/cpp/tests/interop/arrow_column_test.cpp index 06ee7d6d690..e1aba340b88 100644 --- a/cpp/tests/interop/arrow_column_test.cpp +++ b/cpp/tests/interop/arrow_column_test.cpp @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "nanoarrow_utils.hpp" + #include #include #include @@ -47,13 +49,13 @@ TEST_F(ArrowColumnTest, TwoWayConversion) auto arrow_column_from_cudf_column = cudf::arrow_column(std::move(col)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col, *arrow_column_from_cudf_column.view()); - auto [arrow_schema_from_cudf_column, arrow_array_from_arrow_column] = + auto [arrow_schema_from_arrow_column, arrow_array_from_arrow_column] = export_to_arrow(arrow_column_from_cudf_column); - arrow_column_from_cudf_column.to_arrow_schema(arrow_schema_from_cudf_column.get()); + arrow_column_from_cudf_column.to_arrow_schema(arrow_schema_from_arrow_column.get()); arrow_column_from_cudf_column.to_arrow(arrow_array_from_arrow_column.get(), ARROW_DEVICE_CUDA); auto arrow_column_from_arrow_array = - cudf::arrow_column(arrow_schema_from_cudf_column.get(), arrow_array_from_arrow_column.get()); + cudf::arrow_column(arrow_schema_from_arrow_column.get(), arrow_array_from_arrow_column.get()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col, *arrow_column_from_arrow_array.view()); } @@ -90,12 +92,34 @@ TEST_F(ArrowTableTest, TwoWayConversion) CUDF_TEST_EXPECT_TABLES_EQUAL(original_view, *arrow_table_from_cudf_table.view()); - auto [arrow_schema_from_cudf_table, arrow_array_from_arrow_table] = + auto [arrow_schema_from_arrow_table, arrow_array_from_arrow_table] = export_to_arrow(arrow_table_from_cudf_table); - arrow_table_from_cudf_table.to_arrow_schema(arrow_schema_from_cudf_table.get()); + arrow_table_from_cudf_table.to_arrow_schema(arrow_schema_from_arrow_table.get()); arrow_table_from_cudf_table.to_arrow(arrow_array_from_arrow_table.get(), ARROW_DEVICE_CUDA); auto arrow_table_from_arrow_array = - cudf::arrow_table(arrow_schema_from_cudf_table.get(), arrow_array_from_arrow_table.get()); + cudf::arrow_table(arrow_schema_from_arrow_table.get(), arrow_array_from_arrow_table.get()); CUDF_TEST_EXPECT_TABLES_EQUAL(original_view, *arrow_table_from_arrow_array.view()); } + +TEST_F(ArrowTableTest, ComplexNanoarrowHostTables) +{ + auto [tbl, schema, arr] = get_nanoarrow_host_tables(100); + ArrowDeviceArray device_arr{ + .array = *arr.get(), + .device_id = -1, + .device_type = ARROW_DEVICE_CPU, + }; + auto arrow_table_from_nanoarrow_array = cudf::arrow_table(schema.get(), &device_arr); + + CUDF_TEST_EXPECT_TABLES_EQUAL(tbl->view(), *arrow_table_from_nanoarrow_array.view()); + + auto [arrow_schema_from_nanoarrow_array, arrow_array_from_arrow_table] = + export_to_arrow(arrow_table_from_nanoarrow_array); + arrow_table_from_nanoarrow_array.to_arrow_schema(arrow_schema_from_nanoarrow_array.get()); + arrow_table_from_nanoarrow_array.to_arrow(arrow_array_from_arrow_table.get(), ARROW_DEVICE_CUDA); + + auto arrow_table_from_arrow_array = + cudf::arrow_table(arrow_schema_from_nanoarrow_array.get(), arrow_array_from_arrow_table.get()); + CUDF_TEST_EXPECT_TABLES_EQUAL(tbl->view(), *arrow_table_from_arrow_array.view()); +} From a76c3b980f5b030aab951a66cfbfe7bbfc7bff55 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 21 Feb 2025 20:03:46 +0000 Subject: [PATCH 20/37] More CMake testing --- cpp/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 55ba444f83a..b67aa8a5ae3 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -786,7 +786,8 @@ add_library( ) set_source_files_properties( src/interop/arrow_column.cpp src/interop/to_arrow_schema.cpp src/interop/from_arrow_host.cu - src/interop/to_arrow_device.cu PROPERTIES COMPILE_OPTIONS "-g;-O0" + src/interop/from_arrow_device.cu src/interop/to_arrow_device.cu src/column/column_view.cpp + PROPERTIES COMPILE_OPTIONS "-g;-O0" ) # Anything that includes jitify needs to be compiled with _FILE_OFFSET_BITS=64 due to a limitation From 6a59f4124e222a0fd9b053ab31d93460ff591245 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 21 Feb 2025 20:10:55 +0000 Subject: [PATCH 21/37] Support single columns from arrow host data --- cpp/src/interop/arrow_column.cpp | 8 ++++---- cpp/tests/interop/arrow_column_test.cpp | 26 +++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index 16a5687ce85..ca4cbf42bbf 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -171,10 +171,10 @@ arrow_column::arrow_column(ArrowSchema const* schema, break; } case ARROW_DEVICE_CPU: { - throw std::runtime_error("ArrowDeviceArray with CPU data not supported"); - // auto col = from_arrow_host_column(schema, input, stream, mr); - // container->owner = std::shared_ptr(col.release()); - // break; + auto col = from_arrow_host_column(schema, input, stream, mr); + auto tmp_column = arrow_column(std::move(*col)); + container = tmp_column.container; + break; } default: throw std::runtime_error("Unsupported ArrowDeviceArray type"); } diff --git a/cpp/tests/interop/arrow_column_test.cpp b/cpp/tests/interop/arrow_column_test.cpp index e1aba340b88..95b1e2afdfd 100644 --- a/cpp/tests/interop/arrow_column_test.cpp +++ b/cpp/tests/interop/arrow_column_test.cpp @@ -79,6 +79,32 @@ TEST_F(ArrowColumnTest, LifetimeManagement) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*col1->view(), *col2->view()); } +TEST_F(ArrowColumnTest, ComplexNanoarrowHostTables) +{ + auto [tbl, schema, arr] = get_nanoarrow_host_tables(100); + for (auto i = 0; i < tbl->num_columns(); i++) { + auto& col = tbl->get_column(i); + + ArrowDeviceArray device_arr{ + .array = *arr->children[i], + .device_id = -1, + .device_type = ARROW_DEVICE_CPU, + }; + auto arrow_column_from_nanoarrow_array = cudf::arrow_column(schema->children[i], &device_arr); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(col.view(), *arrow_column_from_nanoarrow_array.view()); + + auto [arrow_schema_from_nanoarrow_array, arrow_array_from_arrow_column] = + export_to_arrow(arrow_column_from_nanoarrow_array); + arrow_column_from_nanoarrow_array.to_arrow_schema(arrow_schema_from_nanoarrow_array.get()); + arrow_column_from_nanoarrow_array.to_arrow(arrow_array_from_arrow_column.get(), + ARROW_DEVICE_CUDA); + + auto arrow_column_from_arrow_array = cudf::arrow_column(arrow_schema_from_nanoarrow_array.get(), + arrow_array_from_arrow_column.get()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(col.view(), *arrow_column_from_arrow_array.view()); + } +} + struct ArrowTableTest : public cudf::test::BaseFixture {}; TEST_F(ArrowTableTest, TwoWayConversion) From 9600c3c4c60d84d37826af4a9600728a050b8669 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 21 Feb 2025 20:33:28 +0000 Subject: [PATCH 22/37] Also test nanoarrow device data --- cpp/tests/interop/arrow_column_test.cpp | 48 +++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/cpp/tests/interop/arrow_column_test.cpp b/cpp/tests/interop/arrow_column_test.cpp index 95b1e2afdfd..d743c0282ab 100644 --- a/cpp/tests/interop/arrow_column_test.cpp +++ b/cpp/tests/interop/arrow_column_test.cpp @@ -79,6 +79,32 @@ TEST_F(ArrowColumnTest, LifetimeManagement) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*col1->view(), *col2->view()); } +TEST_F(ArrowColumnTest, ComplexNanoarrowDeviceTables) +{ + auto [tbl, schema, arr] = get_nanoarrow_tables(100); + for (auto i = 0; i < tbl->num_columns(); i++) { + auto& col = tbl->get_column(i); + + ArrowDeviceArray device_arr{ + .array = *arr->children[i], + .device_id = 0, + .device_type = ARROW_DEVICE_CUDA, + }; + auto arrow_column_from_nanoarrow_array = cudf::arrow_column(schema->children[i], &device_arr); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(col.view(), *arrow_column_from_nanoarrow_array.view()); + + auto [arrow_schema_from_nanoarrow_array, arrow_array_from_arrow_column] = + export_to_arrow(arrow_column_from_nanoarrow_array); + arrow_column_from_nanoarrow_array.to_arrow_schema(arrow_schema_from_nanoarrow_array.get()); + arrow_column_from_nanoarrow_array.to_arrow(arrow_array_from_arrow_column.get(), + ARROW_DEVICE_CUDA); + + auto arrow_column_from_arrow_array = cudf::arrow_column(arrow_schema_from_nanoarrow_array.get(), + arrow_array_from_arrow_column.get()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(col.view(), *arrow_column_from_arrow_array.view()); + } +} + TEST_F(ArrowColumnTest, ComplexNanoarrowHostTables) { auto [tbl, schema, arr] = get_nanoarrow_host_tables(100); @@ -128,6 +154,28 @@ TEST_F(ArrowTableTest, TwoWayConversion) CUDF_TEST_EXPECT_TABLES_EQUAL(original_view, *arrow_table_from_arrow_array.view()); } +TEST_F(ArrowTableTest, ComplexNanoarrowDeviceTables) +{ + auto [tbl, schema, arr] = get_nanoarrow_tables(100); + ArrowDeviceArray device_arr{ + .array = *arr.get(), + .device_id = 0, + .device_type = ARROW_DEVICE_CUDA, + }; + auto arrow_table_from_nanoarrow_array = cudf::arrow_table(schema.get(), &device_arr); + + CUDF_TEST_EXPECT_TABLES_EQUAL(tbl->view(), *arrow_table_from_nanoarrow_array.view()); + + auto [arrow_schema_from_nanoarrow_array, arrow_array_from_arrow_table] = + export_to_arrow(arrow_table_from_nanoarrow_array); + arrow_table_from_nanoarrow_array.to_arrow_schema(arrow_schema_from_nanoarrow_array.get()); + arrow_table_from_nanoarrow_array.to_arrow(arrow_array_from_arrow_table.get(), ARROW_DEVICE_CUDA); + + auto arrow_table_from_arrow_array = + cudf::arrow_table(arrow_schema_from_nanoarrow_array.get(), arrow_array_from_arrow_table.get()); + CUDF_TEST_EXPECT_TABLES_EQUAL(tbl->view(), *arrow_table_from_arrow_array.view()); +} + TEST_F(ArrowTableTest, ComplexNanoarrowHostTables) { auto [tbl, schema, arr] = get_nanoarrow_host_tables(100); From 68fb9d8423f4a8dde1283d54b5ae8337254faa9b Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 21 Feb 2025 22:25:33 +0000 Subject: [PATCH 23/37] Implement conversion to host array --- cpp/src/interop/arrow_column.cpp | 24 +++++++++++---- cpp/tests/interop/arrow_column_test.cpp | 39 +++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index ca4cbf42bbf..e6cf97b546b 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -221,9 +221,15 @@ void arrow_column::to_arrow(ArrowDeviceArray* output, output->device_type = device_type; break; } - // case ARROW_DEVICE_CPU: { - // auto out = cudf::to_arrow_host(container->view, output, stream, mr); - // } + case ARROW_DEVICE_CPU: { + auto out = cudf::to_arrow_host(*view().get(), stream, mr); + ArrowArrayMove(&out->array, &output->array); + output->device_id = -1; + output->sync_event = nullptr; + output->device_type = ARROW_DEVICE_CPU; + break; + } + default: throw std::runtime_error("Unsupported ArrowDeviceArray type"); } } @@ -305,9 +311,15 @@ void arrow_table::to_arrow(ArrowDeviceArray* output, output->device_type = device_type; break; } - // case ARROW_DEVICE_CPU: { - // auto out = cudf::to_arrow_host(container->view, output, stream, mr); - // } + case ARROW_DEVICE_CPU: { + auto out = cudf::to_arrow_host(*view().get(), stream, mr); + ArrowArrayMove(&out->array, &output->array); + output->device_id = -1; + output->sync_event = nullptr; + output->device_type = ARROW_DEVICE_CPU; + break; + } + default: throw std::runtime_error("Unsupported ArrowDeviceArray type"); } } diff --git a/cpp/tests/interop/arrow_column_test.cpp b/cpp/tests/interop/arrow_column_test.cpp index d743c0282ab..ec985f11bb9 100644 --- a/cpp/tests/interop/arrow_column_test.cpp +++ b/cpp/tests/interop/arrow_column_test.cpp @@ -32,13 +32,13 @@ struct ArrowColumnTest : public cudf::test::BaseFixture {}; template -auto export_to_arrow(T& obj) +auto export_to_arrow(T& obj, ArrowDeviceType device_type = ARROW_DEVICE_CUDA) { // Now we can extract an ArrowDeviceArray from the arrow_column auto schema = std::make_unique(); obj.to_arrow_schema(schema.get()); auto array = std::make_unique(); - obj.to_arrow(array.get(), ARROW_DEVICE_CUDA); + obj.to_arrow(array.get(), device_type); return std::make_pair(std::move(schema), std::move(array)); } @@ -131,6 +131,22 @@ TEST_F(ArrowColumnTest, ComplexNanoarrowHostTables) } } +TEST_F(ArrowColumnTest, ToFromHost) +{ + cudf::test::fixed_width_column_wrapper int_col{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}}; + auto col = cudf::column(int_col); + auto arrow_column_from_cudf_column = cudf::arrow_column(std::move(col)); + + auto [arrow_schema_from_arrow_column, arrow_array_from_arrow_column] = + export_to_arrow(arrow_column_from_cudf_column, ARROW_DEVICE_CPU); + arrow_column_from_cudf_column.to_arrow_schema(arrow_schema_from_arrow_column.get()); + arrow_column_from_cudf_column.to_arrow(arrow_array_from_arrow_column.get(), ARROW_DEVICE_CPU); + + auto arrow_column_from_arrow_array = + cudf::arrow_column(arrow_schema_from_arrow_column.get(), arrow_array_from_arrow_column.get()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col, *arrow_column_from_arrow_array.view()); +} + struct ArrowTableTest : public cudf::test::BaseFixture {}; TEST_F(ArrowTableTest, TwoWayConversion) @@ -197,3 +213,22 @@ TEST_F(ArrowTableTest, ComplexNanoarrowHostTables) cudf::arrow_table(arrow_schema_from_nanoarrow_array.get(), arrow_array_from_arrow_table.get()); CUDF_TEST_EXPECT_TABLES_EQUAL(tbl->view(), *arrow_table_from_arrow_array.view()); } + +TEST_F(ArrowTableTest, ToFromHost) +{ + cudf::test::fixed_width_column_wrapper int_col{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}}; + cudf::test::fixed_width_column_wrapper float_col{ + {1., 2., 3., 4., 5., 6., 7., 8., 9., 10.}}; + auto original_view = cudf::table_view{{int_col, float_col}}; + cudf::table table{cudf::table_view{{int_col, float_col}}}; + auto arrow_table_from_cudf_table = cudf::arrow_table(std::move(table)); + + auto [arrow_schema_from_arrow_table, arrow_array_from_arrow_table] = + export_to_arrow(arrow_table_from_cudf_table, ARROW_DEVICE_CPU); + arrow_table_from_cudf_table.to_arrow_schema(arrow_schema_from_arrow_table.get()); + arrow_table_from_cudf_table.to_arrow(arrow_array_from_arrow_table.get(), ARROW_DEVICE_CPU); + + auto arrow_table_from_arrow_array = + cudf::arrow_table(arrow_schema_from_arrow_table.get(), arrow_array_from_arrow_table.get()); + CUDF_TEST_EXPECT_TABLES_EQUAL(original_view, *arrow_table_from_arrow_array.view()); +} From 36bbd3675a536f26c3bf7f0e1b1da09f1a7eb653 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 21 Feb 2025 23:30:16 +0000 Subject: [PATCH 24/37] Support direct ingestion of ArrowArray data --- cpp/include/cudf/interop.hpp | 35 ++++++++++++---- cpp/src/interop/arrow_column.cpp | 56 ++++++++++--------------- cpp/tests/interop/arrow_column_test.cpp | 39 +++++++++++++++++ 3 files changed, 87 insertions(+), 43 deletions(-) diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index 080f3a8705b..ccf8c7cc7ef 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -214,10 +214,20 @@ class arrow_column { ArrowDeviceArray* input, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); - // arrow_column(ArrowSchema const* schema, - // ArrowArray* input, - // rmm::cuda_stream_view stream = cudf::get_default_stream(), - // rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + + /** + * @brief Construct a new arrow column object + * + * @param schema Arrow schema for the column + * @param input ArrowArray data for the column + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used for any allocations during conversion + */ + arrow_column(ArrowSchema const* schema, + ArrowArray* input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + /** * @brief Construct a new arrow column object * @@ -293,10 +303,19 @@ class arrow_table { ArrowDeviceArray* input, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); - // arrow_table(ArrowSchema const* schema, - // ArrowArray* input, - // rmm::cuda_stream_view stream = cudf::get_default_stream(), - // rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + + /** + * @brief Construct a new arrow table object + * + * @param schema Arrow schema for the table + * @param input ArrowArray data for the table + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used for any allocations during conversion + */ + arrow_table(ArrowSchema const* schema, + ArrowArray* input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a new arrow table object diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index e6cf97b546b..29184563373 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -143,6 +143,7 @@ arrow_column::arrow_column(cudf::column&& input, // IMPORTANT: This constructor will move the input array if it is device data. // Probably won't for host data, though... is that asymmetry okay? +// We should switch to just also releasing for the host data case. arrow_column::arrow_column(ArrowSchema const* schema, ArrowDeviceArray* input, rmm::cuda_stream_view stream, @@ -179,20 +180,16 @@ arrow_column::arrow_column(ArrowSchema const* schema, default: throw std::runtime_error("Unsupported ArrowDeviceArray type"); } } -// arrow_column::arrow_column( -// ArrowSchema const* schema, -// ArrowArray* input, -// rmm::cuda_stream_view stream, -// rmm::device_async_resource_ref mr) -//{ -// //// The copy initialization of .array means that the release callback will be -// //// called on that object, so we don't need to call it on the `input` in this -// //// function. -// //ArrowDeviceArray arr{.array = *input, .device_id = -1, .device_type = ARROW_DEVICE_CPU}; -// //// TODO: Merge with the ARROW_DEVICE_CPU case above with a helper function. -// //auto col = from_arrow_host_column(schema, &arr, stream, mr); -// //container->owner = std::shared_ptr(col.release()); -// } + +arrow_column::arrow_column(ArrowSchema const* schema, + ArrowArray* input, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + ArrowDeviceArray arr{.array = *input, .device_id = -1, .device_type = ARROW_DEVICE_CPU}; + auto tmp = arrow_column(schema, &arr, stream, mr); + container = tmp.container; +} void arrow_column::to_arrow_schema(ArrowSchema* output, rmm::cuda_stream_view stream, @@ -244,14 +241,6 @@ unique_column_view_t arrow_column::view(rmm::cuda_stream_view stream, return from_arrow_device_column(&container->schema, &container->owner, stream, mr); } -// arrow_table::arrow_table(ArrowArray* input) { -// ArrowDeviceArray arr{ -// .array = *input, -// .device_id = -1, -// .device_type = ARROW_DEVICE_CPU}; -// ArrowArrayMove(&arr, container->arr); -// } - std::vector get_table_metadata(cudf::table_view const& input) { auto meta = std::vector{}; @@ -364,21 +353,18 @@ arrow_table::arrow_table(ArrowSchema const* schema, } } +arrow_table::arrow_table(ArrowSchema const* schema, + ArrowArray* input, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + ArrowDeviceArray arr{.array = *input, .device_id = -1, .device_type = ARROW_DEVICE_CPU}; + auto tmp = arrow_table(schema, &arr, stream, mr); + container = tmp.container; +} + //// ArrowArrayStream and ArrowArray overloads (they can be overloads now instead //// of separate functions) are trivial wrappers around this function. Also need versions //// of all three that return an arrow_column instead of an arrow_table. -// std::unique_ptr from_arrow(ArrowSchema const* schema, -// ArrowDeviceArray* input, -// rmm::cuda_stream_view stream, -// rmm::mr::device_memory_resource mr); -// -//// Produce an ArrowDeviceArray and then create an arrow_column around it. -// std::unique_ptr to_arrow( -// // Question: Do we really need a column_view overload? If we're going this -// // route, I think it's OK to always require a transfer of ownership to the -// // arrow_table, but there is potentially some small overhead there. -// std::unique_ptr input, -// rmm::cuda_stream_view stream = cudf::get_default_stream(), -// rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); } // namespace cudf diff --git a/cpp/tests/interop/arrow_column_test.cpp b/cpp/tests/interop/arrow_column_test.cpp index ec985f11bb9..2d87a6e7422 100644 --- a/cpp/tests/interop/arrow_column_test.cpp +++ b/cpp/tests/interop/arrow_column_test.cpp @@ -131,6 +131,28 @@ TEST_F(ArrowColumnTest, ComplexNanoarrowHostTables) } } +TEST_F(ArrowColumnTest, ComplexNanoarrowHostArrowArrayTables) +{ + auto [tbl, schema, arr] = get_nanoarrow_host_tables(100); + for (auto i = 0; i < tbl->num_columns(); i++) { + auto& col = tbl->get_column(i); + + auto arrow_column_from_nanoarrow_array = + cudf::arrow_column(schema->children[i], arr->children[i]); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(col.view(), *arrow_column_from_nanoarrow_array.view()); + + auto [arrow_schema_from_nanoarrow_array, arrow_array_from_arrow_column] = + export_to_arrow(arrow_column_from_nanoarrow_array); + arrow_column_from_nanoarrow_array.to_arrow_schema(arrow_schema_from_nanoarrow_array.get()); + arrow_column_from_nanoarrow_array.to_arrow(arrow_array_from_arrow_column.get(), + ARROW_DEVICE_CUDA); + + auto arrow_column_from_arrow_array = cudf::arrow_column(arrow_schema_from_nanoarrow_array.get(), + arrow_array_from_arrow_column.get()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(col.view(), *arrow_column_from_arrow_array.view()); + } +} + TEST_F(ArrowColumnTest, ToFromHost) { cudf::test::fixed_width_column_wrapper int_col{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}}; @@ -214,6 +236,23 @@ TEST_F(ArrowTableTest, ComplexNanoarrowHostTables) CUDF_TEST_EXPECT_TABLES_EQUAL(tbl->view(), *arrow_table_from_arrow_array.view()); } +TEST_F(ArrowTableTest, ComplexNanoarrowHostArrowArrayTables) +{ + auto [tbl, schema, arr] = get_nanoarrow_host_tables(100); + auto arrow_table_from_nanoarrow_array = cudf::arrow_table(schema.get(), arr.get()); + + CUDF_TEST_EXPECT_TABLES_EQUAL(tbl->view(), *arrow_table_from_nanoarrow_array.view()); + + auto [arrow_schema_from_nanoarrow_array, arrow_array_from_arrow_table] = + export_to_arrow(arrow_table_from_nanoarrow_array); + arrow_table_from_nanoarrow_array.to_arrow_schema(arrow_schema_from_nanoarrow_array.get()); + arrow_table_from_nanoarrow_array.to_arrow(arrow_array_from_arrow_table.get(), ARROW_DEVICE_CUDA); + + auto arrow_table_from_arrow_array = + cudf::arrow_table(arrow_schema_from_nanoarrow_array.get(), arrow_array_from_arrow_table.get()); + CUDF_TEST_EXPECT_TABLES_EQUAL(tbl->view(), *arrow_table_from_arrow_array.view()); +} + TEST_F(ArrowTableTest, ToFromHost) { cudf::test::fixed_width_column_wrapper int_col{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}}; From 0bc93893b5336b35f7e34df7df6ca8b9aa586a2e Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Sat, 22 Feb 2025 02:00:08 +0000 Subject: [PATCH 25/37] Support construction from streams --- cpp/include/cudf/interop.hpp | 11 +++++ cpp/src/interop/arrow_column.cpp | 13 +++-- cpp/tests/interop/arrow_column_test.cpp | 9 ++++ cpp/tests/interop/from_arrow_stream_test.cpp | 50 +++++--------------- cpp/tests/interop/nanoarrow_utils.hpp | 40 ++++++++++++++++ 5 files changed, 82 insertions(+), 41 deletions(-) diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index ccf8c7cc7ef..a93ff06fbcc 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -304,6 +304,17 @@ class arrow_table { rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + /** + * @brief Construct a new arrow table object + * + * @param input ArrowArrayStream data for the table + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used for any allocations during conversion + */ + arrow_table(ArrowArrayStream* input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + /** * @brief Construct a new arrow table object * diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index 29184563373..6da4b56e26b 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -363,8 +363,13 @@ arrow_table::arrow_table(ArrowSchema const* schema, container = tmp.container; } -//// ArrowArrayStream and ArrowArray overloads (they can be overloads now instead -//// of separate functions) are trivial wrappers around this function. Also need versions -//// of all three that return an arrow_column instead of an arrow_table. - +// TODO: Make sure stream and mr are forwarded everywhere in this file. +arrow_table::arrow_table(ArrowArrayStream* input, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + auto tbl = from_arrow_stream(input, stream, mr); + auto tmp = arrow_table(std::move(*tbl), stream, mr); + container = tmp.container; +} } // namespace cudf diff --git a/cpp/tests/interop/arrow_column_test.cpp b/cpp/tests/interop/arrow_column_test.cpp index 2d87a6e7422..e241f289cd8 100644 --- a/cpp/tests/interop/arrow_column_test.cpp +++ b/cpp/tests/interop/arrow_column_test.cpp @@ -271,3 +271,12 @@ TEST_F(ArrowTableTest, ToFromHost) cudf::arrow_table(arrow_schema_from_arrow_table.get(), arrow_array_from_arrow_table.get()); CUDF_TEST_EXPECT_TABLES_EQUAL(original_view, *arrow_table_from_arrow_array.view()); } + +TEST_F(ArrowTableTest, FromArrowArrayStream) +{ + auto num_copies = 3; + auto [tbl, sch, stream] = get_nanoarrow_stream(num_copies); + + auto result = cudf::arrow_table(&stream); + CUDF_TEST_EXPECT_TABLES_EQUAL(tbl->view(), *result.view()); +} diff --git a/cpp/tests/interop/from_arrow_stream_test.cpp b/cpp/tests/interop/from_arrow_stream_test.cpp index 3916025bf22..1fee115fff8 100644 --- a/cpp/tests/interop/from_arrow_stream_test.cpp +++ b/cpp/tests/interop/from_arrow_stream_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,38 +25,6 @@ #include #include -struct VectorOfArrays { - std::vector arrays; - nanoarrow::UniqueSchema schema; - size_t index{0}; - - static int get_schema(ArrowArrayStream* stream, ArrowSchema* out_schema) - { - auto private_data = static_cast(stream->private_data); - - [[maybe_unused]] auto rc = ArrowSchemaDeepCopy(private_data->schema.get(), out_schema); - return 0; - } - - static int get_next(ArrowArrayStream* stream, ArrowArray* out_array) - { - auto private_data = static_cast(stream->private_data); - if (private_data->index >= private_data->arrays.size()) { - out_array->release = nullptr; - return 0; - } - ArrowArrayMove(private_data->arrays[private_data->index++].get(), out_array); - return 0; - } - - static const char* get_last_error(ArrowArrayStream* stream) { return nullptr; } - - static void release(ArrowArrayStream* stream) - { - delete static_cast(stream->private_data); - } -}; - struct FromArrowStreamTest : public cudf::test::BaseFixture {}; void makeStreamFromArrays(std::vector arrays, @@ -71,15 +39,15 @@ void makeStreamFromArrays(std::vector arrays, out->private_data = private_data; } -TEST_F(FromArrowStreamTest, BasicTest) +std::tuple, nanoarrow::UniqueSchema, ArrowArrayStream> +get_nanoarrow_stream(int num_copies) { - constexpr auto num_copies = 3; std::vector> tables; // The schema is unique across all tables. nanoarrow::UniqueSchema schema; std::vector arrays; for (auto i = 0; i < num_copies; ++i) { - auto [tbl, sch, arr] = get_nanoarrow_host_tables(0); + auto [tbl, sch, arr] = get_nanoarrow_host_tables(3); tables.push_back(std::move(tbl)); arrays.push_back(std::move(arr)); if (i == 0) { sch.move(schema.get()); } @@ -92,8 +60,16 @@ TEST_F(FromArrowStreamTest, BasicTest) ArrowArrayStream stream; makeStreamFromArrays(std::move(arrays), std::move(schema), &stream); + return std::make_tuple(std::move(expected), std::move(schema), stream); +} + +TEST_F(FromArrowStreamTest, BasicTest) +{ + constexpr auto num_copies = 3; + auto [tbl, sch, stream] = get_nanoarrow_stream(num_copies); + auto result = cudf::from_arrow_stream(&stream); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(tbl->view(), result->view()); } TEST_F(FromArrowStreamTest, EmptyTest) diff --git a/cpp/tests/interop/nanoarrow_utils.hpp b/cpp/tests/interop/nanoarrow_utils.hpp index a1211a16e10..d7a4b44a005 100644 --- a/cpp/tests/interop/nanoarrow_utils.hpp +++ b/cpp/tests/interop/nanoarrow_utils.hpp @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include #include @@ -416,3 +417,42 @@ get_decimal_precision() else return cudf::detail::max_precision(); } + +struct VectorOfArrays { + std::vector arrays; + nanoarrow::UniqueSchema schema; + size_t index{0}; + + static int get_schema(ArrowArrayStream* stream, ArrowSchema* out_schema) + { + auto private_data = static_cast(stream->private_data); + + [[maybe_unused]] auto rc = ArrowSchemaDeepCopy(private_data->schema.get(), out_schema); + return 0; + } + + static int get_next(ArrowArrayStream* stream, ArrowArray* out_array) + { + auto private_data = static_cast(stream->private_data); + if (private_data->index >= private_data->arrays.size()) { + out_array->release = nullptr; + return 0; + } + ArrowArrayMove(private_data->arrays[private_data->index++].get(), out_array); + return 0; + } + + static const char* get_last_error(ArrowArrayStream* stream) { return nullptr; } + + static void release(ArrowArrayStream* stream) + { + delete static_cast(stream->private_data); + } +}; + +void makeStreamFromArrays(std::vector arrays, + nanoarrow::UniqueSchema schema, + ArrowArrayStream* out); + +std::tuple, nanoarrow::UniqueSchema, ArrowArrayStream> +get_nanoarrow_stream(int num_copies); From 2b2e77fc6306c0af2dc7d574bac32290aea36551 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 24 Feb 2025 16:58:55 +0000 Subject: [PATCH 26/37] Make ownership semantics consistent across types --- cpp/include/cudf/interop.hpp | 18 ++++++++++ cpp/src/interop/arrow_column.cpp | 46 +++++++++---------------- cpp/tests/interop/arrow_column_test.cpp | 12 ++++--- 3 files changed, 43 insertions(+), 33 deletions(-) diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index a93ff06fbcc..5c7bf5f1645 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -205,6 +205,9 @@ class arrow_column { /** * @brief Construct a new arrow column object * + * The input array will be moved into the arrow_column, so it is no longer + * suitable for use afterwards. + * * @param schema Arrow schema for the column * @param input ArrowDeviceArray data for the column * @param stream CUDA stream used for device memory operations and kernel launches @@ -218,6 +221,9 @@ class arrow_column { /** * @brief Construct a new arrow column object * + * The input array will be released, so it is no longer suitable for use + * afterwards. + * * @param schema Arrow schema for the column * @param input ArrowArray data for the column * @param stream CUDA stream used for device memory operations and kernel launches @@ -231,6 +237,9 @@ class arrow_column { /** * @brief Construct a new arrow column object * + * The input column will be moved into the arrow_column, so it is no longer + * suitable for use afterwards. + * * @param input cudf column to convert to arrow * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used for any allocations during conversion @@ -294,6 +303,9 @@ class arrow_table { /** * @brief Construct a new arrow table object * + * The input array will be moved into the arrow_table, so it is no longer + * suitable for use afterwards. + * * @param schema Arrow schema for the table * @param input ArrowDeviceArray data for the table * @param stream CUDA stream used for device memory operations and kernel launches @@ -318,6 +330,9 @@ class arrow_table { /** * @brief Construct a new arrow table object * + * The input array will be released, so it is no longer suitable for use + * afterwards. + * * @param schema Arrow schema for the table * @param input ArrowArray data for the table * @param stream CUDA stream used for device memory operations and kernel launches @@ -331,6 +346,9 @@ class arrow_table { /** * @brief Construct a new arrow table object * + * The input table will be moved into the arrow_table, so it is no longer + * suitable for use afterwards. + * * @param input cudf table to convert to arrow * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used for any allocations during conversion diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index 6da4b56e26b..14d18781d02 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -36,11 +36,8 @@ struct arrow_array_container { ArrowDeviceArray owner; //< ArrowDeviceArray that owns the data ArrowSchema schema; //< ArrowSchema that describes the data - // Question: When the input data was host data, we could presumably release - // immediately. Do we care? If so, how should we implement that? ~arrow_array_container() { - // TODO: Do we have to sync before releasing? if (owner.array.release != nullptr) { ArrowArrayRelease(&owner.array); } } }; @@ -79,8 +76,8 @@ void ArrayReleaseCallback(ArrowArray* array) /** * @brief Copy an ArrowArray. * - * This function copies an ArrowArray and all of its children. It is used to - * export cudf arrow objects to user-provided ArrowDeviceArrays. + * This function shallow copies an ArrowArray and all of its children. It is + * used to export cudf arrow objects to user-provided ArrowDeviceArrays. * * @param output The ArrowArray to copy to * @param input The ArrowArray to copy from @@ -141,9 +138,6 @@ arrow_column::arrow_column(cudf::column&& input, ArrowDeviceArrayMove(output.get(), &container->owner); } -// IMPORTANT: This constructor will move the input array if it is device data. -// Probably won't for host data, though... is that asymmetry okay? -// We should switch to just also releasing for the host data case. arrow_column::arrow_column(ArrowSchema const* schema, ArrowDeviceArray* input, rmm::cuda_stream_view stream, @@ -154,19 +148,12 @@ arrow_column::arrow_column(ArrowSchema const* schema, case ARROW_DEVICE_CUDA: case ARROW_DEVICE_CUDA_HOST: case ARROW_DEVICE_CUDA_MANAGED: { - // In this case, we have an ArrowDeviceArray with CUDA data as the - // owner. We can simply move it into our container and safe it now. We - // do need to copy the schema, though. ArrowSchemaDeepCopy(schema, &container->schema); auto& device_arr = container->owner; - // This behavior is different than the old from_arrow_device function - // because now we are not create a non-owning column_view but an - // arrow_column that can manage lifetimes. ArrowArrayMove(&input->array, &device_arr.array); device_arr.device_type = input->device_type; - // Pointing to the existing sync event is safe assuming that the - // underlying event is managed by the private data and the release - // callback. + // Pointing to the existing sync event is safe because the underlying + // event must be managed by the private data and the release callback. device_arr.sync_event = input->sync_event; device_arr.device_id = input->device_id; break; @@ -175,6 +162,9 @@ arrow_column::arrow_column(ArrowSchema const* schema, auto col = from_arrow_host_column(schema, input, stream, mr); auto tmp_column = arrow_column(std::move(*col)); container = tmp_column.container; + // Should always be non-null unless we're in some odd multithreaded + // context but best to be safe. + if (input->array.release != nullptr) { ArrowArrayRelease(&input->array); } break; } default: throw std::runtime_error("Unsupported ArrowDeviceArray type"); @@ -186,7 +176,8 @@ arrow_column::arrow_column(ArrowSchema const* schema, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - ArrowDeviceArray arr{.array = *input, .device_id = -1, .device_type = ARROW_DEVICE_CPU}; + ArrowDeviceArray arr{.array = {}, .device_id = -1, .device_type = ARROW_DEVICE_CPU}; + ArrowArrayMove(input, &arr.array); auto tmp = arrow_column(schema, &arr, stream, mr); container = tmp.container; } @@ -234,7 +225,9 @@ void arrow_column::to_arrow(ArrowDeviceArray* output, // storing the unique_column_view_t in the container. Then the container can // safely return copies of the view ad infinitum without needing this call, and // this call can be stream- and mr-free, matching the cudf::column::view -// method. +// method. Also doing this on construction would allow us to cache column data +// for the types where the representation is not identical between arrow and +// cudf (like bools) and avoiding constant back-and-forth conversion. unique_column_view_t arrow_column::view(rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { @@ -322,19 +315,12 @@ arrow_table::arrow_table(ArrowSchema const* schema, case ARROW_DEVICE_CUDA: case ARROW_DEVICE_CUDA_HOST: case ARROW_DEVICE_CUDA_MANAGED: { - // In this case, we have an ArrowDeviceArray with CUDA data as the - // owner. We can simply move it into our container and safe it now. We - // do need to copy the schema, though. ArrowSchemaDeepCopy(schema, &container->schema); auto& device_arr = container->owner; - // This behavior is different than the old from_arrow_device function - // because now we are not create a non-owning table_view but an - // arrow_table that can manage lifetimes. ArrowArrayMove(&input->array, &device_arr.array); device_arr.device_type = input->device_type; - // Pointing to the existing sync event is safe assuming that the - // underlying event is managed by the private data and the release - // callback. + // Pointing to the existing sync event is safe because the underlying + // event must be managed by the private data and the release callback. device_arr.sync_event = input->sync_event; device_arr.device_id = input->device_id; break; @@ -358,7 +344,9 @@ arrow_table::arrow_table(ArrowSchema const* schema, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - ArrowDeviceArray arr{.array = *input, .device_id = -1, .device_type = ARROW_DEVICE_CPU}; + ArrowDeviceArray arr{.array = {}, .device_id = -1, .device_type = ARROW_DEVICE_CPU}; + ArrowArrayMove(input, &arr.array); + auto tmp = arrow_table(schema, &arr, stream, mr); container = tmp.container; } diff --git a/cpp/tests/interop/arrow_column_test.cpp b/cpp/tests/interop/arrow_column_test.cpp index e241f289cd8..a00fb04319c 100644 --- a/cpp/tests/interop/arrow_column_test.cpp +++ b/cpp/tests/interop/arrow_column_test.cpp @@ -86,10 +86,11 @@ TEST_F(ArrowColumnTest, ComplexNanoarrowDeviceTables) auto& col = tbl->get_column(i); ArrowDeviceArray device_arr{ - .array = *arr->children[i], + .array = {}, .device_id = 0, .device_type = ARROW_DEVICE_CUDA, }; + ArrowArrayMove(arr->children[i], &device_arr.array); auto arrow_column_from_nanoarrow_array = cudf::arrow_column(schema->children[i], &device_arr); CUDF_TEST_EXPECT_COLUMNS_EQUAL(col.view(), *arrow_column_from_nanoarrow_array.view()); @@ -112,10 +113,11 @@ TEST_F(ArrowColumnTest, ComplexNanoarrowHostTables) auto& col = tbl->get_column(i); ArrowDeviceArray device_arr{ - .array = *arr->children[i], + .array = {}, .device_id = -1, .device_type = ARROW_DEVICE_CPU, }; + ArrowArrayMove(arr->children[i], &device_arr.array); auto arrow_column_from_nanoarrow_array = cudf::arrow_column(schema->children[i], &device_arr); CUDF_TEST_EXPECT_COLUMNS_EQUAL(col.view(), *arrow_column_from_nanoarrow_array.view()); @@ -196,10 +198,11 @@ TEST_F(ArrowTableTest, ComplexNanoarrowDeviceTables) { auto [tbl, schema, arr] = get_nanoarrow_tables(100); ArrowDeviceArray device_arr{ - .array = *arr.get(), + .array = {}, .device_id = 0, .device_type = ARROW_DEVICE_CUDA, }; + ArrowArrayMove(arr.get(), &device_arr.array); auto arrow_table_from_nanoarrow_array = cudf::arrow_table(schema.get(), &device_arr); CUDF_TEST_EXPECT_TABLES_EQUAL(tbl->view(), *arrow_table_from_nanoarrow_array.view()); @@ -218,10 +221,11 @@ TEST_F(ArrowTableTest, ComplexNanoarrowHostTables) { auto [tbl, schema, arr] = get_nanoarrow_host_tables(100); ArrowDeviceArray device_arr{ - .array = *arr.get(), + .array = {}, .device_id = -1, .device_type = ARROW_DEVICE_CPU, }; + ArrowArrayMove(arr.get(), &device_arr.array); auto arrow_table_from_nanoarrow_array = cudf::arrow_table(schema.get(), &device_arr); CUDF_TEST_EXPECT_TABLES_EQUAL(tbl->view(), *arrow_table_from_nanoarrow_array.view()); From 89b34c3b450110fd4cb34e059822a47ffc4569c0 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 24 Feb 2025 17:01:10 +0000 Subject: [PATCH 27/37] Make sure stream and mr are forwarded everywhere --- cpp/src/interop/arrow_column.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index 14d18781d02..a6810b67d95 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -134,7 +134,7 @@ arrow_column::arrow_column(cudf::column&& input, auto tv = cudf::table_view{{input.view()}}; auto schema = cudf::to_arrow_schema(tv, table_meta); ArrowSchemaMove(schema->children[0], &(container->schema)); - auto output = cudf::to_arrow_device(std::move(input)); + auto output = cudf::to_arrow_device(std::move(input), stream, mr); ArrowDeviceArrayMove(output.get(), &container->owner); } @@ -160,7 +160,7 @@ arrow_column::arrow_column(ArrowSchema const* schema, } case ARROW_DEVICE_CPU: { auto col = from_arrow_host_column(schema, input, stream, mr); - auto tmp_column = arrow_column(std::move(*col)); + auto tmp_column = arrow_column(std::move(*col), stream, mr); container = tmp_column.container; // Should always be non-null unless we're in some odd multithreaded // context but best to be safe. @@ -253,7 +253,7 @@ arrow_table::arrow_table(cudf::table&& input, auto table_meta = get_table_metadata(input.view()); auto schema = cudf::to_arrow_schema(input.view(), table_meta); ArrowSchemaMove(schema.get(), &(container->schema)); - auto output = cudf::to_arrow_device(std::move(input)); + auto output = cudf::to_arrow_device(std::move(input), stream, mr); ArrowDeviceArrayMove(output.get(), &container->owner); } @@ -331,7 +331,7 @@ arrow_table::arrow_table(ArrowSchema const* schema, // suspect that the overhead of the memory copies will dwarf any extra // work here, but it's worth benchmarking to be sure. auto tbl = from_arrow_host(schema, input, stream, mr); - auto tmp_table = arrow_table(std::move(*tbl)); + auto tmp_table = arrow_table(std::move(*tbl), stream, mr); container = tmp_table.container; break; } @@ -351,7 +351,6 @@ arrow_table::arrow_table(ArrowSchema const* schema, container = tmp.container; } -// TODO: Make sure stream and mr are forwarded everywhere in this file. arrow_table::arrow_table(ArrowArrayStream* input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) From eb932626b31b3e884c3309fd21413c5c35c7600e Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 25 Feb 2025 00:30:48 +0000 Subject: [PATCH 28/37] Centralize as much logic as possible --- cpp/include/cudf/interop.hpp | 34 ++++ cpp/src/interop/arrow_column.cpp | 235 +++++++++++------------- cpp/tests/interop/arrow_column_test.cpp | 21 ++- 3 files changed, 157 insertions(+), 133 deletions(-) diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index 5c7bf5f1645..b7337f218bc 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -192,6 +192,36 @@ using unique_column_view_t = */ struct arrow_array_container; +/** + * @brief Helper function to generate empty column metadata (column with no + * name) for arrow conversion. + * + * This function is helpful for internal conversions between host and device + * data using existing arrow functions. It is also convenient for external + * usage of the libcudf Arrow APIs to produce the canonical mapping from cudf + * column names to Arrow column names (i.e. empty names with appropriate + * nesting). + * + * @param input The column to generate metadata for + * @return The metadata for the column + */ +cudf::column_metadata get_column_metadata(cudf::column_view const& input); + +/** + * @brief Helper function to generate empty table metadata (all columns with no + * names) for arrow conversion. + * + * This function is helpful for internal conversions between host and device + * data using existing arrow functions. It is also convenient for external + * usage of the libcudf Arrow APIs to produce the canonical mapping from cudf + * column names to Arrow column names (i.e. empty names with appropriate + * nesting). + * + * @param input The table to generate metadata for + * @return The metadata for the table + */ +std::vector get_table_metadata(cudf::table_view const& input); + /** * @brief A standard interchange medium for ArrowDeviceArray data in cudf. * @@ -241,10 +271,12 @@ class arrow_column { * suitable for use afterwards. * * @param input cudf column to convert to arrow + * @param metadata Column metadata for the column * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used for any allocations during conversion */ arrow_column(cudf::column&& input, + column_metadata const& metadata, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); @@ -350,10 +382,12 @@ class arrow_table { * suitable for use afterwards. * * @param input cudf table to convert to arrow + * @param metadata The hierarchy of names of columns and children * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used for any allocations during conversion */ arrow_table(cudf::table&& input, + cudf::host_span metadata, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index a6810b67d95..b2fc13291bc 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -33,15 +33,68 @@ namespace cudf { struct arrow_array_container { - ArrowDeviceArray owner; //< ArrowDeviceArray that owns the data - ArrowSchema schema; //< ArrowSchema that describes the data + arrow_array_container() = default; + template + arrow_array_container(ArrowSchema* schema_, + T input_, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) + { + ArrowSchemaMove(schema_, &schema); + auto output = cudf::to_arrow_device(std::move(input_), stream, mr); + ArrowDeviceArrayMove(output.get(), &owner); + } + + arrow_array_container(ArrowSchema const* schema_, + ArrowDeviceArray* input_, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) + { + switch (input_->device_type) { + case ARROW_DEVICE_CUDA: + case ARROW_DEVICE_CUDA_HOST: + case ARROW_DEVICE_CUDA_MANAGED: { + ArrowSchemaDeepCopy(schema_, &schema); + auto& device_arr = owner; + ArrowArrayMove(&input_->array, &device_arr.array); + device_arr.device_type = input_->device_type; + // Pointing to the existing sync event is safe because the underlying + // event must be managed by the private data and the release callback. + device_arr.sync_event = input_->sync_event; + device_arr.device_id = input_->device_id; + break; + } + default: throw std::runtime_error("Unsupported ArrowDeviceArray type"); + } + } ~arrow_array_container() { if (owner.array.release != nullptr) { ArrowArrayRelease(&owner.array); } } + + ArrowDeviceArray owner{}; //< ArrowDeviceArray that owns the data + ArrowSchema schema{}; //< ArrowSchema that describes the data }; +cudf::column_metadata get_column_metadata(cudf::column_view const& input) +{ + cudf::column_metadata meta{}; + for (auto i = 0; i < input.num_children(); ++i) { + meta.children_meta.push_back(get_column_metadata(input.child(i))); + } + return meta; +} + +std::vector get_table_metadata(cudf::table_view const& input) +{ + auto meta = std::vector{}; + for (auto i = 0; i < input.num_columns(); ++i) { + meta.push_back(get_column_metadata(input.column(i))); + } + return meta; +} + namespace { /** @@ -111,63 +164,72 @@ void copy_array(ArrowArray* output, output->private_data = private_data; } -} // namespace - -cudf::column_metadata get_column_metadata(cudf::column_view const& input) +template +void arrow_obj_to_arrow(T& obj, + std::shared_ptr container, + ArrowDeviceArray* output, + ArrowDeviceType device_type, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) { - cudf::column_metadata meta{}; - for (auto i = 0; i < input.num_children(); ++i) { - meta.children_meta.push_back(get_column_metadata(input.child(i))); + switch (device_type) { + case ARROW_DEVICE_CUDA: + case ARROW_DEVICE_CUDA_HOST: + case ARROW_DEVICE_CUDA_MANAGED: { + auto& device_arr = container->owner; + copy_array(&output->array, &device_arr.array, container); + output->device_id = device_arr.device_id; + // We can reuse the sync event by reference from the input. The + // destruction of that event is managed by the destruction of + // the underlying ArrowDeviceArray of this table. + output->sync_event = device_arr.sync_event; + output->device_type = device_type; + break; + } + case ARROW_DEVICE_CPU: { + auto out = cudf::to_arrow_host(*obj.view().get(), stream, mr); + ArrowArrayMove(&out->array, &output->array); + output->device_id = -1; + output->sync_event = nullptr; + output->device_type = ARROW_DEVICE_CPU; + break; + } + default: throw std::runtime_error("Unsupported ArrowDeviceArray type"); } - return meta; } +} // namespace + arrow_column::arrow_column(cudf::column&& input, + column_metadata const& metadata, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) - : container{std::make_shared()} + : container{[&]() { + auto table_meta = std::vector{metadata}; + auto tv = cudf::table_view{{input.view()}}; + auto schema = cudf::to_arrow_schema(tv, table_meta); + return std::make_shared( + schema->children[0], std::move(input), stream, mr); + }()} { - // The output ArrowDeviceArray here will own all the data, so we don't need to save a column - // TODO: metadata should be provided by the user - auto meta = get_column_metadata(input.view()); - auto table_meta = std::vector{meta}; - auto tv = cudf::table_view{{input.view()}}; - auto schema = cudf::to_arrow_schema(tv, table_meta); - ArrowSchemaMove(schema->children[0], &(container->schema)); - auto output = cudf::to_arrow_device(std::move(input), stream, mr); - ArrowDeviceArrayMove(output.get(), &container->owner); } arrow_column::arrow_column(ArrowSchema const* schema, ArrowDeviceArray* input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) - : container{std::make_shared()} { switch (input->device_type) { - case ARROW_DEVICE_CUDA: - case ARROW_DEVICE_CUDA_HOST: - case ARROW_DEVICE_CUDA_MANAGED: { - ArrowSchemaDeepCopy(schema, &container->schema); - auto& device_arr = container->owner; - ArrowArrayMove(&input->array, &device_arr.array); - device_arr.device_type = input->device_type; - // Pointing to the existing sync event is safe because the underlying - // event must be managed by the private data and the release callback. - device_arr.sync_event = input->sync_event; - device_arr.device_id = input->device_id; - break; - } case ARROW_DEVICE_CPU: { auto col = from_arrow_host_column(schema, input, stream, mr); - auto tmp_column = arrow_column(std::move(*col), stream, mr); + auto tmp_column = arrow_column(std::move(*col), get_column_metadata(col->view()), stream, mr); container = tmp_column.container; // Should always be non-null unless we're in some odd multithreaded // context but best to be safe. if (input->array.release != nullptr) { ArrowArrayRelease(&input->array); } break; } - default: throw std::runtime_error("Unsupported ArrowDeviceArray type"); + default: container = std::make_shared(schema, input, stream, mr); } } @@ -186,8 +248,7 @@ void arrow_column::to_arrow_schema(ArrowSchema* output, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - auto& schema = container->schema; - ArrowSchemaDeepCopy(&schema, output); + ArrowSchemaDeepCopy(&container->schema, output); } void arrow_column::to_arrow(ArrowDeviceArray* output, @@ -195,30 +256,7 @@ void arrow_column::to_arrow(ArrowDeviceArray* output, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - switch (device_type) { - case ARROW_DEVICE_CUDA: - case ARROW_DEVICE_CUDA_HOST: - case ARROW_DEVICE_CUDA_MANAGED: { - auto device_arr = container->owner; - copy_array(&output->array, &device_arr.array, container); - output->device_id = device_arr.device_id; - // We can reuse the sync event by reference from the input. The - // destruction of that event is managed by the destruction of - // the underlying ArrowDeviceArray of this column. - output->sync_event = device_arr.sync_event; - output->device_type = device_type; - break; - } - case ARROW_DEVICE_CPU: { - auto out = cudf::to_arrow_host(*view().get(), stream, mr); - ArrowArrayMove(&out->array, &output->array); - output->device_id = -1; - output->sync_event = nullptr; - output->device_type = ARROW_DEVICE_CPU; - break; - } - default: throw std::runtime_error("Unsupported ArrowDeviceArray type"); - } + arrow_obj_to_arrow(*this, container, output, device_type, stream, mr); } // TODO: Consider just doing this work on construction of the container and @@ -234,27 +272,15 @@ unique_column_view_t arrow_column::view(rmm::cuda_stream_view stream, return from_arrow_device_column(&container->schema, &container->owner, stream, mr); } -std::vector get_table_metadata(cudf::table_view const& input) -{ - auto meta = std::vector{}; - for (auto i = 0; i < input.num_columns(); ++i) { - meta.push_back(get_column_metadata(input.column(i))); - } - return meta; -} - arrow_table::arrow_table(cudf::table&& input, + cudf::host_span metadata, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) - : container{std::make_shared()} + : container{[&]() { + auto schema = cudf::to_arrow_schema(input.view(), metadata); + return std::make_shared(schema.get(), std::move(input), stream, mr); + }()} { - // The output ArrowDeviceArray here will own all the data, so we don't need to save a column - // TODO: metadata should be provided by the user - auto table_meta = get_table_metadata(input.view()); - auto schema = cudf::to_arrow_schema(input.view(), table_meta); - ArrowSchemaMove(schema.get(), &(container->schema)); - auto output = cudf::to_arrow_device(std::move(input), stream, mr); - ArrowDeviceArrayMove(output.get(), &container->owner); } unique_table_view_t arrow_table::view(rmm::cuda_stream_view stream, @@ -267,75 +293,35 @@ void arrow_table::to_arrow_schema(ArrowSchema* output, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - auto& schema = container->schema; - ArrowSchemaDeepCopy(&schema, output); + ArrowSchemaDeepCopy(&container->schema, output); } -// TODO: Based on the similarity of a lot of this functionality, we can -// probably define it in the arrow_array_container class and then just call it -// from here. Only a few things really need column vs table specializations. void arrow_table::to_arrow(ArrowDeviceArray* output, ArrowDeviceType device_type, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - switch (device_type) { - case ARROW_DEVICE_CUDA: - case ARROW_DEVICE_CUDA_HOST: - case ARROW_DEVICE_CUDA_MANAGED: { - auto device_arr = container->owner; - copy_array(&output->array, &device_arr.array, container); - output->device_id = device_arr.device_id; - // We can reuse the sync event by reference from the input. The - // destruction of that event is managed by the destruction of - // the underlying ArrowDeviceArray of this table. - output->sync_event = device_arr.sync_event; - output->device_type = device_type; - break; - } - case ARROW_DEVICE_CPU: { - auto out = cudf::to_arrow_host(*view().get(), stream, mr); - ArrowArrayMove(&out->array, &output->array); - output->device_id = -1; - output->sync_event = nullptr; - output->device_type = ARROW_DEVICE_CPU; - break; - } - default: throw std::runtime_error("Unsupported ArrowDeviceArray type"); - } + arrow_obj_to_arrow(*this, container, output, device_type, stream, mr); } arrow_table::arrow_table(ArrowSchema const* schema, ArrowDeviceArray* input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) - : container{std::make_shared()} { switch (input->device_type) { - case ARROW_DEVICE_CUDA: - case ARROW_DEVICE_CUDA_HOST: - case ARROW_DEVICE_CUDA_MANAGED: { - ArrowSchemaDeepCopy(schema, &container->schema); - auto& device_arr = container->owner; - ArrowArrayMove(&input->array, &device_arr.array); - device_arr.device_type = input->device_type; - // Pointing to the existing sync event is safe because the underlying - // event must be managed by the private data and the release callback. - device_arr.sync_event = input->sync_event; - device_arr.device_id = input->device_id; - break; - } case ARROW_DEVICE_CPU: { // I'm not sure if there is a more efficient approach than doing this // back-and-forth conversion without writing a lot of bespoke logic. I // suspect that the overhead of the memory copies will dwarf any extra // work here, but it's worth benchmarking to be sure. auto tbl = from_arrow_host(schema, input, stream, mr); - auto tmp_table = arrow_table(std::move(*tbl), stream, mr); + auto tmp_table = arrow_table(std::move(*tbl), get_table_metadata(tbl->view()), stream, mr); container = tmp_table.container; + if (input->array.release != nullptr) { ArrowArrayRelease(&input->array); } break; } - default: throw std::runtime_error("Unsupported ArrowDeviceArray type"); + default: container = std::make_shared(schema, input, stream, mr); } } @@ -346,7 +332,6 @@ arrow_table::arrow_table(ArrowSchema const* schema, { ArrowDeviceArray arr{.array = {}, .device_id = -1, .device_type = ARROW_DEVICE_CPU}; ArrowArrayMove(input, &arr.array); - auto tmp = arrow_table(schema, &arr, stream, mr); container = tmp.container; } @@ -356,7 +341,7 @@ arrow_table::arrow_table(ArrowArrayStream* input, rmm::device_async_resource_ref mr) { auto tbl = from_arrow_stream(input, stream, mr); - auto tmp = arrow_table(std::move(*tbl), stream, mr); + auto tmp = arrow_table(std::move(*tbl), get_table_metadata(tbl->view()), stream, mr); container = tmp.container; } } // namespace cudf diff --git a/cpp/tests/interop/arrow_column_test.cpp b/cpp/tests/interop/arrow_column_test.cpp index a00fb04319c..0a2ff87901d 100644 --- a/cpp/tests/interop/arrow_column_test.cpp +++ b/cpp/tests/interop/arrow_column_test.cpp @@ -45,8 +45,9 @@ auto export_to_arrow(T& obj, ArrowDeviceType device_type = ARROW_DEVICE_CUDA) TEST_F(ArrowColumnTest, TwoWayConversion) { cudf::test::fixed_width_column_wrapper int_col{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}}; - auto col = cudf::column(int_col); - auto arrow_column_from_cudf_column = cudf::arrow_column(std::move(col)); + auto col = cudf::column(int_col); + auto arrow_column_from_cudf_column = + cudf::arrow_column(std::move(col), cudf::get_column_metadata(int_col)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col, *arrow_column_from_cudf_column.view()); auto [arrow_schema_from_arrow_column, arrow_array_from_arrow_column] = @@ -62,8 +63,9 @@ TEST_F(ArrowColumnTest, TwoWayConversion) TEST_F(ArrowColumnTest, LifetimeManagement) { cudf::test::fixed_width_column_wrapper int_col{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}}; - auto col = std::make_unique(int_col); - auto arrow_column_from_cudf_column = std::make_unique(std::move(*col)); + auto col = std::make_unique(int_col); + auto arrow_column_from_cudf_column = + std::make_unique(std::move(*col), cudf::get_column_metadata(int_col)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col, *arrow_column_from_cudf_column->view()); @@ -158,8 +160,9 @@ TEST_F(ArrowColumnTest, ComplexNanoarrowHostArrowArrayTables) TEST_F(ArrowColumnTest, ToFromHost) { cudf::test::fixed_width_column_wrapper int_col{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}}; - auto col = cudf::column(int_col); - auto arrow_column_from_cudf_column = cudf::arrow_column(std::move(col)); + auto col = cudf::column(int_col); + auto arrow_column_from_cudf_column = + cudf::arrow_column(std::move(col), cudf::get_column_metadata(int_col)); auto [arrow_schema_from_arrow_column, arrow_array_from_arrow_column] = export_to_arrow(arrow_column_from_cudf_column, ARROW_DEVICE_CPU); @@ -180,7 +183,8 @@ TEST_F(ArrowTableTest, TwoWayConversion) {1., 2., 3., 4., 5., 6., 7., 8., 9., 10.}}; auto original_view = cudf::table_view{{int_col, float_col}}; cudf::table table{cudf::table_view{{int_col, float_col}}}; - auto arrow_table_from_cudf_table = cudf::arrow_table(std::move(table)); + auto arrow_table_from_cudf_table = + cudf::arrow_table(std::move(table), cudf::get_table_metadata(original_view)); CUDF_TEST_EXPECT_TABLES_EQUAL(original_view, *arrow_table_from_cudf_table.view()); @@ -264,7 +268,8 @@ TEST_F(ArrowTableTest, ToFromHost) {1., 2., 3., 4., 5., 6., 7., 8., 9., 10.}}; auto original_view = cudf::table_view{{int_col, float_col}}; cudf::table table{cudf::table_view{{int_col, float_col}}}; - auto arrow_table_from_cudf_table = cudf::arrow_table(std::move(table)); + auto arrow_table_from_cudf_table = + cudf::arrow_table(std::move(table), cudf::get_table_metadata(original_view)); auto [arrow_schema_from_arrow_table, arrow_array_from_arrow_table] = export_to_arrow(arrow_table_from_cudf_table, ARROW_DEVICE_CPU); From 374e7ac4aab4ac97b5a8b64045cc6aa5b7c31898 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 25 Feb 2025 01:26:03 +0000 Subject: [PATCH 29/37] Dictionary behavior is correct since we are just pointing back to existing private data --- cpp/src/interop/arrow_column.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index b2fc13291bc..f19798d92f6 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -156,9 +156,7 @@ void copy_array(ArrowArray* output, copy_array(private_data->children_raw[i], input->children[i], container); } } - output->children = private_data->children_raw.data(); - // TODO: This is probably not quite right but we don't actually support - // dictionaries so not sure it's worth fixing. + output->children = private_data->children_raw.data(); output->dictionary = input->dictionary; output->release = ArrayReleaseCallback; output->private_data = private_data; From 7014f30b50fe8b899ee3cd7b41df726d9398214d Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 25 Feb 2025 01:27:48 +0000 Subject: [PATCH 30/37] Update comments --- cpp/src/interop/arrow_column.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_column.cpp index f19798d92f6..7a083ba7446 100644 --- a/cpp/src/interop/arrow_column.cpp +++ b/cpp/src/interop/arrow_column.cpp @@ -257,13 +257,13 @@ void arrow_column::to_arrow(ArrowDeviceArray* output, arrow_obj_to_arrow(*this, container, output, device_type, stream, mr); } -// TODO: Consider just doing this work on construction of the container and -// storing the unique_column_view_t in the container. Then the container can -// safely return copies of the view ad infinitum without needing this call, and -// this call can be stream- and mr-free, matching the cudf::column::view -// method. Also doing this on construction would allow us to cache column data -// for the types where the representation is not identical between arrow and -// cudf (like bools) and avoiding constant back-and-forth conversion. +// If it proves to be a bottleneck we could do this work on construction of the +// container and store the extra columns in the container. Then the container +// can safely return copies of the view ad infinitum and this call can be +// stream- and mr-free, matching the cudf::column::view method. Also doing this +// on construction would allow us to cache column data for the types where the +// representation is not identical between arrow and cudf (like bools) and +// avoiding constant back-and-forth conversion. unique_column_view_t arrow_column::view(rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { From 4410262879661b6891e2c2c96e6380577df3b9f1 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 25 Feb 2025 01:28:49 +0000 Subject: [PATCH 31/37] Revert debugging changes --- cpp/CMakeLists.txt | 5 ----- cpp/cmake/thirdparty/get_nanoarrow.cmake | 3 +-- cpp/tests/CMakeLists.txt | 3 --- 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index b67aa8a5ae3..d6b66a19224 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -784,11 +784,6 @@ add_library( src/utilities/type_checks.cpp src/utilities/type_dispatcher.cpp ) -set_source_files_properties( - src/interop/arrow_column.cpp src/interop/to_arrow_schema.cpp src/interop/from_arrow_host.cu - src/interop/from_arrow_device.cu src/interop/to_arrow_device.cu src/column/column_view.cpp - PROPERTIES COMPILE_OPTIONS "-g;-O0" -) # Anything that includes jitify needs to be compiled with _FILE_OFFSET_BITS=64 due to a limitation # in how conda builds glibc diff --git a/cpp/cmake/thirdparty/get_nanoarrow.cmake b/cpp/cmake/thirdparty/get_nanoarrow.cmake index a4868aeac78..6765202cc5e 100644 --- a/cpp/cmake/thirdparty/get_nanoarrow.cmake +++ b/cpp/cmake/thirdparty/get_nanoarrow.cmake @@ -29,8 +29,7 @@ function(find_and_configure_nanoarrow) GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow.git GIT_TAG 4bf5a9322626e95e3717e43de7616c0a256179eb GIT_SHALLOW FALSE - OPTIONS "CMAKE_BUILD_TYPE Debug" "BUILD_SHARED_LIBS OFF" "NANOARROW_NAMESPACE cudf" - ${_exclude_from_all} + OPTIONS "BUILD_SHARED_LIBS OFF" "NANOARROW_NAMESPACE cudf" ${_exclude_from_all} ) set_target_properties(nanoarrow PROPERTIES POSITION_INDEPENDENT_CODE ON) rapids_export_find_package_root(BUILD nanoarrow "${nanoarrow_BINARY_DIR}" EXPORT_SET cudf-exports) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 7c31b6979f1..153c7286a07 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -296,9 +296,6 @@ ConfigureTest( nanoarrow ${ARROW_LIBRARIES} ) -set_target_properties( - INTEROP_TEST - PROPERTIES COMPILE_OPTIONS "-g;-O0") # ################################################################################################## # * io tests -------------------------------------------------------------------------------------- From dab19cf868208f1d1fd848735c215519330537e6 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 25 Feb 2025 01:32:49 +0000 Subject: [PATCH 32/37] Rename file --- cpp/CMakeLists.txt | 2 +- cpp/src/interop/{arrow_column.cpp => arrow_data_structures.cpp} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename cpp/src/interop/{arrow_column.cpp => arrow_data_structures.cpp} (100%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d6b66a19224..57c8c1a8105 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -463,7 +463,7 @@ add_library( src/hash/xxhash_64.cu src/interop/dlpack.cpp src/interop/arrow_utilities.cpp - src/interop/arrow_column.cpp + src/interop/arrow_data_structures.cpp src/interop/to_arrow_device.cu src/interop/to_arrow_host.cu src/interop/from_arrow_device.cu diff --git a/cpp/src/interop/arrow_column.cpp b/cpp/src/interop/arrow_data_structures.cpp similarity index 100% rename from cpp/src/interop/arrow_column.cpp rename to cpp/src/interop/arrow_data_structures.cpp From 57b7cac3b1b09135c34bd0065cfa7c2be73d03e0 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 26 Feb 2025 02:47:19 +0000 Subject: [PATCH 33/37] Rename test --- cpp/tests/CMakeLists.txt | 2 +- .../{arrow_column_test.cpp => arrow_data_structures_test.cpp} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename cpp/tests/interop/{arrow_column_test.cpp => arrow_data_structures_test.cpp} (100%) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 153c7286a07..fbe514a69d5 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -283,7 +283,7 @@ ConfigureTest( # * interop tests ------------------------------------------------------------------------- ConfigureTest( INTEROP_TEST - interop/arrow_column_test.cpp + interop/arrow_data_structures_test.cpp interop/to_arrow_device_test.cpp interop/to_arrow_test.cpp interop/to_arrow_host_test.cpp diff --git a/cpp/tests/interop/arrow_column_test.cpp b/cpp/tests/interop/arrow_data_structures_test.cpp similarity index 100% rename from cpp/tests/interop/arrow_column_test.cpp rename to cpp/tests/interop/arrow_data_structures_test.cpp From e3fce98ff44e79748c12f340dceca24d4566b5e7 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 28 Feb 2025 19:16:11 +0000 Subject: [PATCH 34/37] Use protocol for dlpack instead of deprecated function --- python/cudf/cudf/tests/test_dlpack.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/tests/test_dlpack.py b/python/cudf/cudf/tests/test_dlpack.py index 20c24bd7564..187a5524e8e 100644 --- a/python/cudf/cudf/tests/test_dlpack.py +++ b/python/cudf/cudf/tests/test_dlpack.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# Copyright (c) 2019-2025, NVIDIA CORPORATION. import itertools from contextlib import ExitStack as does_not_raise @@ -140,7 +140,7 @@ def test_to_dlpack_cupy_2d(data_2d): def test_from_dlpack_cupy_1d(data_1d): cupy_array = cupy.array(data_1d) cupy_host_array = cupy_array.get() - dlt = cupy_array.toDlpack() + dlt = cupy_array.__dlpack__() gs = cudf.from_dlpack(dlt) cudf_host_array = gs.to_numpy(na_value=np.nan) @@ -151,7 +151,7 @@ def test_from_dlpack_cupy_1d(data_1d): def test_from_dlpack_cupy_2d(data_2d): cupy_array = cupy.array(data_2d, order="F") cupy_host_array = cupy_array.get().flatten() - dlt = cupy_array.toDlpack() + dlt = cupy_array.__dlpack__() gdf = cudf.from_dlpack(dlt) cudf_host_array = np.array(gdf.to_pandas()).flatten() From fd5b3ccebafd519b073d61090f97b299fbe92bd4 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 4 Mar 2025 06:45:03 +0000 Subject: [PATCH 35/37] Address most PR comments --- cpp/include/cudf/interop.hpp | 6 ++++++ cpp/src/interop/arrow_data_structures.cpp | 21 ++++++++++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index 3a5b0889593..93debd0e71a 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -283,6 +283,9 @@ class arrow_column { /** * @brief Convert the column to an ArrowSchema * + * The resulting schema is a deep copy of the arrow_column's schema and is + * not tied to its lifetime. + * * @param output ArrowSchema to populate with the column's schema * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used for any allocations during conversion @@ -405,6 +408,9 @@ class arrow_table { /** * @brief Convert the table to an ArrowSchema * + * The resulting schema is a deep copy of the arrow_column's schema and is + * not tied to its lifetime. + * * @param output ArrowSchema to populate with the table's schema * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used for any allocations during conversion diff --git a/cpp/src/interop/arrow_data_structures.cpp b/cpp/src/interop/arrow_data_structures.cpp index 7a083ba7446..3f211c8ae9e 100644 --- a/cpp/src/interop/arrow_data_structures.cpp +++ b/cpp/src/interop/arrow_data_structures.cpp @@ -132,6 +132,9 @@ void ArrayReleaseCallback(ArrowArray* array) * This function shallow copies an ArrowArray and all of its children. It is * used to export cudf arrow objects to user-provided ArrowDeviceArrays. * + * The @p input must be the ``owner`` member of the @p container OR a child of + * the ``owner`` member of the @p container. If not, the behavior is undefined. + * * @param output The ArrowArray to copy to * @param input The ArrowArray to copy from * @param container The container that owns the data @@ -140,7 +143,7 @@ void copy_array(ArrowArray* output, ArrowArray const* input, std::shared_ptr container) { - auto private_data = new ArrowArrayPrivateData{container}; + auto private_data = new ArrowArrayPrivateData{std::move(container)}; output->length = input->length; output->null_count = input->null_count; output->offset = input->offset; @@ -227,7 +230,13 @@ arrow_column::arrow_column(ArrowSchema const* schema, if (input->array.release != nullptr) { ArrowArrayRelease(&input->array); } break; } - default: container = std::make_shared(schema, input, stream, mr); + default: { + // Move to a temporary before forwarding to ensure that the caller's release callback has been + // nullified. + ArrowDeviceArray tmp; + ArrowDeviceArrayMove(input, &tmp); + container = std::make_shared(schema, &tmp, stream, mr); + } } } @@ -319,7 +328,13 @@ arrow_table::arrow_table(ArrowSchema const* schema, if (input->array.release != nullptr) { ArrowArrayRelease(&input->array); } break; } - default: container = std::make_shared(schema, input, stream, mr); + default: { + // Move to a temporary before forwarding to ensure that the caller's release callback has been + // nullified. + ArrowDeviceArray tmp; + ArrowDeviceArrayMove(input, &tmp); + container = std::make_shared(schema, &tmp, stream, mr); + } } } From 03e049bf5d942eb433b1034da408f6f8b9b760eb Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 4 Mar 2025 06:54:16 +0000 Subject: [PATCH 36/37] Stop accepting ArrowSchemas as const and move from them --- cpp/include/cudf/interop.hpp | 8 ++--- cpp/src/interop/arrow_data_structures.cpp | 38 ++++++----------------- 2 files changed, 14 insertions(+), 32 deletions(-) diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index 93debd0e71a..a2ada532a0e 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -243,7 +243,7 @@ class arrow_column { * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used for any allocations during conversion */ - arrow_column(ArrowSchema const* schema, + arrow_column(ArrowSchema* schema, ArrowDeviceArray* input, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); @@ -259,7 +259,7 @@ class arrow_column { * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used for any allocations during conversion */ - arrow_column(ArrowSchema const* schema, + arrow_column(ArrowSchema* schema, ArrowArray* input, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); @@ -346,7 +346,7 @@ class arrow_table { * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used for any allocations during conversion */ - arrow_table(ArrowSchema const* schema, + arrow_table(ArrowSchema* schema, ArrowDeviceArray* input, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); @@ -373,7 +373,7 @@ class arrow_table { * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used for any allocations during conversion */ - arrow_table(ArrowSchema const* schema, + arrow_table(ArrowSchema* schema, ArrowArray* input, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); diff --git a/cpp/src/interop/arrow_data_structures.cpp b/cpp/src/interop/arrow_data_structures.cpp index 3f211c8ae9e..61af5539fba 100644 --- a/cpp/src/interop/arrow_data_structures.cpp +++ b/cpp/src/interop/arrow_data_structures.cpp @@ -41,12 +41,12 @@ struct arrow_array_container { rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - ArrowSchemaMove(schema_, &schema); auto output = cudf::to_arrow_device(std::move(input_), stream, mr); + ArrowSchemaMove(schema_, &schema); ArrowDeviceArrayMove(output.get(), &owner); } - arrow_array_container(ArrowSchema const* schema_, + arrow_array_container(ArrowSchema* schema_, ArrowDeviceArray* input_, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) @@ -55,14 +55,8 @@ struct arrow_array_container { case ARROW_DEVICE_CUDA: case ARROW_DEVICE_CUDA_HOST: case ARROW_DEVICE_CUDA_MANAGED: { - ArrowSchemaDeepCopy(schema_, &schema); - auto& device_arr = owner; - ArrowArrayMove(&input_->array, &device_arr.array); - device_arr.device_type = input_->device_type; - // Pointing to the existing sync event is safe because the underlying - // event must be managed by the private data and the release callback. - device_arr.sync_event = input_->sync_event; - device_arr.device_id = input_->device_id; + ArrowSchemaMove(schema_, &schema); + ArrowDeviceArrayMove(input_, &owner); break; } default: throw std::runtime_error("Unsupported ArrowDeviceArray type"); @@ -215,7 +209,7 @@ arrow_column::arrow_column(cudf::column&& input, { } -arrow_column::arrow_column(ArrowSchema const* schema, +arrow_column::arrow_column(ArrowSchema* schema, ArrowDeviceArray* input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) @@ -230,17 +224,11 @@ arrow_column::arrow_column(ArrowSchema const* schema, if (input->array.release != nullptr) { ArrowArrayRelease(&input->array); } break; } - default: { - // Move to a temporary before forwarding to ensure that the caller's release callback has been - // nullified. - ArrowDeviceArray tmp; - ArrowDeviceArrayMove(input, &tmp); - container = std::make_shared(schema, &tmp, stream, mr); - } + default: container = std::make_shared(schema, input, stream, mr); } } -arrow_column::arrow_column(ArrowSchema const* schema, +arrow_column::arrow_column(ArrowSchema* schema, ArrowArray* input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) @@ -311,7 +299,7 @@ void arrow_table::to_arrow(ArrowDeviceArray* output, arrow_obj_to_arrow(*this, container, output, device_type, stream, mr); } -arrow_table::arrow_table(ArrowSchema const* schema, +arrow_table::arrow_table(ArrowSchema* schema, ArrowDeviceArray* input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) @@ -328,17 +316,11 @@ arrow_table::arrow_table(ArrowSchema const* schema, if (input->array.release != nullptr) { ArrowArrayRelease(&input->array); } break; } - default: { - // Move to a temporary before forwarding to ensure that the caller's release callback has been - // nullified. - ArrowDeviceArray tmp; - ArrowDeviceArrayMove(input, &tmp); - container = std::make_shared(schema, &tmp, stream, mr); - } + default: container = std::make_shared(schema, input, stream, mr); } } -arrow_table::arrow_table(ArrowSchema const* schema, +arrow_table::arrow_table(ArrowSchema* schema, ArrowArray* input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) From ea5bcdd6736a587fcc207d277c460c7c0bf45de7 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 4 Mar 2025 17:26:12 +0000 Subject: [PATCH 37/37] CI fixes --- cpp/include/cudf/interop.hpp | 2 +- cpp/src/interop/arrow_data_structures.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index a2ada532a0e..636db1b2111 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -42,7 +42,7 @@ struct ArrowArrayStream; #ifndef DOXYGEN_SHOULD_SKIP_THIS // These are types from arrow that we are forward declaring for our API to // avoid needing to include nanoarrow headers. -typedef int32_t ArrowDeviceType; +typedef int32_t ArrowDeviceType; // NOLINT #define ARROW_DEVICE_CUDA 2 #endif diff --git a/cpp/src/interop/arrow_data_structures.cpp b/cpp/src/interop/arrow_data_structures.cpp index 61af5539fba..d4e886bd03f 100644 --- a/cpp/src/interop/arrow_data_structures.cpp +++ b/cpp/src/interop/arrow_data_structures.cpp @@ -137,7 +137,7 @@ void copy_array(ArrowArray* output, ArrowArray const* input, std::shared_ptr container) { - auto private_data = new ArrowArrayPrivateData{std::move(container)}; + auto private_data = new ArrowArrayPrivateData{container}; output->length = input->length; output->null_count = input->null_count; output->offset = input->offset;