From 4d5f9f02279260c36b2e6750d00b5c7a8aa9a012 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Tue, 4 Mar 2025 19:10:17 +0100 Subject: [PATCH 1/2] Refactor --- thrust/thrust/iterator/iterator_adaptor.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/thrust/thrust/iterator/iterator_adaptor.h b/thrust/thrust/iterator/iterator_adaptor.h index 0cd1c31ee0a..e7594c58cd5 100644 --- a/thrust/thrust/iterator/iterator_adaptor.h +++ b/thrust/thrust/iterator/iterator_adaptor.h @@ -192,7 +192,7 @@ class _CCCL_DECLSPEC_EMPTY_BASES iterator_adaptor private: // Core iterator interface for iterator_facade _CCCL_EXEC_CHECK_DISABLE - _CCCL_HOST_DEVICE typename iterator_adaptor::reference dereference() const + _CCCL_HOST_DEVICE reference dereference() const { return *m_iterator; } @@ -205,7 +205,7 @@ class _CCCL_DECLSPEC_EMPTY_BASES iterator_adaptor } _CCCL_EXEC_CHECK_DISABLE - _CCCL_HOST_DEVICE void advance(typename iterator_adaptor::difference_type n) + _CCCL_HOST_DEVICE void advance(difference_type n) { // XXX statically assert on random_access_traversal_tag @@ -229,7 +229,7 @@ class _CCCL_DECLSPEC_EMPTY_BASES iterator_adaptor _CCCL_EXEC_CHECK_DISABLE template - _CCCL_HOST_DEVICE typename iterator_adaptor::difference_type + _CCCL_HOST_DEVICE difference_type distance_to(iterator_adaptor const& y) const { return y.base() - m_iterator; From 0a998b744a5ef773f080f0f841eb5ec1ba4175a1 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Tue, 4 Mar 2025 19:10:30 +0100 Subject: [PATCH 2/2] Add strided_iterator and counting_iterator with a Step --- thrust/testing/counting_iterator.cu | 64 +++++++++ thrust/testing/strided_iterator.cu | 82 +++++++++++ thrust/thrust/iterator/counting_iterator.h | 138 ++++++++++++++++--- thrust/thrust/iterator/strided_iterator.h | 151 +++++++++++++++++++++ 4 files changed, 418 insertions(+), 17 deletions(-) create mode 100644 thrust/testing/strided_iterator.cu create mode 100644 thrust/thrust/iterator/strided_iterator.h diff --git a/thrust/testing/counting_iterator.cu b/thrust/testing/counting_iterator.cu index 8a7a07bc7dd..adde568718f 100644 --- a/thrust/testing/counting_iterator.cu +++ b/thrust/testing/counting_iterator.cu @@ -3,10 +3,13 @@ #include #include +#include #include #include +#include #include +#include #include @@ -285,4 +288,65 @@ void TestCountingIteratorDifference() } DECLARE_UNITTEST(TestCountingIteratorDifference); +void TestCountingIteratorDynamicStride() +{ + auto iter = thrust::make_counting_iterator(0, 2); + static_assert(sizeof(iter) == 2 * sizeof(int)); + + ASSERT_EQUAL(*iter, 0); + iter++; + ASSERT_EQUAL(*iter, 2); + iter++; + iter++; + ASSERT_EQUAL(*iter, 6); + iter += 5; + ASSERT_EQUAL(*iter, 16); + iter -= 10; + ASSERT_EQUAL(*iter, -4); +} +DECLARE_UNITTEST(TestCountingIteratorDynamicStride); + +void TestCountingIteratorStaticStride() +{ + auto iter = thrust::make_counting_iterator<2>(0); + static_assert(sizeof(decltype(iter)) == sizeof(int)); + + ASSERT_EQUAL(*iter, 0); + iter++; + ASSERT_EQUAL(*iter, 2); + iter++; + iter++; + ASSERT_EQUAL(*iter, 6); + iter += 5; + ASSERT_EQUAL(*iter, 16); + iter -= 10; + ASSERT_EQUAL(*iter, -4); +} +DECLARE_UNITTEST(TestCountingIteratorStaticStride); + +void TestCountingIteratorPointer() +{ + int arr[11]; + std::iota(arr, arr + 11, 0); + + auto iter = thrust::make_counting_iterator(&arr[2]); + + ASSERT_EQUAL(*iter, &arr[2]); + ASSERT_EQUAL(**iter, 2); + iter++; + ASSERT_EQUAL(*iter, &arr[3]); + ASSERT_EQUAL(**iter, 3); + iter++; + iter++; + ASSERT_EQUAL(*iter, &arr[5]); + ASSERT_EQUAL(**iter, 5); + iter += 5; + ASSERT_EQUAL(*iter, &arr[10]); + ASSERT_EQUAL(**iter, 10); + iter -= 10; + ASSERT_EQUAL(*iter, &arr[0]); + ASSERT_EQUAL(**iter, 0); +} +DECLARE_UNITTEST(TestCountingIteratorPointer); + _CCCL_DIAG_POP diff --git a/thrust/testing/strided_iterator.cu b/thrust/testing/strided_iterator.cu new file mode 100644 index 00000000000..8ea40aa817e --- /dev/null +++ b/thrust/testing/strided_iterator.cu @@ -0,0 +1,82 @@ +#include +#include + +#include +#include + +#include +#include + +#include + +void TestReadingStridedIterator() +{ + thrust::host_vector v(21); + std::iota(v.begin(), v.end(), -4); + auto iter = thrust::make_strided_iterator(v.begin() + 4, 2); + + ASSERT_EQUAL(*iter, 0); + iter++; + ASSERT_EQUAL(*iter, 2); + iter++; + iter++; + ASSERT_EQUAL(*iter, 6); + iter += 5; + ASSERT_EQUAL(*iter, 16); + iter -= 10; + ASSERT_EQUAL(*iter, -4); +} +DECLARE_UNITTEST(TestReadingStridedIterator); + +template +void TestWritingStridedIterator() +{ + // iterate over all second elements (runtime stride) + { + Vector v(10); + auto iter = thrust::make_strided_iterator(v.begin(), 2); + ASSERT_EQUAL(v, (Vector{0, 0, 0, 0, 0, 0, 0, 0, 0, 0})); + *iter = 33; + ASSERT_EQUAL(v, (Vector{33, 0, 0, 0, 0, 0, 0, 0, 0, 0})); + auto iter2 = iter + 1; + *iter2 = 34; + ASSERT_EQUAL(v, (Vector{33, 0, 34, 0, 0, 0, 0, 0, 0, 0})); + thrust::fill(iter + 2, iter + 4, 42); + ASSERT_EQUAL(v, (Vector{33, 0, 34, 0, 42, 0, 42, 0, 0, 0})); + } + + // iterate over all second elements (static stride) + { + Vector v(10); + auto iter = thrust::make_strided_iterator<2>(v.begin()); + thrust::fill(iter, iter + 3, 42); + ASSERT_EQUAL(v, (Vector{42, 0, 42, 0, 42, 0, 0, 0, 0, 0})); + } +} +DECLARE_INTEGRAL_VECTOR_UNITTEST(TestWritingStridedIterator); + +void TestWritingStridedIteratorToStructMember() +{ + using pair = ::cuda::std::pair; + using arr_of_pairs = ::cuda::std::array; + const auto data = arr_of_pairs{{{1, 2}, {3, 4}, {5, 6}, {7, 8}}}; + const auto reference = arr_of_pairs{{{1, 1337}, {3, 1337}, {5, 1337}, {7, 1337}}}; + constexpr auto stride = sizeof(pair) / sizeof(double); + + // iterate over all second elements (runtime stride) + { + auto arr = data; + auto iter = thrust::make_strided_iterator(&arr[0].second, stride); + thrust::fill(iter, iter + 4, 1337); + ASSERT_EQUAL(arr == reference, true); + } + + // iterate over all second elements (static stride) + { + auto arr = data; + auto iter = thrust::make_strided_iterator(&arr[0].second); + thrust::fill(iter, iter + 4, 1337); + ASSERT_EQUAL(arr == reference, true); + } +} +DECLARE_UNITTEST(TestWritingStridedIteratorToStructMember); diff --git a/thrust/thrust/iterator/counting_iterator.h b/thrust/thrust/iterator/counting_iterator.h index f06499b9d37..9a2385091ba 100644 --- a/thrust/thrust/iterator/counting_iterator.h +++ b/thrust/thrust/iterator/counting_iterator.h @@ -51,7 +51,7 @@ THRUST_NAMESPACE_BEGIN -template +template class counting_iterator; namespace detail @@ -60,7 +60,7 @@ template using counting_iterator_difference_type = ::cuda::std::_If<::cuda::std::is_integral_v && sizeof(Number) < sizeof(int), int, ::cuda::std::ptrdiff_t>; -template +template struct make_counting_iterator_base { using system = @@ -75,7 +75,7 @@ struct make_counting_iterator_base // to the internal state of an iterator causes subtle bugs (consider the temporary // iterator created in the expression *(iter + i)) and has no compelling use case using type = - iterator_adaptor, + iterator_adaptor, Incrementable, Incrementable, system, @@ -83,6 +83,9 @@ struct make_counting_iterator_base Incrementable, difference>; }; + +struct empty +{}; } // namespace detail //! \addtogroup iterators @@ -164,14 +167,17 @@ struct make_counting_iterator_base //! //! \see make_counting_iterator template + typename System = use_default, + typename Traversal = use_default, + typename Difference = use_default, + typename StrideHolder = detail::empty> class _CCCL_DECLSPEC_EMPTY_BASES counting_iterator - : public detail::make_counting_iterator_base::type + : public detail::make_counting_iterator_base::type + , StrideHolder { //! \cond - using super_t = typename detail::make_counting_iterator_base::type; + using super_t = + typename detail::make_counting_iterator_base::type; friend class iterator_core_access; public: @@ -187,12 +193,14 @@ class _CCCL_DECLSPEC_EMPTY_BASES counting_iterator //! Copy constructor copies the value of another counting_iterator with related System type. //! //! \param rhs The \p counting_iterator to copy. - template >::type, - typename iterator_system::type, - int> = 0> - _CCCL_HOST_DEVICE counting_iterator(counting_iterator const& rhs) + template < + class OtherSystem, + detail::enable_if_convertible_t< + typename iterator_system>::type, + typename iterator_system::type, + int> = 0> + _CCCL_HOST_DEVICE + counting_iterator(counting_iterator const& rhs) : super_t(rhs.base()) {} @@ -204,18 +212,72 @@ class _CCCL_DECLSPEC_EMPTY_BASES counting_iterator : super_t(x) {} + _CCCL_HOST_DEVICE explicit counting_iterator(Incrementable x, StrideHolder stride) + : super_t(x) + , StrideHolder(stride) + {} + //! \cond private: + template + auto stride() const + { + return static_cast(*this).value; + } + + _CCCL_EXEC_CHECK_DISABLE + _CCCL_HOST_DEVICE void advance(difference_type n) + { + if constexpr (::cuda::std::is_same_v) + { + this->base_reference() = static_cast(this->base_reference() + n); + } + else + { + this->base_reference() += n * stride(); + } + } + + _CCCL_EXEC_CHECK_DISABLE + _CCCL_HOST_DEVICE void increment() + { + if constexpr (::cuda::std::is_same_v) + { + ++this->base_reference(); + } + else + { + this->base_reference() += stride(); + } + } + + _CCCL_EXEC_CHECK_DISABLE + _CCCL_HOST_DEVICE void decrement() + { + if constexpr (::cuda::std::is_same_v) + { + --this->base_reference(); + } + else + { + this->base_reference() -= stride(); + } + } + _CCCL_HOST_DEVICE reference dereference() const { return this->base_reference(); } // note that we implement equal specially for floating point counting_iterator - template + template _CCCL_HOST_DEVICE bool - equal(counting_iterator const& y) const + equal(counting_iterator const& y) + const { if constexpr (::cuda::is_floating_point_v) { @@ -229,7 +291,7 @@ class _CCCL_DECLSPEC_EMPTY_BASES counting_iterator template _CCCL_HOST_DEVICE difference_type - distance_to(counting_iterator const& y) const + distance_to(counting_iterator const& y) const { if constexpr (::cuda::std::is_integral::value) { @@ -255,6 +317,48 @@ inline _CCCL_HOST_DEVICE counting_iterator make_counting_iterator return counting_iterator(x); } +namespace detail +{ +// Holds a runtime stride +template +struct runtime_stride_holder +{ + T value; +}; + +// Holds a compile-time stride +// (we cannot use ::cuda::std::integral_constant, because it has a conversion operator to T that causes an ambiguity +// with operator+(counting_iterator, counting_iterator::difference_type) in any expression `counting_iterator + +// integral`. +template +struct compile_time_stride_holder +{ + static constexpr T value = Value; +}; +} // namespace detail + +//! Constructs a counting_iterator with a runtime stride +template +_CCCL_HOST_DEVICE auto make_counting_iterator(Incrementable x, Stride stride) +{ + return counting_iterator>(x, {stride}); +} + +//! Constructs a counting_iterator with a compile-time stride +template +_CCCL_HOST_DEVICE auto make_counting_iterator(Incrementable x) +{ + return counting_iterator>(x, {}); +} + //! \} // end fancyiterators //! \} // end iterators diff --git a/thrust/thrust/iterator/strided_iterator.h b/thrust/thrust/iterator/strided_iterator.h new file mode 100644 index 00000000000..92051c06867 --- /dev/null +++ b/thrust/thrust/iterator/strided_iterator.h @@ -0,0 +1,151 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include + +THRUST_NAMESPACE_BEGIN + +//! \addtogroup iterators +//! \{ + +//! \addtogroup fancyiterator Fancy Iterators +//! \ingroup iterators +//! \{ + +//! Holds a runtime value +template +struct runtime_value +{ + T value; +}; + +//! Holds a compile-time value +// we cannot use ::cuda::std::integral_constant, because it has a conversion operator to T that causes an ambiguity +// with operator+(counting_iterator, counting_iterator::difference_type) in any expression `counting_iterator + +// integral`. +template +struct compile_time_value +{ + static constexpr decltype(Value) value = Value; +}; + +namespace detail +{ +template +_CCCL_INLINE_VAR constexpr bool is_compile_time_value = false; + +template +_CCCL_INLINE_VAR constexpr bool is_compile_time_value> = true; +} // namespace detail + +//! A \p strided_iterator wraps another iterator and moves it by a specified stride each time it is incremented or +//! decremented. +//! +//! \param RandomAccessIterator A random access iterator +//! \param StrideHolder Either a \ref runtime_value or a \ref compile_time_value specifying the stride +template +class _CCCL_DECLSPEC_EMPTY_BASES strided_iterator + : public iterator_adaptor, RandomAccessIterator> + , StrideHolder +{ + //! \cond + using super_t = iterator_adaptor; + friend class iterator_core_access; + +public: + using difference_type = typename super_t::difference_type; + //! \endcond + + static_assert(::cuda::std::random_access_iterator, + "The iterator underlying a strided_iterator must be a random access iterator."); + static_assert(::cuda::std::is_same_v, random_access_traversal_tag>); + static_assert(::cuda::std::is_convertible_v, + "The stride must be convertible to the iterator's difference_type"); + + strided_iterator() = default; + + //! Creates a strided_iterator from an existing iterator and a stride. + _CCCL_HOST_DEVICE strided_iterator(RandomAccessIterator it, StrideHolder stride = {}) + : super_t(it) + , StrideHolder(stride) + {} + + static constexpr bool has_static_stride = detail::is_compile_time_value; + + //! Returns either the \ref runtime_value or the \ref compile_time_value holding the stride's value + _CCCL_HOST_DEVICE const auto& stride_holder() const + { + return static_cast(*this); + } + + //! Returns the stride's value + _CCCL_HOST_DEVICE auto stride() const -> difference_type + { + return static_cast>(stride_holder().value); + } + +private: + //! \cond + _CCCL_EXEC_CHECK_DISABLE + _CCCL_HOST_DEVICE void advance(difference_type n) + { + this->base_reference() += n * stride(); + } + + _CCCL_EXEC_CHECK_DISABLE + _CCCL_HOST_DEVICE void increment() + { + this->base_reference() += stride(); + } + + _CCCL_EXEC_CHECK_DISABLE + _CCCL_HOST_DEVICE void decrement() + { + this->base_reference() -= stride(); + } + + template + _CCCL_HOST_DEVICE bool equal(strided_iterator const& other) const + { + return this->base() == other.base(); + } + + _CCCL_HOST_DEVICE difference_type distance_to(strided_iterator const& other) const + { + const difference_type dist = other.base() - this->base(); + _CCCL_ASSERT(dist % stride() == 0, "Underlying iterator difference must be divisible by the stride"); + return dist / stride(); + } + //! \endcond +}; + +//! Constructs a strided_iterator with a runtime stride +template +_CCCL_HOST_DEVICE auto make_strided_iterator(Iterator it, Stride stride) +{ + return strided_iterator>(it, {stride}); +} + +//! Constructs a strided_iterator with a compile-time stride +template +_CCCL_HOST_DEVICE auto make_strided_iterator(Iterator it) +{ + return strided_iterator>(it, {}); +} + +//! \} // end fancyiterators +//! \} // end iterators + +THRUST_NAMESPACE_END