diff --git a/thrust/testing/cuda/mismatch.cu b/thrust/testing/cuda/mismatch.cu index 540dccdbb96..1fcf097953e 100644 --- a/thrust/testing/cuda/mismatch.cu +++ b/thrust/testing/cuda/mismatch.cu @@ -93,3 +93,36 @@ void TestMismatchCudaStreams() cudaStreamDestroy(s); } DECLARE_UNITTEST(TestMismatchCudaStreams); + +// see https://github.com/NVIDIA/cccl/issues/3591 +template +class Wrapper +{ +public: + Wrapper() + { + ++my_count; + } + + _CCCL_HOST_DEVICE bool operator==(const Wrapper& input) const + { + return true; + } + + ~Wrapper() + { + --my_count; + } + +private: + static std::atomic my_count; + T dummy; +}; + +void TestMismatchBug3591() +{ + using T = Wrapper; + T* p = nullptr; + thrust::mismatch(thrust::device, p, p, p, cuda::std::equal_to()); +} +DECLARE_UNITTEST(TestMismatchBug3591); diff --git a/thrust/thrust/system/cuda/detail/mismatch.h b/thrust/thrust/system/cuda/detail/mismatch.h index 9987799faca..63e64b722ec 100644 --- a/thrust/thrust/system/cuda/detail/mismatch.h +++ b/thrust/thrust/system/cuda/detail/mismatch.h @@ -63,17 +63,145 @@ THRUST_NAMESPACE_END THRUST_NAMESPACE_BEGIN namespace cuda_cub { +namespace detail +{ +template +struct transform_pair_of_input_iterators_t +{ + using self_t = transform_pair_of_input_iterators_t; + using difference_type = typename iterator_traits::difference_type; + using value_type = ValueType; + using pointer = void; + using reference = value_type; + using iterator_category = std::random_access_iterator_tag; + + InputIt1 input1; + InputIt2 input2; + mutable BinaryOp op; + + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE + transform_pair_of_input_iterators_t(InputIt1 input1_, InputIt2 input2_, BinaryOp op_) + : input1(input1_) + , input2(input2_) + , op(op_) + {} + + transform_pair_of_input_iterators_t(const self_t&) = default; + + // BinaryOp might not be copy assignable, such as when it is a lambda. + // Define an explicit copy assignment operator that doesn't try to assign it. + _CCCL_HOST_DEVICE self_t& operator=(const self_t& o) + { + input1 = o.input1; + input2 = o.input2; + return *this; + } + + /// Postfix increment + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator++(int) + { + self_t retval = *this; + ++input1; + ++input2; + return retval; + } + + /// Prefix increment + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator++() + { + ++input1; + ++input2; + return *this; + } + + /// Indirection + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE reference operator*() const + { + return op(*input1, *input2); + } + /// Indirection + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE reference operator*() + { + return op(*input1, *input2); + } + + /// Addition + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator+(difference_type n) const + { + return self_t(input1 + n, input2 + n, op); + } + + /// Addition assignment + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t& operator+=(difference_type n) + { + input1 += n; + input2 += n; + return *this; + } + + /// Subtraction + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator-(difference_type n) const + { + return self_t(input1 - n, input2 - n, op); + } + + /// Subtraction assignment + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t& operator-=(difference_type n) + { + input1 -= n; + input2 -= n; + return *this; + } + + /// Distance + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE difference_type operator-(self_t other) const + { + return input1 - other.input1; + } + + /// Array subscript + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE reference operator[](difference_type n) const + { + return op(input1[n], input2[n]); + } + + /// Equal to + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE bool operator==(const self_t& rhs) const + { + return (input1 == rhs.input1) && (input2 == rhs.input2); + } + + /// Not equal to + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE bool operator!=(const self_t& rhs) const + { + return (input1 != rhs.input1) || (input2 != rhs.input2); + } + +}; // struct transform_pair_of_input_iterators_t +} // namespace detail template pair _CCCL_HOST_DEVICE mismatch(execution_policy& policy, InputIt1 first1, InputIt1 last1, InputIt2 first2, BinaryPred binary_pred) { + const auto transform_first = + detail::transform_pair_of_input_iterators_t(first1, first2, binary_pred); + const auto result = cuda_cub::find_if_not( + policy, transform_first, transform_first + thrust::distance(first1, last1), ::cuda::std::__identity{}); + return thrust::make_pair(first1 + thrust::distance(transform_first, result), + first2 + thrust::distance(transform_first, result)); + + // FIXME(bgruber): the following code should be equivalent and not require a dedicated iterator. However, it + // additionally requires the value_type to constructible/destructible on the device, which should be fixed at some + // point. See also: https://github.com/NVIDIA/cccl/issues/3591 +# if 0 const auto n = thrust::distance(first1, last1); const auto first = make_zip_iterator(first1, first2); const auto last = make_zip_iterator(last1, first2 + n); const auto mismatch_pos = cuda_cub::find_if_not(policy, first, last, make_zip_function(binary_pred)); const auto dist = thrust::distance(first, mismatch_pos); return thrust::make_pair(first1 + dist, first2 + dist); +# endif } template