Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert Thrust mismatch implementation #3899

Merged
merged 1 commit into from
Feb 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions thrust/testing/cuda/mismatch.cu
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,36 @@ void TestMismatchCudaStreams()
cudaStreamDestroy(s);
}
DECLARE_UNITTEST(TestMismatchCudaStreams);

// see https://github.com/NVIDIA/cccl/issues/3591
template <typename T>
class Wrapper
{
public:
Wrapper()
{
++my_count;
}

_CCCL_HOST_DEVICE bool operator==(const Wrapper& input) const
{
return true;
}

~Wrapper()
{
--my_count;
}

private:
static std::atomic<size_t> my_count;
T dummy;
};

void TestMismatchBug3591()
{
using T = Wrapper<int32_t>;
T* p = nullptr;
thrust::mismatch(thrust::device, p, p, p, cuda::std::equal_to<T>());
}
DECLARE_UNITTEST(TestMismatchBug3591);
128 changes: 128 additions & 0 deletions thrust/thrust/system/cuda/detail/mismatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,17 +63,145 @@ THRUST_NAMESPACE_END
THRUST_NAMESPACE_BEGIN
namespace cuda_cub
{
namespace detail
{
template <class ValueType, class InputIt1, class InputIt2, class BinaryOp>
struct transform_pair_of_input_iterators_t
{
using self_t = transform_pair_of_input_iterators_t;
using difference_type = typename iterator_traits<InputIt1>::difference_type;
using value_type = ValueType;
using pointer = void;
using reference = value_type;
using iterator_category = std::random_access_iterator_tag;

InputIt1 input1;
InputIt2 input2;
mutable BinaryOp op;

_CCCL_HOST_DEVICE _CCCL_FORCEINLINE
transform_pair_of_input_iterators_t(InputIt1 input1_, InputIt2 input2_, BinaryOp op_)
: input1(input1_)
, input2(input2_)
, op(op_)
{}

transform_pair_of_input_iterators_t(const self_t&) = default;

// BinaryOp might not be copy assignable, such as when it is a lambda.
// Define an explicit copy assignment operator that doesn't try to assign it.
_CCCL_HOST_DEVICE self_t& operator=(const self_t& o)
{
input1 = o.input1;
input2 = o.input2;
return *this;
}

/// Postfix increment
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator++(int)
{
self_t retval = *this;
++input1;
++input2;
return retval;
}

/// Prefix increment
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator++()
{
++input1;
++input2;
return *this;
}

/// Indirection
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE reference operator*() const
{
return op(*input1, *input2);
}
/// Indirection
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE reference operator*()
{
return op(*input1, *input2);
}

/// Addition
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator+(difference_type n) const
{
return self_t(input1 + n, input2 + n, op);
}

/// Addition assignment
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t& operator+=(difference_type n)
{
input1 += n;
input2 += n;
return *this;
}

/// Subtraction
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator-(difference_type n) const
{
return self_t(input1 - n, input2 - n, op);
}

/// Subtraction assignment
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t& operator-=(difference_type n)
{
input1 -= n;
input2 -= n;
return *this;
}

/// Distance
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE difference_type operator-(self_t other) const
{
return input1 - other.input1;
}

/// Array subscript
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE reference operator[](difference_type n) const
{
return op(input1[n], input2[n]);
}

/// Equal to
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE bool operator==(const self_t& rhs) const
{
return (input1 == rhs.input1) && (input2 == rhs.input2);
}

/// Not equal to
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE bool operator!=(const self_t& rhs) const
{
return (input1 != rhs.input1) || (input2 != rhs.input2);
}

}; // struct transform_pair_of_input_iterators_t
} // namespace detail

template <class Derived, class InputIt1, class InputIt2, class BinaryPred>
pair<InputIt1, InputIt2> _CCCL_HOST_DEVICE
mismatch(execution_policy<Derived>& policy, InputIt1 first1, InputIt1 last1, InputIt2 first2, BinaryPred binary_pred)
{
const auto transform_first =
detail::transform_pair_of_input_iterators_t<bool, InputIt1, InputIt2, BinaryPred>(first1, first2, binary_pred);
const auto result = cuda_cub::find_if_not(
policy, transform_first, transform_first + thrust::distance(first1, last1), ::cuda::std::__identity{});
return thrust::make_pair(first1 + thrust::distance(transform_first, result),
first2 + thrust::distance(transform_first, result));

// FIXME(bgruber): the following code should be equivalent and not require a dedicated iterator. However, it
// additionally requires the value_type to constructible/destructible on the device, which should be fixed at some
// point. See also: https://github.com/NVIDIA/cccl/issues/3591
# if 0
const auto n = thrust::distance(first1, last1);
const auto first = make_zip_iterator(first1, first2);
const auto last = make_zip_iterator(last1, first2 + n);
const auto mismatch_pos = cuda_cub::find_if_not(policy, first, last, make_zip_function(binary_pred));
const auto dist = thrust::distance(first, mismatch_pos);
return thrust::make_pair(first1 + dist, first2 + dist);
# endif
}

template <class Derived, class InputIt1, class InputIt2>
Expand Down
Loading