Skip to content

Commit

Permalink
Revert Thrust mismatch implementation to please nvc++ (#3899)
Browse files Browse the repository at this point in the history
Fixes: #3591
  • Loading branch information
bernhardmgruber committed Mar 5, 2025
1 parent 6d02e11 commit e5b9da0
Show file tree
Hide file tree
Showing 2 changed files with 161 additions and 0 deletions.
33 changes: 33 additions & 0 deletions thrust/testing/cuda/mismatch.cu
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,36 @@ void TestMismatchCudaStreams()
cudaStreamDestroy(s);
}
DECLARE_UNITTEST(TestMismatchCudaStreams);

// see https://github.com/NVIDIA/cccl/issues/3591
template <typename T>
class Wrapper
{
public:
Wrapper()
{
++my_count;
}

_CCCL_HOST_DEVICE bool operator==(const Wrapper& input) const
{
return true;
}

~Wrapper()
{
--my_count;
}

private:
static std::atomic<size_t> my_count;
T dummy;
};

void TestMismatchBug3591()
{
using T = Wrapper<int32_t>;
T* p = nullptr;
thrust::mismatch(thrust::device, p, p, p, cuda::std::equal_to<T>());
}
DECLARE_UNITTEST(TestMismatchBug3591);
128 changes: 128 additions & 0 deletions thrust/thrust/system/cuda/detail/mismatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,17 +63,145 @@ THRUST_NAMESPACE_END
THRUST_NAMESPACE_BEGIN
namespace cuda_cub
{
namespace detail
{
template <class ValueType, class InputIt1, class InputIt2, class BinaryOp>
struct transform_pair_of_input_iterators_t
{
using self_t = transform_pair_of_input_iterators_t;
using difference_type = typename iterator_traits<InputIt1>::difference_type;
using value_type = ValueType;
using pointer = void;
using reference = value_type;
using iterator_category = std::random_access_iterator_tag;

InputIt1 input1;
InputIt2 input2;
mutable BinaryOp op;

_CCCL_HOST_DEVICE _CCCL_FORCEINLINE
transform_pair_of_input_iterators_t(InputIt1 input1_, InputIt2 input2_, BinaryOp op_)
: input1(input1_)
, input2(input2_)
, op(op_)
{}

transform_pair_of_input_iterators_t(const self_t&) = default;

// BinaryOp might not be copy assignable, such as when it is a lambda.
// Define an explicit copy assignment operator that doesn't try to assign it.
_CCCL_HOST_DEVICE self_t& operator=(const self_t& o)
{
input1 = o.input1;
input2 = o.input2;
return *this;
}

/// Postfix increment
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator++(int)
{
self_t retval = *this;
++input1;
++input2;
return retval;
}

/// Prefix increment
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator++()
{
++input1;
++input2;
return *this;
}

/// Indirection
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE reference operator*() const
{
return op(*input1, *input2);
}
/// Indirection
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE reference operator*()
{
return op(*input1, *input2);
}

/// Addition
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator+(difference_type n) const
{
return self_t(input1 + n, input2 + n, op);
}

/// Addition assignment
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t& operator+=(difference_type n)
{
input1 += n;
input2 += n;
return *this;
}

/// Subtraction
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator-(difference_type n) const
{
return self_t(input1 - n, input2 - n, op);
}

/// Subtraction assignment
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t& operator-=(difference_type n)
{
input1 -= n;
input2 -= n;
return *this;
}

/// Distance
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE difference_type operator-(self_t other) const
{
return input1 - other.input1;
}

/// Array subscript
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE reference operator[](difference_type n) const
{
return op(input1[n], input2[n]);
}

/// Equal to
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE bool operator==(const self_t& rhs) const
{
return (input1 == rhs.input1) && (input2 == rhs.input2);
}

/// Not equal to
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE bool operator!=(const self_t& rhs) const
{
return (input1 != rhs.input1) || (input2 != rhs.input2);
}

}; // struct transform_pair_of_input_iterators_t
} // namespace detail

template <class Derived, class InputIt1, class InputIt2, class BinaryPred>
pair<InputIt1, InputIt2> _CCCL_HOST_DEVICE
mismatch(execution_policy<Derived>& policy, InputIt1 first1, InputIt1 last1, InputIt2 first2, BinaryPred binary_pred)
{
const auto transform_first =
detail::transform_pair_of_input_iterators_t<bool, InputIt1, InputIt2, BinaryPred>(first1, first2, binary_pred);
const auto result = cuda_cub::find_if_not(
policy, transform_first, transform_first + thrust::distance(first1, last1), ::cuda::std::__identity{});
return thrust::make_pair(first1 + thrust::distance(transform_first, result),
first2 + thrust::distance(transform_first, result));

// FIXME(bgruber): the following code should be equivalent and not require a dedicated iterator. However, it
// additionally requires the value_type to constructible/destructible on the device, which should be fixed at some
// point. See also: https://github.com/NVIDIA/cccl/issues/3591
# if 0
const auto n = thrust::distance(first1, last1);
const auto first = make_zip_iterator(first1, first2);
const auto last = make_zip_iterator(last1, first2 + n);
const auto mismatch_pos = cuda_cub::find_if_not(policy, first, last, make_zip_function(binary_pred));
const auto dist = thrust::distance(first, mismatch_pos);
return thrust::make_pair(first1 + dist, first2 + dist);
# endif
}

template <class Derived, class InputIt1, class InputIt2>
Expand Down

0 comments on commit e5b9da0

Please sign in to comment.