Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extended <cuda/bit> operations: bitfield_insert, bitfield_extract, bit_reverse, bitmask #3941

Merged
merged 40 commits into from
Mar 5, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
4e3ad46
add bitfield_insert and bitfield_extract
fbusato Feb 26, 2025
139ec3b
add bit_reverse
fbusato Feb 26, 2025
fe1e0ff
fix macros
fbusato Feb 26, 2025
3d85c5a
add bit_reverse header
fbusato Feb 26, 2025
113e3a7
add bitfield test
fbusato Feb 26, 2025
7ba0e87
fix bit_reverse implementation
fbusato Feb 26, 2025
be3a126
add bfe/bfi
fbusato Feb 26, 2025
836192c
fix asm statements
fbusato Feb 27, 2025
f3bf56f
try to remove macros for device code
fbusato Feb 27, 2025
3f6c2f5
protect asm statement from MSVC
fbusato Feb 27, 2025
8636b60
add documentation
fbusato Feb 27, 2025
4ecb37c
add bitmask function
fbusato Feb 27, 2025
fe63dd8
use bitmask and refactor bitfield_insert
fbusato Feb 27, 2025
3457737
refactor documentation
fbusato Feb 27, 2025
79d5c25
fix MSVC warning
fbusato Feb 27, 2025
31c8d93
fix mask generation in tests
fbusato Feb 27, 2025
45cfcb9
fix MSVC warning
fbusato Feb 28, 2025
8b91731
fix documentation typos
fbusato Feb 28, 2025
d612fe1
optmize shift
fbusato Feb 28, 2025
5a3544a
update docs
fbusato Feb 28, 2025
bc231d1
improve bitmask implementation
fbusato Feb 28, 2025
1238a7c
add new tests
fbusato Feb 28, 2025
2973b85
fix bfi
fbusato Feb 28, 2025
ee3e41d
add _CCCL_BUILTIN_BITREVERSE
fbusato Mar 3, 2025
f7f2532
fix _CCCL_BUILTIN_BITREVERSE
fbusato Mar 3, 2025
e870511
replace __CUDA_ARCH__ with __cccl_ptx_isa
fbusato Mar 3, 2025
20aaf70
increase _CCCL_HAS_INT128 scope
fbusato Mar 3, 2025
5b00c97
exclude CLANG from __builtin_bitreverse on device
fbusato Mar 3, 2025
d4d675c
modify _CCCL_BUILTIN_BITREVERSE32 guard
fbusato Mar 3, 2025
3c2963b
disable __builtin_bitreverse8 with nvcc
fbusato Mar 3, 2025
60f29d6
Add include
miscco Mar 4, 2025
0eaf2bf
Update libcudacxx/include/cuda/bit
fbusato Mar 4, 2025
e4186ed
Update docs/libcudacxx/extended_api/bit/bitmask.rst
fbusato Mar 4, 2025
47fc5ac
Update docs/libcudacxx/extended_api/bit/bitfield_insert.rst
fbusato Mar 4, 2025
34b0d48
Update docs/libcudacxx/extended_api/bit/bitfield_extract.rst
fbusato Mar 4, 2025
4b180ae
Update docs/libcudacxx/extended_api/bit/bitfield_extract.rst
fbusato Mar 4, 2025
98e52cd
Update docs/libcudacxx/extended_api/bit/bit_reverse.rst
fbusato Mar 4, 2025
3baa1eb
Update docs/libcudacxx/extended_api/bit/bitfield_insert.rst
fbusato Mar 4, 2025
15031ed
improve documentation
fbusato Mar 4, 2025
442beb5
Merge branch 'main' into extended-bit-operations
fbusato Mar 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions libcudacxx/include/cuda/__bit/bitfield.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
//===----------------------------------------------------------------------===//
//
// Part of libcu++, the C++ Standard Library for your entire system,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _CUDA___BIT_BITFILED_INSERT_EXTRACT_H
#define _CUDA___BIT_BITFILED_INSERT_EXTRACT_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cuda/__ptx/instructions/bmsk.h>
#include <cuda/std/__concepts/concept_macros.h>
#include <cuda/std/__type_traits/is_constant_evaluated.h>
#include <cuda/std/__type_traits/is_unsigned_integer.h>
#include <cuda/std/limits>

_LIBCUDACXX_BEGIN_NAMESPACE_CUDA

template <typename _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr _Tp
bitfield_insert(const _Tp __value, int __start, int __width = 1) noexcept
{
static_assert(_CUDA_VSTD::__cccl_is_unsigned_integer_v<_Tp>, "bitfield_insert() requires unsigned integer");
constexpr auto __digits = _CUDA_VSTD::numeric_limits<_Tp>::digits;
CS_ASSERT(__width > 0 && __width <= __digits, "width out of range");
CS_ASSERT(__start >= 0 && __start < __digits, "start position out of range");
CS_ASSERT(__start + __width <= __digits, "start position + width out of range");
if constexpr (sizeof(_Tp) <= sizeof(uint32_t))
{
if (!_CUDA_VSTD::__cccl_default_is_constant_evaluated())
{
NV_IF_TARGET(NV_PROVIDES_SM_70, (return __value | _CUDA_VPTX::bmsk_clamp(__start, __width);))
}
}
if (__width == __digits)
{
return ~_Tp{0};
}
auto __mask = (_Tp{1} << __width) - 1;
return __value | (__mask << __start);
}

template <typename _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr _Tp
bitfield_extract(const _Tp __value, int __start, int __width = 1) noexcept
{
static_assert(_CUDA_VSTD::__cccl_is_unsigned_integer_v<_Tp>, "bitfield_extract() requires unsigned integer");
constexpr auto __digits = _CUDA_VSTD::numeric_limits<_Tp>::digits;
CS_ASSERT(__width > 0 && __width <= __digits, "width out of range");
CS_ASSERT(__start >= 0 && __start < __digits, "start position out of range");
CS_ASSERT(__start + __width <= __digits, "start position + width out of range");
if constexpr (sizeof(_Tp) <= sizeof(uint32_t))
{
if (!_CUDA_VSTD::__cccl_default_is_constant_evaluated())
{
NV_IF_TARGET(NV_PROVIDES_SM_70, (return __value & _CUDA_VPTX::bmsk_clamp(__start, __width);))
}
}
if (__width == __digits)
{
return __value;
}
auto __mask = (_Tp{1} << __width) - 1;
return __value & (__mask << __start);
}

_LIBCUDACXX_END_NAMESPACE_CUDA

#endif // _CUDA___BIT_BITFILED_INSERT_EXTRACT_H
26 changes: 26 additions & 0 deletions libcudacxx/include/cuda/bit
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
//===----------------------------------------------------------------------===//
//
// Part of libcu++, the C++ Standard Library for your entire system,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _CUDA_BIT
#define _CUDA_BIT

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cuda/__bit/bitfield.h>

#endif // _CUDA_BIT
Loading