28
28
* limitations under the License.
29
29
*/
30
30
31
- #include " lidar_centerpoint/preprocess/preprocess_kernel .hpp"
32
-
31
+ #include < lidar_centerpoint/cuda_utils .hpp>
32
+ # include < lidar_centerpoint/preprocess/preprocess_kernel.hpp >
33
33
#include < lidar_centerpoint/utils.hpp>
34
34
35
+ #include < cassert>
35
36
namespace
36
37
{
37
38
const std::size_t MAX_POINT_IN_VOXEL_SIZE = 32 ; // the same as max_point_in_voxel_size_ in config
@@ -41,6 +42,50 @@ const std::size_t ENCODER_IN_FEATURE_SIZE = 9; // the same as encoder_in_featur
41
42
42
43
namespace centerpoint
43
44
{
45
+
46
+ __global__ void generateSweepPoints_kernel (
47
+ const float * input_points, size_t points_size, int input_point_step, float time_lag,
48
+ const float * transform_array, int num_features, float * output_points)
49
+ {
50
+ int point_idx = blockIdx .x * blockDim .x + threadIdx .x ;
51
+ if (point_idx >= points_size) return ;
52
+
53
+ const float input_x = input_points[point_idx * input_point_step + 0 ];
54
+ const float input_y = input_points[point_idx * input_point_step + 1 ];
55
+ const float input_z = input_points[point_idx * input_point_step + 2 ];
56
+
57
+ output_points[point_idx * num_features + 0 ] = transform_array[0 ] * input_x +
58
+ transform_array[1 ] * input_y +
59
+ transform_array[2 ] * input_z + transform_array[3 ];
60
+ output_points[point_idx * num_features + 1 ] = transform_array[4 ] * input_x +
61
+ transform_array[5 ] * input_y +
62
+ transform_array[6 ] * input_z + transform_array[7 ];
63
+ output_points[point_idx * num_features + 2 ] = transform_array[8 ] * input_x +
64
+ transform_array[9 ] * input_y +
65
+ transform_array[10 ] * input_z + transform_array[11 ];
66
+ output_points[point_idx * num_features + 3 ] = time_lag;
67
+ }
68
+
69
+ cudaError_t generateSweepPoints_launch (
70
+ const float * input_points, size_t points_size, int input_point_step, float time_lag,
71
+ const float * transform_array, int num_features, float * output_points, cudaStream_t stream)
72
+ {
73
+ auto transform_d = cuda::make_unique<float []>(16 );
74
+ CHECK_CUDA_ERROR (cudaMemcpyAsync (
75
+ transform_d.get (), transform_array, 16 * sizeof (float ), cudaMemcpyHostToDevice, stream));
76
+
77
+ dim3 blocks ((points_size + 256 - 1 ) / 256 );
78
+ dim3 threads (256 );
79
+ assert (num_features == 4 );
80
+
81
+ generateSweepPoints_kernel<<<blocks, threads, 0 , stream>>> (
82
+ input_points, points_size, input_point_step, time_lag, transform_d.get (), num_features,
83
+ output_points);
84
+
85
+ cudaError_t err = cudaGetLastError ();
86
+ return err;
87
+ }
88
+
44
89
__global__ void generateVoxels_random_kernel (
45
90
const float * points, size_t points_size, float min_x_range, float max_x_range, float min_y_range,
46
91
float max_y_range, float min_z_range, float max_z_range, float pillar_x_size, float pillar_y_size,
0 commit comments