Skip to content

Commit be583bc

Browse files
committed
pp refactor and add ONNX converter
Signed-off-by: Kaan Çolak <kaancolak95@gmail.com>
1 parent 5c0613b commit be583bc

File tree

3 files changed

+490
-28
lines changed

3 files changed

+490
-28
lines changed
+181
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
_base_ = [
2+
'../_base_/datasets/nus-3d.py',
3+
'../_base_/models/centerpoint_pillar02_second_secfpn_nus.py',
4+
'../_base_/schedules/cyclic-20e.py', '../_base_/default_runtime.py'
5+
]
6+
7+
point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
8+
9+
class_names = [
10+
"car",
11+
"truck",
12+
"bus",
13+
"bicycle",
14+
"pedestrian",
15+
]
16+
17+
data_prefix = dict(pts='samples/LIDAR_TOP', img='', sweeps='sweeps/LIDAR_TOP')
18+
19+
out_size_factor = 1
20+
model = dict(
21+
data_preprocessor=dict(
22+
voxel_layer=dict(
23+
point_cloud_range=point_cloud_range)),
24+
pts_voxel_encoder=dict(
25+
point_cloud_range=point_cloud_range,
26+
in_channels=4,
27+
feat_channels=[32, 32],
28+
use_voxel_center_z=False),
29+
pts_middle_encoder=dict(
30+
in_channels=32),
31+
pts_backbone=dict(
32+
in_channels=32,
33+
layer_strides=[1, 2, 2],),
34+
pts_neck=dict(
35+
upsample_strides=[1, 2, 4], ),
36+
pts_bbox_head=dict(
37+
tasks=[dict(
38+
num_class=len(class_names),
39+
class_names=class_names)],
40+
bbox_coder=dict(
41+
out_size_factor=out_size_factor,
42+
pc_range=point_cloud_range[:2])),
43+
# model training and testing settings
44+
train_cfg=dict(
45+
pts=dict(
46+
point_cloud_range=point_cloud_range,
47+
out_size_factor=out_size_factor)),
48+
test_cfg=dict(
49+
pts=dict(
50+
pc_range=point_cloud_range[:2],
51+
nms_type='circle',
52+
out_size_factor=out_size_factor,)))
53+
54+
dataset_type = 'NuScenesDataset'
55+
data_root = 'data/nuscenes/'
56+
backend_args = None
57+
58+
point_load_dim = 5
59+
point_use_dim = [0, 1, 2, 4]
60+
61+
db_sampler = dict(
62+
data_root=data_root,
63+
info_path=data_root + 'nuscenes_dbinfos_train.pkl',
64+
rate=1.0,
65+
prepare=dict(
66+
filter_by_difficulty=[-1],
67+
filter_by_min_points=dict(
68+
car=5,
69+
truck=5,
70+
bus=5,
71+
bicycle=5,
72+
pedestrian=5)),
73+
classes=class_names,
74+
sample_groups=dict(
75+
car=2,
76+
truck=3,
77+
bus=4,
78+
bicycle=6,
79+
pedestrian=2),
80+
points_loader=dict(
81+
type='LoadPointsFromFile',
82+
coord_type='LIDAR',
83+
load_dim=point_load_dim,
84+
use_dim=point_use_dim,
85+
backend_args=backend_args),
86+
backend_args=backend_args)
87+
88+
train_pipeline = [
89+
dict(
90+
type='LoadPointsFromFile',
91+
coord_type='LIDAR',
92+
load_dim=point_load_dim,
93+
use_dim=5,
94+
backend_args=backend_args),
95+
dict(
96+
type='LoadPointsFromMultiSweeps',
97+
sweeps_num=9,
98+
use_dim=point_use_dim,
99+
pad_empty_sweeps=True,
100+
remove_close=True,
101+
backend_args=backend_args),
102+
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
103+
dict(type='ObjectSample', db_sampler=db_sampler),
104+
dict(
105+
type='GlobalRotScaleTrans',
106+
rot_range=[-0.3925, 0.3925],
107+
scale_ratio_range=[0.95, 1.05],
108+
translation_std=[0, 0, 0]),
109+
dict(
110+
type='RandomFlip3D',
111+
sync_2d=False,
112+
flip_ratio_bev_horizontal=0.5,
113+
flip_ratio_bev_vertical=0.5),
114+
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
115+
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
116+
dict(type='ObjectNameFilter', classes=class_names),
117+
dict(type='PointShuffle'),
118+
dict(
119+
type='Pack3DDetInputs',
120+
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
121+
]
122+
test_pipeline = [
123+
dict(
124+
type='LoadPointsFromFile',
125+
coord_type='LIDAR',
126+
load_dim=point_load_dim,
127+
use_dim=5,
128+
backend_args=backend_args),
129+
dict(
130+
type='LoadPointsFromMultiSweeps',
131+
sweeps_num=9,
132+
use_dim=point_use_dim,
133+
pad_empty_sweeps=True,
134+
remove_close=True,
135+
backend_args=backend_args),
136+
dict(
137+
type='MultiScaleFlipAug3D',
138+
img_scale=(1333, 800),
139+
pts_scale_ratio=1,
140+
flip=False,
141+
transforms=[
142+
dict(
143+
type='GlobalRotScaleTrans',
144+
rot_range=[0, 0],
145+
scale_ratio_range=[1., 1.],
146+
translation_std=[0, 0, 0]),
147+
dict(type='RandomFlip3D')
148+
]),
149+
dict(type='Pack3DDetInputs', keys=['points'])
150+
]
151+
152+
train_dataloader = dict(
153+
_delete_=True,
154+
batch_size=2,
155+
num_workers=4,
156+
persistent_workers=True,
157+
sampler=dict(type='DefaultSampler', shuffle=True),
158+
dataset=dict(
159+
type='CBGSDataset',
160+
dataset=dict(
161+
type=dataset_type,
162+
data_root=data_root,
163+
ann_file='nuscenes_infos_train.pkl',
164+
pipeline=train_pipeline,
165+
metainfo=dict(classes=class_names),
166+
test_mode=False,
167+
data_prefix=data_prefix,
168+
use_valid_flag=True,
169+
box_type_3d='LiDAR',
170+
backend_args=backend_args)))
171+
test_dataloader = dict(
172+
dataset=dict(pipeline=test_pipeline, metainfo=dict(classes=class_names)))
173+
val_dataloader = dict(
174+
dataset=dict(pipeline=test_pipeline, metainfo=dict(classes=class_names)))
175+
176+
train_cfg = dict(val_interval=5)
177+
default_hooks = dict(
178+
checkpoint=dict(
179+
type='CheckpointHook',
180+
interval=1,
181+
save_optimizer=True))

mmdet3d/models/voxel_encoders/pillar_encoder.py

+36-28
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class PillarFeatureNet(nn.Module):
4040

4141
def __init__(self,
4242
in_channels: Optional[int] = 4,
43-
feat_channels: Optional[tuple] = (64, ),
43+
feat_channels: Optional[tuple] = (64,),
4444
with_distance: Optional[bool] = False,
4545
with_cluster_center: Optional[bool] = True,
4646
with_voxel_center: Optional[bool] = True,
@@ -50,14 +50,18 @@ def __init__(self,
5050
norm_cfg: Optional[dict] = dict(
5151
type='BN1d', eps=1e-3, momentum=0.01),
5252
mode: Optional[str] = 'max',
53-
legacy: Optional[bool] = True):
53+
legacy: Optional[bool] = True,
54+
use_voxel_center_z: Optional[bool] = True, ):
5455
super(PillarFeatureNet, self).__init__()
5556
assert len(feat_channels) > 0
5657
self.legacy = legacy
58+
self.use_voxel_center_z = use_voxel_center_z
5759
if with_cluster_center:
5860
in_channels += 3
5961
if with_voxel_center:
60-
in_channels += 3
62+
in_channels += 2
63+
if self.use_voxel_center_z:
64+
in_channels += 1
6165
if with_distance:
6266
in_channels += 1
6367
self._with_distance = with_distance
@@ -110,35 +114,38 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
110114
if self._with_cluster_center:
111115
points_mean = features[:, :, :3].sum(
112116
dim=1, keepdim=True) / num_points.type_as(features).view(
113-
-1, 1, 1)
117+
-1, 1, 1)
114118
f_cluster = features[:, :, :3] - points_mean
115119
features_ls.append(f_cluster)
116120

117121
# Find distance of x, y, and z from pillar center
118122
dtype = features.dtype
119123
if self._with_voxel_center:
124+
center_feature_size = 3 if self.use_voxel_center_z else 2
120125
if not self.legacy:
121-
f_center = torch.zeros_like(features[:, :, :3])
126+
f_center = torch.zeros_like(features[:, :, :center_feature_size])
122127
f_center[:, :, 0] = features[:, :, 0] - (
123-
coors[:, 3].to(dtype).unsqueeze(1) * self.vx +
124-
self.x_offset)
128+
coors[:, 3].to(dtype).unsqueeze(1) * self.vx +
129+
self.x_offset)
125130
f_center[:, :, 1] = features[:, :, 1] - (
126-
coors[:, 2].to(dtype).unsqueeze(1) * self.vy +
127-
self.y_offset)
128-
f_center[:, :, 2] = features[:, :, 2] - (
129-
coors[:, 1].to(dtype).unsqueeze(1) * self.vz +
130-
self.z_offset)
131+
coors[:, 2].to(dtype).unsqueeze(1) * self.vy +
132+
self.y_offset)
133+
if self.use_voxel_center_z:
134+
f_center[:, :, 2] = features[:, :, 2] - (
135+
coors[:, 1].to(dtype).unsqueeze(1) * self.vz +
136+
self.z_offset)
131137
else:
132-
f_center = features[:, :, :3]
138+
f_center = features[:, :, :center_feature_size]
133139
f_center[:, :, 0] = f_center[:, :, 0] - (
134-
coors[:, 3].type_as(features).unsqueeze(1) * self.vx +
135-
self.x_offset)
140+
coors[:, 3].type_as(features).unsqueeze(1) * self.vx +
141+
self.x_offset)
136142
f_center[:, :, 1] = f_center[:, :, 1] - (
137-
coors[:, 2].type_as(features).unsqueeze(1) * self.vy +
138-
self.y_offset)
139-
f_center[:, :, 2] = f_center[:, :, 2] - (
140-
coors[:, 1].type_as(features).unsqueeze(1) * self.vz +
141-
self.z_offset)
143+
coors[:, 2].type_as(features).unsqueeze(1) * self.vy +
144+
self.y_offset)
145+
if self.use_voxel_center_z:
146+
f_center[:, :, 2] = f_center[:, :, 2] - (
147+
coors[:, 1].type_as(features).unsqueeze(1) * self.vz +
148+
self.z_offset)
142149
features_ls.append(f_center)
143150

144151
if self._with_distance:
@@ -193,7 +200,7 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
193200

194201
def __init__(self,
195202
in_channels: Optional[int] = 4,
196-
feat_channels: Optional[tuple] = (64, ),
203+
feat_channels: Optional[tuple] = (64,),
197204
with_distance: Optional[bool] = False,
198205
with_cluster_center: Optional[bool] = True,
199206
with_voxel_center: Optional[bool] = True,
@@ -264,15 +271,15 @@ def map_voxel_center_to_point(self, pts_coors: Tensor, voxel_mean: Tensor,
264271
canvas = voxel_mean.new_zeros(canvas_channel, canvas_len)
265272
# Only include non-empty pillars
266273
indices = (
267-
voxel_coors[:, 0] * canvas_y * canvas_x +
268-
voxel_coors[:, 2] * canvas_x + voxel_coors[:, 3])
274+
voxel_coors[:, 0] * canvas_y * canvas_x +
275+
voxel_coors[:, 2] * canvas_x + voxel_coors[:, 3])
269276
# Scatter the blob back to the canvas
270277
canvas[:, indices.long()] = voxel_mean.t()
271278

272279
# Step 2: get voxel mean for each point
273280
voxel_index = (
274-
pts_coors[:, 0] * canvas_y * canvas_x +
275-
pts_coors[:, 2] * canvas_x + pts_coors[:, 3])
281+
pts_coors[:, 0] * canvas_y * canvas_x +
282+
pts_coors[:, 2] * canvas_x + pts_coors[:, 3])
276283
center_per_point = canvas[:, voxel_index.long()].t()
277284
return center_per_point
278285

@@ -301,11 +308,11 @@ def forward(self, features: Tensor, coors: Tensor) -> Tensor:
301308
if self._with_voxel_center:
302309
f_center = features.new_zeros(size=(features.size(0), 3))
303310
f_center[:, 0] = features[:, 0] - (
304-
coors[:, 3].type_as(features) * self.vx + self.x_offset)
311+
coors[:, 3].type_as(features) * self.vx + self.x_offset)
305312
f_center[:, 1] = features[:, 1] - (
306-
coors[:, 2].type_as(features) * self.vy + self.y_offset)
313+
coors[:, 2].type_as(features) * self.vy + self.y_offset)
307314
f_center[:, 2] = features[:, 2] - (
308-
coors[:, 1].type_as(features) * self.vz + self.z_offset)
315+
coors[:, 1].type_as(features) * self.vz + self.z_offset)
309316
features_ls.append(f_center)
310317

311318
if self._with_distance:
@@ -324,3 +331,4 @@ def forward(self, features: Tensor, coors: Tensor) -> Tensor:
324331
features = torch.cat([point_feats, feat_per_point], dim=1)
325332

326333
return voxel_feats, voxel_coors
334+

0 commit comments

Comments
 (0)