forked from ultralytics/yolov3
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathbop_to_yolov3_converter.py
95 lines (81 loc) · 4.29 KB
/
bop_to_yolov3_converter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import os
import json
import argparse
import numpy as np
def clamp(val, size):
return min(max(0, val), size)
def bb_reformat(bboxin, bbox_vis, render_width, render_height):
tlx, tly, w, h = bboxin
#tlx_v, tly_v, w_v, h_v = bbox_vis
# since yolov3 cant handle bboxes that are outside image, but we still don't
# want bboxes that get smaller due to occusions we overide when outside
tlx = clamp(tlx, render_width)
tly = clamp(tly, render_height)
w = clamp(w + tlx, render_width) - tlx
h = clamp(h + tly, render_height) - tly
cx = tlx + w/2
cy = tly + h/2
return [cx/render_width, cy/render_height, w/render_width, w/render_height]
def convert_dataset(bop_dir, output_dir, val_frac = 0.1, width=720, height=540):
os.makedirs(output_dir, exist_ok=True)
img_dir = os.path.join(output_dir, "images")
os.makedirs(img_dir, exist_ok=True)
label_dir = os.path.join(output_dir, "labels")
os.makedirs(label_dir, exist_ok=True)
train_file = os.path.join(output_dir, "train.txt")
val_file = os.path.join(output_dir, "validation.txt")
dirs = next(os.walk(bop_dir))[1]
# find all classes and count instances
class_counts = {}
for dir in dirs:
# Load the BOP-scenewise dataset annotation file
poses_and_ids_file = os.path.join(bop_dir, '{}/scene_gt.json'.format(dir))
with open(poses_and_ids_file, 'r') as f:
poses_and_ids = json.load(f)
for scene_id, object_pose_id in poses_and_ids.items():
for obj_pi in object_pose_id:
if obj_pi["obj_id"] in class_counts:
class_counts[obj_pi["obj_id"]] += 1
else:
class_counts[obj_pi["obj_id"]] = 1
classes = list(sorted(class_counts.keys()))
cls2clsind = {} # yolov3 needs 0 indexed indices as class markers
for i, clc in enumerate(classes):
cls2clsind[clc] = i
for n, dir in enumerate(dirs):
# Load the BOP-scenewise dataset annotation file
poses_and_ids_file = os.path.join(bop_dir, '{}/scene_gt.json'.format(dir))
with open(poses_and_ids_file, 'r') as f:
poses_and_ids = json.load(f)
bbox_and_vis_file = os.path.join(bop_dir, '{}/scene_gt_info.json'.format(dir))
with open(bbox_and_vis_file, 'r') as f:
bbox_and_vis = json.load(f)
for scene_id, object_pose_id in poses_and_ids.items():
# Copy the image file to the output directory
base_filename = str(dir).zfill(6) + "_" + str(scene_id).zfill(6)
image_filename = base_filename + '.jpg'
image_path = os.path.join(bop_dir, dir, 'rgb', str(scene_id).zfill(6) + '.jpg')
image_output_path = os.path.join(img_dir, image_filename)
os.system(f'cp {image_path} {image_output_path}')
# randomize if to put in val or train
use_for_val = np.random.rand() < val_frac
with open(val_file if use_for_val else train_file, 'a') as f:
f.write(os.path.join("./images/", image_filename) + "\n")
label_filename = base_filename + '.txt'
label_path_txt = os.path.join(label_dir, label_filename)
with open(label_path_txt, "w") as label_file:
for i, obj_pi in enumerate(object_pose_id):
bbox = bbox_and_vis[scene_id][i]["bbox_obj"]
bbox_vis = bbox_and_vis[scene_id][i]["bbox_visib"]
if bbox != [-1, -1, -1, -1]:
cx, cy, w, h = bb_reformat(bbox, bbox_vis, width, height)
label_file.write("{} {} {} {} {}\n".format(cls2clsind[obj_pi["obj_id"]], cx, cy, w, h))
print(f"{n+1} out of {len(dirs)} directories converted")
print('Conversion completed successfully.')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Convert BOP-scenewise dataset format to Ultralytics YOLOv3 dataset format')
parser.add_argument('bop_dir', help='Path to the BOP-scenewise dataset directory')
parser.add_argument('output_dir', help='Output directory for the converted dataset')
parser.add_argument('val_frac', help='Fraction of data to use for validation', nargs='?', const=1, type=float, default=0.1)
args = parser.parse_args()
convert_dataset(args.bop_dir, args.output_dir, args.val_frac)