forked from intel/neural-compressor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
163 lines (145 loc) · 5.64 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import os
import tqdm
import onnx
import torch
import logging
import argparse
import onnxruntime as ort
from timm.utils import accuracy
from torchvision import datasets, transforms
from timm.data.constants import \
IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
logger = logging.getLogger(__name__)
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s',
datefmt = '%m/%d/%Y %H:%M:%S',
level = logging.WARN)
def build_eval_transform(input_size=224, imagenet_default_mean_and_std=False, crop_pct=None):
resize_im = input_size > 32
imagenet_default_mean_and_std = imagenet_default_mean_and_std
mean = IMAGENET_INCEPTION_MEAN if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_MEAN
std = IMAGENET_INCEPTION_STD if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_STD
t = []
if resize_im:
if crop_pct is None:
if input_size < 384:
crop_pct = 224 / 256
else:
crop_pct = 1.0
size = int(input_size / crop_pct)
t.append(
transforms.Resize(size, interpolation=3), # to maintain same ratio w.r.t. 224 images
)
t.append(transforms.CenterCrop(input_size))
t.append(transforms.ToTensor())
t.append(transforms.Normalize(mean, std))
return transforms.Compose(t)
def build_val_dataset(data_path):
transform = build_eval_transform()
root = os.path.join(data_path, 'val')
dataset = datasets.ImageFolder(root, transform=transform)
return dataset
def evaluate_func(data_loader, model):
session = ort.InferenceSession(model.SerializeToString(), providers=["CPUExecutionProvider"])
top1, top5 = 0, 0
for idx, batch in tqdm.tqdm(enumerate(data_loader), desc='eval'):
images = batch[0].cpu().detach().numpy()
target = batch[-1]
output = session.run(None, {'image': images})[0]
acc1, acc5 = accuracy(torch.from_numpy(output), target, topk=(1, 5))
top1 += acc1.cpu().detach().numpy()
top5 += acc5.cpu().detach().numpy()
top1 = top1 / len(data_loader)
top5 = top5 / len(data_loader)
print('* Acc@1 {:.3f} Acc@5 {:.3f}'.format(top1, top5))
return top1
if __name__ == '__main__':
logger.info("Evaluating ONNXRuntime full precision accuracy and performance:")
parser = argparse.ArgumentParser(
description="BEiT quantization examples.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
'--model_path',
type=str,
help="Pre-trained model on onnx file"
)
parser.add_argument(
'--dataset_location',
type=str,
help="Imagenet data path"
)
parser.add_argument(
'--benchmark',
action='store_true', \
default=False,
help="whether benchmark the model"
)
parser.add_argument(
'--tune',
action='store_true', \
default=False,
help="whether quantize the model"
)
parser.add_argument(
'--output_model',
type=str,
help="output model path"
)
parser.add_argument(
'--quant_format',
type=str,
default='default',
choices=['default', 'QDQ', 'QOperator'],
help="quantization format"
)
parser.add_argument(
'--mode',
type=str,
help="benchmark mode of performance or accuracy"
)
parser.add_argument(
"--batch_size",
default=64,
type=int,
)
parser.add_argument(
"--num_workers",
default=10,
type=int,
)
args = parser.parse_args()
val_dataset = build_val_dataset(args.dataset_location)
val_sampler = torch.utils.data.SequentialSampler(val_dataset)
val_data_loader = torch.utils.data.DataLoader(
val_dataset, sampler=val_sampler,
batch_size=int(1.5 * args.batch_size),
num_workers=args.num_workers,
drop_last=False
)
def eval(model):
return evaluate_func(val_data_loader, model)
model = onnx.load(args.model_path)
if args.tune:
from neural_compressor import PostTrainingQuantConfig, quantization
from neural_compressor.utils.constant import FP32
config = PostTrainingQuantConfig(approach="static",
quant_format=args.quant_format,
op_type_dict={'Conv': FP32},
op_name_dict={'/blocks.*/mlp/fc2/MatMul': FP32},
recipes={'optypes_to_exclude_output_quant': ['MatMul']},
)
q_model = quantization.fit(model,
config,
calib_dataloader=val_data_loader,
eval_func=eval)
q_model.save(args.output_model)
if args.benchmark:
if args.mode == 'performance':
from neural_compressor.benchmark import fit
from neural_compressor.config import BenchmarkConfig
conf = BenchmarkConfig(warmup=10, iteration=1000, cores_per_instance=4, num_of_instance=1)
fit(model, conf, b_dataloader=val_data_loader)
elif args.mode == 'accuracy':
acc_result = evaluate_func(val_data_loader, model)
print("Batch size = %d" % val_data_loader.batch_size)
print("Accuracy: %.5f" % acc_result)