Skip to content

Commit 5943eae

Browse files
authored
Fix Accuracy Difference during Benchmark and Quantization on TF Examples (#1632)
Signed-off-by: zehao-intel <zehao.huang@intel.com>
1 parent a5e5f5f commit 5943eae

File tree

3 files changed

+14
-20
lines changed
  • examples/tensorflow
    • graph_networks/graphsage/quantization/ptq
    • image_recognition/tensorflow_models/vision_transformer/quantization/ptq
    • nlp/bert_large_squad/quantization/ptq

3 files changed

+14
-20
lines changed

examples/tensorflow/graph_networks/graphsage/quantization/ptq/main.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ def eval_func(size, output_tensor, minibatch, test):
138138
labels.append(batch_labels)
139139
iter_num += 1
140140
total_time += time_consume
141-
if iteration and iter_num >= iteration:
141+
if iteration != -1 and iter_num >= iteration:
142142
break
143143
tf_logging.warn('\n---> Stop iteration {0}'.format(str(iter_num)))
144144
val_preds = np.vstack(val_preds)

examples/tensorflow/image_recognition/tensorflow_models/vision_transformer/quantization/ptq/main.py

+7-12
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
arg_parser.add_argument('--int8', dest='int8', action='store_true', help='whether to use int8 model for benchmark')
5353
args = arg_parser.parse_args()
5454

55-
def evaluate(model, eval_dataloader, metric, postprocess=None):
55+
def evaluate(model, eval_dataloader, postprocess=None):
5656
"""Custom evaluate function to estimate the accuracy of the model.
5757
5858
Args:
@@ -61,12 +61,14 @@ def evaluate(model, eval_dataloader, metric, postprocess=None):
6161
Returns:
6262
accuracy (float): evaluation result, the larger is better.
6363
"""
64+
from neural_compressor import METRICS
6465
from neural_compressor.model import Model
6566
model = Model(model)
6667
input_tensor = model.input_tensor
6768
output_tensor = model.output_tensor if len(model.output_tensor)>1 else \
6869
model.output_tensor[0]
6970
iteration = -1
71+
metric = METRICS('tensorflow')['topk']()
7072
if args.benchmark and args.mode == 'performance':
7173
iteration = args.iters
7274

@@ -136,9 +138,6 @@ def run(self):
136138
accuracy_criterion = AccuracyCriterion(tolerable_loss=0.01),
137139
op_type_dict={'conv2d':{ 'weight':{'dtype':['fp32']}, 'activation':{'dtype':['fp32']} }}
138140
)
139-
from neural_compressor import METRICS
140-
metrics = METRICS('tensorflow')
141-
top1 = metrics['topk']()
142141
from tensorflow.core.protobuf import saved_model_pb2
143142
sm = saved_model_pb2.SavedModel()
144143
with tf.io.gfile.GFile(args.input_graph, "rb") as f:
@@ -147,10 +146,9 @@ def run(self):
147146
from neural_compressor.data import TensorflowShiftRescale
148147
postprocess = TensorflowShiftRescale()
149148
def eval(model):
150-
return evaluate(model, eval_dataloader, top1, postprocess)
151-
q_model = quantization.fit(graph_def, conf=conf, calib_dataloader=calib_dataloader,
152-
# eval_dataloader=eval_dataloader, eval_metric=top1)
153-
eval_func=eval)
149+
return evaluate(model, eval_dataloader, postprocess)
150+
q_model = quantization.fit(graph_def, conf=conf, eval_func=eval,
151+
calib_dataloader=calib_dataloader)
154152
q_model.save(args.output_graph)
155153

156154
if args.benchmark:
@@ -163,9 +161,6 @@ def eval(model):
163161
'filter': None
164162
}
165163
dataloader = create_dataloader('tensorflow', dataloader_args)
166-
from neural_compressor import METRICS
167-
metrics = METRICS('tensorflow')
168-
top1 = metrics['topk']()
169164

170165
if args.int8 or args.input_graph.endswith("-tune.pb"):
171166
input_graph = args.input_graph
@@ -180,7 +175,7 @@ def eval(model):
180175
from neural_compressor.data import TensorflowShiftRescale
181176
postprocess = TensorflowShiftRescale()
182177
def eval(model):
183-
return evaluate(model, dataloader, top1, postprocess)
178+
return evaluate(model, dataloader, postprocess)
184179

185180
if args.mode == 'performance':
186181
from neural_compressor.benchmark import fit

examples/tensorflow/nlp/bert_large_squad/quantization/ptq/tune_squad.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
flags.DEFINE_integer("iters", 100, "The iteration used for benchmark.")
5252

5353

54-
def evaluate(model, dataloader, metric, postprocess):
54+
def evaluate(model, dataloader, postprocess):
5555
"""Custom evaluate function to estimate the accuracy of the bert model.
5656
5757
Args:
@@ -60,6 +60,7 @@ def evaluate(model, dataloader, metric, postprocess):
6060
Returns:
6161
accuracy (float): evaluation result, the larger is better.
6262
"""
63+
from neural_compressor.metric import SquadF1
6364
from neural_compressor.adaptor.tf_utils.util import iterator_sess_run
6465
from neural_compressor.objective import Performance
6566
from neural_compressor.model import Model, BaseModel
@@ -70,12 +71,12 @@ def evaluate(model, dataloader, metric, postprocess):
7071
input_tensor = model.input_tensor
7172
output_tensor = model.output_tensor if len(model.output_tensor)>1 else \
7273
model.output_tensor[0]
74+
warmup = 5
7375
iteration = -1
76+
metric = SquadF1()
77+
measurer = Performance()
7478
if FLAGS.benchmark and FLAGS.mode == 'performance':
7579
iteration = FLAGS.iters
76-
measurer = Performance()
77-
78-
warmup = 5
7980
for idx, (inputs, labels) in enumerate(dataloader):
8081
# dataloader should keep the order and len of inputs same with input_tensor
8182
assert len(input_tensor) == len(inputs), \
@@ -125,8 +126,6 @@ def strip_iterator(graph_def):
125126

126127
def main(_):
127128
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
128-
from neural_compressor.metric import SquadF1
129-
metric = SquadF1()
130129
from neural_compressor.utils.create_obj_from_config import create_dataloader
131130
data_path = os.path.join(FLAGS.dataset_location, 'eval.tf_record')
132131
label_path = os.path.join(FLAGS.dataset_location, 'dev-v1.1.json')
@@ -142,7 +141,7 @@ def main(_):
142141
from neural_compressor.data import TFSquadV1PostTransform
143142
postprocess = TFSquadV1PostTransform(label_file=label_path, vocab_file=vocab_path)
144143
def eval(model):
145-
return evaluate(model, dataloader, metric, postprocess)
144+
return evaluate(model, dataloader, postprocess)
146145
if FLAGS.benchmark:
147146
if FLAGS.mode == 'performance':
148147
from neural_compressor.benchmark import fit

0 commit comments

Comments
 (0)