zehao-intel
diff --git a/‎tests/unit/common/tensorflow/test_run_tf_benchmarks.py
+11-5 b/‎tests/unit/common/tensorflow/test_run_tf_benchmarks.py
+11-5
diff --git a/‎tests/unit/common/tensorflow/tf_model_args/tf_3d_unet_mlperf_args.json
+32 b/‎tests/unit/common/tensorflow/tf_model_args/tf_3d_unet_mlperf_args.json
+32
diff --git a/‎tests/unit/common/tensorflow/tf_model_args/tf_bert_args.json
+45 b/‎tests/unit/common/tensorflow/tf_model_args/tf_bert_args.json
+45
@@ -87,16 +87,22 @@ def test_run_benchmark(mock_run_command, mock_subprocess, mock_platform, mock_os
     test_args and verifying that the model_init file calls run_command with
     the expected_cmd string.
     """
-    if comment in ["tf_ssd_resnet34_args.json :: ssd_resnet34_fp32_training",
-                   "tf_gnmt_args.json :: gnmt_fp32_throughput",
-                   "tf_gnmt_args.json :: gnmt_fp32_latency"]:
-        pytest.skip()
 
     print("****** Running The {} test ******".format(comment))
     os.environ["PYTHON_EXE"] = "python"
-    if "mpi" not in mock_run_command:
+    if "mpi" not in test_args:
         os.environ["MPI_NUM_PROCESSES"] = "None"
         os.environ["MPI_HOSTNAMES"] = "None"
+    else:
+        if "--mpi_num_processes=" in test_args:
+            match_mpi_procs = re.search('--mpi_num_processes=([0-9]+)', test_args)
+            if match_mpi_procs and match_mpi_procs.lastindex >= 1:
+                os.environ["MPI_NUM_PROCESSES"] = match_mpi_procs.group(1)
+        if "--mpi_num_processes_per_socket=" in test_args:
+            match_per_socket = re.search('--mpi_num_processes_per_socket=([0-9]+)', test_args)
+            if match_per_socket and match_per_socket.lastindex >= 1:
+                os.environ["MPI_NUM_PROCESSES_PER_SOCKET"] = match_per_socket.group(1)
+
     mock_path_exists.return_value = True
     mock_is_dir.return_value = True
     mock_is_file.return_value = True
 
@@ -0,0 +1,32 @@
+[
+  {
+    "_comment": "3d_unet_mlperf_fp32_inference",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=3d_unet_mlperf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --benchmark-only --in-graph=/in_graph/3dunet_dynamic_ndhwc.pb --warmup-steps=20 --steps=100",
+    "output": "python /workspace/intelai_models/inference/fp32/brats/run_performance.py --input-graph=/in_graph/3dunet_dynamic_ndhwc.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=1 --model-name=3d_unet_mlperf --steps=100 --warmup-steps=20"
+  },
+  {
+    "_comment": "3d_unet_mlperf_fp32_inference_accuracy",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=3d_unet_mlperf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --accuracy-only --in-graph=/in_graph/3dunet_dynamic_ndhwc.pb --data-location=/dataset/MICCAI_BraTS_2019_Data_Training",
+    "output": "python /workspace/intelai_models/inference/fp32/brats/run_accuracy.py --input-graph=/in_graph/3dunet_dynamic_ndhwc.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=1 --model-name=3d_unet_mlperf --data-location=/dataset/MICCAI_BraTS_2019_Data_Training --accuracy-only"
+  },
+  {
+    "_comment": "3d_unet_mlperf_int8_inference",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=3d_unet_mlperf --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --benchmark-only --in-graph=/in_graph/3dunet_int8_fully_quantized_perchannel.pb --warmup-steps=20 --steps=100",
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/int8/brats/run_performance.py --input-graph=/in_graph/3dunet_int8_fully_quantized_perchannel.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=1 --model-name=3d_unet_mlperf --steps=100 --warmup-steps=20"
+  },
+  {
+    "_comment": "3d_unet_mlperf_int8_inference_accuracy",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=3d_unet_mlperf --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --accuracy-only --in-graph=/in_graph/3dunet_int8_fully_quantized_perchannel.pb --data-location=/dataset/MICCAI_BraTS_2019_Data_Training",
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/int8/brats/run_accuracy.py --input-graph=/in_graph/3dunet_int8_fully_quantized_perchannel.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=1 --model-name=3d_unet_mlperf --data-location=/dataset/MICCAI_BraTS_2019_Data_Training --accuracy-only"
+  },
+  {
+    "_comment": "3d_unet_mlperf_bfloat16_inference",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=3d_unet_mlperf --precision=bfloat16 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --benchmark-only --in-graph=/in_graph/3dunet_dynamic_ndhwc.pb --warmup-steps=20 --steps=100",
+    "output": "python /workspace/intelai_models/inference/bfloat16/brats/run_performance.py --input-graph=/in_graph/3dunet_dynamic_ndhwc.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=1 --model-name=3d_unet_mlperf --steps=100 --warmup-steps=20"
+  },
+  {
+    "_comment": "3d_unet_mlperf_bfloat16_inference_accuracy",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=3d_unet_mlperf --precision=bfloat16 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --accuracy-only --in-graph=/in_graph/3dunet_dynamic_ndhwc.pb --data-location=/dataset/MICCAI_BraTS_2019_Data_Training",
+    "output": "python /workspace/intelai_models/inference/bfloat16/brats/run_accuracy.py --input-graph=/in_graph/3dunet_dynamic_ndhwc.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=1 --model-name=3d_unet_mlperf --data-location=/dataset/MICCAI_BraTS_2019_Data_Training --accuracy-only"
+  }
+]
@@ -3,5 +3,50 @@
     "_comment": "bert_fp32_inference",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=bert --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --checkpoint=/checkpoints --intelai-models=/workspace/intelai_models --num-cores=28 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --data-location=/dataset --num-inter-threads=1 --num-intra-threads=28 --disable-tcmalloc=True --task_name=XNLI --max_seq_length=128 --batch-size=8 --learning_rate=5e-5",
     "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/run_classifier.py --data_dir=/dataset --output_dir=/workspace/benchmarks/common/tensorflow/logs --vocab_file=/checkpoints/vocab.txt --bert_config_file=/checkpoints/bert_config.json --init_checkpoint=/checkpoints/bert_model.ckpt --task_name=XNLI --max_seq_length=128 --eval_batch_size=8 --learning_rate=5e-05 --num_inter_threads=1 --num_intra_threads=28 --do_train=false --do_eval=true"
+  },
+  {
+    "_comment": "bert_large_fp32_squad_inference",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=bert_large --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --checkpoint=/checkpoints --intelai-models=/workspace/intelai_models --in-graph=/in_graph/bert.pb --output-dir=/workspace/logs --batch-size=128 --data-location=/dataset --infer-option=SQuAD",
+    "output": "python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=fp32 --output_dir=/workspace/logs --predict_batch_size=128 --experimental_gelu=False --optimized_softmax=True --input_graph=/in_graph/bert.pb --do_predict=True  --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30"
+  },
+  {
+    "_comment": "bert_large_fp32_squad_profile",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=bert_large --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --checkpoint=/checkpoints --intelai-models=/workspace/intelai_models --in-graph=/in_graph/bert.pb --output-dir=/workspace/logs --batch-size=128 --data-location=/dataset --infer-option=SQuAD --profile=True",
+    "output": "python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=fp32 --output_dir=/workspace/logs --predict_batch_size=128 --experimental_gelu=False --optimized_softmax=True --input_graph=/in_graph/bert.pb --do_predict=True  --mode=profile --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30"
+  },
+  {
+    "_comment": "bert_large_fp32_squad_accuracy",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=bert_large --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --checkpoint=/checkpoints --intelai-models=/workspace/intelai_models --in-graph=/in_graph/bert.pb --output-dir=/workspace/logs --batch-size=128 --data-location=/dataset --infer-option=SQuAD --accuracy-only",
+    "output": "python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=fp32 --output_dir=/workspace/logs --predict_batch_size=128 --experimental_gelu=False --optimized_softmax=True --input_graph=/in_graph/bert.pb --do_predict=True  --mode=accuracy --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30"
+  },
+  {
+    "_comment": "bert_large_int8_squad_inference",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=bert_large --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --checkpoint=/checkpoints --intelai-models=/workspace/intelai_models --in-graph=/in_graph/bert.pb --output-dir=/workspace/logs --batch-size=32 --data-location=/dataset --infer-option=SQuAD",
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=int8 --output_dir=/workspace/logs --predict_batch_size=32 --experimental_gelu=False --optimized_softmax=True --input_graph=/in_graph/bert.pb --do_predict=True  --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30"
+  },
+  {
+    "_comment": "bert_large_int8_inference_optional_args",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=bert_large --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --checkpoint=/checkpoints --intelai-models=/workspace/intelai_models --in-graph=/in_graph/bert.pb --output-dir=/workspace/logs --data-location=/dataset --infer-option=SQuAD --num-intra-threads=28 --num-inter-threads=1 --benchmark-only --doc-stride=128 --max-seq-length=384",
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=int8 --output_dir=/workspace/logs --predict_batch_size=32 --experimental_gelu=False --optimized_softmax=True --input_graph=/in_graph/bert.pb --do_predict=True  --mode=benchmark --doc_stride=128 --max_seq_length=384 --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28 --warmup_steps=10 --steps=30"
+  },
+  {
+    "_comment": "bert_large_int8_squad_accuracy",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=bert_large --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --checkpoint=/checkpoints --intelai-models=/workspace/intelai_models --in-graph=/in_graph/bert.pb --output-dir=/workspace/logs --batch-size=32 --data-location=/dataset --infer-option=SQuAD --accuracy-only",
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=int8 --output_dir=/workspace/logs --predict_batch_size=32 --experimental_gelu=False --optimized_softmax=True --input_graph=/in_graph/bert.pb --do_predict=True  --mode=accuracy --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30"
+  },
+  {
+    "_comment": "bert_large_bfloat16_squad_inference",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=bert_large --precision=bfloat16 --mode=inference --benchmark-dir=/workspace/benchmarks --checkpoint=/checkpoints --intelai-models=/workspace/intelai_models --batch-size=32 --output-dir=/workspace/logs --infer-option=SQuAD --data-location=/dataset --benchmark-only",
+    "output": "python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=bfloat16 --output_dir=/workspace/logs --predict_batch_size=32 --experimental_gelu=False --optimized_softmax=True --do_predict=True  --mode=benchmark --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30"
+  },
+  {
+    "_comment": "bert_large_bfloat16_squad_profile",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=bert_large --precision=bfloat16 --mode=inference --benchmark-dir=/workspace/benchmarks --checkpoint=/checkpoints --intelai-models=/workspace/intelai_models --batch-size=32 --output-dir=/workspace/logs --infer-option=SQuAD --data-location=/dataset --profile=True",
+    "output": "python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=bfloat16 --output_dir=/workspace/logs --predict_batch_size=32 --experimental_gelu=False --optimized_softmax=True --do_predict=True  --mode=profile --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30"
+  },
+  {
+    "_comment": "bert_large_bfloat16_squad_accuracy",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=bert_large --precision=bfloat16 --mode=inference --benchmark-dir=/workspace/benchmarks --checkpoint=/checkpoints --intelai-models=/workspace/intelai_models --batch-size=32 --output-dir=/workspace/logs --infer-option=SQuAD --data-location=/dataset --accuracy-only",
+    "output": "python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=bfloat16 --output_dir=/workspace/logs --predict_batch_size=32 --experimental_gelu=False --optimized_softmax=True --do_predict=True  --mode=accuracy --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30"
   }
 ]