diff --git a/.github/workflows/test_api_cpu.yaml b/.github/workflows/test_api_cpu.yaml index 126e500b..44f70269 100644 --- a/.github/workflows/test_api_cpu.yaml +++ b/.github/workflows/test_api_cpu.yaml @@ -47,8 +47,14 @@ jobs: pip install -e .[testing,timm,diffusers,codecarbon] - name: Run tests + run: | + pytest tests/test_api.py -s -k "api and cpu" env: HF_TOKEN: ${{ secrets.HF_TOKEN }} PUSH_REPO_ID: optimum-benchmark/cpu + + - name: Run examples run: | - pytest tests/test_api.py -s -k "api and cpu" + pytest tests/test_examples.py -s -k "api and cpu" + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} diff --git a/.github/workflows/test_api_cuda.yaml b/.github/workflows/test_api_cuda.yaml index c8be0ece..abaf3111 100644 --- a/.github/workflows/test_api_cuda.yaml +++ b/.github/workflows/test_api_cuda.yaml @@ -45,8 +45,14 @@ jobs: pip install -e .[testing,timm,diffusers,codecarbon] - name: Run tests + run: | + pytest tests/test_api.py -x -s -k "api and cuda" env: HF_TOKEN: ${{ secrets.HF_TOKEN }} PUSH_REPO_ID: optimum-benchmark/cuda + + - name: Run examples run: | - pytest tests/test_api.py -x -s -k "api and cuda" + pytest tests/test_examples.py -x -s -k "api and cuda" + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} diff --git a/.github/workflows/test_cli_cpu_ipex.yaml b/.github/workflows/test_cli_cpu_ipex.yaml index d6b94d3e..c064f765 100644 --- a/.github/workflows/test_cli_cpu_ipex.yaml +++ b/.github/workflows/test_cli_cpu_ipex.yaml @@ -49,3 +49,6 @@ jobs: - name: Run tests run: pytest tests/test_cli.py -s -k "cli and cpu and ipex" + + - name: Run examples + run: pytest tests/test_examples.py -s -k "cli and cpu and ipex" diff --git a/.github/workflows/test_cli_cpu_llama_cpp.yaml b/.github/workflows/test_cli_cpu_llama_cpp.yaml index 05d43683..50622cea 100644 --- a/.github/workflows/test_cli_cpu_llama_cpp.yaml +++ b/.github/workflows/test_cli_cpu_llama_cpp.yaml @@ -49,3 +49,6 @@ jobs: - name: Run tests run: pytest tests/test_cli.py -s -k "llama_cpp" + + - name: Run examples + run: pytest tests/test_examples.py -s -k "llama_cpp" diff --git a/.github/workflows/test_cli_cpu_onnxruntime.yaml b/.github/workflows/test_cli_cpu_onnxruntime.yaml index 21e65235..cb3085a0 100644 --- a/.github/workflows/test_cli_cpu_onnxruntime.yaml +++ b/.github/workflows/test_cli_cpu_onnxruntime.yaml @@ -49,3 +49,6 @@ jobs: - name: Run tests run: pytest tests/test_cli.py -s -k "cli and cpu and onnxruntime" + + - name: Run examples + run: pytest tests/test_examples.py -s -k "cli and cpu and onnxruntime" diff --git a/.github/workflows/test_cli_cpu_openvino.yaml b/.github/workflows/test_cli_cpu_openvino.yaml index 4612370c..442f0cd1 100644 --- a/.github/workflows/test_cli_cpu_openvino.yaml +++ b/.github/workflows/test_cli_cpu_openvino.yaml @@ -49,3 +49,6 @@ jobs: - name: Run tests run: pytest tests/test_cli.py -s -k "cli and cpu and openvino" + + - name: Run examples + run: pytest tests/test_examples.py -s -k "cli and cpu and openvino" diff --git a/.github/workflows/test_cli_cpu_py_txi.yaml b/.github/workflows/test_cli_cpu_py_txi.yaml index d07f6170..bf1b5adb 100644 --- a/.github/workflows/test_cli_cpu_py_txi.yaml +++ b/.github/workflows/test_cli_cpu_py_txi.yaml @@ -49,3 +49,6 @@ jobs: - name: Run tests run: pytest tests/test_cli.py -s -k "cli and cpu and py_txi" + + - name: Run examples + run: pytest tests/test_examples.py -s -k "cli and cpu and py_txi" diff --git a/.github/workflows/test_cli_cpu_pytorch.yaml b/.github/workflows/test_cli_cpu_pytorch.yaml index fef2a772..c2b9b720 100644 --- a/.github/workflows/test_cli_cpu_pytorch.yaml +++ b/.github/workflows/test_cli_cpu_pytorch.yaml @@ -49,3 +49,6 @@ jobs: - name: Run tests run: pytest tests/test_cli.py -s -k "cli and cpu and pytorch" + + - name: Run examples + run: pytest tests/test_examples.py -s -k "cli and cpu and pytorch" diff --git a/.github/workflows/test_cli_cuda_onnxruntime.yaml b/.github/workflows/test_cli_cuda_onnxruntime.yaml index 0584665c..5044f9c6 100644 --- a/.github/workflows/test_cli_cuda_onnxruntime.yaml +++ b/.github/workflows/test_cli_cuda_onnxruntime.yaml @@ -48,3 +48,6 @@ jobs: - name: Run tests run: | pytest tests/test_cli.py -x -s -k "cli and cuda and onnxruntime" + + - name: Run examples + run: pytest tests/test_examples.py -x -s -k "cli and cuda and onnxruntime" diff --git a/.github/workflows/test_cli_cuda_py_txi.yaml b/.github/workflows/test_cli_cuda_py_txi.yaml index 7339b98e..4d21cd1c 100644 --- a/.github/workflows/test_cli_cuda_py_txi.yaml +++ b/.github/workflows/test_cli_cuda_py_txi.yaml @@ -49,3 +49,6 @@ jobs: - name: Run tests run: pytest tests/test_cli.py -x -s -k "cli and cuda and py_txi" + + - name: Run examples + run: pytest tests/test_examples.py -x -s -k "cli and cuda and (tgi or tei)" diff --git a/.github/workflows/test_cli_cuda_pytorch.yaml b/.github/workflows/test_cli_cuda_pytorch.yaml index 0bc5dfaf..329c97ab 100644 --- a/.github/workflows/test_cli_cuda_pytorch.yaml +++ b/.github/workflows/test_cli_cuda_pytorch.yaml @@ -50,6 +50,9 @@ jobs: run: | pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed)" + - name: Run examples + run: pytest tests/test_examples.py -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed)" + run_cli_cuda_pytorch_multi_gpu_tests: if: ${{ (github.event_name == 'push') || diff --git a/.github/workflows/test_cli_cuda_tensorrt_llm.yaml b/.github/workflows/test_cli_cuda_tensorrt_llm.yaml index acb04fe2..3c9c2925 100644 --- a/.github/workflows/test_cli_cuda_tensorrt_llm.yaml +++ b/.github/workflows/test_cli_cuda_tensorrt_llm.yaml @@ -50,6 +50,9 @@ jobs: run: | pytest tests/test_cli.py -x -s -k "cli and cuda and tensorrt_llm and not (tp or pp)" + - name: Run examples + run: pytest tests/test_examples.py -x -s -k "cli and cuda and tensorrt_llm and not (tp or pp)" + cli_cuda_tensorrt_llm_multi_gpu_tests: if: ${{ (github.event_name == 'push') || diff --git a/.github/workflows/test_cli_cuda_torch_ort.yaml b/.github/workflows/test_cli_cuda_torch_ort.yaml index ee886e8c..06320b7c 100644 --- a/.github/workflows/test_cli_cuda_torch_ort.yaml +++ b/.github/workflows/test_cli_cuda_torch_ort.yaml @@ -51,6 +51,10 @@ jobs: run: | pytest tests/test_cli.py -x -s -k "cli and cuda and torch_ort and not (dp or ddp or device_map) and not (peft)" + - name: Run examples + run: | + pytest tests/test_examples.py -x -s -k "cli and cuda and torch_ort and not (dp or ddp or device_map) and not (peft)" + run_cli_cuda_torch_ort_multi_gpu_tests: if: ${{ (github.event_name == 'push') || diff --git a/.github/workflows/test_cli_cuda_vllm.yaml b/.github/workflows/test_cli_cuda_vllm.yaml index 732513d2..c44c79a0 100644 --- a/.github/workflows/test_cli_cuda_vllm.yaml +++ b/.github/workflows/test_cli_cuda_vllm.yaml @@ -50,6 +50,10 @@ jobs: run: | FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and vllm and not (tp or pp)" + - name: Run examples (sequential) + run: | + FORCE_SEQUENTIAL=1 pytest tests/test_examples.py -x -s -k "cli and cuda and vllm and not (tp or pp)" + run_cli_cuda_vllm_multi_gpu_tests: if: ${{ (github.event_name == 'push') || diff --git a/.github/workflows/test_cli_energy_star.yaml b/.github/workflows/test_energy_star.yaml similarity index 84% rename from .github/workflows/test_cli_energy_star.yaml rename to .github/workflows/test_energy_star.yaml index 24c487f6..91f7b14b 100644 --- a/.github/workflows/test_cli_energy_star.yaml +++ b/.github/workflows/test_energy_star.yaml @@ -20,13 +20,11 @@ concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} jobs: - run_cli_energy_star_tests: + run_energy_star_tests: if: ${{ (github.event_name == 'push') || (github.event_name == 'workflow_dispatch') || - contains( github.event.pull_request.labels.*.name, 'cli') || - contains( github.event.pull_request.labels.*.name, 'energy_star') || - contains( github.event.pull_request.labels.*.name, 'cli_energy_star') + contains( github.event.pull_request.labels.*.name, 'energy_star') }} runs-on: diff --git a/examples/energy_star/_base_.yaml b/energy_star/_base_.yaml similarity index 100% rename from examples/energy_star/_base_.yaml rename to energy_star/_base_.yaml diff --git a/examples/energy_star/automatic_speech_recognition.yaml b/energy_star/automatic_speech_recognition.yaml similarity index 100% rename from examples/energy_star/automatic_speech_recognition.yaml rename to energy_star/automatic_speech_recognition.yaml diff --git a/examples/energy_star/image_classification.yaml b/energy_star/image_classification.yaml similarity index 100% rename from examples/energy_star/image_classification.yaml rename to energy_star/image_classification.yaml diff --git a/examples/energy_star/image_to_text.yaml b/energy_star/image_to_text.yaml similarity index 100% rename from examples/energy_star/image_to_text.yaml rename to energy_star/image_to_text.yaml diff --git a/examples/energy_star/object_detection.yaml b/energy_star/object_detection.yaml similarity index 100% rename from examples/energy_star/object_detection.yaml rename to energy_star/object_detection.yaml diff --git a/examples/energy_star/question_answering.yaml b/energy_star/question_answering.yaml similarity index 100% rename from examples/energy_star/question_answering.yaml rename to energy_star/question_answering.yaml diff --git a/examples/energy_star/sentence_similarity.yaml b/energy_star/sentence_similarity.yaml similarity index 100% rename from examples/energy_star/sentence_similarity.yaml rename to energy_star/sentence_similarity.yaml diff --git a/examples/energy_star/summarization.yaml b/energy_star/summarization.yaml similarity index 100% rename from examples/energy_star/summarization.yaml rename to energy_star/summarization.yaml diff --git a/examples/energy_star/t5_question_answering.yaml b/energy_star/t5_question_answering.yaml similarity index 100% rename from examples/energy_star/t5_question_answering.yaml rename to energy_star/t5_question_answering.yaml diff --git a/examples/energy_star/t5_summarization.yaml b/energy_star/t5_summarization.yaml similarity index 100% rename from examples/energy_star/t5_summarization.yaml rename to energy_star/t5_summarization.yaml diff --git a/examples/energy_star/t5_text_classification.yaml b/energy_star/t5_text_classification.yaml similarity index 100% rename from examples/energy_star/t5_text_classification.yaml rename to energy_star/t5_text_classification.yaml diff --git a/examples/energy_star/t5_text_generation.yaml b/energy_star/t5_text_generation.yaml similarity index 100% rename from examples/energy_star/t5_text_generation.yaml rename to energy_star/t5_text_generation.yaml diff --git a/examples/energy_star/text_classification.yaml b/energy_star/text_classification.yaml similarity index 100% rename from examples/energy_star/text_classification.yaml rename to energy_star/text_classification.yaml diff --git a/examples/energy_star/text_generation.yaml b/energy_star/text_generation.yaml similarity index 100% rename from examples/energy_star/text_generation.yaml rename to energy_star/text_generation.yaml diff --git a/examples/energy_star/text_to_image.yaml b/energy_star/text_to_image.yaml similarity index 100% rename from examples/energy_star/text_to_image.yaml rename to energy_star/text_to_image.yaml diff --git a/examples/ipex_bert.yaml b/examples/cpu_ipex_bert.yaml similarity index 86% rename from examples/ipex_bert.yaml rename to examples/cpu_ipex_bert.yaml index e549da0a..4f6f7fc5 100644 --- a/examples/ipex_bert.yaml +++ b/examples/cpu_ipex_bert.yaml @@ -6,7 +6,7 @@ defaults: - _base_ - _self_ -name: ipex_bert +name: cpu_ipex_bert launcher: numactl: true @@ -14,16 +14,17 @@ launcher: cpunodebind: 0 membind: 0 +backend: + device: cpu + export: true + no_weights: false + torch_dtype: bfloat16 + model: google-bert/bert-base-uncased + scenario: - latency: true memory: true + latency: true + input_shapes: batch_size: 1 sequence_length: 128 - -backend: - device: cpu - no_weights: false - export: true - torch_dtype: bfloat16 - model: bert-base-uncased diff --git a/examples/ipex_llama.yaml b/examples/cpu_ipex_llama.yaml similarity index 95% rename from examples/ipex_llama.yaml rename to examples/cpu_ipex_llama.yaml index b564316b..6fad7a65 100644 --- a/examples/ipex_llama.yaml +++ b/examples/cpu_ipex_llama.yaml @@ -6,7 +6,7 @@ defaults: - _base_ - _self_ -name: ipex_llama +name: cpu_ipex_llama launcher: numactl: true @@ -14,24 +14,25 @@ launcher: cpunodebind: 0 membind: 0 +backend: + device: cpu + export: true + no_weights: false + torch_dtype: bfloat16 + model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 + scenario: - latency: true memory: true + latency: true warmup_runs: 10 iterations: 10 duration: 10 - + input_shapes: batch_size: 1 sequence_length: 256 + generate_kwargs: max_new_tokens: 32 min_new_tokens: 32 - -backend: - device: cpu - export: true - no_weights: false - torch_dtype: bfloat16 - model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 diff --git a/examples/llama_cpp_embedding.yaml b/examples/cpu_llama_cpp_embedding.yaml similarity index 72% rename from examples/llama_cpp_embedding.yaml rename to examples/cpu_llama_cpp_embedding.yaml index bdd86cce..c5f6f628 100644 --- a/examples/llama_cpp_embedding.yaml +++ b/examples/cpu_llama_cpp_embedding.yaml @@ -1,26 +1,24 @@ defaults: - benchmark - scenario: inference - - launcher: inline - backend: llama_cpp + - launcher: process - _base_ - _self_ -name: llama_cpp_llama +name: cpu_llama_cpp_embedding backend: - device: mps - model: nomic-ai/nomic-embed-text-v1.5-GGUF + device: cpu task: feature-extraction + model: nomic-ai/nomic-embed-text-v1.5-GGUF filename: nomic-embed-text-v1.5.Q4_0.gguf scenario: input_shapes: batch_size: 1 sequence_length: 256 - vocab_size: 30000 - type_vocab_size: 1 - max_position_embeddings: 512 + generate_kwargs: max_new_tokens: 100 min_new_tokens: 100 diff --git a/examples/llama_cpp_text_generation.yaml b/examples/cpu_llama_cpp_text_generation.yaml similarity index 82% rename from examples/llama_cpp_text_generation.yaml rename to examples/cpu_llama_cpp_text_generation.yaml index 96def950..9edcd5c3 100644 --- a/examples/llama_cpp_text_generation.yaml +++ b/examples/cpu_llama_cpp_text_generation.yaml @@ -1,25 +1,24 @@ defaults: - benchmark - scenario: inference - - launcher: inline - backend: llama_cpp + - launcher: process - _base_ - _self_ -name: llama_cpp_llama +name: cpu_llama_cpp_text_generation backend: - device: mps - model: TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF + device: cpu task: text-generation + model: TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF filename: tinyllama-1.1b-chat-v1.0.Q4_0.gguf - scenario: input_shapes: batch_size: 1 sequence_length: 256 - vocab_size: 32000 + generate_kwargs: max_new_tokens: 100 min_new_tokens: 100 diff --git a/examples/onnxruntime_static_quant_vit.yaml b/examples/cpu_onnxruntime_static_quant_vit.yaml similarity index 84% rename from examples/onnxruntime_static_quant_vit.yaml rename to examples/cpu_onnxruntime_static_quant_vit.yaml index 3d298473..283ecb60 100644 --- a/examples/onnxruntime_static_quant_vit.yaml +++ b/examples/cpu_onnxruntime_static_quant_vit.yaml @@ -6,10 +6,11 @@ defaults: - _base_ - _self_ -name: onnxruntime_static_quant_vit +name: cpu_onnxruntime_static_quant_vit backend: device: cpu + export: true no_weights: true model: google/vit-base-patch16-224 quantization: true diff --git a/examples/onnxruntime_timm.yaml b/examples/cpu_onnxruntime_timm.yaml similarity index 82% rename from examples/onnxruntime_timm.yaml rename to examples/cpu_onnxruntime_timm.yaml index 165fc28a..963f44f0 100644 --- a/examples/onnxruntime_timm.yaml +++ b/examples/cpu_onnxruntime_timm.yaml @@ -10,7 +10,8 @@ name: onnxruntime_timm backend: device: cpu - model: timm/mobilenetv3_large_100.ra_in1k + export: true + model: timm/tiny_vit_21m_224.in1k scenario: memory: true diff --git a/examples/openvino_static_quant_bert.yaml b/examples/cpu_openvino_8bit.yaml similarity index 66% rename from examples/openvino_static_quant_bert.yaml rename to examples/cpu_openvino_8bit.yaml index caa4363a..02cd578c 100644 --- a/examples/openvino_static_quant_bert.yaml +++ b/examples/cpu_openvino_8bit.yaml @@ -6,16 +6,16 @@ defaults: - _base_ - _self_ -name: openvino_static_quant_bert +name: openvino_static_quant backend: device: cpu - no_weights: true - model: bert-base-uncased - quantization: true - calibration: true reshape: true + no_weights: true + load_in_8bit: true + model: google-bert/bert-base-uncased scenario: input_shapes: batch_size: 1 + sequence_length: 16 diff --git a/examples/openvino_diffusion.yaml b/examples/cpu_openvino_diffusion.yaml similarity index 94% rename from examples/openvino_diffusion.yaml rename to examples/cpu_openvino_diffusion.yaml index f0501101..0c2008db 100644 --- a/examples/openvino_diffusion.yaml +++ b/examples/cpu_openvino_diffusion.yaml @@ -9,11 +9,10 @@ defaults: name: openvino_diffusion backend: + half: true device: cpu - model: stabilityai/stable-diffusion-2-1 - reshape: true export: true - half: true + model: stabilityai/stable-diffusion-2-1 scenario: input_shapes: diff --git a/examples/pytorch_bert.py b/examples/cuda_pytorch_bert.py similarity index 93% rename from examples/pytorch_bert.py rename to examples/cuda_pytorch_bert.py index 09f62b8d..82e1d56c 100644 --- a/examples/pytorch_bert.py +++ b/examples/cuda_pytorch_bert.py @@ -11,12 +11,13 @@ print(f"Failed to get username from Hugging Face Hub: {e}") USERNAME = None -BENCHMARK_NAME = "pytorch_bert" +BENCHMARK_NAME = "cuda_pytorch_bert" +MODEL = "google-bert/bert-base-uncased" def run_benchmark(): launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="warn") - backend_config = PyTorchConfig(device="cuda", device_ids="0", no_weights=True, model="bert-base-uncased") + backend_config = PyTorchConfig(device="cuda", device_ids="0", no_weights=True, model=MODEL) scenario_config = InferenceConfig(memory=True, latency=True, input_shapes={"batch_size": 1, "sequence_length": 128}) benchmark_config = BenchmarkConfig( name=BENCHMARK_NAME, diff --git a/examples/pytorch_bert.yaml b/examples/cuda_pytorch_bert.yaml similarity index 100% rename from examples/pytorch_bert.yaml rename to examples/cuda_pytorch_bert.yaml diff --git a/examples/pytorch_llama.py b/examples/cuda_pytorch_llama.py similarity index 96% rename from examples/pytorch_llama.py rename to examples/cuda_pytorch_llama.py index fe732bfa..b515019c 100644 --- a/examples/pytorch_llama.py +++ b/examples/cuda_pytorch_llama.py @@ -11,7 +11,8 @@ print(f"Failed to get username from Hugging Face Hub: {e}") USERNAME = None -BENCHMARK_NAME = "pytorch-llama" +BENCHMARK_NAME = "cuda_pytorch_llama" +MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" WEIGHTS_CONFIGS = { "float16": { @@ -40,10 +41,10 @@ def run_benchmark(weight_config: str): launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="warn") backend_config = PyTorchConfig( + model=MODEL, device="cuda", device_ids="0", no_weights=True, - model="gpt2", **WEIGHTS_CONFIGS[weight_config], ) scenario_config = InferenceConfig( diff --git a/examples/pytorch_llama.yaml b/examples/cuda_pytorch_llama.yaml similarity index 83% rename from examples/pytorch_llama.yaml rename to examples/cuda_pytorch_llama.yaml index becd1f2e..aaf46098 100644 --- a/examples/pytorch_llama.yaml +++ b/examples/cuda_pytorch_llama.yaml @@ -6,16 +6,17 @@ defaults: - _base_ - _self_ -name: pytorch_llama +name: cuda_pytorch_llama launcher: device_isolation: true device_isolation_action: warn backend: - model: gpt2 device: cuda + no_weights: true torch_dtype: float16 + model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 scenario: memory: true diff --git a/examples/pytorch_vlm.yaml b/examples/cuda_pytorch_vlm.yaml similarity index 96% rename from examples/pytorch_vlm.yaml rename to examples/cuda_pytorch_vlm.yaml index a39f8c8a..fa3b4e99 100644 --- a/examples/pytorch_vlm.yaml +++ b/examples/cuda_pytorch_vlm.yaml @@ -6,7 +6,7 @@ defaults: - _base_ - _self_ -name: pytorch_vlm +name: cuda_pytorch_vlm launcher: device_isolation: true diff --git a/examples/tgi_llama.yaml b/examples/cuda_tgi_llama.yaml similarity index 71% rename from examples/tgi_llama.yaml rename to examples/cuda_tgi_llama.yaml index 399667fb..df1ab8ec 100644 --- a/examples/tgi_llama.yaml +++ b/examples/cuda_tgi_llama.yaml @@ -6,7 +6,7 @@ defaults: - _base_ - _self_ -name: tgi_llama +name: cuda_tgi_llama launcher: device_isolation: true @@ -14,14 +14,15 @@ launcher: backend: device: cuda - device_ids: 4 - # no_weights: true + device_ids: 0 + no_weights: true model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 scenario: input_shapes: batch_size: 4 - sequence_length: 256 + sequence_length: 64 + generate_kwargs: - max_new_tokens: 100 - min_new_tokens: 100 + max_new_tokens: 16 + min_new_tokens: 16 diff --git a/examples/trt_llama.yaml b/examples/cuda_trt_llama.yaml similarity index 79% rename from examples/trt_llama.yaml rename to examples/cuda_trt_llama.yaml index 30cb600a..7ed79f31 100644 --- a/examples/trt_llama.yaml +++ b/examples/cuda_trt_llama.yaml @@ -6,7 +6,7 @@ defaults: - _base_ - _self_ -name: trt_llama +name: cuda_trt_llama launcher: device_isolation: true @@ -21,7 +21,8 @@ backend: scenario: input_shapes: batch_size: 4 - sequence_length: 256 + sequence_length: 64 + generate_kwargs: - max_new_tokens: 100 - min_new_tokens: 100 + max_new_tokens: 16 + min_new_tokens: 16 diff --git a/examples/vllm_llama.yaml b/examples/cuda_vllm_llama.yaml similarity index 77% rename from examples/vllm_llama.yaml rename to examples/cuda_vllm_llama.yaml index 8bbb4025..044928a3 100644 --- a/examples/vllm_llama.yaml +++ b/examples/cuda_vllm_llama.yaml @@ -6,7 +6,7 @@ defaults: - _base_ - _self_ -name: vllm_llama +name: cuda_vllm_llama launcher: device_isolation: true @@ -15,7 +15,7 @@ launcher: backend: device: cuda device_ids: 0 - no_weights: false + no_weights: true serving_mode: offline model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 engine_args: @@ -24,7 +24,8 @@ backend: scenario: input_shapes: batch_size: 4 - sequence_length: 256 + sequence_length: 64 + generate_kwargs: - max_new_tokens: 100 - min_new_tokens: 100 + max_new_tokens: 16 + min_new_tokens: 16 diff --git a/examples/pytorch_bert_mps.yaml b/examples/mps_pytorch_bert.yaml similarity index 67% rename from examples/pytorch_bert_mps.yaml rename to examples/mps_pytorch_bert.yaml index 4d4dc6e3..8c6bae9a 100644 --- a/examples/pytorch_bert_mps.yaml +++ b/examples/mps_pytorch_bert.yaml @@ -1,15 +1,12 @@ defaults: - benchmark - scenario: inference - - launcher: process # launcher: inline works, + - launcher: inline # mps has problems with multi processing (process launcher) - backend: pytorch - _base_ - _self_ -name: pytorch_bert - -# launcher: -# start_method: spawn +name: mps_pytorch_bert scenario: latency: true @@ -19,8 +16,6 @@ scenario: sequence_length: 128 backend: - device: cpu + device: mps no_weights: true model: bert-base-uncased - - diff --git a/examples/neural_compressor_ptq_bert.yaml b/examples/neural_compressor_ptq_bert.yaml deleted file mode 100644 index cbc32590..00000000 --- a/examples/neural_compressor_ptq_bert.yaml +++ /dev/null @@ -1,20 +0,0 @@ -defaults: - - benchmark - - backend: neural-compressor - - scenario: inference - - launcher: process - - _base_ - - _self_ - -name: neural_compressor_ptq_bert - -backend: - device: cpu - no_weights: true - model: bert-base-uncased - ptq_quantization: true - calibration: true - -scenario: - input_shapes: - batch_size: 1 diff --git a/examples/numactl_bert.yaml b/examples/numactl_bert.yaml deleted file mode 100644 index 7add65e7..00000000 --- a/examples/numactl_bert.yaml +++ /dev/null @@ -1,27 +0,0 @@ -defaults: - - benchmark - - scenario: inference - - launcher: process - - backend: pytorch - - _base_ - - _self_ - -name: pytorch_bert - -launcher: - numactl: true - numactl_kwargs: - cpunodebind: 0 - membind: 0 - -scenario: - latency: true - memory: true - input_shapes: - batch_size: 1 - sequence_length: 128 - -backend: - device: cpu - no_weights: true - model: bert-base-uncased diff --git a/examples/tei_bge.yaml b/examples/tei_bge.yaml deleted file mode 100644 index dbbab7d5..00000000 --- a/examples/tei_bge.yaml +++ /dev/null @@ -1,21 +0,0 @@ -defaults: - - benchmark - - scenario: inference - - launcher: inline - - backend: py-txi - - _self_ - -name: tei_bert - -launcher: - device_isolation: true - device_isolation_action: warn - -backend: - device: cpu - model: BAAI/bge-base-en-v1.5 - -scenario: - input_shapes: - batch_size: 64 - sequence_length: 128 diff --git a/optimum_benchmark/cli.py b/optimum_benchmark/cli.py index 4b26266b..5af0723b 100644 --- a/optimum_benchmark/cli.py +++ b/optimum_benchmark/cli.py @@ -10,12 +10,10 @@ Benchmark, BenchmarkConfig, EnergyStarConfig, - INCConfig, InferenceConfig, InlineConfig, IPEXConfig, LlamaCppConfig, - LLMSwarmConfig, ORTConfig, OVConfig, ProcessConfig, @@ -43,9 +41,7 @@ cs.store(group="backend", name=ORTConfig.name, node=ORTConfig) cs.store(group="backend", name=TorchORTConfig.name, node=TorchORTConfig) cs.store(group="backend", name=TRTLLMConfig.name, node=TRTLLMConfig) -cs.store(group="backend", name=INCConfig.name, node=INCConfig) cs.store(group="backend", name=PyTXIConfig.name, node=PyTXIConfig) -cs.store(group="backend", name=LLMSwarmConfig.name, node=LLMSwarmConfig) cs.store(group="backend", name=VLLMConfig.name, node=VLLMConfig) cs.store(group="backend", name=LlamaCppConfig.name, node=LlamaCppConfig) # scenarios configurations diff --git a/tests/test_energy_star.py b/tests/test_energy_star.py index bbb83f55..6e6d1f5e 100644 --- a/tests/test_energy_star.py +++ b/tests/test_energy_star.py @@ -9,7 +9,8 @@ LOGGER = getLogger("test-cli") -TEST_CONFIG_DIR = Path(__file__).parent.parent / "examples/energy_star" +TEST_CONFIG_DIR = Path(__file__).parent.parent / "energy_star" + TEST_CONFIG_NAMES = [ config.split(".")[0] for config in os.listdir(TEST_CONFIG_DIR) diff --git a/tests/test_examples.py b/tests/test_examples.py new file mode 100644 index 00000000..d395a4bb --- /dev/null +++ b/tests/test_examples.py @@ -0,0 +1,49 @@ +import os +from logging import getLogger +from pathlib import Path + +import pytest + +from optimum_benchmark.logging_utils import run_subprocess_and_log_stream_output + +LOGGER = getLogger("test-examples") + + +TEST_CONFIG_DIR = Path(__file__).parent.parent / "examples" + +TEST_CONFIG_NAMES = [ + config.split(".")[0] + for config in os.listdir(TEST_CONFIG_DIR) + if config.endswith(".yaml") and not (config.startswith("_") or config.endswith("_")) +] + +TEST_SCRIPT_PATHS = [TEST_CONFIG_DIR / filename for filename in os.listdir(TEST_CONFIG_DIR) if filename.endswith(".py")] + +ROCR_VISIBLE_DEVICES = os.environ.get("ROCR_VISIBLE_DEVICES", None) +CUDA_VISIBLE_DEVICES = os.environ.get("CUDA_VISIBLE_DEVICES", None) + + +@pytest.mark.parametrize("config_name", TEST_CONFIG_NAMES) +def test_cli_configs(config_name): + args = ["optimum-benchmark", "--config-dir", TEST_CONFIG_DIR, "--config-name", config_name] + + if ROCR_VISIBLE_DEVICES is not None: + args += [f'backend.device_ids="{ROCR_VISIBLE_DEVICES}"'] + elif CUDA_VISIBLE_DEVICES is not None: + args += [f'backend.device_ids="{CUDA_VISIBLE_DEVICES}"'] + + popen = run_subprocess_and_log_stream_output(LOGGER, args) + assert popen.returncode == 0, f"Failed to run {config_name}" + + +@pytest.mark.parametrize("config_name", TEST_SCRIPT_PATHS) +def test_api_scripts(script_path): + args = ["python", script_path] + + if ROCR_VISIBLE_DEVICES is not None: + args += [f'backend.device_ids="{ROCR_VISIBLE_DEVICES}"'] + elif CUDA_VISIBLE_DEVICES is not None: + args += [f'backend.device_ids="{CUDA_VISIBLE_DEVICES}"'] + + popen = run_subprocess_and_log_stream_output(LOGGER, args) + assert popen.returncode == 0, f"Failed to run {script_path}"