diff --git a/.github/workflows/test_api_cpu.yaml b/.github/workflows/test_api_cpu.yaml
index 126e500b..44f70269 100644
--- a/.github/workflows/test_api_cpu.yaml
+++ b/.github/workflows/test_api_cpu.yaml
@@ -47,8 +47,14 @@ jobs:
           pip install -e .[testing,timm,diffusers,codecarbon]
 
       - name: Run tests
+        run: |
+          pytest tests/test_api.py -s -k "api and cpu"
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           PUSH_REPO_ID: optimum-benchmark/cpu
+
+      - name: Run examples
         run: |
-          pytest tests/test_api.py -s -k "api and cpu"
+          pytest tests/test_examples.py -s -k "api and cpu"
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
diff --git a/.github/workflows/test_api_cuda.yaml b/.github/workflows/test_api_cuda.yaml
index c8be0ece..abaf3111 100644
--- a/.github/workflows/test_api_cuda.yaml
+++ b/.github/workflows/test_api_cuda.yaml
@@ -45,8 +45,14 @@ jobs:
           pip install -e .[testing,timm,diffusers,codecarbon]
 
       - name: Run tests
+        run: |
+          pytest tests/test_api.py -x -s -k "api and cuda"
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           PUSH_REPO_ID: optimum-benchmark/cuda
+
+      - name: Run examples
         run: |
-          pytest tests/test_api.py -x -s -k "api and cuda"
+          pytest tests/test_examples.py -x -s -k "api and cuda"
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
diff --git a/.github/workflows/test_cli_cpu_ipex.yaml b/.github/workflows/test_cli_cpu_ipex.yaml
index d6b94d3e..c064f765 100644
--- a/.github/workflows/test_cli_cpu_ipex.yaml
+++ b/.github/workflows/test_cli_cpu_ipex.yaml
@@ -49,3 +49,6 @@ jobs:
 
       - name: Run tests
         run: pytest tests/test_cli.py -s -k "cli and cpu and ipex"
+
+      - name: Run examples
+        run: pytest tests/test_examples.py -s -k "cli and cpu and ipex"
diff --git a/.github/workflows/test_cli_cpu_llama_cpp.yaml b/.github/workflows/test_cli_cpu_llama_cpp.yaml
index 05d43683..50622cea 100644
--- a/.github/workflows/test_cli_cpu_llama_cpp.yaml
+++ b/.github/workflows/test_cli_cpu_llama_cpp.yaml
@@ -49,3 +49,6 @@ jobs:
 
       - name: Run tests
         run: pytest tests/test_cli.py -s -k "llama_cpp"
+
+      - name: Run examples
+        run: pytest tests/test_examples.py -s -k "llama_cpp"
diff --git a/.github/workflows/test_cli_cpu_onnxruntime.yaml b/.github/workflows/test_cli_cpu_onnxruntime.yaml
index 21e65235..cb3085a0 100644
--- a/.github/workflows/test_cli_cpu_onnxruntime.yaml
+++ b/.github/workflows/test_cli_cpu_onnxruntime.yaml
@@ -49,3 +49,6 @@ jobs:
 
       - name: Run tests
         run: pytest tests/test_cli.py -s -k "cli and cpu and onnxruntime"
+
+      - name: Run examples
+        run: pytest tests/test_examples.py -s -k "cli and cpu and onnxruntime"
diff --git a/.github/workflows/test_cli_cpu_openvino.yaml b/.github/workflows/test_cli_cpu_openvino.yaml
index 4612370c..442f0cd1 100644
--- a/.github/workflows/test_cli_cpu_openvino.yaml
+++ b/.github/workflows/test_cli_cpu_openvino.yaml
@@ -49,3 +49,6 @@ jobs:
 
       - name: Run tests
         run: pytest tests/test_cli.py -s -k "cli and cpu and openvino"
+
+      - name: Run examples
+        run: pytest tests/test_examples.py -s -k "cli and cpu and openvino"
diff --git a/.github/workflows/test_cli_cpu_py_txi.yaml b/.github/workflows/test_cli_cpu_py_txi.yaml
index d07f6170..bf1b5adb 100644
--- a/.github/workflows/test_cli_cpu_py_txi.yaml
+++ b/.github/workflows/test_cli_cpu_py_txi.yaml
@@ -49,3 +49,6 @@ jobs:
 
       - name: Run tests
         run: pytest tests/test_cli.py -s -k "cli and cpu and py_txi"
+
+      - name: Run examples
+        run: pytest tests/test_examples.py -s -k "cli and cpu and py_txi"
diff --git a/.github/workflows/test_cli_cpu_pytorch.yaml b/.github/workflows/test_cli_cpu_pytorch.yaml
index fef2a772..c2b9b720 100644
--- a/.github/workflows/test_cli_cpu_pytorch.yaml
+++ b/.github/workflows/test_cli_cpu_pytorch.yaml
@@ -49,3 +49,6 @@ jobs:
 
       - name: Run tests
         run: pytest tests/test_cli.py -s -k "cli and cpu and pytorch"
+
+      - name: Run examples
+        run: pytest tests/test_examples.py -s -k "cli and cpu and pytorch"
diff --git a/.github/workflows/test_cli_cuda_onnxruntime.yaml b/.github/workflows/test_cli_cuda_onnxruntime.yaml
index 0584665c..5044f9c6 100644
--- a/.github/workflows/test_cli_cuda_onnxruntime.yaml
+++ b/.github/workflows/test_cli_cuda_onnxruntime.yaml
@@ -48,3 +48,6 @@ jobs:
       - name: Run tests
         run: |
           pytest tests/test_cli.py -x -s -k "cli and cuda and onnxruntime"
+
+      - name: Run examples
+        run: pytest tests/test_examples.py -x -s -k "cli and cuda and onnxruntime"
diff --git a/.github/workflows/test_cli_cuda_py_txi.yaml b/.github/workflows/test_cli_cuda_py_txi.yaml
index 7339b98e..4d21cd1c 100644
--- a/.github/workflows/test_cli_cuda_py_txi.yaml
+++ b/.github/workflows/test_cli_cuda_py_txi.yaml
@@ -49,3 +49,6 @@ jobs:
 
       - name: Run tests
         run: pytest tests/test_cli.py -x -s -k "cli and cuda and py_txi"
+
+      - name: Run examples
+        run: pytest tests/test_examples.py -x -s -k "cli and cuda and (tgi or tei)"
diff --git a/.github/workflows/test_cli_cuda_pytorch.yaml b/.github/workflows/test_cli_cuda_pytorch.yaml
index 0bc5dfaf..329c97ab 100644
--- a/.github/workflows/test_cli_cuda_pytorch.yaml
+++ b/.github/workflows/test_cli_cuda_pytorch.yaml
@@ -50,6 +50,9 @@ jobs:
         run: |
           pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed)"
 
+      - name: Run examples
+        run: pytest tests/test_examples.py -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed)"
+
   run_cli_cuda_pytorch_multi_gpu_tests:
     if: ${{
       (github.event_name == 'push') ||
diff --git a/.github/workflows/test_cli_cuda_tensorrt_llm.yaml b/.github/workflows/test_cli_cuda_tensorrt_llm.yaml
index acb04fe2..3c9c2925 100644
--- a/.github/workflows/test_cli_cuda_tensorrt_llm.yaml
+++ b/.github/workflows/test_cli_cuda_tensorrt_llm.yaml
@@ -50,6 +50,9 @@ jobs:
         run: |
           pytest tests/test_cli.py -x -s -k "cli and cuda and tensorrt_llm and not (tp or pp)"
 
+      - name: Run examples
+        run: pytest tests/test_examples.py -x -s -k "cli and cuda and tensorrt_llm and not (tp or pp)"
+
   cli_cuda_tensorrt_llm_multi_gpu_tests:
     if: ${{
       (github.event_name == 'push') ||
diff --git a/.github/workflows/test_cli_cuda_torch_ort.yaml b/.github/workflows/test_cli_cuda_torch_ort.yaml
index ee886e8c..06320b7c 100644
--- a/.github/workflows/test_cli_cuda_torch_ort.yaml
+++ b/.github/workflows/test_cli_cuda_torch_ort.yaml
@@ -51,6 +51,10 @@ jobs:
         run: |
           pytest tests/test_cli.py -x -s -k "cli and cuda and torch_ort and not (dp or ddp or device_map) and not (peft)"
 
+      - name: Run examples
+        run: |
+          pytest tests/test_examples.py -x -s -k "cli and cuda and torch_ort and not (dp or ddp or device_map) and not (peft)"
+
   run_cli_cuda_torch_ort_multi_gpu_tests:
     if: ${{
       (github.event_name == 'push') ||
diff --git a/.github/workflows/test_cli_cuda_vllm.yaml b/.github/workflows/test_cli_cuda_vllm.yaml
index 732513d2..c44c79a0 100644
--- a/.github/workflows/test_cli_cuda_vllm.yaml
+++ b/.github/workflows/test_cli_cuda_vllm.yaml
@@ -50,6 +50,10 @@ jobs:
         run: |
           FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and vllm and not (tp or pp)"
 
+      - name: Run examples (sequential)
+        run: |
+          FORCE_SEQUENTIAL=1 pytest tests/test_examples.py -x -s -k "cli and cuda and vllm and not (tp or pp)"
+
   run_cli_cuda_vllm_multi_gpu_tests:
     if: ${{
       (github.event_name == 'push') ||
diff --git a/.github/workflows/test_cli_energy_star.yaml b/.github/workflows/test_energy_star.yaml
similarity index 84%
rename from .github/workflows/test_cli_energy_star.yaml
rename to .github/workflows/test_energy_star.yaml
index 24c487f6..91f7b14b 100644
--- a/.github/workflows/test_cli_energy_star.yaml
+++ b/.github/workflows/test_energy_star.yaml
@@ -20,13 +20,11 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
 
 jobs:
-  run_cli_energy_star_tests:
+  run_energy_star_tests:
     if: ${{
       (github.event_name == 'push') ||
       (github.event_name == 'workflow_dispatch') ||
-      contains( github.event.pull_request.labels.*.name, 'cli') ||
-      contains( github.event.pull_request.labels.*.name, 'energy_star') ||
-      contains( github.event.pull_request.labels.*.name, 'cli_energy_star')
+      contains( github.event.pull_request.labels.*.name, 'energy_star')
       }}
 
     runs-on:
diff --git a/examples/energy_star/_base_.yaml b/energy_star/_base_.yaml
similarity index 100%
rename from examples/energy_star/_base_.yaml
rename to energy_star/_base_.yaml
diff --git a/examples/energy_star/automatic_speech_recognition.yaml b/energy_star/automatic_speech_recognition.yaml
similarity index 100%
rename from examples/energy_star/automatic_speech_recognition.yaml
rename to energy_star/automatic_speech_recognition.yaml
diff --git a/examples/energy_star/image_classification.yaml b/energy_star/image_classification.yaml
similarity index 100%
rename from examples/energy_star/image_classification.yaml
rename to energy_star/image_classification.yaml
diff --git a/examples/energy_star/image_to_text.yaml b/energy_star/image_to_text.yaml
similarity index 100%
rename from examples/energy_star/image_to_text.yaml
rename to energy_star/image_to_text.yaml
diff --git a/examples/energy_star/object_detection.yaml b/energy_star/object_detection.yaml
similarity index 100%
rename from examples/energy_star/object_detection.yaml
rename to energy_star/object_detection.yaml
diff --git a/examples/energy_star/question_answering.yaml b/energy_star/question_answering.yaml
similarity index 100%
rename from examples/energy_star/question_answering.yaml
rename to energy_star/question_answering.yaml
diff --git a/examples/energy_star/sentence_similarity.yaml b/energy_star/sentence_similarity.yaml
similarity index 100%
rename from examples/energy_star/sentence_similarity.yaml
rename to energy_star/sentence_similarity.yaml
diff --git a/examples/energy_star/summarization.yaml b/energy_star/summarization.yaml
similarity index 100%
rename from examples/energy_star/summarization.yaml
rename to energy_star/summarization.yaml
diff --git a/examples/energy_star/t5_question_answering.yaml b/energy_star/t5_question_answering.yaml
similarity index 100%
rename from examples/energy_star/t5_question_answering.yaml
rename to energy_star/t5_question_answering.yaml
diff --git a/examples/energy_star/t5_summarization.yaml b/energy_star/t5_summarization.yaml
similarity index 100%
rename from examples/energy_star/t5_summarization.yaml
rename to energy_star/t5_summarization.yaml
diff --git a/examples/energy_star/t5_text_classification.yaml b/energy_star/t5_text_classification.yaml
similarity index 100%
rename from examples/energy_star/t5_text_classification.yaml
rename to energy_star/t5_text_classification.yaml
diff --git a/examples/energy_star/t5_text_generation.yaml b/energy_star/t5_text_generation.yaml
similarity index 100%
rename from examples/energy_star/t5_text_generation.yaml
rename to energy_star/t5_text_generation.yaml
diff --git a/examples/energy_star/text_classification.yaml b/energy_star/text_classification.yaml
similarity index 100%
rename from examples/energy_star/text_classification.yaml
rename to energy_star/text_classification.yaml
diff --git a/examples/energy_star/text_generation.yaml b/energy_star/text_generation.yaml
similarity index 100%
rename from examples/energy_star/text_generation.yaml
rename to energy_star/text_generation.yaml
diff --git a/examples/energy_star/text_to_image.yaml b/energy_star/text_to_image.yaml
similarity index 100%
rename from examples/energy_star/text_to_image.yaml
rename to energy_star/text_to_image.yaml
diff --git a/examples/ipex_bert.yaml b/examples/cpu_ipex_bert.yaml
similarity index 86%
rename from examples/ipex_bert.yaml
rename to examples/cpu_ipex_bert.yaml
index e549da0a..4f6f7fc5 100644
--- a/examples/ipex_bert.yaml
+++ b/examples/cpu_ipex_bert.yaml
@@ -6,7 +6,7 @@ defaults:
   - _base_
   - _self_
 
-name: ipex_bert
+name: cpu_ipex_bert
 
 launcher:
   numactl: true
@@ -14,16 +14,17 @@ launcher:
     cpunodebind: 0
     membind: 0
 
+backend:
+  device: cpu
+  export: true
+  no_weights: false
+  torch_dtype: bfloat16
+  model: google-bert/bert-base-uncased
+
 scenario:
-  latency: true
   memory: true
+  latency: true
+
   input_shapes:
     batch_size: 1
     sequence_length: 128
-
-backend:
-  device: cpu
-  no_weights: false
-  export: true
-  torch_dtype: bfloat16
-  model: bert-base-uncased
diff --git a/examples/ipex_llama.yaml b/examples/cpu_ipex_llama.yaml
similarity index 95%
rename from examples/ipex_llama.yaml
rename to examples/cpu_ipex_llama.yaml
index b564316b..6fad7a65 100644
--- a/examples/ipex_llama.yaml
+++ b/examples/cpu_ipex_llama.yaml
@@ -6,7 +6,7 @@ defaults:
   - _base_
   - _self_
 
-name: ipex_llama
+name: cpu_ipex_llama
 
 launcher:
   numactl: true
@@ -14,24 +14,25 @@ launcher:
     cpunodebind: 0
     membind: 0
 
+backend:
+  device: cpu
+  export: true
+  no_weights: false
+  torch_dtype: bfloat16
+  model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
+
 scenario:
-  latency: true
   memory: true
+  latency: true
 
   warmup_runs: 10
   iterations: 10
   duration: 10
-  
+
   input_shapes:
     batch_size: 1
     sequence_length: 256
+
   generate_kwargs:
     max_new_tokens: 32
     min_new_tokens: 32
-
-backend:
-  device: cpu
-  export: true
-  no_weights: false
-  torch_dtype: bfloat16
-  model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
diff --git a/examples/llama_cpp_embedding.yaml b/examples/cpu_llama_cpp_embedding.yaml
similarity index 72%
rename from examples/llama_cpp_embedding.yaml
rename to examples/cpu_llama_cpp_embedding.yaml
index bdd86cce..c5f6f628 100644
--- a/examples/llama_cpp_embedding.yaml
+++ b/examples/cpu_llama_cpp_embedding.yaml
@@ -1,26 +1,24 @@
 defaults:
   - benchmark
   - scenario: inference
-  - launcher: inline
   - backend: llama_cpp
+  - launcher: process
   - _base_
   - _self_
 
-name: llama_cpp_llama
+name: cpu_llama_cpp_embedding
 
 backend:
-  device: mps
-  model: nomic-ai/nomic-embed-text-v1.5-GGUF
+  device: cpu
   task: feature-extraction
+  model: nomic-ai/nomic-embed-text-v1.5-GGUF
   filename: nomic-embed-text-v1.5.Q4_0.gguf
 
 scenario:
   input_shapes:
     batch_size: 1
     sequence_length: 256
-    vocab_size: 30000
-    type_vocab_size: 1
-    max_position_embeddings: 512
+
   generate_kwargs:
     max_new_tokens: 100
     min_new_tokens: 100
diff --git a/examples/llama_cpp_text_generation.yaml b/examples/cpu_llama_cpp_text_generation.yaml
similarity index 82%
rename from examples/llama_cpp_text_generation.yaml
rename to examples/cpu_llama_cpp_text_generation.yaml
index 96def950..9edcd5c3 100644
--- a/examples/llama_cpp_text_generation.yaml
+++ b/examples/cpu_llama_cpp_text_generation.yaml
@@ -1,25 +1,24 @@
 defaults:
   - benchmark
   - scenario: inference
-  - launcher: inline
   - backend: llama_cpp
+  - launcher: process
   - _base_
   - _self_
 
-name: llama_cpp_llama
+name: cpu_llama_cpp_text_generation
 
 backend:
-  device: mps
-  model: TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF
+  device: cpu
   task: text-generation
+  model: TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF
   filename: tinyllama-1.1b-chat-v1.0.Q4_0.gguf
 
-
 scenario:
   input_shapes:
     batch_size: 1
     sequence_length: 256
-    vocab_size: 32000
+
   generate_kwargs:
     max_new_tokens: 100
     min_new_tokens: 100
diff --git a/examples/onnxruntime_static_quant_vit.yaml b/examples/cpu_onnxruntime_static_quant_vit.yaml
similarity index 84%
rename from examples/onnxruntime_static_quant_vit.yaml
rename to examples/cpu_onnxruntime_static_quant_vit.yaml
index 3d298473..283ecb60 100644
--- a/examples/onnxruntime_static_quant_vit.yaml
+++ b/examples/cpu_onnxruntime_static_quant_vit.yaml
@@ -6,10 +6,11 @@ defaults:
   - _base_
   - _self_
 
-name: onnxruntime_static_quant_vit
+name: cpu_onnxruntime_static_quant_vit
 
 backend:
   device: cpu
+  export: true
   no_weights: true
   model: google/vit-base-patch16-224
   quantization: true
diff --git a/examples/onnxruntime_timm.yaml b/examples/cpu_onnxruntime_timm.yaml
similarity index 82%
rename from examples/onnxruntime_timm.yaml
rename to examples/cpu_onnxruntime_timm.yaml
index 165fc28a..963f44f0 100644
--- a/examples/onnxruntime_timm.yaml
+++ b/examples/cpu_onnxruntime_timm.yaml
@@ -10,7 +10,8 @@ name: onnxruntime_timm
 
 backend:
   device: cpu
-  model: timm/mobilenetv3_large_100.ra_in1k
+  export: true
+  model: timm/tiny_vit_21m_224.in1k
 
 scenario:
   memory: true
diff --git a/examples/openvino_static_quant_bert.yaml b/examples/cpu_openvino_8bit.yaml
similarity index 66%
rename from examples/openvino_static_quant_bert.yaml
rename to examples/cpu_openvino_8bit.yaml
index caa4363a..02cd578c 100644
--- a/examples/openvino_static_quant_bert.yaml
+++ b/examples/cpu_openvino_8bit.yaml
@@ -6,16 +6,16 @@ defaults:
   - _base_
   - _self_
 
-name: openvino_static_quant_bert
+name: openvino_static_quant
 
 backend:
   device: cpu
-  no_weights: true
-  model: bert-base-uncased
-  quantization: true
-  calibration: true
   reshape: true
+  no_weights: true
+  load_in_8bit: true
+  model: google-bert/bert-base-uncased
 
 scenario:
   input_shapes:
     batch_size: 1
+    sequence_length: 16
diff --git a/examples/openvino_diffusion.yaml b/examples/cpu_openvino_diffusion.yaml
similarity index 94%
rename from examples/openvino_diffusion.yaml
rename to examples/cpu_openvino_diffusion.yaml
index f0501101..0c2008db 100644
--- a/examples/openvino_diffusion.yaml
+++ b/examples/cpu_openvino_diffusion.yaml
@@ -9,11 +9,10 @@ defaults:
 name: openvino_diffusion
 
 backend:
+  half: true
   device: cpu
-  model: stabilityai/stable-diffusion-2-1
-  reshape: true
   export: true
-  half: true
+  model: stabilityai/stable-diffusion-2-1
 
 scenario:
   input_shapes:
diff --git a/examples/pytorch_bert.py b/examples/cuda_pytorch_bert.py
similarity index 93%
rename from examples/pytorch_bert.py
rename to examples/cuda_pytorch_bert.py
index 09f62b8d..82e1d56c 100644
--- a/examples/pytorch_bert.py
+++ b/examples/cuda_pytorch_bert.py
@@ -11,12 +11,13 @@
     print(f"Failed to get username from Hugging Face Hub: {e}")
     USERNAME = None
 
-BENCHMARK_NAME = "pytorch_bert"
+BENCHMARK_NAME = "cuda_pytorch_bert"
+MODEL = "google-bert/bert-base-uncased"
 
 
 def run_benchmark():
     launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="warn")
-    backend_config = PyTorchConfig(device="cuda", device_ids="0", no_weights=True, model="bert-base-uncased")
+    backend_config = PyTorchConfig(device="cuda", device_ids="0", no_weights=True, model=MODEL)
     scenario_config = InferenceConfig(memory=True, latency=True, input_shapes={"batch_size": 1, "sequence_length": 128})
     benchmark_config = BenchmarkConfig(
         name=BENCHMARK_NAME,
diff --git a/examples/pytorch_bert.yaml b/examples/cuda_pytorch_bert.yaml
similarity index 100%
rename from examples/pytorch_bert.yaml
rename to examples/cuda_pytorch_bert.yaml
diff --git a/examples/pytorch_llama.py b/examples/cuda_pytorch_llama.py
similarity index 96%
rename from examples/pytorch_llama.py
rename to examples/cuda_pytorch_llama.py
index fe732bfa..b515019c 100644
--- a/examples/pytorch_llama.py
+++ b/examples/cuda_pytorch_llama.py
@@ -11,7 +11,8 @@
     print(f"Failed to get username from Hugging Face Hub: {e}")
     USERNAME = None
 
-BENCHMARK_NAME = "pytorch-llama"
+BENCHMARK_NAME = "cuda_pytorch_llama"
+MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 
 WEIGHTS_CONFIGS = {
     "float16": {
@@ -40,10 +41,10 @@
 def run_benchmark(weight_config: str):
     launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="warn")
     backend_config = PyTorchConfig(
+        model=MODEL,
         device="cuda",
         device_ids="0",
         no_weights=True,
-        model="gpt2",
         **WEIGHTS_CONFIGS[weight_config],
     )
     scenario_config = InferenceConfig(
diff --git a/examples/pytorch_llama.yaml b/examples/cuda_pytorch_llama.yaml
similarity index 83%
rename from examples/pytorch_llama.yaml
rename to examples/cuda_pytorch_llama.yaml
index becd1f2e..aaf46098 100644
--- a/examples/pytorch_llama.yaml
+++ b/examples/cuda_pytorch_llama.yaml
@@ -6,16 +6,17 @@ defaults:
   - _base_
   - _self_
 
-name: pytorch_llama
+name: cuda_pytorch_llama
 
 launcher:
   device_isolation: true
   device_isolation_action: warn
 
 backend:
-  model: gpt2
   device: cuda
+  no_weights: true
   torch_dtype: float16
+  model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
 
 scenario:
   memory: true
diff --git a/examples/pytorch_vlm.yaml b/examples/cuda_pytorch_vlm.yaml
similarity index 96%
rename from examples/pytorch_vlm.yaml
rename to examples/cuda_pytorch_vlm.yaml
index a39f8c8a..fa3b4e99 100644
--- a/examples/pytorch_vlm.yaml
+++ b/examples/cuda_pytorch_vlm.yaml
@@ -6,7 +6,7 @@ defaults:
   - _base_
   - _self_
 
-name: pytorch_vlm
+name: cuda_pytorch_vlm
 
 launcher:
   device_isolation: true
diff --git a/examples/tgi_llama.yaml b/examples/cuda_tgi_llama.yaml
similarity index 71%
rename from examples/tgi_llama.yaml
rename to examples/cuda_tgi_llama.yaml
index 399667fb..df1ab8ec 100644
--- a/examples/tgi_llama.yaml
+++ b/examples/cuda_tgi_llama.yaml
@@ -6,7 +6,7 @@ defaults:
   - _base_
   - _self_
 
-name: tgi_llama
+name: cuda_tgi_llama
 
 launcher:
   device_isolation: true
@@ -14,14 +14,15 @@ launcher:
 
 backend:
   device: cuda
-  device_ids: 4
-  # no_weights: true
+  device_ids: 0
+  no_weights: true
   model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
 
 scenario:
   input_shapes:
     batch_size: 4
-    sequence_length: 256
+    sequence_length: 64
+
   generate_kwargs:
-    max_new_tokens: 100
-    min_new_tokens: 100
+    max_new_tokens: 16
+    min_new_tokens: 16
diff --git a/examples/trt_llama.yaml b/examples/cuda_trt_llama.yaml
similarity index 79%
rename from examples/trt_llama.yaml
rename to examples/cuda_trt_llama.yaml
index 30cb600a..7ed79f31 100644
--- a/examples/trt_llama.yaml
+++ b/examples/cuda_trt_llama.yaml
@@ -6,7 +6,7 @@ defaults:
   - _base_
   - _self_
 
-name: trt_llama
+name: cuda_trt_llama
 
 launcher:
   device_isolation: true
@@ -21,7 +21,8 @@ backend:
 scenario:
   input_shapes:
     batch_size: 4
-    sequence_length: 256
+    sequence_length: 64
+
   generate_kwargs:
-    max_new_tokens: 100
-    min_new_tokens: 100
+    max_new_tokens: 16
+    min_new_tokens: 16
diff --git a/examples/vllm_llama.yaml b/examples/cuda_vllm_llama.yaml
similarity index 77%
rename from examples/vllm_llama.yaml
rename to examples/cuda_vllm_llama.yaml
index 8bbb4025..044928a3 100644
--- a/examples/vllm_llama.yaml
+++ b/examples/cuda_vllm_llama.yaml
@@ -6,7 +6,7 @@ defaults:
   - _base_
   - _self_
 
-name: vllm_llama
+name: cuda_vllm_llama
 
 launcher:
   device_isolation: true
@@ -15,7 +15,7 @@ launcher:
 backend:
   device: cuda
   device_ids: 0
-  no_weights: false
+  no_weights: true
   serving_mode: offline
   model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
   engine_args:
@@ -24,7 +24,8 @@ backend:
 scenario:
   input_shapes:
     batch_size: 4
-    sequence_length: 256
+    sequence_length: 64
+
   generate_kwargs:
-    max_new_tokens: 100
-    min_new_tokens: 100
+    max_new_tokens: 16
+    min_new_tokens: 16
diff --git a/examples/pytorch_bert_mps.yaml b/examples/mps_pytorch_bert.yaml
similarity index 67%
rename from examples/pytorch_bert_mps.yaml
rename to examples/mps_pytorch_bert.yaml
index 4d4dc6e3..8c6bae9a 100644
--- a/examples/pytorch_bert_mps.yaml
+++ b/examples/mps_pytorch_bert.yaml
@@ -1,15 +1,12 @@
 defaults:
   - benchmark
   - scenario: inference
-  - launcher: process # launcher: inline works, 
+  - launcher: inline # mps has problems with multi processing (process launcher)
   - backend: pytorch
   - _base_
   - _self_
 
-name: pytorch_bert
-
-# launcher:
-#   start_method: spawn
+name: mps_pytorch_bert
 
 scenario:
   latency: true
@@ -19,8 +16,6 @@ scenario:
     sequence_length: 128
 
 backend:
-  device: cpu
+  device: mps
   no_weights: true
   model: bert-base-uncased
-
-
diff --git a/examples/neural_compressor_ptq_bert.yaml b/examples/neural_compressor_ptq_bert.yaml
deleted file mode 100644
index cbc32590..00000000
--- a/examples/neural_compressor_ptq_bert.yaml
+++ /dev/null
@@ -1,20 +0,0 @@
-defaults:
-  - benchmark
-  - backend: neural-compressor
-  - scenario: inference
-  - launcher: process
-  - _base_
-  - _self_
-
-name: neural_compressor_ptq_bert
-
-backend:
-  device: cpu
-  no_weights: true
-  model: bert-base-uncased
-  ptq_quantization: true
-  calibration: true
-
-scenario:
-  input_shapes:
-    batch_size: 1
diff --git a/examples/numactl_bert.yaml b/examples/numactl_bert.yaml
deleted file mode 100644
index 7add65e7..00000000
--- a/examples/numactl_bert.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-defaults:
-  - benchmark
-  - scenario: inference
-  - launcher: process
-  - backend: pytorch
-  - _base_
-  - _self_
-
-name: pytorch_bert
-
-launcher:
-  numactl: true
-  numactl_kwargs:
-    cpunodebind: 0
-    membind: 0
-
-scenario:
-  latency: true
-  memory: true
-  input_shapes:
-    batch_size: 1
-    sequence_length: 128
-
-backend:
-  device: cpu
-  no_weights: true
-  model: bert-base-uncased
diff --git a/examples/tei_bge.yaml b/examples/tei_bge.yaml
deleted file mode 100644
index dbbab7d5..00000000
--- a/examples/tei_bge.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-defaults:
-  - benchmark
-  - scenario: inference
-  - launcher: inline
-  - backend: py-txi
-  - _self_
-
-name: tei_bert
-
-launcher:
-  device_isolation: true
-  device_isolation_action: warn
-
-backend:
-  device: cpu
-  model: BAAI/bge-base-en-v1.5
-
-scenario:
-  input_shapes:
-    batch_size: 64
-    sequence_length: 128
diff --git a/optimum_benchmark/cli.py b/optimum_benchmark/cli.py
index 4b26266b..5af0723b 100644
--- a/optimum_benchmark/cli.py
+++ b/optimum_benchmark/cli.py
@@ -10,12 +10,10 @@
     Benchmark,
     BenchmarkConfig,
     EnergyStarConfig,
-    INCConfig,
     InferenceConfig,
     InlineConfig,
     IPEXConfig,
     LlamaCppConfig,
-    LLMSwarmConfig,
     ORTConfig,
     OVConfig,
     ProcessConfig,
@@ -43,9 +41,7 @@
 cs.store(group="backend", name=ORTConfig.name, node=ORTConfig)
 cs.store(group="backend", name=TorchORTConfig.name, node=TorchORTConfig)
 cs.store(group="backend", name=TRTLLMConfig.name, node=TRTLLMConfig)
-cs.store(group="backend", name=INCConfig.name, node=INCConfig)
 cs.store(group="backend", name=PyTXIConfig.name, node=PyTXIConfig)
-cs.store(group="backend", name=LLMSwarmConfig.name, node=LLMSwarmConfig)
 cs.store(group="backend", name=VLLMConfig.name, node=VLLMConfig)
 cs.store(group="backend", name=LlamaCppConfig.name, node=LlamaCppConfig)
 # scenarios configurations
diff --git a/tests/test_energy_star.py b/tests/test_energy_star.py
index bbb83f55..6e6d1f5e 100644
--- a/tests/test_energy_star.py
+++ b/tests/test_energy_star.py
@@ -9,7 +9,8 @@
 LOGGER = getLogger("test-cli")
 
 
-TEST_CONFIG_DIR = Path(__file__).parent.parent / "examples/energy_star"
+TEST_CONFIG_DIR = Path(__file__).parent.parent / "energy_star"
+
 TEST_CONFIG_NAMES = [
     config.split(".")[0]
     for config in os.listdir(TEST_CONFIG_DIR)
diff --git a/tests/test_examples.py b/tests/test_examples.py
new file mode 100644
index 00000000..d395a4bb
--- /dev/null
+++ b/tests/test_examples.py
@@ -0,0 +1,49 @@
+import os
+from logging import getLogger
+from pathlib import Path
+
+import pytest
+
+from optimum_benchmark.logging_utils import run_subprocess_and_log_stream_output
+
+LOGGER = getLogger("test-examples")
+
+
+TEST_CONFIG_DIR = Path(__file__).parent.parent / "examples"
+
+TEST_CONFIG_NAMES = [
+    config.split(".")[0]
+    for config in os.listdir(TEST_CONFIG_DIR)
+    if config.endswith(".yaml") and not (config.startswith("_") or config.endswith("_"))
+]
+
+TEST_SCRIPT_PATHS = [TEST_CONFIG_DIR / filename for filename in os.listdir(TEST_CONFIG_DIR) if filename.endswith(".py")]
+
+ROCR_VISIBLE_DEVICES = os.environ.get("ROCR_VISIBLE_DEVICES", None)
+CUDA_VISIBLE_DEVICES = os.environ.get("CUDA_VISIBLE_DEVICES", None)
+
+
+@pytest.mark.parametrize("config_name", TEST_CONFIG_NAMES)
+def test_cli_configs(config_name):
+    args = ["optimum-benchmark", "--config-dir", TEST_CONFIG_DIR, "--config-name", config_name]
+
+    if ROCR_VISIBLE_DEVICES is not None:
+        args += [f'backend.device_ids="{ROCR_VISIBLE_DEVICES}"']
+    elif CUDA_VISIBLE_DEVICES is not None:
+        args += [f'backend.device_ids="{CUDA_VISIBLE_DEVICES}"']
+
+    popen = run_subprocess_and_log_stream_output(LOGGER, args)
+    assert popen.returncode == 0, f"Failed to run {config_name}"
+
+
+@pytest.mark.parametrize("config_name", TEST_SCRIPT_PATHS)
+def test_api_scripts(script_path):
+    args = ["python", script_path]
+
+    if ROCR_VISIBLE_DEVICES is not None:
+        args += [f'backend.device_ids="{ROCR_VISIBLE_DEVICES}"']
+    elif CUDA_VISIBLE_DEVICES is not None:
+        args += [f'backend.device_ids="{CUDA_VISIBLE_DEVICES}"']
+
+    popen = run_subprocess_and_log_stream_output(LOGGER, args)
+    assert popen.returncode == 0, f"Failed to run {script_path}"