Remove OpenVino support (#3153)

yifanmai · web-flow · commit c567e4350c4f · 2024-11-12T15:26:33.000-08:00
diff --git a/.github/workflows/test-daily-integration.yml b/.github/workflows/test-daily-integration.yml
diff --git a/docs/huggingface_models.md b/docs/huggingface_models.md
@@ -47,17 +47,3 @@ helm-run \
     --suite v1 \
     --max-eval-instances 10
 ```
-
-To use Optimum Intel, add `--openvino` flag to `helm-run`. Optimum Intel provides a simple interface to optimize Transformer models and convert them to OpenVINO™ Intermediate Representation format to accelerate end-to-end pipelines on Intel® architectures using OpenVINO™ runtime. It runs the model on the CPU.
-
-Examples:
-
-```bash
-# Run boolq on stanford-crfm/BioMedLM optimized by Optimum Intel OpenNIVO
-helm-run \
-    --run-entries boolq:model=stanford-crfm/BioMedLM \
-    --enable-huggingface-models stanford-crfm/BioMedLM \
-    --suite v1 \
-    --max-eval-instances 10 \
-    --openvino 
-```
diff --git a/requirements.txt b/requirements.txt
@@ -180,9 +180,6 @@ open_clip_torch==2.26.1
 openai==1.48.0
 opencv-python==4.8.1.78
 opencv-python-headless==4.10.0.84
-openvino==2024.4.0
-openvino-telemetry==2024.1.0
-openvino-tokenizers==2024.4.0.0
 opt-einsum==3.3.0
 optax==0.2.3
 optimum==1.22.0
diff --git a/setup.cfg b/setup.cfg
@@ -126,9 +126,6 @@ aleph-alpha =
     aleph-alpha-client~=2.14.0
     tokenizers>=0.13.3
 
-openvino =
-    optimum[openvino]~=1.19
-
 allenai =
     ai2-olmo~=0.2
 
@@ -175,7 +172,6 @@ models =
     crfm-helm[reka]
     crfm-helm[together]
     crfm-helm[yandex]
-    crfm-helm[openvino]
 
 reka = 
     reka-api~=2.0.0
diff --git a/src/helm/benchmark/huggingface_registration.py b/src/helm/benchmark/huggingface_registration.py
@@ -20,13 +20,10 @@ def register_huggingface_model(
     helm_model_name: str,
     pretrained_model_name_or_path: str,
     revision: Optional[str] = None,
-    openvino: Optional[bool] = False,
 ) -> None:
     object_spec_args: Dict[str, Union[str, bool]] = {"pretrained_model_name_or_path": pretrained_model_name_or_path}
     if revision:
         object_spec_args["revision"] = revision
-    if openvino:
-        object_spec_args["openvino"] = openvino
 
     # Auto-infer model properties from the tokenizer.
     create_tokenizer_args: Dict[str, str] = {"pretrained_model_name_or_path": pretrained_model_name_or_path}
@@ -79,7 +76,7 @@ def register_huggingface_model(
     register_tokenizer_config(tokenizer_config)
 
 
-def register_huggingface_hub_model_from_flag_value(raw_model_string: str, openvino=False) -> None:
+def register_huggingface_hub_model_from_flag_value(raw_model_string: str) -> None:
     raw_model_string_parts = raw_model_string.split("@")
     pretrained_model_name_or_path: str
     revision: Optional[str]
@@ -96,17 +93,15 @@ def register_huggingface_hub_model_from_flag_value(raw_model_string: str, openvi
         helm_model_name=raw_model_string,
         pretrained_model_name_or_path=pretrained_model_name_or_path,
         revision=revision,
-        openvino=openvino,
     )
 
 
-def register_huggingface_local_model_from_flag_value(path: str, openvino=False) -> None:
+def register_huggingface_local_model_from_flag_value(path: str) -> None:
     if not path:
         raise ValueError("Path to Hugging Face model must be non-empty")
     path_parts = os.path.split(path)
     helm_model_name = f"huggingface/{path_parts[-1]}"
     register_huggingface_model(
         helm_model_name=helm_model_name,
         pretrained_model_name_or_path=path,
-        openvino=openvino,
     )
diff --git a/src/helm/benchmark/run.py b/src/helm/benchmark/run.py
@@ -266,13 +266,6 @@ def main():
         default=None,
         help="Full class name of the Runner class to use. If unset, uses the default Runner.",
     )
-    parser.add_argument(
-        "--openvino",
-        action="store_true",
-        default=False,
-        help="Experimental: Apply openvino optimization to Hugging Face AutoModelForCausalLM models "
-        "specified with the --enable-huggingface-models and --enable-local-huggingface-models flags.",
-    )
     add_run_args(parser)
     args = parser.parse_args()
     validate_args(args)
@@ -284,19 +277,13 @@ def main():
         from helm.benchmark.huggingface_registration import register_huggingface_hub_model_from_flag_value
 
         for huggingface_model_name in args.enable_huggingface_models:
-            if args.openvino:
-                register_huggingface_hub_model_from_flag_value(huggingface_model_name, args.openvino)
-            else:
-                register_huggingface_hub_model_from_flag_value(huggingface_model_name)
+            register_huggingface_hub_model_from_flag_value(huggingface_model_name)
 
     if args.enable_local_huggingface_models:
         from helm.benchmark.huggingface_registration import register_huggingface_local_model_from_flag_value
 
         for huggingface_model_path in args.enable_local_huggingface_models:
-            if args.openvino:
-                register_huggingface_local_model_from_flag_value(huggingface_model_path, args.openvino)
-            else:
-                register_huggingface_local_model_from_flag_value(huggingface_model_path)
+            register_huggingface_local_model_from_flag_value(huggingface_model_path)
 
     run_entries: List[RunEntry] = []
     if args.conf_paths:
diff --git a/src/helm/clients/huggingface_client.py b/src/helm/clients/huggingface_client.py
@@ -59,7 +59,6 @@ def __init__(
         self,
         pretrained_model_name_or_path: str,
         wrapped_tokenizer: WrappedPreTrainedTokenizer,
-        openvino: bool = False,
         **kwargs,
     ):
         self.device: Optional[str]
@@ -92,20 +91,7 @@ def __init__(
 
         with htrack_block(f"Loading Hugging Face model {pretrained_model_name_or_path}"):
             # WARNING this may fail if your GPU does not have enough memory
-            if openvino:
-                # Optimum Intel provides a simple interface to optimize Transformer models and convert them to \
-                # OpenVINO™ Intermediate Representation (IR) format to accelerate end-to-end pipelines on \
-                # Intel® architectures using OpenVINO™ runtime.
-                try:
-                    from optimum.intel.openvino import OVModelForCausalLM
-                except ModuleNotFoundError as e:
-                    handle_module_not_found_error(e, ["openvino"])
-
-                self.device = "cpu"
-                self.model = OVModelForCausalLM.from_pretrained(
-                    pretrained_model_name_or_path, export=True, **kwargs
-                ).to(self.device)
-            elif self.device is None:
+            if self.device is None:
                 # kwargs contains device_map=auto
                 # Do not call to() because accelerate will take care of model device placement.
                 self.model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path, **kwargs)