[fix] Allow ORTQuantizer over models with subfolder ONNX files (#2094)

tomaarsen · web-flow · commit 400bb82f3120 · 2024-11-18T15:05:37.000+01:00
* Allow ORTQuantizer over models with subfolder ONNX files

* Also catch ValueError as that seems a common fail when AutoConfig.from_pretrained("does/not/exist")

* Use test case that previously failed
diff --git a/optimum/onnxruntime/quantization.py b/optimum/onnxruntime/quantization.py
@@ -100,7 +100,7 @@ def __init__(self, onnx_model_path: Path, config: Optional["PretrainedConfig"] =
         if self.config is None:
             try:
                 self.config = AutoConfig.from_pretrained(self.onnx_model_path.parent)
-            except OSError:
+            except (OSError, ValueError):
                 LOGGER.warning(
                     f"Could not load the config for {self.onnx_model_path} automatically, this might make "
                     "the quantized model harder to use because it will not be able to be loaded by an ORTModel without "
@@ -134,6 +134,7 @@ def from_pretrained(
             model_or_path = Path(model_or_path)
 
         path = None
+        config = None
         if isinstance(model_or_path, ORTModelForConditionalGeneration):
             raise NotImplementedError(ort_quantizer_error_message)
         elif isinstance(model_or_path, Path) and file_name is None:
@@ -147,13 +148,13 @@ def from_pretrained(
             file_name = onnx_files[0].name
 
         if isinstance(model_or_path, ORTModel):
-            if path is None:
-                path = Path(model_or_path.model._model_path)
+            path = Path(model_or_path.model._model_path)
+            config = model_or_path.config
         elif os.path.isdir(model_or_path):
             path = Path(model_or_path) / file_name
         else:
             raise ValueError(f"Unable to load model from {model_or_path}.")
-        return cls(path)
+        return cls(path, config=config)
 
     def fit(
         self,
diff --git a/tests/onnxruntime/test_quantization.py b/tests/onnxruntime/test_quantization.py
@@ -30,6 +30,7 @@
     AutoQuantizationConfig,
     ORTConfig,
     ORTModelForCausalLM,
+    ORTModelForFeatureExtraction,
     ORTModelForSeq2SeqLM,
     ORTModelForSequenceClassification,
     ORTQuantizer,
@@ -52,6 +53,13 @@ class ORTQuantizerTest(unittest.TestCase):
                 "optimum/distilbert-base-uncased-finetuned-sst-2-english"
             )
         },
+        "ort_model_with_onnx_model_in_subfolder": {
+            "model_or_path": ORTModelForFeatureExtraction.from_pretrained(
+                "sentence-transformers/all-MiniLM-L6-v2",
+                subfolder="onnx",
+                file_name="model.onnx",
+            )
+        },
     }
 
     @parameterized.expand(LOAD_CONFIGURATION.items())