openvinotoolkit · alexsu52 · Feb 24, 2025 · Feb 4, 2025 · Feb 4, 2025 · Feb 4, 2025
@@ -2,8 +2,8 @@
 openvino==2025.0.0
 
 # Pytorch
-torch==2.5.1
-torchvision==0.20.1
+torch==2.6.0
+torchvision==0.21.0
 
 # ONNX
 onnx==1.17.0

@@ -49,7 +49,7 @@ as well as the supported versions of Python:
 
 | NNCF      | OpenVINO   | PyTorch  | ONNX     | TensorFlow | Python |
 |-----------|------------|----------|----------|------------|--------|
-| `develop` | `2025.0.0` | `2.5.1`  | `1.17.0` | `2.15.1`   | `3.10` |
+| `develop` | `2025.0.0` | `2.6.0`  | `1.17.0` | `2.15.1`   | `3.10` |
 | `2.15.0`  | `2025.0.0` | `2.5.1`  | `1.17.0` | `2.15.1`   | `3.10` |
 | `2.14.1`  | `2024.6.0` | `2.5.1`  | `1.17.0` | `2.15.1`   | `3.10` |
 | `2.14.0`  | `2024.5.0` | `2.5.1`  | `1.17.0` | `2.15.1`   | `3.10` |

@@ -140,9 +140,9 @@ def validate_model_fn_top1(model_, loader_):
     nncf_network = create_nncf_network(model, nncf_config)
 
     if config.search_mode_active:
-        compression_state = torch.load(config.search_elasticity_state_path)
+        compression_state = torch.load(config.search_elasticity_state_path, weights_only=False)
         model, elasticity_ctrl = resume_compression_from_state(nncf_network, compression_state)
-        model_weights = torch.load(config.search_supernet_weights)
+        model_weights = torch.load(config.search_supernet_weights, weights_only=False)
 
         load_state(model, model_weights, is_resume=True)
 

diff --git a/examples/llm_compression/openvino/smollm2_360m_fp8/requirements.txt b/examples/llm_compression/openvino/smollm2_360m_fp8/requirements.txt
@@ -1,5 +1,5 @@
 datasets
 openvino==2024.6
-optimum-intel[openvino]
-transformers
+optimum-intel[openvino]>=1.22.0
+transformers>=4.48.0
 onnx==1.17.0
diff --git a/examples/llm_compression/openvino/tiny_llama/requirements.txt b/examples/llm_compression/openvino/tiny_llama/requirements.txt
@@ -1,5 +1,5 @@
-transformers
+transformers>=4.48.0
 datasets==2.14.7
 openvino==2025.0
-optimum-intel[openvino]
+optimum-intel[openvino]>=1.22.0
 onnx==1.17.0
diff --git a/examples/llm_compression/openvino/tiny_llama_find_hyperparams/requirements.txt b/examples/llm_compression/openvino/tiny_llama_find_hyperparams/requirements.txt
@@ -2,6 +2,6 @@ datasets
 whowhatbench @ git+https://github.com/openvinotoolkit/openvino.genai#subdirectory=tools/who_what_benchmark
 numpy>=1.23.5,<2
 openvino==2025.0
-optimum-intel>=1.13.0
-transformers>=4.35.2
+optimum-intel>=1.22.0
+transformers>=4.48.0
 onnx==1.17.0
@@ -1,7 +1,7 @@
-torch==2.5.1
+torch==2.6.0
 datasets==3.0.1
 numpy>=1.23.5,<2
 openvino==2025.0
-optimum-intel>=1.13.0
-transformers>=4.35.2
+optimum-intel>=1.22.0
+transformers>=4.48.0
 onnx==1.17.0
@@ -1,6 +1,6 @@
 fastdownload==0.0.7
 openvino==2025.0
 scikit-learn
-torch==2.5.1
-torchvision==0.20.1
+torch==2.6.0
+torchvision==0.21.0
 setuptools<=72.1.0
@@ -2,8 +2,8 @@ fastdownload==0.0.7
 onnx==1.17.0
 openvino==2025.0
 pycocotools==2.0.7
-torch==2.5.1
+torch==2.6.0
 torchmetrics==1.0.1
-torchvision==0.20.1
+torchvision==0.21.0
 numpy<2
 setuptools<=72.1.0
@@ -1,4 +1,4 @@
 fastdownload==0.0.7
 openvino==2025.0
-torch==2.5.1
-torchvision==0.20.1
+torch==2.6.0
+torchvision==0.21.0
@@ -293,7 +293,7 @@ def transform_fn(data_item):
             acc1_int8_best = acc1_int8
 
     # Load quantization modules and parameters from best checkpoint to the source model.
-    ckpt = torch.load(ROOT / BEST_CKPT_NAME)
+    ckpt = torch.load(ROOT / BEST_CKPT_NAME, weights_only=False)
     quantized_model = nncf.torch.load_from_config(
         deepcopy(model), ckpt["compression_config"], torch.ones((1, 3, IMAGE_SIZE, IMAGE_SIZE)).to(device)
     )

@@ -1,5 +1,5 @@
 fastdownload==0.0.7
 openvino==2025.0
-torch==2.5.1
-torchvision==0.20.1
+torch==2.6.0
+torchvision==0.21.0
 setuptools<=72.1.0
@@ -57,7 +57,7 @@ def load_model(
         # Check if provided path is a url and download the checkpoint if yes
         if is_url(weights_path):
             weights_path = download_checkpoint(weights_path)
-        sd = torch.load(weights_path, map_location="cpu", pickle_module=restricted_pickle_module)
+        sd = torch.load(weights_path, map_location="cpu", pickle_module=restricted_pickle_module, weights_only=False)
         if MODEL_STATE_ATTR in sd:
             sd = sd[MODEL_STATE_ATTR]
         load_state(loaded_model, sd, is_resume=False)
@@ -71,7 +71,9 @@ def load_model(
 def load_resuming_checkpoint(resuming_checkpoint_path: str):
     if osp.isfile(resuming_checkpoint_path):
         logger.info(f"=> loading checkpoint '{resuming_checkpoint_path}'")
-        checkpoint = torch.load(resuming_checkpoint_path, map_location="cpu", pickle_module=restricted_pickle_module)
+        checkpoint = torch.load(
+            resuming_checkpoint_path, map_location="cpu", pickle_module=restricted_pickle_module, weights_only=False
+        )
         return checkpoint
     msg = f"no checkpoint found at '{resuming_checkpoint_path}'"
     raise FileNotFoundError(msg)

@@ -143,6 +143,6 @@ def mobilenet_v2_cifar10(pretrained=False, progress=True, device="cpu", **kwargs
     model = MobileNetV2(**kwargs)
     if pretrained:
         script_dir = os.path.dirname(__file__)
-        state_dict = torch.load(script_dir + "/state_dicts/mobilenet_v2.pt", map_location=device)
+        state_dict = torch.load(script_dir + "/state_dicts/mobilenet_v2.pt", map_location=device, weights_only=False)
         model.load_state_dict(state_dict)
     return model
@@ -270,7 +270,7 @@ def _resnet(arch, block, layers, pretrained, progress, device, **kwargs):
     model = ResNet(block, layers, **kwargs)
     if pretrained:
         script_dir = os.path.dirname(__file__)
-        state_dict = torch.load(script_dir + "/state_dicts/" + arch + ".pt", map_location=device)
+        state_dict = torch.load(script_dir + "/state_dicts/" + arch + ".pt", map_location=device, weights_only=False)
         model.load_state_dict(state_dict)
     return model
 

@@ -136,7 +136,7 @@ def _vgg(arch, cfg, batch_norm, pretrained, progress, device, **kwargs):
     model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm), **kwargs)
     if pretrained:
         script_dir = os.path.dirname(__file__)
-        state_dict = torch.load(script_dir + "/state_dicts/" + arch + ".pt", map_location=device)
+        state_dict = torch.load(script_dir + "/state_dicts/" + arch + ".pt", map_location=device, weights_only=False)
         model.load_state_dict(state_dict)
     return model
 

@@ -361,7 +361,7 @@ def create_model(config: SampleConfig):
     ssd_net = build_ssd(config.model, config.ssd_params, image_size, config.num_classes, config)
     weights = config.get("weights")
     if weights:
-        sd = torch.load(weights, map_location="cpu", pickle_module=restricted_pickle_module)
+        sd = torch.load(weights, map_location="cpu", pickle_module=restricted_pickle_module, weights_only=False)
         sd = sd["state_dict"]
         load_state(ssd_net, sd)
 

@@ -109,7 +109,9 @@ def build_ssd_mobilenet(cfg, size, num_classes, config):
         # may be used to perform arbitrary code execution during unpickling. Only load the data you
         # trust.
         #
-        basenet_weights = torch.load(config.basenet, pickle_module=restricted_pickle_module)["state_dict"]
+        basenet_weights = torch.load(config.basenet, pickle_module=restricted_pickle_module, weights_only=False)[
+            "state_dict"
+        ]
         new_weights = {}
         for wn, wv in basenet_weights.items():
             wn = wn.replace("model.", "")

@@ -81,7 +81,12 @@ def load_weights(self, base_file):
             # trust.
             #
             self.load_state_dict(
-                torch.load(base_file, map_location=lambda storage, loc: storage, pickle_module=restricted_pickle_module)
+                torch.load(
+                    base_file,
+                    weights_only=False,
+                    map_location=lambda storage, loc: storage,
+                    pickle_module=restricted_pickle_module,
+                )
             )
             logger.debug("Finished!")
         else:
@@ -170,7 +175,7 @@ def build_ssd_vgg(cfg, size, num_classes, config):
 
     if config.basenet and (config.resuming_checkpoint_path is None) and (config.weights is None):
         logger.debug("Loading base network...")
-        basenet_weights = torch.load(config.basenet, pickle_module=restricted_pickle_module)
+        basenet_weights = torch.load(config.basenet, pickle_module=restricted_pickle_module, weights_only=False)
         new_weights = {}
         for wn, wv in basenet_weights.items():
             wn = wn.replace("features.", "")

@@ -556,7 +556,7 @@ def __init__(
         self._filter_importance_fn = filter_importance_fn
         self._external_importance = None
         if external_importance_path is not None:
-            self._external_importance = torch.load(external_importance_path)
+            self._external_importance = torch.load(external_importance_path, weights_only=False)
             nncf_logger.debug("Loaded custom external weight importance.")
         self._weights_normalizer_fn = weights_normalizer_fn
         self._add_dynamic_inputs = add_dynamic_inputs

@@ -60,9 +60,13 @@ def from_checkpoint(
         :return: SuperNetwork with wrapped functionality.
         """
         nncf_network = create_nncf_network(model, nncf_config)
-        compression_state = torch.load(supernet_elasticity_path, map_location=torch.device(nncf_config.device))
+        compression_state = torch.load(
+            supernet_elasticity_path, map_location=torch.device(nncf_config.device), weights_only=False
+        )
         model, elasticity_ctrl = resume_compression_from_state(nncf_network, compression_state)
-        model_weights = torch.load(supernet_weights_path, map_location=torch.device(nncf_config.device))
+        model_weights = torch.load(
+            supernet_weights_path, map_location=torch.device(nncf_config.device), weights_only=False
+        )
         load_state(model, model_weights, is_resume=True)
         elasticity_ctrl.multi_elasticity_handler.activate_maximum_subnet()
         return TrainedSuperNet(elasticity_ctrl, model)

@@ -255,7 +255,7 @@ def from_checkpoint(
             msg = f"no checkpoint found at '{resuming_checkpoint_path}'"
             raise FileNotFoundError(msg)
         nncf_logger.info(f"=> loading checkpoint '{resuming_checkpoint_path}'")
-        checkpoint = torch.load(resuming_checkpoint_path, map_location="cpu")
+        checkpoint = torch.load(resuming_checkpoint_path, map_location="cpu", weights_only=False)
 
         training_state = checkpoint[cls._state_names.TRAINING_ALGO_STATE]
         nncf_config = NNCFConfig()

@@ -64,7 +64,6 @@ def multi_head_attention_forward(
 ) -> Tuple[Tensor, Optional[Tensor]]:
 
     is_batched = _mha_shape_check(query, key, value, key_padding_mask, attn_mask, num_heads)
-
     if not is_batched:
         query = query.unsqueeze(1)
         key = key.unsqueeze(1)
@@ -91,7 +90,6 @@ def multi_head_attention_forward(
         )
 
     if is_causal and key_padding_mask is None and not need_weights:
-
         attn_mask = None
     else:
         attn_mask = _canonical_mask(
@@ -104,7 +102,6 @@ def multi_head_attention_forward(
         )
 
         if key_padding_mask is not None:
-
             is_causal = False
 
     assert (
@@ -133,7 +130,17 @@ def multi_head_attention_forward(
             b_q = b_k = b_v = None
         else:
             b_q, b_k, b_v = in_proj_bias.chunk(3)
-        q, k, v = _in_projection(query, key, value, q_proj_weight, k_proj_weight, v_proj_weight, b_q, b_k, b_v)
+        q, k, v = _in_projection(
+            query,
+            key,
+            value,
+            q_proj_weight,
+            k_proj_weight,
+            v_proj_weight,
+            b_q,
+            b_k,
+            b_v,
+        )
 
     if attn_mask is not None:
         if attn_mask.dim() == 2:
@@ -195,10 +202,9 @@ def multi_head_attention_forward(
     src_len = k.size(1)
 
     if key_padding_mask is not None:
-        assert key_padding_mask.shape == (
-            bsz,
-            src_len,
-        ), f"expecting key_padding_mask shape of {(bsz, src_len)}, but got {key_padding_mask.shape}"
+        if not torch.jit.is_scripting() and not torch.jit.is_tracing():  # type: ignore
+            _check_key_padding_mask(key_padding_mask, src_len, bsz)  # type: ignore
+
         key_padding_mask = (
             key_padding_mask.view(bsz, 1, 1, src_len).expand(-1, num_heads, -1, -1).reshape(bsz * num_heads, 1, src_len)
         )
@@ -211,7 +217,7 @@ def multi_head_attention_forward(
         dropout_p = 0.0
 
     if need_weights:
-        B, Nt, E = q.shape  # noqa: F841
+        _B, _Nt, E = q.shape  # noqa: F841
         q_scaled = q * math.sqrt(1.0 / float(E))
 
         assert not (is_causal and attn_mask is None), "FIXME: is_causal not implemented for need_weights"

@@ -122,7 +122,7 @@ def _load_checkpoint(self, model, checkpoint_path):
         if self._load_checkpoint_fn is not None:
             self._load_checkpoint_fn(model, checkpoint_path)
         else:
-            resuming_checkpoint = torch.load(checkpoint_path, map_location="cpu")
+            resuming_checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=False)
             resuming_model_state_dict = resuming_checkpoint.get("state_dict", resuming_checkpoint)
             load_state(model, resuming_model_state_dict, is_resume=True)
 

@@ -269,9 +269,9 @@ def load_weights(self, output):
         if output is None:
             return
 
-        self.actor.load_state_dict(torch.load(f"{output}/actor.pkl"))
+        self.actor.load_state_dict(torch.load(f"{output}/actor.pkl", weights_only=False))
 
-        self.critic.load_state_dict(torch.load(f"{output}/critic.pkl"))
+        self.critic.load_state_dict(torch.load(f"{output}/critic.pkl", weights_only=False))
 
     def save_model(self, output):
         torch.save(self.actor.state_dict(), f"{output}/actor.pkl")

@@ -81,7 +81,7 @@ at::Tensor q_forward(
 
     at::Tensor output;
 
-    DISPATCH_TENSOR_DATA_TYPES(input.type(), "q_cpu_forward", ([&] {
+    DISPATCH_TENSOR_DATA_TYPES(input.scalar_type(), "q_cpu_forward", ([&] {
       output = q_cpu_forward<scalar_t>(input, input_low, input_range, levels);
     }));
 
@@ -103,7 +103,7 @@ std::vector<at::Tensor> q_backward(
     CHECK_INPUT(input_range);
 
     std::vector<at::Tensor> results;
-    DISPATCH_TENSOR_DATA_TYPES(input.type(), "q_cpu_backward", ([&] {
+    DISPATCH_TENSOR_DATA_TYPES(input.scalar_type(), "q_cpu_backward", ([&] {
         results = q_cpu_backward<scalar_t>(grad_output, input, input_low, input_range, levels, level_low, level_high, is_asymmetric);
     }));
 

@@ -478,7 +478,7 @@ def _calc_traces(
         tolerance: float,
     ) -> TracesPerLayer:
         if self._traces_per_layer_path:
-            return TracesPerLayer(torch.load(self._traces_per_layer_path).to(self._init_device))
+            return TracesPerLayer(torch.load(self._traces_per_layer_path, weights_only=False).to(self._init_device))
 
         quantizers_switcher = QuantizersSwitcher(list(self._all_quantizers_per_scope.values()))
         params_to_restore = self.disable_all_gradients_except_weights_of_quantized_modules(

@@ -12,6 +12,6 @@
 __version__ = "2.16.0"
 
 
-BKC_TORCH_SPEC = "==2.5.*"
+BKC_TORCH_SPEC = "==2.6.*"
 BKC_TF_SPEC = "==2.15.*"
 STRICT_TF_SPEC = ">=2.9.3,<2.16.0"
diff --git a/tests/openvino/native/quantization/test_weights_compression_statistics_caching.py b/tests/openvino/native/quantization/test_weights_compression_statistics_caching.py
@@ -14,6 +14,7 @@
 from typing import Tuple
 
 import datasets
+import numpy as np
 import openvino as ov
 from optimum.intel.openvino import OVModelForCausalLM
 from transformers import AutoTokenizer
@@ -37,6 +38,11 @@ def transform_fn(data, model=model, tokenizer=tokenizer):
         input_ids = tokenized_text["input_ids"]
         inputs = {"input_ids": input_ids, "attention_mask": tokenized_text["attention_mask"]}
 
+        if "position_ids" in model.input_names:
+            position_ids = np.cumsum(inputs["attention_mask"], axis=1) - 1
+            position_ids[inputs["attention_mask"] == 0] = 1
+            inputs["position_ids"] = position_ids
+
         batch_size = input_ids.shape[0]
         if hasattr(model, "key_value_input_names"):
             for input_name in model.key_value_input_names:

diff --git a/tests/openvino/requirements.txt b/tests/openvino/requirements.txt
@@ -13,6 +13,6 @@ addict>=2.4.0
 timm==0.9.2
 efficientnet_pytorch==0.7.1
 datasets==3.0.1
-transformers==4.45.2
-optimum-intel==1.20.0
-optimum==1.23.1
+transformers==4.48.3
+optimum-intel==1.22.0
+optimum==1.24.0