Skip to content

Commit 36e7539

Browse files
authored
Merge branch 'master' into cvs_160853
2 parents 4fcee65 + 3e7eeb6 commit 36e7539

File tree

28 files changed

+2535
-1058
lines changed

28 files changed

+2535
-1058
lines changed

.github/dependabot.yml

-3
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,11 @@ updates:
1515
timezone: "Poland"
1616
open-pull-requests-limit: 3
1717
assignees:
18-
- "jiwaszki"
1918
- "p-wysocki"
2019
- "akuporos"
2120
- "rkazants"
2221
- "ceciliapeng2011"
2322
- "meiyang-intel"
24-
- "mbencer"
25-
- "tomdol"
2623
- "jane-intel"
2724
versioning-strategy: increase-if-necessary
2825

docs/articles_en/documentation/compatibility-and-support/supported-models.rst

+3-5
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ models from OpenVINO-supported frameworks may also work properly but have not be
66

77
**AI Models that run on Intel® Core Ultra™ Processors with OpenVINO™ toolkit:**
88

9-
.. data-table::
9+
.. data-table::
1010
:class: modeldata stripe
1111
:name: supportedModelsTable
1212
:header-rows: 1
@@ -16,13 +16,11 @@ models from OpenVINO-supported frameworks may also work properly but have not be
1616
:data-page-length: 10
1717

1818

19-
| Marked cells indicate models that passed inference with no errors. Empty cells indicate
20-
models that were not tested. No failing runs producing an error have been recorded.
19+
| Marked cells indicate models that passed inference with no errors.
2120
|
2221
| In the precision column, the "optimum-intel default" label corresponds to FP32 for small models
2322
and INT8 for models greater than 1B parameters.
24-
|
25-
| The results as of June 17 2024, for OpenVINO version 2024.2.
23+
| The results as of February 25 2025, for OpenVINO version 2025.0.
2624
| The models come from different public model repositories, such as Pytorch Model Zoo and
2725
HuggingFace; they were executed on the designated hardware with OpenVINO either natively or
2826
as a backend.

docs/sphinx_setup/_static/download/supported_models.csv

+1,792-910
Large diffs are not rendered by default.

src/bindings/python/src/openvino/_ov_api.py

+27
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,33 @@ def __dir__(cls) -> list:
3131

3232
class Model(object, metaclass=ModelMeta):
3333
def __init__(self, *args: Any, **kwargs: Any) -> None:
34+
if not args and not kwargs:
35+
36+
constructors = [
37+
"1. openvino.Model(other: openvino.Model)"
38+
"2. openvino.Model(results: list[openvino.op.Result], sinks: list[openvino.Node], parameters: list[openvino.op.Parameter], name: str = '')",
39+
"3. openvino.Model(results: list[openvino.Node], parameters: list[openvino.op.Parameter], name: str = '')",
40+
"4. openvino.Model(result: openvino.Node, parameters: list[openvino.op.Parameter], name: str = '')",
41+
"5. openvino.Model(results: list[openvino.Output], parameters: list[openvino.op.Parameter], name: str = '')",
42+
"6. openvino.Model(results: list[openvino.Output], sinks: list[openvino.Node], parameters: list[openvino.op.Parameter], name: str = '')",
43+
"7. openvino.Model(results: list[openvino.Output], sinks: list[openvino.Output], parameters: list[openvino.op.Parameter], name: str = '')",
44+
"8. openvino.Model(results: list[openvino.Output], sinks: list[openvino.Output], parameters: list[openvino.op.Parameter], \
45+
variables: list[openvino.op.util.Variable], name: str = '')",
46+
"9. openvino.Model(results: list[openvino.op.Result], sinks: list[openvino.Output], parameters: list[openvino.op.Parameter], name: str = '')",
47+
"10. openvino.Model(results: list[openvino.op.Result], sinks: list[openvino.Output], parameters: list[openvino.op.Parameter], \
48+
variables: list[openvino.op.util.Variable], name: str = '')",
49+
"11. openvino.Model(results: list[openvino.op.Result], sinks: list[openvino.Node], parameters: list[openvino.op.Parameter], \
50+
variables: list[openvino.op.util.Variable], name: str = '')",
51+
"12. openvino.Model(results: list[openvino.Output], sinks: list[openvino.Node], parameters: list[openvino.op.Parameter], \
52+
variables: list[openvino.op.util.Variable], name: str = '')",
53+
"13. openvino.Model(results: list[openvino.op.Result], parameters: list[openvino.op.Parameter], \
54+
variables: list[openvino.op.util.Variable], name: str = '')",
55+
"14. openvino.Model(results: list[openvino.Output], parameters: list[openvino.op.Parameter], \
56+
variables: list[openvino.op.util.Variable], name: str = '')",
57+
]
58+
59+
constructor_info = "\n".join(f" - {ctor}" for ctor in constructors)
60+
raise ValueError(f"Model cannot be instantiated without arguments.\n\nAvailable constructors:\n{constructor_info}")
3461
if args and not kwargs:
3562
if isinstance(args[0], ModelBase):
3663
self.__model = ModelBase(args[0])

src/bindings/python/src/openvino/frontend/pytorch/patch_model.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,12 @@ def __make_16bit_traceable(model: torch.nn.Module,
8484
- Replace known list of modules with ModuleExtension.
8585
- Convert other modules with weights to FP32.
8686
"""
87+
supported = {torch.float16, torch.bfloat16, torch.float8_e4m3fn, torch.float8_e5m2}
8788
if patch_condition is None:
8889
def patch_condition(module):
89-
supported = {torch.float32, torch.float16, torch.bfloat16}
90+
dtype_to_patch = {torch.float32, *supported}
9091
weight = getattr(module, "weight", None)
91-
return weight is not None and weight.dtype in supported
92+
return weight is not None and weight.dtype in dtype_to_patch
9293

9394
def fp32_tensor(*shape):
9495
return torch.full(shape, 0.5, dtype=torch.float32)
@@ -123,10 +124,9 @@ def fp32_tensor(*shape):
123124
except ImportError:
124125
pass
125126
patch_model(model, extensions, orig_forward_name)
126-
dtype_to_patch = {torch.float16, torch.bfloat16}
127127
for _, module in model.named_modules():
128128
if (module.__class__ not in extensions and
129-
(any(p.dtype in dtype_to_patch for p in module.parameters(False))
130-
or any(b.dtype in dtype_to_patch for b in module.buffers(False)))):
129+
(any(p.dtype in supported for p in module.parameters(False))
130+
or any(b.dtype in supported for b in module.buffers(False)))):
131131
log.debug("Casting module %s to float32", module)
132132
module.float()

src/bindings/python/tests/test_runtime/test_model.py

+5
Original file line numberDiff line numberDiff line change
@@ -857,3 +857,8 @@ def test_model_dir():
857857

858858
assert type(dir(model)) == list
859859
assert len(dir(model)) >= num_of_attrs
860+
861+
862+
def test_model_without_arguments():
863+
with pytest.raises(ValueError, match="Model cannot be instantiated without arguments."):
864+
Model()

src/common/transformations/include/transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,10 @@ class ov::pass::KeepConstantsPrecisionAndAddConverts : public MatcherPass {
6262

6363
/**
6464
* @ingroup ov_transformation_common_api
65-
* @brief Prevents ConstantFolding for f16/bf16 Const + Convert_To_FP32 to keep original FW float Constants.
65+
* @brief Prevents ConstantFolding for low precision Const + Convert_To_FP32 to keep original FW float Constants.
6666
* Original precision should be kept as long as possible, this prevents redundant conversions and saves memory.
6767
* E.g. if original FW model was already compressed no need to upcast during CF, store intermediate f32 consts and
68-
* then again compress them to f16 during save_model.
68+
* then again compress them to low precision during save_model.
6969
*/
7070
class ov::pass::MarkCompressedFloatConstants : public MatcherPass {
7171
public:

src/common/transformations/src/transformations/fp16_compression/mark_decompression_convert_constant_folding.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,10 @@ pass::MarkCompressedFloatConstants::MarkCompressedFloatConstants() {
135135
if (convert_node->get_destination_type() != element::f32)
136136
return false;
137137
if (const_node->get_output_element_type(0) != element::f16 &&
138-
const_node->get_output_element_type(0) != element::bf16)
138+
const_node->get_output_element_type(0) != element::bf16 &&
139+
const_node->get_output_element_type(0) != element::f8e4m3 &&
140+
const_node->get_output_element_type(0) != element::f8e5m2 &&
141+
const_node->get_output_element_type(0) != element::f8e8m0)
139142
return false;
140143

141144
mark_as_decompression(convert_node);

src/core/dev_api/openvino/runtime/shared_buffer.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ template <typename T>
1313
class SharedBuffer : public ov::AlignedBuffer {
1414
public:
1515
SharedBuffer(char* data, size_t size, const T& shared_object) : _shared_object(shared_object) {
16-
m_allocated_buffer = data;
16+
m_allocated_buffer = nullptr;
1717
m_aligned_buffer = data;
1818
m_byte_size = size;
1919
}

src/core/include/openvino/core/type.hpp

-4
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,7 @@ class ConversionExtensionBase;
8383

8484
template <typename T>
8585
constexpr bool use_ov_dynamic_cast() {
86-
#if defined(__ANDROID__) || defined(ANDROID)
87-
return true;
88-
#else
8986
return std::is_base_of_v<ov::frontend::ConversionExtensionBase, T>;
90-
#endif
9187
}
9288

9389
/// \brief Tests if value is a pointer/shared_ptr that can be statically cast to a

src/core/include/openvino/runtime/tensor.hpp

+16
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@
99
*/
1010
#pragma once
1111

12+
#include <filesystem>
1213
#include <type_traits>
1314

1415
#include "openvino/core/coordinate.hpp"
16+
#include "openvino/core/partial_shape.hpp"
1517
#include "openvino/core/rtti.hpp"
1618
#include "openvino/core/shape.hpp"
1719
#include "openvino/core/type/element_type.hpp"
@@ -259,4 +261,18 @@ class OPENVINO_API Tensor {
259261
*/
260262
using TensorVector = std::vector<Tensor>;
261263

264+
/// \brief Read a tensor content from a file. Only raw data is loaded.
265+
/// \param file_name Path to file to read.
266+
/// \param element_type Element type, when not specified the it is assumed as element::u8.
267+
/// \param shape Shape for resulting tensor. If provided shape is static, specified number of elements is read only.
268+
/// File should contain enough bytes, an exception is raised otherwise.
269+
/// One of the dimensions can be dynamic. In this case it will be determined automatically based on the
270+
/// length of the file content and `offset`. Default value is [?].
271+
/// \param offset_in_bytes Read file starting from specified offset. Default is 0. The remining size of the file should
272+
/// be compatible with shape.
273+
OPENVINO_API
274+
Tensor read_tensor_data(const std::filesystem::path& file_name,
275+
const element::Type& element_type = element::u8,
276+
const PartialShape& shape = PartialShape::dynamic(1),
277+
std::size_t offset_in_bytes = 0);
262278
} // namespace ov

src/core/src/preprocess/pre_post_process.cpp

+9-6
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "transformations/common_optimizations/convolution_to_group_convolution_fusion.hpp"
1717
#include "transformations/common_optimizations/disable_random_uniform_constant_folding.hpp"
1818
#include "transformations/common_optimizations/disable_shapeof_constant_folding.hpp"
19+
#include "transformations/common_optimizations/gelu_fusion.hpp"
1920
#include "transformations/common_optimizations/mul_conv_fusion.hpp"
2021
#include "transformations/common_optimizations/ric_fusion.hpp"
2122
#include "transformations/common_optimizations/shared_ops_optimization.hpp"
@@ -89,12 +90,14 @@ void transformation_pipeline(std::shared_ptr<ov::Model>& model) {
8990

9091
// 2. Fusion transformations:
9192
REGISTER_PASS(manager, ConvertDivideWithConstant)
92-
auto multiply_fusions = manager.register_pass<GraphRewrite>();
93-
ADD_MATCHER(multiply_fusions, MultiplyConvolutionFusion)
94-
ADD_MATCHER(multiply_fusions, MultiplyGroupConvolutionFusion)
95-
ADD_MATCHER(multiply_fusions, MultiplyConvolutionBackpropDataFusion)
96-
ADD_MATCHER(multiply_fusions, MultiplyGroupConvolutionBackpropDataFusion)
97-
multiply_fusions->set_name("ov::pass::MultiplyFusions");
93+
auto fusions = manager.register_pass<GraphRewrite>();
94+
// Gelu fusion have to be executed before MulConv fusion because Mul(X, 0.5) might be fused to Conv weights
95+
ADD_MATCHER(fusions, GeluFusion)
96+
ADD_MATCHER(fusions, MultiplyConvolutionFusion)
97+
ADD_MATCHER(fusions, MultiplyGroupConvolutionFusion)
98+
ADD_MATCHER(fusions, MultiplyConvolutionBackpropDataFusion)
99+
ADD_MATCHER(fusions, MultiplyGroupConvolutionBackpropDataFusion)
100+
fusions->set_name("ov::pass::MultiplyFusions");
98101
REGISTER_PASS(manager, ReverseInputChannelsFusion)
99102

100103
// 3. CF call due to detected perf degradations

src/core/src/runtime/tensor.cpp

+76
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,13 @@
1111
#include "openvino/core/shape.hpp"
1212
#include "openvino/core/shape_util.hpp"
1313
#include "openvino/core/strides.hpp"
14+
#include "openvino/core/tensor_util.hpp"
15+
#include "openvino/core/type/element_iterator.hpp"
1416
#include "openvino/runtime/itensor.hpp"
1517
#include "openvino/runtime/make_tensor.hpp"
1618
#include "openvino/runtime/remote_tensor.hpp"
19+
#include "openvino/runtime/shared_buffer.hpp"
20+
#include "openvino/util/mmap_object.hpp"
1721

1822
namespace ov {
1923

@@ -108,4 +112,76 @@ bool Tensor::is_continuous() const {
108112
OV_TENSOR_STATEMENT(return _impl->is_continuous());
109113
}
110114

115+
namespace {
116+
ov::Shape calc_static_shape_for_file(const std::filesystem::path& file_name,
117+
const ov::element::Type& element_type,
118+
const ov::PartialShape& partial_shape,
119+
size_t offset) {
120+
auto file_size = std::filesystem::file_size(file_name);
121+
if (partial_shape.is_static()) {
122+
auto static_shape = partial_shape.get_shape();
123+
OPENVINO_ASSERT((ov::shape_size(static_shape)) * element_type.bitwidth() + offset * 8 == file_size * 8,
124+
"Cannot fit file size into requested static PartialShape");
125+
return static_shape;
126+
}
127+
auto partial_shape_copy = partial_shape;
128+
auto rank = partial_shape_copy.rank();
129+
OPENVINO_ASSERT(rank.is_static(), "Rank cannot be dynamic");
130+
std::vector<size_t> dynamic_dimension_numbers;
131+
size_t slice_size = 1;
132+
for (size_t id = 0; id < partial_shape_copy.size(); ++id) {
133+
if (partial_shape_copy[id].is_dynamic()) {
134+
dynamic_dimension_numbers.push_back(id);
135+
} else {
136+
slice_size *= partial_shape_copy[id].get_min_length();
137+
}
138+
}
139+
OPENVINO_ASSERT(dynamic_dimension_numbers.size() == 1,
140+
"Only one dynamic dimension in input shape is supported but got: ",
141+
dynamic_dimension_numbers.size());
142+
auto& dynamic_dimension = partial_shape_copy[dynamic_dimension_numbers[0]];
143+
144+
OPENVINO_ASSERT(file_size > offset, "Offset is bigger than size of file to read.");
145+
auto file_size_to_read = file_size - offset;
146+
147+
OPENVINO_ASSERT((file_size_to_read * 8) % element_type.bitwidth() == 0,
148+
"cannot fit ",
149+
element_type.get_type_name(),
150+
" into ",
151+
file_size_to_read,
152+
" bytes");
153+
auto elements_to_read = file_size_to_read * 8 / element_type.bitwidth();
154+
155+
auto new_dimension_size = elements_to_read / slice_size;
156+
OPENVINO_ASSERT(new_dimension_size * slice_size == elements_to_read,
157+
"Cannot fit file size into requested PartialShape");
158+
159+
OPENVINO_ASSERT(dynamic_dimension.compatible(new_dimension_size),
160+
"Cannot fit file size into requested PartialShape");
161+
162+
dynamic_dimension = Dimension(new_dimension_size);
163+
return partial_shape_copy.get_shape();
164+
}
165+
} // namespace
166+
167+
Tensor read_tensor_data(const std::filesystem::path& file_name,
168+
const ov::element::Type& element_type,
169+
const ov::PartialShape& partial_shape,
170+
size_t offset_in_bytes) {
171+
OPENVINO_ASSERT(element_type != ov::element::string);
172+
auto static_shape = calc_static_shape_for_file(file_name, element_type, partial_shape, offset_in_bytes);
173+
174+
auto mapped_memory = ov::load_mmap_object(file_name);
175+
auto shared_buffer =
176+
std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::MappedMemory>>>(mapped_memory->data() + offset_in_bytes,
177+
mapped_memory->size() - offset_in_bytes,
178+
mapped_memory);
179+
180+
auto view_tensor = Tensor(element_type, static_shape, shared_buffer->get_ptr());
181+
auto impl = get_tensor_impl(view_tensor);
182+
impl._so = shared_buffer;
183+
view_tensor = make_tensor(impl);
184+
185+
return view_tensor;
186+
}
111187
} // namespace ov

src/core/tests/preprocess.cpp

+51
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22
// SPDX-License-Identifier: Apache-2.0
33
//
44

5+
#define _USE_MATH_DEFINES
6+
7+
#include <math.h>
8+
59
#include "common_test_utils/ov_test_utils.hpp"
610
#include "common_test_utils/test_assertions.hpp"
711
#include "common_test_utils/test_tools.hpp"
@@ -2504,3 +2508,50 @@ TEST_F(TransformationTestsF, preprocessing_conv_decompression) {
25042508
model_ref = std::make_shared<ov::Model>(ResultVector{res}, ParameterVector{input});
25052509
}
25062510
}
2511+
2512+
TEST_F(TransformationTestsF, preprocessing_gelu_fusion) {
2513+
auto in_shape = Shape{1, 3, 32, 32};
2514+
auto in_type = element::f32;
2515+
auto weight_type = element::f32;
2516+
{
2517+
auto data = std::make_shared<ov::op::v0::Parameter>(in_type, in_shape);
2518+
2519+
auto mul_const_sqrt_1_2 = ov::op::v0::Constant::create(in_type, Shape{1}, {M_SQRT1_2});
2520+
auto mul_to_erf = std::make_shared<ov::op::v1::Multiply>(data, mul_const_sqrt_1_2);
2521+
auto erf = std::make_shared<ov::op::v0::Erf>(mul_to_erf);
2522+
2523+
auto add_const = ov::op::v0::Constant::create(in_type, Shape{1}, {1.0});
2524+
auto add = std::make_shared<ov::op::v1::Add>(erf, add_const);
2525+
auto mul_first = std::make_shared<ov::op::v1::Multiply>(data, add);
2526+
2527+
auto mul_const = ov::op::v0::Constant::create(in_type, Shape{1}, {0.5});
2528+
auto mul = std::make_shared<ov::op::v1::Multiply>(mul_first, mul_const);
2529+
2530+
std::shared_ptr<Node> weights = std::make_shared<op::v0::Constant>(weight_type, ov::Shape{1, 3, 3, 3}, 1);
2531+
auto conv = std::make_shared<op::v1::Convolution>(mul,
2532+
weights,
2533+
Strides{},
2534+
CoordinateDiff{},
2535+
CoordinateDiff{},
2536+
Strides{});
2537+
auto res = std::make_shared<op::v0::Result>(conv);
2538+
auto f = std::make_shared<ov::Model>(ov::ResultVector{res}, ov::ParameterVector{data});
2539+
auto p = PrePostProcessor(f);
2540+
model = p.build();
2541+
}
2542+
2543+
{
2544+
auto input = std::make_shared<op::v0::Parameter>(in_type, in_shape);
2545+
2546+
auto gelu = std::make_shared<op::v7::Gelu>(input);
2547+
auto weights = op::v0::Constant::create(weight_type, ov::Shape({1, 3, 3, 3}), {1.f});
2548+
auto conv = std::make_shared<op::v1::Convolution>(gelu,
2549+
weights,
2550+
Strides{},
2551+
CoordinateDiff{},
2552+
CoordinateDiff{},
2553+
Strides{});
2554+
auto res = std::make_shared<op::v0::Result>(conv);
2555+
model_ref = std::make_shared<ov::Model>(ResultVector{res}, ParameterVector{input});
2556+
}
2557+
}

0 commit comments

Comments
 (0)