Remove unintended model copies during compilation (#584)

ericcraw · sfatimar · web-flow · commit bd32f5140eb2 · 2025-03-04T20:20:55.000+05:30
Co-authored-by: sfatimar &lt;sahar.fatima@intel.com&gt;
diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc
@@ -137,14 +137,14 @@ bool IsCILogEnabled() {
 }
 
 std::shared_ptr<const OVNetwork>
-CreateOVModel(const std::string model,
+CreateOVModel(std::string&& model,
               const SessionContext& session_context,
               std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map) {
   if (IsCILogEnabled()) {
     std::cout << "CreateNgraphFunc" << std::endl;
   }
   try {
-    auto ov_model = OVCore::Get()->ReadModel(model, session_context.onnx_model_path_name.string());
+    auto ov_model = OVCore::Get()->ReadModel(std::move(model), session_context.onnx_model_path_name.string());
 
     // Check for Constant Folding
     if ((session_context.device_type != "NPU") && !session_context.is_wholly_supported_graph) {
diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h
@@ -62,7 +62,7 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,
                     size_t batch_slice_idx);
 
 std::shared_ptr<const OVNetwork>
-CreateOVModel(const std::string model,
+CreateOVModel(std::string&& model,
               const SessionContext& session_context,
               std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
 
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -69,14 +69,11 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
                                                 subgraph_context_.subgraph_name);
       model_stream.reset();  // Delete stream after it is no longer needed
     } else {
-      std::shared_ptr<const OVNetwork> ov_model;
-      {
-        const std::string model = model_proto->SerializeAsString();
-        if (!subgraph_context.has_dynamic_input_shape) {
-          delete model_proto.release();
-        }
-        ov_model = CreateOVModel(model, session_context_, const_outputs_map_);
+      std::string model = model_proto->SerializeAsString();
+      if (!subgraph_context.has_dynamic_input_shape) {
+        model_proto.reset()
       }
+      auto ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_);
       LOGS_DEFAULT(INFO) << log_tag << "IO Buffering Enabled";
       exe_network_ = OVCore::Get()->CompileModel(
           ov_model, remote_context_, subgraph_context_.subgraph_name);
@@ -108,14 +105,11 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
                                                  subgraph_context_.subgraph_name);
     } else {  // For all other types use ov::ov_core read_model() to generate OV IR
               // followed by ov::ov_core compile_model()
-      std::shared_ptr<const OVNetwork> ov_model;
-      {
-        const std::string model = model_proto->SerializeAsString();
-        if (!subgraph_context.has_dynamic_input_shape) {
-          delete model_proto.release();
-        }
-        ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_);
+      std::string model = model_proto->SerializeAsString();
+      if (!subgraph_context.has_dynamic_input_shape) {
+        model_proto.reset();
       }
+      auto ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_);
       exe_network_ = OVCore::Get()->CompileModel(
           ov_model, hw_target, device_config, subgraph_context_.subgraph_name);
     }
diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc
@@ -46,9 +46,9 @@ void printDebugInfo(const ov::CompiledModel& obj) {
 }
 #endif
 
-std::shared_ptr<OVNetwork> OVCore::ReadModel(const std::string& model, const std::string& model_path) {
+std::shared_ptr<OVNetwork> OVCore::ReadModel(std::string&& model, const std::string& model_path) {
   try {
-    std::istringstream modelStringStream(model);
+    std::istringstream modelStringStream(std::move(model));
     std::istream& modelStream = modelStringStream;
     // Try to load with FrontEndManager
     ov::frontend::FrontEndManager manager;
diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h
@@ -67,7 +67,7 @@ struct OVCore : WeakSingleton<OVCore> {
   ov::Core core;
 
   // OV Interface For Reading Model
-  std::shared_ptr<OVNetwork> ReadModel(const std::string& model_stream, const std::string& model_path);
+  std::shared_ptr<OVNetwork> ReadModel(std::string&& model_stream, const std::string& model_path);
 
   // OV Interface for Compiling OV Model Type
   OVExeNetwork CompileModel(std::shared_ptr<const OVNetwork>& ie_cnn_network,