Skip to content

Commit 98ef583

Browse files
committed
Fix the reading of epctx blob using stream
1 parent ca56234 commit 98ef583

10 files changed

+287
-215
lines changed

onnxruntime/core/providers/openvino/backend_manager.cc

+29-8
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,12 @@ BackendManager::BackendManager(SessionContext& session_context,
7676
ptr_stream_t model_stream;
7777
std::unique_ptr<onnx::ModelProto> model_proto;
7878
if (subgraph_context_.is_ep_ctx_graph) {
79-
model_stream = ep_ctx_handle_.GetModelBlobStream(subgraph);
79+
std::cout << " inside is_ep_ctx_graph " << std::endl;
80+
std::string model_name = onnxruntime::openvino_ep::BackendManager::stripAfterFirstDot(session_context_.onnx_model_path_name.filename().string());
81+
auto subgraph_name = model_name + "_" +subgraph_context_.subgraph_name;
82+
model_stream = ep_ctx_handle_.GetModelBlobStream(shared_context_,
83+
subgraph_name,
84+
subgraph);
8085
} else {
8186
model_proto = GetModelProtoFromFusedNode(fused_node, subgraph, logger);
8287
}
@@ -96,7 +101,9 @@ BackendManager::BackendManager(SessionContext& session_context,
96101
if (!sw.mapped_weights) {
97102
sw.mapped_weights = std::make_unique<SharedContext::SharedWeights::WeightsFile>(weight_filename);
98103
}
104+
std::cout << " Call createOVTensors in backend_manager.cc" << std::endl;
99105
backend_utils::CreateOVTensors(session_context_.device_type, sw.metadata, *sw.mapped_weights);
106+
std::cout << " create OVTensors successful " << std::endl;
100107
}
101108
}
102109

@@ -197,6 +204,14 @@ BackendManager::BackendManager(SessionContext& session_context,
197204
}
198205
}
199206

207+
std::string BackendManager::stripAfterFirstDot(std::string filename) {
208+
size_t dotPos = filename.find('.'); // Find first dot
209+
if (dotPos == std::string::npos) {
210+
return filename; // No dot found, return full filename
211+
}
212+
return filename.substr(0, dotPos); // Return everything before first dot
213+
}
214+
200215
// Call EPContext model exporter here if the provider option for exporting
201216
// precompiled blob is set. If that's the case:
202217
// By default, create model in embed mode where the blob stream is exported as data within
@@ -210,27 +225,33 @@ Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphVie
210225
ORT_THROW(exception_str);
211226
}
212227

228+
std::cout << " inside export compiled model " << std::endl;
229+
213230
// If embed_mode, then pass on the serialized blob
214231
// If not embed_mode, dump the blob here and only pass on the path to the blob
215232
std::string model_blob_str;
216233
auto compiled_model = concrete_backend_->GetOVCompiledModel();
217234
if (session_context_.so_share_ep_contexts){
218-
// std::ostringstream model_blob_stream;
219-
// compiled_model.export_model(model_blob_stream);
235+
std::ostringstream model_blob_stream;
236+
compiled_model.export_model(model_blob_stream);
237+
std::cout << " inside export compiled model - share ep contexts" << std::endl;
220238

221239
// std::ofstream file(metadata_filename, std::ios::app| std::ios::binary);
222240
// std::cout << " write to metadata bin - " << metadata_filename << std::endl;
223241
auto& subgraph_metadata = shared_context_.shared_weights.subgraph_metadata;
224-
225-
sw::SubgraphMetadata::Map::key_type key{subgraph_context_.subgraph_name};
242+
std::string model_name = onnxruntime::openvino_ep::BackendManager::stripAfterFirstDot(session_context_.onnx_model_path_name.filename().string());
243+
auto subgraph_name = model_name + "_" +subgraph_context_.subgraph_name;
244+
sw::SubgraphMetadata::Map::key_type key{subgraph_name};
226245
sw::SubgraphMetadata::Map::mapped_type value{};
227246

228247
auto& bin_file = shared_context_.shared_weights.shared_bin_file.bin_file_;
229-
if (bin_file.is_open()) {
248+
std::cout << " subgraph name "<< subgraph_name << "key = " << key.name << " For bin write " << std::endl;
249+
if (!subgraph_metadata.contains(key) && bin_file.is_open()) {
230250
// std::cout << "Current offset before "<< subgraph_context_.subgraph_name << " = " << bin_file.tellp() << std::endl;
231251
value.epctx_offset = bin_file.tellp();
232-
// bin_file << model_blob_stream.str();
233-
compiled_model.export_model(bin_file);
252+
std::cout << " bin file location for writing subgraph = " << bin_file.tellp() << std::endl;
253+
bin_file << model_blob_stream.str();
254+
// compiled_model.export_model(bin_file);
234255
// std::cout << "Current offset after "<< subgraph_context_.subgraph_name << " = " << bin_file.tellp() << std::endl;
235256
value.epctx_length = static_cast<size_t>(static_cast<std::streamoff>(bin_file.tellp()) - value.epctx_offset);
236257
// std::cout << "Key = " << key.name << " Offset = " << value.epctx_offset << " , length = " << value.epctx_length << std::endl;

onnxruntime/core/providers/openvino/backend_manager.h

+2
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ class BackendManager {
4747
ReWriteInputShapeInfo(const ONNX_NAMESPACE::ModelProto& model_proto,
4848
const std::vector<std::vector<int64_t>>& input_shapes);
4949

50+
std::string stripAfterFirstDot(std::string filename);
51+
5052
std::unique_ptr<ONNX_NAMESPACE::ModelProto> model_proto_;
5153
std::shared_ptr<IBackend> concrete_backend_;
5254
std::map<std::string, std::shared_ptr<IBackend>> backend_map_;

onnxruntime/core/providers/openvino/backend_utils.cc

+15-3
Original file line numberDiff line numberDiff line change
@@ -302,23 +302,35 @@ ov::element::Type GetOpenVINOElementType(ONNX_NAMESPACE::TensorProto_DataType dt
302302
void CreateOVTensors(const std::string& device_name,
303303
SharedContext::SharedWeights::Metadata::Map& metadata_map,
304304
SharedContext::SharedWeights::WeightsFile& weights) {
305+
305306
for (auto& [key, value] : metadata_map) {
306-
if (value.tensor) continue;
307+
// std::cout << " Key = " << key.name << std::endl;
308+
if (value.tensor) {
309+
// std::cout << " Value already present for key = " << key.name << std::endl;
310+
continue;
311+
}
307312

308313
// Get element data type
314+
// std::cout << " value element type = " << value.element_type << std::endl;
309315
auto onnx_element_type = (ONNX_NAMESPACE::TensorProto_DataType)value.element_type;
310316

311317
ov::element::Type ov_elementType = GetOpenVINOElementType(onnx_element_type); // Map to OpenVINO data type
312-
318+
// std::cout << "value dimensions = " << std::endl;
319+
// for (auto dim:value.dimensions){
320+
// std::cout << dim << std::endl;
321+
// }
313322
// Create OpenVINO Tensor
314323
if (device_name == "NPU") {
315324
// Use remote tensors
316325
auto npu_context = OVCore::Get().get_default_context("NPU").as<ov::intel_npu::level_zero::ZeroContext>();
317326
auto&& remote_tensor = npu_context.create_l0_host_tensor(ov_elementType, value.dimensions, ov::intel_npu::TensorType::INPUT);
318-
327+
// std::cout << " Remote tensor created " << std::endl;
319328
// Copy data to remote tensor
329+
// std::cout << " value size = " << value.size << std::endl;
320330
weights.load_weights(value.data_offset, remote_tensor.get(), value.size);
321331
value.tensor = std::make_shared<ov::Tensor>(remote_tensor);
332+
// std::cout << " value tensor created " << std::endl;
333+
322334
} else {
323335
// Use vanilla tensors
324336
value.tensor = std::make_shared<ov::Tensor>(ov_elementType, value.dimensions);

onnxruntime/core/providers/openvino/backends/basic_backend.cc

+6
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,12 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
8888
if (subgraph_context_.is_ep_ctx_graph) {
8989
// If the blob is held in an EPContext node, then skip FE+Compile
9090
// and directly move on to creating a backend with the executable blob
91+
std::cout << " before ovcore import model " << std::endl;
9192
exe_network_ = OVCore::ImportModel(*model_stream,
9293
hw_target,
9394
device_config,
9495
subgraph_context_.subgraph_name);
96+
std::cout << " import model is successful " << std::endl;
9597
model_stream.reset(); // Delete stream after it is no longer needed
9698
} else if (!session_context_.has_external_weights &&
9799
!subgraph_context_.has_dynamic_input_shape &&
@@ -120,6 +122,7 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
120122
ov_model, hw_target, device_config, subgraph_context_.subgraph_name);
121123
}
122124
#endif
125+
std::cout << " loaded model to the plugin " << std::endl;
123126
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
124127
} catch (const char* msg) {
125128
ORT_THROW(msg);
@@ -131,11 +134,14 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
131134
if (session_context_.so_share_ep_contexts) {
132135
initializer = [&metadata](OVInferRequestPtr ir_ptr) {
133136
const auto input_count = ir_ptr->GetNumInputs();
137+
std::cout << " ov ir input count = " << input_count << std::endl;
134138
for (auto i = 0u; i < input_count; i++) {
135139
using Key = SharedContext::SharedWeights::Metadata::Key;
136140
const auto tensor_key = Key{ir_ptr->GetInputTensorName(i)};
137141
if (metadata.contains(tensor_key)) {
138142
auto& value = metadata.at(tensor_key);
143+
// ORT_ENFORCE(value.tensor->get_byte_size() == value.size, "Unexpected tensor size mismatch");
144+
std::cout << " value tensor is set with shape = " << value.tensor->get_byte_size() << " input size from metadata = " << value.size << std::endl;
139145
ir_ptr->SetTensor(tensor_key.name, value.tensor);
140146
}
141147
}

onnxruntime/core/providers/openvino/contexts.h

+1-11
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,6 @@ struct SharedContext {
5151
using Map = std::unordered_map<Key, Value, Hash>;
5252
void writeMetadataToBinaryFile(SharedContext& shared_context, const Metadata::Map& metadata);
5353
void readMetadataFromBinaryFile(SharedContext& shared_context, Metadata::Map& metadata);
54-
// friend std::ostream& operator<<(std::ostream& right, const Metadata::Map& metadata);
55-
// friend std::istream& operator>>(std::istream& right, Metadata::Map& metadata);
5654
};
5755

5856
struct SubgraphMetadata {
@@ -74,8 +72,6 @@ struct SharedContext {
7472
const SubgraphMetadata::Map& subgraph_metadata);
7573
void readSubgraphDataFromBinaryFile(SharedContext& shared_context,
7674
SubgraphMetadata::Map& subgraph_metadata);
77-
// friend std::ostream& operator<<(std::ostream& right, const SubgraphMetadata::Map& subgraph_metadata);
78-
// friend std::istream& operator>>(std::istream& right, SubgraphMetadata::Map& subgraph_metadata);
7975
};
8076

8177
struct WeightsFile {
@@ -105,13 +101,11 @@ struct SharedContext {
105101
void openBinFile(const fs::path shared_bin_filename) {
106102
// Check if the file exists before trying to open
107103
if (!fs::exists(shared_bin_filename)) {
108-
std::cerr << "Error: The file does not exist at path: " << shared_bin_filename << std::endl;
109104
std::ofstream createFile(shared_bin_filename, std::ios::binary); // Create an empty binary file
110105
if (!createFile) {
111106
throw std::runtime_error("Failed to create the file!");
112107
}
113108
createFile.close();
114-
// throw std::runtime_error("Failed to open log file! File does not exist.");
115109
}
116110

117111
// Check if the file is accessible for reading and writing
@@ -125,20 +119,16 @@ struct SharedContext {
125119

126120

127121
if (!bin_file_.is_open()) { // Prevent reopening
128-
std::cout << " Bin file is not open " << std::endl;
129122
bin_file_.open(shared_bin_filename, std::ios::in | std::ios::out | std::ios::binary);
130-
std::cout << " bin file opened " << std::endl;
131123
bin_size_ = bin_file_.seekg(0, std::ios::end).tellg();
132-
133-
std::cout << " bin size = " << bin_size_ << std::endl;
134124
bin_file_.seekg(0, std::ios::beg); // Reset to the beginning of the file
135125

136-
137126
if (!bin_file_) {
138127
throw std::runtime_error("Failed to open log file!");
139128
}
140129
}
141130
}
131+
void readBinFile(SharedContext& shared_context_);
142132
}shared_bin_file;
143133

144134
fs::path external_weight_filename;

onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc

+49-2
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,9 @@ Status EPCtxHandler::AddOVEPCtxNodeToGraph(const GraphViewer& graph_viewer,
9999
return Status::OK();
100100
}
101101

102-
std::unique_ptr<std::istream> EPCtxHandler::GetModelBlobStream(const GraphViewer& graph_viewer) const {
102+
std::unique_ptr<std::istream> EPCtxHandler::GetModelBlobStream(SharedContext& shared_context_,
103+
const std::string &subgraph_name,
104+
const GraphViewer& graph_viewer) const {
103105
auto first_index = *graph_viewer.GetNodesInTopologicalOrder().begin();
104106
auto node = graph_viewer.GetNode(first_index);
105107
ORT_ENFORCE(node != nullptr);
@@ -117,7 +119,52 @@ std::unique_ptr<std::istream> EPCtxHandler::GetModelBlobStream(const GraphViewer
117119
} else {
118120
const auto& blob_filepath = graph_viewer.ModelPath().parent_path() / ep_cache_context;
119121
ORT_ENFORCE(std::filesystem::exists(blob_filepath), "Blob file not found: ", blob_filepath.string());
120-
result.reset((std::istream*)new std::ifstream(blob_filepath, std::ios_base::binary | std::ios_base::in));
122+
std::cout << " blob_filepath " << blob_filepath.filename().string() << std::endl;
123+
std::cout << " shared bin filename = " << shared_context_.shared_weights.shared_bin_file.shared_bin_filename.filename().string() << std::endl;
124+
if (blob_filepath == shared_context_.shared_weights.shared_bin_file.shared_bin_filename) {
125+
126+
LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from Shared bin file - " << blob_filepath;
127+
auto &sb = shared_context_.shared_weights.shared_bin_file;
128+
//check if size of bin file is greater than the header as it gets written at the begining
129+
ORT_ENFORCE(sb.bin_size_ > 8, " Bin file is empty. Regenerate the epctx model. Bin file path : ", blob_filepath.string());
130+
auto subgraph_metadata = shared_context_.shared_weights.subgraph_metadata;
131+
using Key = SharedContext::SharedWeights::SubgraphMetadata::Key;
132+
std::cout << " subgraph name = " << subgraph_name << std::endl;
133+
const auto subgraph_key = Key{subgraph_name};
134+
auto it = subgraph_metadata.find(subgraph_key);
135+
if (it != subgraph_metadata.end()) {
136+
auto& value = it->second;
137+
std::cout << " value.epctx_offset = " << value.epctx_offset << std::endl;
138+
std::cout << " value.epctx_length = " << value.epctx_length << std::endl;
139+
std::cout << " sb.bin_size_ = " << sb.bin_size_ << std::endl;
140+
141+
if(value.epctx_offset < sb.bin_size_ && value.epctx_length <= sb.bin_size_ &&
142+
(value.epctx_offset <= sb.bin_size_ - value.epctx_length)) {
143+
sb.bin_file_.seekg(value.epctx_offset); // Move to the specified offset
144+
std::string buffer(value.epctx_length, '\0'); // preallocate space
145+
sb.bin_file_.read(&buffer[0], value.epctx_length); // Read the specified length
146+
// Adjust string size in case of a short read
147+
buffer.resize(sb.bin_file_.gcount());
148+
// long end = value.epctx_offset + value.epctx_length ;
149+
// LimitedFileStreambuf limitedBuffer(sb.bin_file_, value.epctx_offset, end);
150+
// std::cout << " string stream read from " << value.epctx_offset << " to " << end << std::endl;
151+
152+
153+
// sb.bin_file_.seekg(value.epctx_offset, std::ios::beg);
154+
// // Read exactly 'length' bytes into a string
155+
// std::string data(value.epctx_length, '\0'); // Allocate a string with 'length' bytes
156+
// sb.bin_file_.read(&data[0], value.epctx_length); // Read directly into the string
157+
// result = std::make_unique<std::stringstream>(std::move(data));
158+
std::cout << " Read epctx into stream " << std::endl;
159+
result.reset((std::istream*)new std::istringstream(buffer));
160+
161+
}
162+
}
163+
ORT_ENFORCE(result!=nullptr, " Epctx blob is not read. Check bin file correctness from Bin path: ",
164+
blob_filepath.string());
165+
} else {
166+
result.reset((std::istream*)new std::ifstream(blob_filepath, std::ios_base::binary | std::ios_base::in));
167+
}
121168
}
122169
LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from EPContext Node";
123170
return result;

onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h

+26-1
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@
66
#include <sstream>
77
#include <string>
88
#include <memory>
9+
#include <streambuf>
910

1011
#include "core/providers/shared_library/provider_api.h"
12+
#include "core/providers/openvino/contexts.h"
1113

1214
namespace onnxruntime {
1315
namespace openvino_ep {
@@ -31,7 +33,9 @@ class EPCtxHandler {
3133
const std::string& graph_name,
3234
const bool embed_mode,
3335
std::string&& model_blob_str) const;
34-
std::unique_ptr<std::istream> GetModelBlobStream(const GraphViewer& graph_viewer) const;
36+
std::unique_ptr<std::istream> GetModelBlobStream(SharedContext& shared_context_,
37+
const std::string &subgraph_name,
38+
const GraphViewer& graph_viewer) const;
3539
InlinedVector<const Node*> GetEPCtxNodes() const;
3640

3741
private:
@@ -40,5 +44,26 @@ class EPCtxHandler {
4044
const logging::Logger& logger_;
4145
};
4246

47+
// class LimitedFileStreambuf : public std::streambuf {
48+
// private:
49+
// std::fstream& file; // Reference to the existing file stream
50+
// long start, end; // Start and end positions
51+
52+
// protected:
53+
// int_type underflow() override {
54+
// if (file.tellg() >= end || file.eof())
55+
// return traits_type::eof(); // Stop reading if we reach the limit
56+
57+
// return file.get(); // Read next character directly from the file
58+
// }
59+
60+
// public:
61+
// LimitedFileStreambuf(std::fstream& bin_file_, long start, long end)
62+
// : file(bin_file_), start(start), end(end) {
63+
// file.clear(); // Clear error flags in case of previous reads
64+
// file.seekg(start); // Move file pointer to the start position
65+
// }
66+
// };
67+
4368
} // namespace openvino_ep
4469
} // namespace onnxruntime

0 commit comments

Comments
 (0)