Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 6 additions & 34 deletions onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "core/providers/openvino/ov_interface.h"
#include "core/providers/openvino/ov_versions/capability.h"
#include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
#include "core/providers/openvino/exceptions.h"
#include "core/providers/openvino/qdq_transformations/qdq_scales_fix.h"
#include "../../framework/tensorprotoutils.h"

Expand Down Expand Up @@ -158,40 +159,11 @@ BackendManager::BackendManager(SessionContext& session_context,
subgraph_context_.has_dynamic_input_shape = false;

// OV NPU plugin is supported with fallback to OV CPU upon compilation failures.
try {
concrete_backend_ = BackendFactory::MakeBackend(model_proto,
session_context_,
subgraph_context_,
shared_context_,
model_stream);
} catch (const OnnxRuntimeException& ex) {
std::string exception_str = ex.what();

if (session_context_.device_type.find("NPU") != std::string::npos &&
exception_str.find("intel_npu") != std::string::npos) {
// Handle NPU device related errors
#ifndef NDEBUG
std::string suffix = session_context_.so_disable_cpu_ep_fallback ? "\nModel failed to compile on NPU. Enable CPU fallback or try another device.\n" : "\nModel needs to be recompiled\n";
ORT_THROW(exception_str + suffix);
#else
std::string error_message = "UNKNOWN NPU ERROR";
std::string error_code = "code 0x0";
std::regex error_message_pattern(R"(\bZE_\w*\b)");
std::regex error_code_pattern("code 0x[0-9a-fA-F]+");
std::smatch matches;
if (std::regex_search(exception_str, matches, error_message_pattern)) {
error_message = matches[0];
}
if (std::regex_search(exception_str, matches, error_code_pattern)) {
error_code = matches[0];
}
std::string suffix = session_context_.so_disable_cpu_ep_fallback ? "\nModel failed to compile on NPU. Enable CPU fallback or try another device.\n" : "\nModel needs to be recompiled\n";
throw std::runtime_error(error_message + ", " + error_code + suffix);
#endif
} else {
ORT_THROW(exception_str);
}
}
concrete_backend_ = BackendFactory::MakeBackend(model_proto,
session_context_,
subgraph_context_,
shared_context_,
model_stream);
}
if (session_context_.so_context_enable &&
(subgraph_context_.is_ep_ctx_ovir_encapsulated || !subgraph_context_.is_ep_ctx_graph)) {
Expand Down
82 changes: 82 additions & 0 deletions onnxruntime/core/providers/openvino/exceptions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// Copyright (C) Intel Corporation
// Licensed under the MIT License

#pragma once

#include <exception>
#include <regex>
#include <string>

#include "core/common/status.h"

namespace onnxruntime {
namespace openvino_ep {

struct ovep_exception : public std::exception {
enum class type {
compile_model,
import_model,
query_prop,
read_model,
unknown,
};

ovep_exception(const std::string& message,
enum class type type) : message_{message},
type_{type},
error_code_{ze_result_code_from_string(message)},
error_name_{ze_result_name_from_string(message)} {}

const char* what() const noexcept override {
return message_.data();
}

uint32_t get_code() const { return error_code_; }

operator common::Status() const {
common::StatusCategory category_ort{common::ONNXRUNTIME};

if (type_ == type::unknown) {
return {category_ort, common::FAIL, message_};
}

// Newer drivers
if ((type_ == type::import_model) &&
(error_code_ == 0x7800000f /* ZE_RESULT_ERROR_INVALID_NATIVE_BINARY */)) {
std::string message{error_name_ + ", code 0x" + std::to_string(error_code_) + "\nModel needs to be recompiled\n"};
return {category_ort, common::INVALID_GRAPH, message};
}

std::string error_message = "Unhandled exception type: " + std::to_string(static_cast<int>(type_));
return {category_ort, common::FAIL, error_message};
}

protected:
std::string message_;
type type_{type::unknown};
uint32_t error_code_{0};
std::string error_name_;

private:
uint32_t ze_result_code_from_string(const std::string& ov_exception_string) {
uint32_t error_code{0};
std::regex error_code_pattern("code 0x([0-9a-fA-F]+)");
std::smatch matches;
if (std::regex_search(ov_exception_string, matches, error_code_pattern)) {
std::from_chars(&(*matches[1].first), &(*matches[1].second), error_code, 16);
}
return error_code;
}
std::string ze_result_name_from_string(const std::string& ov_exception_string) {
std::string error_message = "UNKNOWN NPU ERROR";
std::regex error_message_pattern(R"(\bZE_\w*\b)");
std::smatch matches;
if (std::regex_search(ov_exception_string, matches, error_message_pattern)) {
error_message = matches[0];
}
return error_message;
}
};

} // namespace openvino_ep
} // namespace onnxruntime
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "core/providers/openvino/onnx_ctx_model_helper.h"
#include "core/providers/openvino/ov_versions/capability.h"
#include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
#include "core/providers/openvino/exceptions.h"
#include "core/session/onnxruntime_session_options_config_keys.h"
#include "openvino/core/version.hpp"
#ifdef USE_OVEP_NPU_MEMORY
Expand Down Expand Up @@ -102,7 +103,8 @@ common::Status OpenVINOExecutionProvider::Compile(
auto& logger = *GetLogger();
Status status = Status::OK();

bool is_epctx_model = false;
try {
bool is_epctx_model = false;
if (!fused_nodes.empty()) {
// Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext
const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get();
Expand Down Expand Up @@ -227,6 +229,9 @@ common::Status OpenVINOExecutionProvider::Compile(
shared_context_->clear();
}
}
} catch (const ovep_exception& ex) {
status = ex;
}

return status;
}
Expand Down
43 changes: 29 additions & 14 deletions onnxruntime/core/providers/openvino/ov_interface.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,25 @@
#include "core/providers/openvino/backends/basic_backend.h"
#include "core/providers/openvino/ov_stateful_patch_utils.h"
#include "core/providers/openvino/onnx_ctx_model_helper.h"
#include "core/providers/openvino/exceptions.h"

namespace onnxruntime {
namespace openvino_ep {

template <typename Func, typename... Args>
inline auto OvExceptionBoundary(Func&& func, std::format_string<Args...>&& fmt, Args&&... args) {
template <bool typed, typename Func, typename... Args>
inline auto OvExceptionBoundary(Func&& func, Args&&... args) {
try {
return func();
} catch (const ov::Exception& e) {
ORT_THROW(log_tag + std::vformat(fmt.get(), std::make_format_args(args...)) + ": " + std::string(e.what()));
const auto message = log_tag + (args + ...) + ": " + std::string(e.what());
if constexpr (typed) {
ORT_THROW_EX(ovep_exception, message, ovep_exception::type::import_model);
} else {
ORT_THROW(message);
}
} catch (...) {
ORT_THROW(log_tag + std::vformat(fmt.get(), std::make_format_args(args...)));
const auto message = log_tag + (args + ...);
ORT_THROW(message);
}
}

Expand Down Expand Up @@ -70,7 +77,7 @@ std::optional<bool> queryOVProperty(const std::string& property, const std::stri
}

std::shared_ptr<OVNetwork> OVCore::ReadModel(std::string&& model, const std::string& model_path) {
return OvExceptionBoundary([&]() {
return OvExceptionBoundary<false>([&]() {
std::istringstream modelStringStream(std::move(model));
std::istream& modelStream = modelStringStream;
// Try to load with FrontEndManager
Expand Down Expand Up @@ -156,7 +163,7 @@ OVExeNetwork OVCore::CompileModel(std::shared_ptr<const OVNetwork>& ie_cnn_netwo
ov::AnyMap& device_config,
bool enable_causallm,
const std::string& name) {
return OvExceptionBoundary([&]() {
return OvExceptionBoundary<false>([&]() {
OVExeNetwork exe;
if (enable_causallm) {
auto mutable_model = ie_cnn_network->clone();
Expand All @@ -179,7 +186,7 @@ OVExeNetwork OVCore::CompileModel(const std::string& onnx_model,
std::string& hw_target,
ov::AnyMap& device_config,
const std::string& name) {
return OvExceptionBoundary([&]() {
return OvExceptionBoundary<false>([&]() {
ov::CompiledModel obj;

obj = core.compile_model(onnx_model, ov::Tensor(), hw_target, device_config);
Expand All @@ -196,7 +203,7 @@ OVExeNetwork OVCore::ImportModel(ModelBlobWrapper& model_blob,
std::string hw_target,
const ov::AnyMap& device_config,
std::string name) {
return OvExceptionBoundary([&]() {
return OvExceptionBoundary<true>([&]() {
ov::CompiledModel obj;
#if (OPENVINO_VERSION_MAJOR > 2025 || (OPENVINO_VERSION_MAJOR == 2025 && OPENVINO_VERSION_MINOR >= 3))
if (!model_blob.maybe_native_blob_path_.empty()) {
Expand All @@ -222,7 +229,7 @@ OVExeNetwork OVCore::ImportEPCtxOVIREncapsulation(std::istream& model_stream,
const ov::AnyMap& device_config,
bool enable_causallm,
std::filesystem::path model_file_path) {
return OvExceptionBoundary([&]() {
return OvExceptionBoundary<false>([&]() {
OVExeNetwork exe;

bool isXML = backend_utils::IsModelStreamXML(model_stream);
Expand Down Expand Up @@ -308,8 +315,16 @@ std::vector<std::string> OVCore::GetAvailableDevices(const std::string& device_t
return available_devices;
}

void OVCore::SetCache(const std::string& cache_dir_path) {
core.set_property(ov::cache_dir(cache_dir_path));
}

void OVCore::SetStreams(const std::string& device_type, int num_streams) {
core.set_property(device_type, {ov::num_streams(num_streams)});
}

std::shared_ptr<OVInferRequest> OVExeNetwork::CreateInferRequest() {
return OvExceptionBoundary([&]() {
return OvExceptionBoundary<false>([&]() {
auto infReq = compiled_model_obj.create_infer_request();
std::shared_ptr<OVInferRequest> ovInfReq;
if (is_stateful_causallm) {
Expand All @@ -324,7 +339,7 @@ std::shared_ptr<OVInferRequest> OVExeNetwork::CreateInferRequest() {
}

OVTensorPtr OVInferRequest::GetTensor(const std::string& input_name) {
return OvExceptionBoundary([&]() {
return OvExceptionBoundary<false>([&]() {
auto tobj = ovInfReq.get_tensor(input_name);
OVTensorPtr blob = std::make_shared<OVTensor>(tobj);
return blob;
Expand All @@ -333,15 +348,15 @@ OVTensorPtr OVInferRequest::GetTensor(const std::string& input_name) {
}

std::string OVInferRequest::GetInputTensorName(uint32_t index) {
return OvExceptionBoundary([&]() -> const std::string& {
return OvExceptionBoundary<false>([&]() -> const std::string& {
const auto& model = ovInfReq.get_compiled_model();
return *model.input(index).get_names().begin();
},
" Cannot access IE Blob for input number: {}", index);
}

void OVInferRequest::SetTensor(const std::string& name, OVTensorPtr& blob) {
OvExceptionBoundary([&]() {
OvExceptionBoundary<false>([&]() {
ovInfReq.set_tensor(name, *(blob.get()));
},
" Cannot set Remote Blob for output: {}", name);
Expand All @@ -352,7 +367,7 @@ uint32_t OVInferRequest::GetNumInputs() {
}

void OVInferRequest::Infer() {
OvExceptionBoundary([&]() {
OvExceptionBoundary<false>([&]() {
ovInfReq.infer();
},
"In Error Couldn't start Inference");
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/core/providers/openvino/ov_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ struct OVCore : WeakSingleton<OVCore> {

std::vector<std::string> GetAvailableDevices() const;
std::vector<std::string> GetAvailableDevices(const std::string& device_type) const;
void SetCache(const std::string& cache_dir_path);
void SetStreams(const std::string& device_type, int num_streams);
};

class OVExeNetwork {
Expand Down
Loading