Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions olive/olive_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,14 @@
"supported_quantization_encodings": [ ],
"run_on_target": true
},
"EstimateNPULatency": {
"module_path": "olive.passes.onnx.vitis_ai.estimate_npu_latency.EstimateNPULatency",
"supported_providers": [ "*" ],
"supported_accelerators": [ "*" ],
"supported_precisions": [ "*" ],
"supported_algorithms": [ ],
"supported_quantization_encodings": [ ]
Copy link
Contributor

@jambayk jambayk Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you add a "module_dependencies" option like under the autoawqquantizer pass for the package required to run this estimation?

},
"ExtractAdapters": {
"module_path": "olive.passes.onnx.extract_adapters.ExtractAdapters",
"supported_providers": [ "*" ],
Expand Down
74 changes: 74 additions & 0 deletions olive/passes/onnx/vitis_ai/estimate_npu_latency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#
# Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved.
# SPDX-License-Identifier: MIT
#

import logging

from olive.hardware.accelerator import AcceleratorSpec
from olive.model import ONNXModelHandler
from olive.passes import Pass
from olive.passes.pass_config import BasePassConfig, PassConfigParam

logger = logging.getLogger(__name__)


class EstimateNPULatency(Pass):
"""Returns latency estimates for the model."""

@classmethod
def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassConfigParam]:
return {
"target_device": PassConfigParam(
type_=str, required=False, description="Target device type", default_value="stx"
)
}

@classmethod
def validate_config(cls, config: type[BasePassConfig], accelerator_spec: AcceleratorSpec) -> bool:
if not super().validate_config(config, accelerator_spec):
return False

if config.target_device and config.target_device not in ["stx"]:
logger.warning("Unsupported target device type: %s", config.target_device)
return False

return True

def _run_for_config(
self, model: ONNXModelHandler, config: BasePassConfig, output_model_path: str
) -> ONNXModelHandler:
perf_installed = True
try:
from estimator.config import EstimatorSettings
from estimator.run import run_perf_estimate
except ImportError:
perf_installed = False
logger.warning("Estimator module not found. Skipping EstimateNPULatency pass.")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think instead of raising a warning, it might be better to fail with import a helpful import error which tells what package to install. since olive caches runs, to rerun the pass with the dependency installed, they would have to clean the cache or delete the cached run that skipped the estimation.


if not isinstance(model, ONNXModelHandler):
raise ValueError("Model must be an instance of ONNXModelHandler")

input_model_path = model.model_path

# Bypass if perf estimator package not installed
if perf_installed:
EstimatorSettings.model_path = f"{input_model_path}"

# Override default parameters if specified
if config.target_device:
EstimatorSettings.target_device = config.target_device
EstimatorSettings.initialized = True

logger.info(
"Running perf estimator for model path: %s and target device: %s",
input_model_path,
EstimatorSettings.target_device,
)

run_perf_estimate(EstimatorSettings)

logger.info("Finish running perf estimator pass")

# Return the original model as is
return model
37 changes: 37 additions & 0 deletions test/passes/onnx/test_estimate_npu_latency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#
# Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved.
# SPDX-License-Identifier: MIT
#
import os
from pathlib import Path

import onnx

from olive.passes.olive_pass import create_pass_from_dict
from olive.passes.onnx.vitis_ai.estimate_npu_latency import EstimateNPULatency
from test.utils import get_onnx_model


class TestEstimateNPULatency:
"""Test cases for EstimateNPULatency pass."""

def test_estimate_latency_basic(self, tmp_path):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please also add the required dependency to the requirements-test.txt under test

"""Test Perf Estimator call with automatic Olive version."""
# Setup
input_model = get_onnx_model()
config = {}
p = create_pass_from_dict(EstimateNPULatency, config, disable_search=True)
output_folder = str(tmp_path / "onnx")

# Execute
output_model = p.run(input_model, output_folder)

# Assert we created output csv for latency results
estimates_csv = f"{os.path.dirname(input_model.model_path)}/concise_summary"
assert Path(estimates_csv).exists()

# Assert
assert Path(output_model.model_path).exists()
# Load the output model and check graph name
onnx_model = onnx.load_model(output_model.model_path)
assert onnx_model.graph.name == "main_graph"
Loading