microsoft · alinah-amd · Sep 23, 2025 · Oct 1, 2025 · jambayk · Oct 3, 2025
diff --git a/olive/olive_config.json b/olive/olive_config.json
@@ -70,6 +70,14 @@
             "supported_quantization_encodings": [  ],
             "run_on_target": true
         },
+        "EstimateNPULatency": {
+            "module_path": "olive.passes.onnx.vitis_ai.estimate_npu_latency.EstimateNPULatency",
+            "supported_providers": [ "*" ],
+            "supported_accelerators": [ "*" ],
+            "supported_precisions": [ "*" ],
+            "supported_algorithms": [  ],
+            "supported_quantization_encodings": [  ]
+        },
         "ExtractAdapters": {
             "module_path": "olive.passes.onnx.extract_adapters.ExtractAdapters",
             "supported_providers": [ "*" ],

diff --git a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py
@@ -0,0 +1,74 @@
+#
+# Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+
+import logging
+
+from olive.hardware.accelerator import AcceleratorSpec
+from olive.model import ONNXModelHandler
+from olive.passes import Pass
+from olive.passes.pass_config import BasePassConfig, PassConfigParam
+
+logger = logging.getLogger(__name__)
+
+
+class EstimateNPULatency(Pass):
+    """Returns latency estimates for the model."""
+
+    @classmethod
+    def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassConfigParam]:
+        return {
+            "target_device": PassConfigParam(
+                type_=str, required=False, description="Target device type", default_value="stx"
+            )
+        }
+
+    @classmethod
+    def validate_config(cls, config: type[BasePassConfig], accelerator_spec: AcceleratorSpec) -> bool:
+        if not super().validate_config(config, accelerator_spec):
+            return False
+
+        if config.target_device and config.target_device not in ["stx"]:
+            logger.warning("Unsupported target device type: %s", config.target_device)
+            return False
+
+        return True
+
+    def _run_for_config(
+        self, model: ONNXModelHandler, config: BasePassConfig, output_model_path: str
+    ) -> ONNXModelHandler:
+        perf_installed = True
+        try:
+            from estimator.config import EstimatorSettings
+            from estimator.run import run_perf_estimate
+        except ImportError:
+            perf_installed = False
+            logger.warning("Estimator module not found. Skipping EstimateNPULatency pass.")
+
+        if not isinstance(model, ONNXModelHandler):
+            raise ValueError("Model must be an instance of ONNXModelHandler")
+
+        input_model_path = model.model_path
+
+        # Bypass if perf estimator package not installed
+        if perf_installed:
+            EstimatorSettings.model_path = f"{input_model_path}"
+
+            # Override default parameters if specified
+            if config.target_device:
+                EstimatorSettings.target_device = config.target_device
+            EstimatorSettings.initialized = True
+
+            logger.info(
+                "Running perf estimator for model path: %s and target device: %s",
+                input_model_path,
+                EstimatorSettings.target_device,
+            )
+
+            run_perf_estimate(EstimatorSettings)
+
+            logger.info("Finish running perf estimator pass")
+
+        # Return the original model as is
+        return model
diff --git a/test/passes/onnx/test_estimate_npu_latency.py b/test/passes/onnx/test_estimate_npu_latency.py
@@ -0,0 +1,37 @@
+#
+# Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+import os
+from pathlib import Path
+
+import onnx
+
+from olive.passes.olive_pass import create_pass_from_dict
+from olive.passes.onnx.vitis_ai.estimate_npu_latency import EstimateNPULatency
+from test.utils import get_onnx_model
+
+
+class TestEstimateNPULatency:
+    """Test cases for EstimateNPULatency pass."""
+
+    def test_estimate_latency_basic(self, tmp_path):
+        """Test Perf Estimator call with automatic Olive version."""
+        # Setup
+        input_model = get_onnx_model()
+        config = {}
+        p = create_pass_from_dict(EstimateNPULatency, config, disable_search=True)
+        output_folder = str(tmp_path / "onnx")
+
+        # Execute
+        output_model = p.run(input_model, output_folder)
+
+        # Assert we created output csv for latency results
+        estimates_csv = f"{os.path.dirname(input_model.model_path)}/concise_summary"
+        assert Path(estimates_csv).exists()
+
+        # Assert
+        assert Path(output_model.model_path).exists()
+        # Load the output model and check graph name
+        onnx_model = onnx.load_model(output_model.model_path)
+        assert onnx_model.graph.name == "main_graph"