pytorch · FranciscoThiesen · Oct 25, 2025 · Oct 25, 2025 · Oct 25, 2025 · Oct 26, 2025
diff --git a/helion/autotuner/__init__.py b/helion/autotuner/__init__.py
@@ -11,17 +11,22 @@
 )
 from .effort_profile import AutotuneEffortProfile as AutotuneEffortProfile
 from .effort_profile import DifferentialEvolutionConfig as DifferentialEvolutionConfig
+from .effort_profile import MultiFidelityBOConfig as MultiFidelityBOConfig
 from .effort_profile import PatternSearchConfig as PatternSearchConfig
 from .effort_profile import RandomSearchConfig as RandomSearchConfig
 from .finite_search import FiniteSearch as FiniteSearch
 from .local_cache import LocalAutotuneCache as LocalAutotuneCache
 from .local_cache import StrictLocalAutotuneCache as StrictLocalAutotuneCache
+from .multifidelity_bo_search import (
+    MultiFidelityBayesianSearch as MultiFidelityBayesianSearch,
+)
 from .pattern_search import PatternSearch as PatternSearch
 from .random_search import RandomSearch as RandomSearch
 
 search_algorithms = {
     "DifferentialEvolutionSearch": DifferentialEvolutionSearch,
     "FiniteSearch": FiniteSearch,
+    "MultiFidelityBayesianSearch": MultiFidelityBayesianSearch,
     "PatternSearch": PatternSearch,
     "RandomSearch": RandomSearch,
 }
diff --git a/helion/autotuner/acquisition.py b/helion/autotuner/acquisition.py
@@ -0,0 +1,115 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import numpy as np
+from scipy.stats import norm
+
+if TYPE_CHECKING:
+    from numpy.typing import NDArray
+
+
+def expected_improvement(
+    mu: NDArray[np.float64],
+    sigma: NDArray[np.float64],
+    best_so_far: float,
+    xi: float = 0.01,
+) -> NDArray[np.float64]:
+    """
+    Expected Improvement acquisition function.
+
+    Balances exploration (high uncertainty) and exploitation (low predicted value).
+
+    Args:
+        mu: GP mean predictions (N,).
+        sigma: GP uncertainty (standard deviation) (N,).
+        best_so_far: Current best (minimum) performance observed.
+        xi: Exploration parameter (higher = more exploration).
+
+    Returns:
+        Expected improvement scores (higher = more valuable to evaluate).
+    """
+    # Avoid division by zero
+    sigma = np.maximum(sigma, 1e-9)
+
+    # We're minimizing, so improvement is best_so_far - mu
+    improvement = best_so_far - mu - xi
+    Z = improvement / sigma
+
+    # Expected improvement formula
+    ei = improvement * norm.cdf(Z) + sigma * norm.pdf(Z)
+
+    # If sigma is very small, just use the improvement
+    return np.where(sigma > 1e-9, ei, np.maximum(improvement, 0.0))
+
+
+def upper_confidence_bound(
+    mu: NDArray[np.float64],
+    sigma: NDArray[np.float64],
+    beta: float = 2.0,
+) -> NDArray[np.float64]:
+    """
+    Upper Confidence Bound acquisition function.
+
+    For minimization, we use Lower Confidence Bound (LCB).
+
+    Args:
+        mu: GP mean predictions (N,).
+        sigma: GP uncertainty (standard deviation) (N,).
+        beta: Exploration parameter (higher = more exploration).
+
+    Returns:
+        UCB scores (lower = more valuable for minimization).
+    """
+    # For minimization, we want lower confidence bound
+    return mu - beta * sigma
+
+
+def probability_of_improvement(
+    mu: NDArray[np.float64],
+    sigma: NDArray[np.float64],
+    best_so_far: float,
+    xi: float = 0.01,
+) -> NDArray[np.float64]:
+    """
+    Probability of Improvement acquisition function.
+
+    Args:
+        mu: GP mean predictions (N,).
+        sigma: GP uncertainty (standard deviation) (N,).
+        best_so_far: Current best (minimum) performance observed.
+        xi: Exploration parameter.
+
+    Returns:
+        Probability of improvement scores.
+    """
+    sigma = np.maximum(sigma, 1e-9)
+    improvement = best_so_far - mu - xi
+    Z = improvement / sigma
+    return norm.cdf(Z)
+
+
+def cost_aware_ei(
+    mu: NDArray[np.float64],
+    sigma: NDArray[np.float64],
+    best_so_far: float,
+    cost: float = 1.0,
+    xi: float = 0.01,
+) -> NDArray[np.float64]:
+    """
+    Cost-aware Expected Improvement.
+
+    Normalizes EI by evaluation cost, useful for multi-fidelity optimization.
+
+    Args:
+        mu: GP mean predictions (N,).
+        sigma: GP uncertainty (standard deviation) (N,).
+        best_so_far: Current best (minimum) performance observed.
+        cost: Cost of evaluation at this fidelity.
+        xi: Exploration parameter.
+
+    Returns:
+        Cost-normalized expected improvement scores.
+    """
+    ei = expected_improvement(mu, sigma, best_so_far, xi)
+    return ei / np.sqrt(cost)
diff --git a/helion/autotuner/base_search.py b/helion/autotuner/base_search.py
@@ -276,14 +276,17 @@ def benchmark(self, config: Config) -> tuple[Callable[..., object], float]:
             return fn, self.benchmark_function(config, fn)
         return fn, inf
 
-    def benchmark_function(self, config: Config, fn: CompiledConfig) -> float:
+    def benchmark_function(
+        self, config: Config, fn: CompiledConfig, *, fidelity: int = 50
+    ) -> float:
         """
         Benchmark a compiled function.  This function is called by the autotuner to measure the
         performance of a specific configuration.
 
         Args:
             config: The configuration to benchmark.
             fn: A precompiled version of config.
+            fidelity: Number of repetitions for benchmarking (default: 50).
 
         Returns:
             The performance of the configuration in ms.
@@ -310,7 +313,7 @@ def benchmark_function(self, config: Config, fn: CompiledConfig) -> float:
                 functools.partial(fn, *self.args),
                 return_mode="median",
                 warmup=1,  # we are already warmed up above
-                rep=50,
+                rep=fidelity,
             )
             t2 = time.perf_counter()
             assert isinstance(res, float)
@@ -568,18 +571,25 @@ class PopulationMember:
         perfs (list[float]): The performance of the configuration, accumulated over multiple benchmarks.
         flat_values (FlatConfig): The flat representation of the configuration values.
         config (Config): The full configuration object.
+        fidelities (list[int]): The fidelity levels used for each benchmark.
     """
 
     fn: Callable[..., object]
     perfs: list[float]
     flat_values: FlatConfig
     config: Config
     status: Literal["ok", "error", "timeout", "unknown"] = "unknown"
+    fidelities: list[int] = dataclasses.field(default_factory=list)
 
     @property
     def perf(self) -> float:
         return self.perfs[-1]
 
+    @property
+    def fidelity(self) -> int:
+        """Get the fidelity of the latest benchmark."""
+        return self.fidelities[-1] if self.fidelities else 50
+
 
 def performance(member: PopulationMember) -> float:
     """

diff --git a/helion/autotuner/config_encoding.py b/helion/autotuner/config_encoding.py
@@ -0,0 +1,144 @@
+from __future__ import annotations
+
+import math
+from typing import TYPE_CHECKING
+
+import numpy as np
+
+from .config_fragment import Category
+
+if TYPE_CHECKING:
+    from .config_generation import ConfigGeneration
+    from .config_generation import FlatConfig
+
+
+class ConfigEncoder:
+    """
+    Encodes Helion configurations into numerical vectors for Gaussian Process models.
+
+    Handles various config types:
+    - Power-of-2 values: log2 encoding
+    - Integers: direct encoding with normalization
+    - Booleans: 0/1 encoding
+    - Enums: one-hot encoding
+    - Permutations: inversion count encoding
+    """
+
+    def __init__(self, config_gen: ConfigGeneration) -> None:
+        """
+        Initialize the encoder with a configuration generator.
+
+        Args:
+            config_gen: The configuration generator containing the flat spec.
+        """
+        self.config_gen = config_gen
+        self.flat_spec = config_gen.flat_spec
+        self._compute_encoding_metadata()
+
+    def _compute_encoding_metadata(self) -> None:
+        """Precompute metadata for encoding to determine output dimensionality."""
+        self.encoded_dim = 0
+        self.encoding_map: list[tuple[int, int, str]] = []  # (start_idx, end_idx, type)
+
+        for spec in self.flat_spec:
+            category = spec.category()
+            start_idx = self.encoded_dim
+
+            if category in {
+                Category.BLOCK_SIZE,
+                Category.NUM_WARPS,
+                Category.NUM_STAGES,
+            }:
+                # Single numerical value
+                self.encoded_dim += 1
+                self.encoding_map.append((start_idx, self.encoded_dim, "numerical"))
+            elif hasattr(spec, "choices"):
+                # Enum - one-hot encoding
+                num_choices = len(spec.choices)  # type: ignore[no-untyped-call]
+                self.encoded_dim += num_choices
+                self.encoding_map.append((start_idx, self.encoded_dim, "enum"))
+            else:
+                # Boolean or other single value
+                self.encoded_dim += 1
+                self.encoding_map.append((start_idx, self.encoded_dim, "numerical"))
+
+    def encode(self, flat_config: FlatConfig) -> np.ndarray:
+        """
+        Convert a flat configuration to a numerical vector.
+
+        Args:
+            flat_config: The flat configuration values.
+
+        Returns:
+            A numpy array suitable for GP training.
+        """
+        encoded = np.zeros(self.encoded_dim, dtype=np.float64)
+
+        for flat_idx, spec in enumerate(self.flat_spec):
+            value = flat_config[flat_idx]
+            category = spec.category()
+            enc_start, enc_end, enc_type = self.encoding_map[flat_idx]
+
+            if enc_type == "numerical":
+                if category in {Category.BLOCK_SIZE, Category.NUM_WARPS}:
+                    # Power-of-2: use log2 encoding
+                    if isinstance(value, (int, float)) and value > 0:
+                        encoded[enc_start] = math.log2(float(value))
+                    else:
+                        encoded[enc_start] = 0.0
+                elif category == Category.NUM_STAGES:
+                    # Integer: direct encoding
+                    encoded[enc_start] = (
+                        float(value) if isinstance(value, (int, float)) else 0.0
+                    )
+                else:
+                    # Boolean or other: 0/1
+                    encoded[enc_start] = (
+                        float(value) if isinstance(value, (bool, int, float)) else 0.0
+                    )
+            elif enc_type == "enum":
+                # One-hot encoding
+                if hasattr(spec, "choices"):
+                    choices = spec.choices  # type: ignore[attr-defined]
+                    try:
+                        choice_idx = choices.index(value)
+                        encoded[enc_start + choice_idx] = 1.0
+                    except (ValueError, IndexError):
+                        # Default to first choice if value not found
+                        encoded[enc_start] = 1.0
+
+        return encoded
+
+    def get_bounds(self) -> list[tuple[float, float]]:
+        """
+        Get bounds for each encoded dimension.
+
+        Returns:
+            List of (min, max) tuples for each dimension.
+        """
+        bounds: list[tuple[float, float]] = []
+
+        for flat_idx, spec in enumerate(self.flat_spec):
+            category = spec.category()
+            enc_start, enc_end, enc_type = self.encoding_map[flat_idx]
+
+            if enc_type == "numerical":
+                if category in {Category.BLOCK_SIZE, Category.NUM_WARPS}:
+                    # Power-of-2: log2 bounds
+                    min_val = math.log2(float(spec.min_size))  # type: ignore[attr-defined]
+                    max_val = math.log2(float(spec.max_size))  # type: ignore[attr-defined]
+                    bounds.append((min_val, max_val))
+                elif category == Category.NUM_STAGES:
+                    # Integer bounds
+                    bounds.append(
+                        (float(spec.min_size), float(spec.max_size))  # type: ignore[attr-defined]
+                    )
+                else:
+                    # Boolean: 0 or 1
+                    bounds.append((0.0, 1.0))
+            elif enc_type == "enum":
+                # One-hot: each dimension is 0 or 1
+                num_choices = enc_end - enc_start
+                bounds.extend([(0.0, 1.0)] * num_choices)
+
+        return bounds