From b5fcccf088c60c95f43c3fb3f927045e91f569c2 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Thu, 18 Jun 2026 14:24:40 +0200 Subject: [PATCH 1/2] Return dataframes instead of indices in discrete recommendation --- baybe/recommenders/naive.py | 11 ++++--- baybe/recommenders/pure/base.py | 11 ++++--- .../pure/bayesian/botorch/core.py | 6 ++-- .../pure/bayesian/botorch/discrete.py | 29 ++++++++++--------- .../pure/nonpredictive/clustering.py | 4 +-- .../pure/nonpredictive/sampling.py | 4 +-- 6 files changed, 32 insertions(+), 33 deletions(-) diff --git a/baybe/recommenders/naive.py b/baybe/recommenders/naive.py index 5b602d881b..4887e4f0ec 100644 --- a/baybe/recommenders/naive.py +++ b/baybe/recommenders/naive.py @@ -112,8 +112,8 @@ def recommend( self.disc_recommender._botorch_acqf = disc_acqf_part - # Call the private function of the discrete recommender and get the indices - disc_rec_idx = self.disc_recommender._recommend_discrete( + # Call the private function of the discrete recommender and get the candidates + disc_rec = self.disc_recommender._recommend_discrete( subspace_discrete=searchspace.discrete, candidates_exp=candidates_exp, batch_size=batch_size, @@ -121,7 +121,7 @@ def recommend( # Get one random discrete point that will be attached when evaluating the # acquisition function in the discrete space. - disc_part = searchspace.discrete.comp_rep.loc[disc_rec_idx].sample(1) + disc_part = searchspace.discrete.comp_rep.loc[disc_rec.index].sample(1) disc_part_tensor = to_tensor(disc_part).unsqueeze(-2) # Setup a fresh acquisition function for the continuous recommender @@ -143,9 +143,8 @@ def recommend( ) # Glue the solutions together and return them - rec_disc_exp = searchspace.discrete.exp_rep.loc[disc_rec_idx] - rec_cont.index = rec_disc_exp.index - rec_exp = pd.concat([rec_disc_exp, rec_cont], axis=1) + rec_cont.index = disc_rec.index + rec_exp = pd.concat([disc_rec, rec_cont], axis=1) return rec_exp diff --git a/baybe/recommenders/pure/base.py b/baybe/recommenders/pure/base.py index 4c61a4ea4e..0c46b3ee7b 100644 --- a/baybe/recommenders/pure/base.py +++ b/baybe/recommenders/pure/base.py @@ -144,7 +144,7 @@ def _recommend_discrete( subspace_discrete: SubspaceDiscrete, candidates_exp: pd.DataFrame, batch_size: int, - ) -> pd.Index: + ) -> pd.DataFrame: """Generate recommendations from a discrete search space. Args: @@ -158,8 +158,8 @@ def _recommend_discrete( NotImplementedError: If the function is not implemented by the child class. Returns: - The dataframe indices of the recommended points in the provided - experimental representation. + A dataframe containing the recommendations as a subset of rows from the + provided experimental representation. """ # If this method is not implemented by a child class, try to resort to hybrid # recommendation (with an empty subspace) instead. @@ -168,7 +168,7 @@ def _recommend_discrete( searchspace=SearchSpace(discrete=subspace_discrete), candidates_exp=candidates_exp, batch_size=batch_size, - ).index + ) except NotImplementedError as exc: raise NotImplementedError( """Hybrid recommendation could not be used as fallback when trying to @@ -298,10 +298,9 @@ def _recommend_with_discrete_parts( if is_hybrid_space: rec = self._recommend_hybrid(searchspace, candidates_exp, batch_size) else: - idxs = self._recommend_discrete( + rec = self._recommend_discrete( searchspace.discrete, candidates_exp, batch_size ) - rec = searchspace.discrete.exp_rep.loc[idxs, :] # Return recommendations return rec diff --git a/baybe/recommenders/pure/bayesian/botorch/core.py b/baybe/recommenders/pure/bayesian/botorch/core.py index 7953d5ca74..d2bd8489b4 100644 --- a/baybe/recommenders/pure/bayesian/botorch/core.py +++ b/baybe/recommenders/pure/bayesian/botorch/core.py @@ -158,7 +158,7 @@ def _recommend_discrete( subspace_discrete: SubspaceDiscrete, candidates_exp: pd.DataFrame, batch_size: int, - ) -> pd.Index: + ) -> pd.DataFrame: """Generate recommendations from a discrete search space. Dispatches to the appropriate optimization routine depending on whether @@ -172,8 +172,8 @@ def _recommend_discrete( batch_size: The size of the recommendation batch. Returns: - The dataframe indices of the recommended points in the provided - experimental representation. + A dataframe containing the recommendations as a subset of rows from the + provided experimental representation. """ if subspace_discrete.n_subsets > 0: return recommend_discrete_with_subsets( diff --git a/baybe/recommenders/pure/bayesian/botorch/discrete.py b/baybe/recommenders/pure/bayesian/botorch/discrete.py index a5f92d04d0..086ccc24aa 100644 --- a/baybe/recommenders/pure/bayesian/botorch/discrete.py +++ b/baybe/recommenders/pure/bayesian/botorch/discrete.py @@ -23,7 +23,7 @@ def recommend_discrete_with_subsets( subspace_discrete: SubspaceDiscrete, candidates_exp: pd.DataFrame, batch_size: int, -) -> pd.Index: +) -> pd.DataFrame: """Recommend from a discrete space with subset-generating constraints. Splits the candidate set into subsets according to subset-generating constraints, @@ -39,7 +39,8 @@ def recommend_discrete_with_subsets( batch_size: The size of the recommendation batch. Returns: - The dataframe indices of the recommended points. + A dataframe containing the recommendations as a subset of rows from the + provided experimental representation. """ import torch @@ -55,24 +56,24 @@ def recommend_discrete_with_subsets( def make_callable( mask: np.ndarray, - ) -> Callable[[], tuple[pd.Index, Tensor]]: - def optimize() -> tuple[pd.Index, Tensor]: + ) -> Callable[[], tuple[pd.DataFrame, Tensor]]: + def optimize() -> tuple[pd.DataFrame, Tensor]: subset = candidates_exp.loc[mask] - idxs = recommend_discrete_without_subsets( + rec = recommend_discrete_without_subsets( recommender, subspace_discrete, subset, batch_size ) - comp = subspace_discrete.transform(candidates_exp.loc[idxs]) + comp = subspace_discrete.transform(rec) with torch.no_grad(): acqf_value = recommender._botorch_acqf(to_tensor(comp).unsqueeze(0)) - return idxs, acqf_value + return rec, acqf_value return optimize callables = (make_callable(m) for m in masks) - best_idxs, _ = recommender._optimize_over_subsets(callables) - return best_idxs + best_rec, _ = recommender._optimize_over_subsets(callables) + return best_rec def recommend_discrete_without_subsets( @@ -80,7 +81,7 @@ def recommend_discrete_without_subsets( subspace_discrete: SubspaceDiscrete, candidates_exp: pd.DataFrame, batch_size: int, -) -> pd.Index: +) -> pd.DataFrame: """Generate recommendations from a discrete search space. Args: @@ -96,8 +97,8 @@ def recommend_discrete_without_subsets( function is used with a batch size > 1. Returns: - The dataframe indices of the recommended points in the provided - experimental representation. + A dataframe containing the recommendations as a subset of rows from the + provided experimental representation. """ from baybe.acquisition.acqfs import qThompsonSampling from baybe.exceptions import ( @@ -125,7 +126,7 @@ def recommend_discrete_without_subsets( recommender._botorch_acqf, batch_size, to_tensor(candidates_comp) ) - # retrieve the index of the points from the input dataframe + # retrieve the rows from the input dataframe corresponding to the selected points # IMPROVE: The merging procedure is conceptually similar to what # `SearchSpace._match_measurement_with_searchspace_indices` does, though using # a simpler matching logic. When refactoring the SearchSpace class to @@ -139,4 +140,4 @@ def recommend_discrete_without_subsets( )["index"] ) - return idxs + return candidates_exp.loc[idxs] diff --git a/baybe/recommenders/pure/nonpredictive/clustering.py b/baybe/recommenders/pure/nonpredictive/clustering.py index 29ef090286..976aaad656 100644 --- a/baybe/recommenders/pure/nonpredictive/clustering.py +++ b/baybe/recommenders/pure/nonpredictive/clustering.py @@ -103,7 +103,7 @@ def _recommend_discrete( subspace_discrete: SubspaceDiscrete, candidates_exp: pd.DataFrame, batch_size: int, - ) -> pd.Index: + ) -> pd.DataFrame: # Fit scaler on entire search space from sklearn.preprocessing import StandardScaler @@ -129,7 +129,7 @@ def _recommend_discrete( selection = self._make_selection_default(model, candidates_scaled) # Convert positional indices into DataFrame indices and return result - return candidates_comp.index[selection] + return candidates_exp.iloc[selection] @override def __str__(self) -> str: diff --git a/baybe/recommenders/pure/nonpredictive/sampling.py b/baybe/recommenders/pure/nonpredictive/sampling.py index 0770b59d4f..7320b9bdf0 100644 --- a/baybe/recommenders/pure/nonpredictive/sampling.py +++ b/baybe/recommenders/pure/nonpredictive/sampling.py @@ -148,7 +148,7 @@ def _recommend_discrete( subspace_discrete: SubspaceDiscrete, candidates_exp: pd.DataFrame, batch_size: int, - ) -> pd.Index: + ) -> pd.DataFrame: # Fit scaler on entire search space from sklearn.preprocessing import StandardScaler @@ -174,7 +174,7 @@ def _recommend_discrete( initialization=self.initialization.value, random_tie_break=self.random_tie_break, ) - return candidates_comp.index[ilocs] + return candidates_exp.iloc[ilocs] @override def __str__(self) -> str: From 8a6ef3c7deb718bf0e6d92998101c2e75895c45a Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Thu, 18 Jun 2026 14:28:15 +0200 Subject: [PATCH 2/2] Update CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c96495ba4e..8ce13ec602 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 no longer requiring users to manually group constraints according to their type - Parameter and constraint validation has been streamlined, using `validate_parameters` and `validate_constraints` as the only remaining public entry points +- `_recommend_discrete` and kin now return a `pd.DataFrame` subselection of the + candidates instead of a `pd.Index` ### Deprecations - `Campaign.n_fits_done` and `Campaign.n_batches_done` attributes