From 7b6f7357d92fc9626f9055c0ac4793cf2bb7e9ba Mon Sep 17 00:00:00 2001 From: Jordan Christensen Date: Tue, 2 Sep 2025 14:52:03 -0400 Subject: [PATCH 1/9] Add find_best feature for profileset early elimination Implements statistical confidence-based early elimination for profilesets to avoid simulating clearly inferior gear combinations. Uses Bayesian confidence intervals to compare candidates against the current best, with configurable safety margins. Key features: - Early elimination based on statistical confidence intervals - Default verbosity 0 (quiet), always shows new best discoveries - Configurable minimum iterations and winner precision thresholds - Thread-safe implementation with mutex protection - Integration with existing profileset workflow Options: - find_best=1 to enable - find_best_verbose=N for verbosity control - find_best_min_iterations=N (default 500) - find_best_winner_precision=N (default inherits target_error) - find_best_safety_margin=N (default 0.001) (cherry picked from commit d4cf4eea35ecbea4a39cd755ebe35abe7bc4e028) --- engine/report/json/report_json.cpp | 11 +++ engine/sim/profileset.cpp | 10 +++ engine/sim/sim.cpp | 138 ++++++++++++++++++++++++++++- engine/sim/sim.hpp | 24 +++++ 4 files changed, 181 insertions(+), 2 deletions(-) diff --git a/engine/report/json/report_json.cpp b/engine/report/json/report_json.cpp index 5a25c85d0d6..7e2124d24c8 100644 --- a/engine/report/json/report_json.cpp +++ b/engine/report/json/report_json.cpp @@ -1022,6 +1022,17 @@ void profileset_json2( const profileset::profilesets_t& profileset, const sim_t& obj[ "iterations" ] = as( result.iterations() ); + // find_best elimination metadata (MVP) + if ( sim.find_best.enabled ) + { + obj[ "find_best_best" ] = ( sim.find_best.best_name == profileset->name() ); + if ( sim.find_best.best_name == profileset->name() ) + { + obj[ "find_best_best_error" ] = sim.find_best.best_error; + } + // We cannot directly know elimination status from parent result; use mean_error heuristic not available here. + } + if ( profileset->results() > 1 ) { auto results2 = obj[ "additional_metrics" ].make_array(); diff --git a/engine/sim/profileset.cpp b/engine/sim/profileset.cpp index fa20cab2db4..03d95c0debb 100644 --- a/engine/sim/profileset.cpp +++ b/engine/sim/profileset.cpp @@ -110,6 +110,7 @@ void simulate_profileset( sim_t* parent, profileset::profile_set_t& set, sim_t*& // Reset random seed for the profileset sims profile_sim -> seed = 0; profile_sim -> profileset_enabled = true; + profile_sim -> profileset_current_name = set.name(); profile_sim -> report_details = 0; if ( parent -> profileset_work_threads > 0 ) { @@ -137,6 +138,10 @@ void simulate_profileset( sim_t* parent, profileset::profile_set_t& set, sim_t*& if ( !ret || profile_sim -> is_canceled() ) { + if ( profile_sim->find_best_eliminated ) + { + fmt::print( stderr, "\nProfileset '{}' early-stopped: {}\n", set.name(), profile_sim->find_best_reason ); + } return; } @@ -174,6 +179,11 @@ void simulate_profileset( sim_t* parent, profileset::profile_set_t& set, sim_t*& parent -> event_mgr.total_events_processed += profile_sim -> event_mgr.total_events_processed; set.cleanup_options(); + + if ( profile_sim->find_best_eliminated ) + { + fmt::print( stderr, "\nProfileset '{}' early-stopped: {}\n", set.name(), profile_sim->find_best_reason ); + } } // Figure out if the option defines new actor(s) with their own scope diff --git a/engine/sim/sim.cpp b/engine/sim/sim.cpp index 095c93a8e17..541901301c0 100644 --- a/engine/sim/sim.cpp +++ b/engine/sim/sim.cpp @@ -2112,9 +2112,13 @@ void sim_t::datacollection_end() void sim_t::analyze_error() { if ( thread_index != 0 ) return; - if ( target_error <= 0 ) return; if ( current_iteration < 1 ) return; + // Allow analyze_error to run either for normal target_error handling OR for find_best elimination logic + bool need_precision_handling = target_error > 0; + bool need_find_best = ( parent && parent->find_best.enabled && profileset_enabled ); + if ( !need_precision_handling && !need_find_best ) return; + work_queue -> lock(); // First iterations of each thread are considered statistically insignificant and not @@ -2210,7 +2214,7 @@ void sim_t::analyze_error() current_error *= 100; - if ( current_error > 0 ) + if ( need_precision_handling && current_error > 0 ) { if ( current_error < target_error ) { @@ -2236,6 +2240,97 @@ void sim_t::analyze_error() } } + // Find-best elimination logic (MVP): only for profileset child sims + if ( need_find_best && current_mean > 0 ) + { + auto &s = parent->find_best; + // Convert relative percent error to absolute half-width + double abs_error = ( current_error / 100.0 ) * current_mean; // half-width (since current_error is relative pct) + + // Guard: ensure enough iterations + if ( work_queue->progress().current_iterations >= s.min_iterations ) + { + AUTO_LOCK( s.mtx ); + // Establish winner_precision threshold default if not set: 0.5*target_error if target_error>0 else 2.5% + if ( s.winner_precision <= 0 ) + { + // Default: need at most half the target_error to start eliminating (faster unlock); fallback 2.5% + if ( parent->target_error > 0 ) s.winner_precision = parent->target_error * 0.5; else s.winner_precision = 2.5; // percent + } + + // If no best yet, promote self + if ( s.best_name.empty() ) + { + s.best_name = profileset_current_name; + s.best_mean = current_mean; + s.best_error = abs_error; + s.best_iterations = work_queue->progress().current_iterations; + s.best_precision_satisfied = ( current_error > 0 && current_error <= s.winner_precision ); + fmt::print( stderr, "\nfind_best: initial best '{}' mean={:.2f} err={:.4f} ({:.3f}% rel) iters={}\n", s.best_name, s.best_mean, s.best_error, current_error, s.best_iterations ); + } + else + { + if ( profileset_current_name == s.best_name ) + { + // Update best uncertainty window if shrunk + if ( abs_error < s.best_error ) + { + s.best_error = abs_error; + s.best_mean = current_mean; // update mean to latest (could drift slightly) + s.best_iterations = work_queue->progress().current_iterations; + } + if ( current_error > 0 && current_error <= s.winner_precision ) s.best_precision_satisfied = true; + } + else + { + // Candidate: test elimination vs current best (assumes higher is better) + // Eliminate immediately once candidate interval (with safety) cannot reach best lower bound + double safety = s.elim_safety_margin_frac > 0 ? s.elim_safety_margin_frac * s.best_mean : 0.0; + double candidate_upper = current_mean + abs_error + safety; + double best_lower = s.best_mean - s.best_error; + if ( s.verbose >= 2 ) + { + double candidate_lower = current_mean - abs_error; + double best_upper = s.best_mean + s.best_error; + fmt::print( stderr, + "\nfind_best[v2]: compare cand='{}' [{:.2f},{:.2f}] (+safety -> upper {:.2f}) vs best='{}' [{:.2f},{:.2f}] | safety={:.4f} | cand_upper={:.2f} best_lower={:.2f} | elim_if (cand_upper < best_lower)={} | promote_if (cand_lower>{:.2f})\n", + profileset_current_name, + candidate_lower, current_mean + abs_error, candidate_upper, + s.best_name, + s.best_mean - s.best_error, best_upper, + safety, + candidate_upper, best_lower, + candidate_upper < best_lower ? "YES" : "no", + s.best_mean + s.best_error + ( s.elim_safety_margin_frac > 0 ? s.elim_safety_margin_frac * s.best_mean : 0.0 ) ); + } + if ( candidate_upper < best_lower ) + { + find_best_eliminated = true; + find_best_reason = fmt::format( "find_best: eliminated vs '{}' (cand_upper={:.2f} < best_lower={:.2f})", s.best_name, candidate_upper, best_lower ); + if ( s.verbose >= 1 ) + { + fmt::print( stderr, "\n{}\n", find_best_reason ); + } + work_queue->unlock(); // unlock before interrupt + interrupt(); + return; // early exit + } + // Promotion check if candidate clearly better + double safety2 = s.elim_safety_margin_frac > 0 ? s.elim_safety_margin_frac * s.best_mean : 0.0; + if ( ( current_mean - abs_error ) > ( s.best_mean + s.best_error + safety2 ) ) + { + s.best_name = profileset_current_name; + s.best_mean = current_mean; + s.best_error = abs_error; + s.best_iterations = work_queue->progress().current_iterations; + s.best_precision_satisfied = ( current_error > 0 && current_error <= s.winner_precision ); + fmt::print( stderr, "\nfind_best: new best '{}' mean={:.2f} err={:.4f} ({:.3f}% rel) iters={} (prev best lower={:.2f})\n", s.best_name, s.best_mean, s.best_error, current_error, s.best_iterations, (s.best_mean - s.best_error) ); + } + } + } + } + } + work_queue -> unlock(); } @@ -3774,6 +3869,13 @@ void sim_t::create_options() add_option( opt_int( "min_report_iteration_data", min_report_iteration_data ) ); add_option( opt_bool( "average_range", average_range ) ); add_option( opt_bool( "average_gauss", average_gauss ) ); + // Find-Best (profileset early elimination) options (MVP) + add_option( opt_bool( "find_best", find_best.enabled ) ); + add_option( opt_string( "find_best_metric", find_best_metric_str ) ); + add_option( opt_int( "find_best_min_iterations", find_best.min_iterations ) ); + add_option( opt_float( "find_best_winner_precision", find_best.winner_precision ) ); + add_option( opt_float( "find_best_elim_safety_margin", find_best.elim_safety_margin_frac ) ); + add_option( opt_int( "find_best_verbose", find_best.verbose ) ); // Misc add_option( opt_list( "party", party_encoding ) ); add_option( opt_func( "active", parse_active ) ); @@ -4238,6 +4340,38 @@ void sim_t::setup( sim_control_t* c ) if ( player_list.empty() && spell_query == nullptr && !display_bonus_ids && display_build <= 1 ) throw sc_runtime_error( "Nothing to sim!" ); + // Finalize find_best configuration on parent sim only + if ( !parent && find_best.enabled ) + { + // Determine metric + if ( !find_best_metric_str.empty() ) + { + auto m = util::parse_scale_metric( find_best_metric_str ); + if ( m == SCALE_METRIC_NONE ) + { + error( "find_best: unknown metric '{}' disabling feature", find_best_metric_str ); + find_best.enabled = false; + } + else + { + find_best.metric = m; + } + } + else + { + // If only one profileset metric specified, use it; otherwise require explicit option + if ( profileset_metric.size() == 1 ) + { + find_best.metric = profileset_metric.front(); + } + else + { + error( "find_best: multiple profileset metrics active, specify find_best_metric=... disabling feature" ); + find_best.enabled = false; + } + } + } + range::for_each( player_list, []( player_t* p ) { p->validate_sim_options(); } ); if ( parent || profileset_enabled ) diff --git a/engine/sim/sim.hpp b/engine/sim/sim.hpp index d449957600d..da1c7ff701a 100644 --- a/engine/sim/sim.hpp +++ b/engine/sim/sim.hpp @@ -96,6 +96,30 @@ struct sim_t : private sc_thread_t double current_mean; int analyze_error_interval, analyze_number; + // Find-Best (profileset early elimination) state (shared on parent sim) + struct find_best_state_t { + mutex_t mtx; + bool enabled = false; + // Metric currently only supports primary profileset metric (DPS-family) + scale_metric_e metric = SCALE_METRIC_DPS; + std::string best_name; // profileset name + double best_mean = 0.0; // mean of current best + double best_error = 0.0; // absolute half-width (same units as mean) + unsigned best_iterations = 0; // iterations when last updated + bool best_precision_satisfied = false; // winner precision threshold reached + // Configuration copied from options (stored here for child sims to read) + int min_iterations = 500; // minimum iterations before evaluating elimination + double winner_precision = -1.0; // percent relative error threshold (same unit as target_error/current_error) + double elim_safety_margin_frac = 0.001; // fractional safety margin (fraction of best mean) + int verbose = 0; // 0 silent, 1 events, 2 verbose + } find_best; + + // Per-sim (child) flags used for reporting elimination + bool find_best_eliminated = false; // set true if this profileset was early stopped + std::string find_best_reason; // human readable reason + std::string profileset_current_name; // name of the profileset for this sim (child only) + std::string find_best_metric_str; // raw option string parsed during setup (parent only) + sim_control_t* control; sim_t* parent; player_t* target; From 1390b05cf4948c90044cb6462af0385a6bd8fc4e Mon Sep 17 00:00:00 2001 From: Jordan Christensen Date: Tue, 2 Sep 2025 22:51:14 -0400 Subject: [PATCH 2/9] clean up spurious comments (cherry picked from commit a5113b7f59c0221046ffa0b38d278bc90cf32dc1) --- engine/report/json/report_json.cpp | 2 -- engine/sim/sim.cpp | 15 +++++++-------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/engine/report/json/report_json.cpp b/engine/report/json/report_json.cpp index 7e2124d24c8..48ebf1b5e8b 100644 --- a/engine/report/json/report_json.cpp +++ b/engine/report/json/report_json.cpp @@ -1022,7 +1022,6 @@ void profileset_json2( const profileset::profilesets_t& profileset, const sim_t& obj[ "iterations" ] = as( result.iterations() ); - // find_best elimination metadata (MVP) if ( sim.find_best.enabled ) { obj[ "find_best_best" ] = ( sim.find_best.best_name == profileset->name() ); @@ -1030,7 +1029,6 @@ void profileset_json2( const profileset::profilesets_t& profileset, const sim_t& { obj[ "find_best_best_error" ] = sim.find_best.best_error; } - // We cannot directly know elimination status from parent result; use mean_error heuristic not available here. } if ( profileset->results() > 1 ) diff --git a/engine/sim/sim.cpp b/engine/sim/sim.cpp index 541901301c0..1d153456a6d 100644 --- a/engine/sim/sim.cpp +++ b/engine/sim/sim.cpp @@ -2114,7 +2114,7 @@ void sim_t::analyze_error() if ( thread_index != 0 ) return; if ( current_iteration < 1 ) return; - // Allow analyze_error to run either for normal target_error handling OR for find_best elimination logic + // We want analyze_error to run either for normal target_error handling OR for find_best elimination logic bool need_precision_handling = target_error > 0; bool need_find_best = ( parent && parent->find_best.enabled && profileset_enabled ); if ( !need_precision_handling && !need_find_best ) return; @@ -2240,14 +2240,13 @@ void sim_t::analyze_error() } } - // Find-best elimination logic (MVP): only for profileset child sims if ( need_find_best && current_mean > 0 ) { auto &s = parent->find_best; // Convert relative percent error to absolute half-width - double abs_error = ( current_error / 100.0 ) * current_mean; // half-width (since current_error is relative pct) + double abs_error = ( current_error / 100.0 ) * current_mean; - // Guard: ensure enough iterations + // ensure enough iterations if ( work_queue->progress().current_iterations >= s.min_iterations ) { AUTO_LOCK( s.mtx ); @@ -2276,14 +2275,14 @@ void sim_t::analyze_error() if ( abs_error < s.best_error ) { s.best_error = abs_error; - s.best_mean = current_mean; // update mean to latest (could drift slightly) + s.best_mean = current_mean; s.best_iterations = work_queue->progress().current_iterations; } if ( current_error > 0 && current_error <= s.winner_precision ) s.best_precision_satisfied = true; } else { - // Candidate: test elimination vs current best (assumes higher is better) + // Candidate: test elimination vs current best // Eliminate immediately once candidate interval (with safety) cannot reach best lower bound double safety = s.elim_safety_margin_frac > 0 ? s.elim_safety_margin_frac * s.best_mean : 0.0; double candidate_upper = current_mean + abs_error + safety; @@ -2311,9 +2310,9 @@ void sim_t::analyze_error() { fmt::print( stderr, "\n{}\n", find_best_reason ); } - work_queue->unlock(); // unlock before interrupt + work_queue->unlock(); interrupt(); - return; // early exit + return; } // Promotion check if candidate clearly better double safety2 = s.elim_safety_margin_frac > 0 ? s.elim_safety_margin_frac * s.best_mean : 0.0; From 630e94625cd7ed5791a0c8d3c0b0e5102ea81cf0 Mon Sep 17 00:00:00 2001 From: Jordan Christensen Date: Tue, 2 Sep 2025 22:55:38 -0400 Subject: [PATCH 3/9] Changed default (cherry picked from commit 010eead3109828e57c9d88b5f6ba1338b8ce3f9d) --- engine/sim/sim.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/engine/sim/sim.cpp b/engine/sim/sim.cpp index 1d153456a6d..75af337afc8 100644 --- a/engine/sim/sim.cpp +++ b/engine/sim/sim.cpp @@ -2250,11 +2250,11 @@ void sim_t::analyze_error() if ( work_queue->progress().current_iterations >= s.min_iterations ) { AUTO_LOCK( s.mtx ); - // Establish winner_precision threshold default if not set: 0.5*target_error if target_error>0 else 2.5% + // Establish winner_precision threshold default if not set: 0.5*target_error if target_error>0 else 0.05% if ( s.winner_precision <= 0 ) { - // Default: need at most half the target_error to start eliminating (faster unlock); fallback 2.5% - if ( parent->target_error > 0 ) s.winner_precision = parent->target_error * 0.5; else s.winner_precision = 2.5; // percent + // Default: need at most half the target_error to start eliminating (faster unlock); fallback 0.05% + if ( parent->target_error > 0 ) s.winner_precision = parent->target_error / 2.0; else s.winner_precision = 0.05; // percent } // If no best yet, promote self From d6e6ad4f5ac441f610ad429ac5dc00159a0cf852 Mon Sep 17 00:00:00 2001 From: Jordan Christensen Date: Wed, 10 Sep 2025 15:27:20 -0400 Subject: [PATCH 4/9] good changes for patch (cherry picked from commit d1e3e536466fdb0d86de5d4256194d146a423346) --- engine/report/json/report_json.cpp | 49 +++++++++++++++++++++++++++--- engine/report/report_html_sim.cpp | 37 ++++++++++++++++++++++ engine/sim/profileset.cpp | 24 ++++++++++++--- engine/sim/profileset.hpp | 42 +++++++++++++++++++++++++ engine/sim/sim.cpp | 7 +++-- 5 files changed, 148 insertions(+), 11 deletions(-) diff --git a/engine/report/json/report_json.cpp b/engine/report/json/report_json.cpp index 48ebf1b5e8b..a084c341d2b 100644 --- a/engine/report/json/report_json.cpp +++ b/engine/report/json/report_json.cpp @@ -1022,12 +1022,21 @@ void profileset_json2( const profileset::profilesets_t& profileset, const sim_t& obj[ "iterations" ] = as( result.iterations() ); - if ( sim.find_best.enabled ) + if ( sim.profileset_cull.enabled ) { - obj[ "find_best_best" ] = ( sim.find_best.best_name == profileset->name() ); - if ( sim.find_best.best_name == profileset->name() ) + obj[ "profileset_cull_best" ] = ( sim.profileset_cull.best_name == profileset->name() ); + if ( sim.profileset_cull.best_name == profileset->name() ) { - obj[ "find_best_best_error" ] = sim.find_best.best_error; + obj[ "profileset_cull_best_error" ] = sim.profileset_cull.best_error; + } + if ( profileset->culled() ) + { + obj[ "culled" ] = true; + obj[ "culled_reason" ] = profileset->culled_reason(); + obj[ "culled_iterations" ] = profileset->culled_iterations(); + obj[ "culled_mean" ] = profileset->culled_mean(); + obj[ "culled_error" ] = profileset->culled_error(); + obj[ "culled_error_type" ] = profileset->culled_error_type_cstr(); } } @@ -1077,6 +1086,25 @@ void profileset_json3( const profileset::profilesets_t& profilesets, const sim_t obj[ "name" ] = profileset->name(); auto results_obj = obj[ "metrics" ].make_array(); + // Profileset culling metadata at profileset level + if ( sim.profileset_cull.enabled ) + { + obj[ "profileset_cull_best" ] = ( sim.profileset_cull.best_name == profileset->name() ); + if ( sim.profileset_cull.best_name == profileset->name() ) + { + obj[ "profileset_cull_best_error" ] = sim.profileset_cull.best_error; + } + if ( profileset->culled() ) + { + obj[ "culled" ] = true; + obj[ "culled_reason" ] = profileset->culled_reason(); + obj[ "culled_iterations" ] = profileset->culled_iterations(); + obj[ "culled_mean" ] = profileset->culled_mean(); + obj[ "culled_error" ] = profileset->culled_error(); + obj[ "culled_error_type" ] = profileset->culled_error_type_cstr(); + } + } + for ( size_t midx = 0; midx < sim.profileset_metric.size(); ++midx ) { const auto& result = profileset->result( sim.profileset_metric[ midx ] ); @@ -1240,6 +1268,19 @@ void to_json( const ::report::json::report_configuration_t& report_configuration options_root[ "profileset_metric" ] = util::scale_metric_type_abbrev( sim.profileset_metric.front() ); options_root[ "profileset_multiactor_base_name" ] = sim.profileset_multiactor_base_name; + if ( sim.profileset_cull.enabled ) + { + auto cull = options_root[ "profileset_cull" ]; + cull[ "enabled" ] = true; + cull[ "method" ] = sim.profileset_cull.method_name(); + cull[ "min_iterations" ] = sim.profileset_cull.min_iterations; + if ( sim.profileset_cull.method == sim_t::profileset_cull_state_t::T_TEST ) + cull[ "alpha" ] = sim.profileset_cull.alpha; + else + cull[ "margin" ] = sim.profileset_cull.margin; + cull[ "metric" ] = util::scale_metric_type_abbrev( sim.profileset_cull.metric ); + } + to_json( options_root[ "dbc" ], *sim.dbc ); if ( sim.scaling->calculate_scale_factors ) diff --git a/engine/report/report_html_sim.cpp b/engine/report/report_html_sim.cpp index b1c0bd7520d..e7be5df268b 100644 --- a/engine/report/report_html_sim.cpp +++ b/engine/report/report_html_sim.cpp @@ -1158,6 +1158,43 @@ void print_profilesets( std::ostream& out, const profileset::profilesets_t& prof out << "

Profile sets

\n"; out << "
\n"; + // Profileset culling indicator and culled list + if ( sim.profileset_cull.enabled ) + { + out << "
"; + out << "Profileset culling enabled: method=" + << ( sim.profileset_cull.method == sim_t::profileset_cull_state_t::T_TEST ? "t_test" : "ci" ) + << ", min_iters=" << sim.profileset_cull.min_iterations; + if ( sim.profileset_cull.method == sim_t::profileset_cull_state_t::T_TEST ) + out << ", alpha=" << sim.profileset_cull.alpha; + else + out << ", margin=" << sim.profileset_cull.margin; + out << "
\n"; + + // List culled profiles if any + bool any_culled = false; + for ( const auto& pset : profilesets.profilesets() ) + { + if ( pset->culled() ) { any_culled = true; break; } + } + if ( any_culled ) + { + out << "
Culled profiles:
    "; + for ( const auto& pset : profilesets.profilesets() ) + { + if ( !pset->culled() ) continue; + out << "
  • " << util::encode_html( pset->name() ) + << ": " << util::encode_html( pset->culled_reason() ) + << " (iters=" << pset->culled_iterations() + << ", mean=" << util::round( pset->culled_mean(), 2 ) + << ", error=" << util::round( pset->culled_error(), 4 ) + << ", type=" << pset->culled_error_type_cstr() << ")"; + out << "
  • "; + } + out << "
\n"; + } + } + print_profilesets_chart( out, sim ); out << "
"; diff --git a/engine/sim/profileset.cpp b/engine/sim/profileset.cpp index 03d95c0debb..4cda23ec954 100644 --- a/engine/sim/profileset.cpp +++ b/engine/sim/profileset.cpp @@ -138,9 +138,9 @@ void simulate_profileset( sim_t* parent, profileset::profile_set_t& set, sim_t*& if ( !ret || profile_sim -> is_canceled() ) { - if ( profile_sim->find_best_eliminated ) + if ( profile_sim->culled ) { - fmt::print( stderr, "\nProfileset '{}' early-stopped: {}\n", set.name(), profile_sim->find_best_reason ); + fmt::print( stderr, "\nProfileset '{}' culled: {}\n", set.name(), profile_sim->culled_reason ); } return; } @@ -161,6 +161,22 @@ void simulate_profileset( sim_t* parent, profileset::profile_set_t& set, sim_t*& .stddev( data.std_dev ) .mean_stddev( data.mean_std_dev ) .iterations( progress.current_iterations ); + + // If culled, persist snapshot information for JSON/HTML reporting on primary metric only + if ( profile_sim->culled && metric == parent->profileset_metric.front() ) + { + // error to record depends on method: CI mode wants half-width, t-test wants SE + auto etype = ( parent->profileset_cull.prefers_standard_error() ) ? + profileset::profile_set_t::cull_error_type_e::STANDARD_ERROR : + profileset::profile_set_t::cull_error_type_e::CI_HALF_WIDTH; + double err_val = parent->profileset_cull.select_error(data.mean_std_dev * parent->confidence_estimator, data.mean_std_dev / sqrt(parent->iterations) ); + set.set_culled( true, + profile_sim->culled_reason, + progress.current_iterations, + data.mean, + err_val, + etype ); + } } ); if ( ! parent -> profileset_output_data.empty() ) @@ -180,9 +196,9 @@ void simulate_profileset( sim_t* parent, profileset::profile_set_t& set, sim_t*& set.cleanup_options(); - if ( profile_sim->find_best_eliminated ) + if ( profile_sim->culled ) { - fmt::print( stderr, "\nProfileset '{}' early-stopped: {}\n", set.name(), profile_sim->find_best_reason ); + fmt::print( stderr, "\nProfileset '{}' culled: {}\n", set.name(), profile_sim->culled_reason ); } } diff --git a/engine/sim/profileset.hpp b/engine/sim/profileset.hpp index 70230f0124d..5b33081dc44 100644 --- a/engine/sim/profileset.hpp +++ b/engine/sim/profileset.hpp @@ -384,6 +384,19 @@ class profile_set_t std::vector m_results; std::unique_ptr m_output_data; + // Culled metadata (set when profileset culling terminates a run early) + bool m_culled = false; + std::string m_culled_reason; + uint64_t m_culled_iterations = 0; + double m_culled_mean = 0.0; + double m_culled_error = 0.0; + // CI half-width or standard error depending on cull method + +public: + enum class cull_error_type_e { NONE = 0, CI_HALF_WIDTH, STANDARD_ERROR }; +private: + cull_error_type_e m_culled_error_type = cull_error_type_e::NONE; + public: profile_set_t( std::string name, sim_control_t* opts, bool has_output ); @@ -415,6 +428,35 @@ class profile_set_t return *m_output_data; } + + // Culled metadata accessors + bool culled() const { return m_culled; } + const std::string& culled_reason() const { return m_culled_reason; } + uint64_t culled_iterations() const { return m_culled_iterations; } + double culled_mean() const { return m_culled_mean; } + double culled_error() const { return m_culled_error; } + profile_set_t::cull_error_type_e culled_error_type() const { return m_culled_error_type; } + const char* culled_error_type_cstr() const { + switch ( m_culled_error_type ) { + case profile_set_t::cull_error_type_e::CI_HALF_WIDTH: return "ci_half_width"; + case profile_set_t::cull_error_type_e::STANDARD_ERROR: return "standard_error"; + default: return "none"; + } + } + void set_culled( bool culled, + std::string reason, + uint64_t iterations, + double mean, + double error, + profile_set_t::cull_error_type_e etype ) + { + m_culled = culled; + m_culled_reason = std::move( reason ); + m_culled_iterations = iterations; + m_culled_mean = mean; + m_culled_error = error; + m_culled_error_type = etype; + } }; class worker_t diff --git a/engine/sim/sim.cpp b/engine/sim/sim.cpp index 75af337afc8..08d578fb34e 100644 --- a/engine/sim/sim.cpp +++ b/engine/sim/sim.cpp @@ -2114,10 +2114,10 @@ void sim_t::analyze_error() if ( thread_index != 0 ) return; if ( current_iteration < 1 ) return; - // We want analyze_error to run either for normal target_error handling OR for find_best elimination logic + // We want analyze_error to run either for normal target_error handling OR for profileset culling elimination logic bool need_precision_handling = target_error > 0; - bool need_find_best = ( parent && parent->find_best.enabled && profileset_enabled ); - if ( !need_precision_handling && !need_find_best ) return; + bool need_culling = ( parent && parent->profileset_cull.enabled && profileset_enabled ); + if ( !need_precision_handling && !need_culling ) return; work_queue -> lock(); @@ -2141,6 +2141,7 @@ void sim_t::analyze_error() double mean_total=0; int mean_count=0; + double current_standard_error = 0.0; current_error = 0; From 635efcb48401f171fee40207310dcf0be6f72b7f Mon Sep 17 00:00:00 2001 From: Jordan Christensen Date: Wed, 10 Sep 2025 15:32:20 -0400 Subject: [PATCH 5/9] rest of culling changes (cherry picked from commit 12472f597fdf91cb43d18d8dbec1c53fd9b3eaa8) --- engine/sim/sim.cpp | 332 +++++++++++++++++++++++++++++++++++---------- engine/sim/sim.hpp | 41 ++++-- 2 files changed, 290 insertions(+), 83 deletions(-) diff --git a/engine/sim/sim.cpp b/engine/sim/sim.cpp index 08d578fb34e..50bb22404b3 100644 --- a/engine/sim/sim.cpp +++ b/engine/sim/sim.cpp @@ -2159,7 +2159,9 @@ void sim_t::analyze_error() current_mean = cd.target_metric.mean(); if ( current_mean != 0 ) { - current_error = sim_t::distribution_mean_error( *this, cd.target_metric ) / current_mean; + double mean_error = sim_t::distribution_mean_error( *this, cd.target_metric ); + current_error = mean_error / current_mean; + current_standard_error = cd.target_metric.std_dev / sqrt(cd.target_metric.count()); } } } @@ -2241,90 +2243,95 @@ void sim_t::analyze_error() } } - if ( need_find_best && current_mean > 0 ) + if ( need_culling && current_mean > 0 ) { - auto &s = parent->find_best; + auto &s = parent->profileset_cull; // Convert relative percent error to absolute half-width double abs_error = ( current_error / 100.0 ) * current_mean; + // Standard error for current candidate (already computed above) + double std_error = current_standard_error; + + // Snapshot progress once to avoid repeated work_queue calls + sim_progress_t current_progress; + if ( strict_work_queue ) { + // In strict_work_queue mode, read iterations from this child sim's own counters + current_progress.current_iterations = current_iteration; + current_progress.total_iterations = iterations; + } else { + current_progress = work_queue->progress(); + } // ensure enough iterations - if ( work_queue->progress().current_iterations >= s.min_iterations ) + if ( current_progress.current_iterations >= s.min_iterations ) { AUTO_LOCK( s.mtx ); - // Establish winner_precision threshold default if not set: 0.5*target_error if target_error>0 else 0.05% - if ( s.winner_precision <= 0 ) - { - // Default: need at most half the target_error to start eliminating (faster unlock); fallback 0.05% - if ( parent->target_error > 0 ) s.winner_precision = parent->target_error / 2.0; else s.winner_precision = 0.05; // percent - } - - // If no best yet, promote self - if ( s.best_name.empty() ) + // If no best yet, only promote if baseline hasn't been seeded + // (i.e., fallback to old behavior only if baseline seeding fails) + if ( s.best_name.empty() && !s.baseline_seeded ) { s.best_name = profileset_current_name; s.best_mean = current_mean; - s.best_error = abs_error; - s.best_iterations = work_queue->progress().current_iterations; - s.best_precision_satisfied = ( current_error > 0 && current_error <= s.winner_precision ); - fmt::print( stderr, "\nfind_best: initial best '{}' mean={:.2f} err={:.4f} ({:.3f}% rel) iters={}\n", s.best_name, s.best_mean, s.best_error, current_error, s.best_iterations ); + s.best_error = s.select_error( abs_error, std_error ); + s.best_iterations = current_progress.current_iterations; + + fmt::print( stderr, "\nprofileset_cull: initial best '{}' mean={:.2f} err={:.4f} ({:.3f}% rel) iters={}\n", s.best_name, s.best_mean, s.best_error, current_error, s.best_iterations ); + } + else if ( s.best_name.empty() && s.baseline_seeded ) + { + // Baseline should have been seeded, but best_name is empty - this shouldn't happen + if ( s.verbose >= 1 ) + { + fmt::print( stderr, "\nprofileset_cull: warning - baseline was seeded but best_name is empty\n" ); + } } else { if ( profileset_current_name == s.best_name ) { // Update best uncertainty window if shrunk - if ( abs_error < s.best_error ) + if ( s.select_error( abs_error, std_error ) < s.best_error ) { - s.best_error = abs_error; + s.best_error = s.select_error( abs_error, std_error ); s.best_mean = current_mean; - s.best_iterations = work_queue->progress().current_iterations; + s.best_iterations = current_progress.current_iterations; } - if ( current_error > 0 && current_error <= s.winner_precision ) s.best_precision_satisfied = true; } else { - // Candidate: test elimination vs current best - // Eliminate immediately once candidate interval (with safety) cannot reach best lower bound - double safety = s.elim_safety_margin_frac > 0 ? s.elim_safety_margin_frac * s.best_mean : 0.0; - double candidate_upper = current_mean + abs_error + safety; - double best_lower = s.best_mean - s.best_error; - if ( s.verbose >= 2 ) - { - double candidate_lower = current_mean - abs_error; - double best_upper = s.best_mean + s.best_error; - fmt::print( stderr, - "\nfind_best[v2]: compare cand='{}' [{:.2f},{:.2f}] (+safety -> upper {:.2f}) vs best='{}' [{:.2f},{:.2f}] | safety={:.4f} | cand_upper={:.2f} best_lower={:.2f} | elim_if (cand_upper < best_lower)={} | promote_if (cand_lower>{:.2f})\n", - profileset_current_name, - candidate_lower, current_mean + abs_error, candidate_upper, - s.best_name, - s.best_mean - s.best_error, best_upper, - safety, - candidate_upper, best_lower, - candidate_upper < best_lower ? "YES" : "no", - s.best_mean + s.best_error + ( s.elim_safety_margin_frac > 0 ? s.elim_safety_margin_frac * s.best_mean : 0.0 ) ); - } - if ( candidate_upper < best_lower ) + // Candidate: test elimination vs current best using current method + double error_for_method = s.select_error( abs_error, current_standard_error ); + if ( s.should_cull( current_mean, error_for_method, current_progress.current_iterations, s.best_mean, s.best_error ) ) { - find_best_eliminated = true; - find_best_reason = fmt::format( "find_best: eliminated vs '{}' (cand_upper={:.2f} < best_lower={:.2f})", s.best_name, candidate_upper, best_lower ); + culled = true; + culled_reason = fmt::format( "profileset_cull: eliminated vs '{}' using {}", s.best_name, s.method_name() ); if ( s.verbose >= 1 ) { - fmt::print( stderr, "\n{}\n", find_best_reason ); + fmt::print( stderr, "\n{}\n", culled_reason ); } - work_queue->unlock(); + // Without this unlock the program hangs + work_queue -> unlock(); interrupt(); return; } // Promotion check if candidate clearly better - double safety2 = s.elim_safety_margin_frac > 0 ? s.elim_safety_margin_frac * s.best_mean : 0.0; - if ( ( current_mean - abs_error ) > ( s.best_mean + s.best_error + safety2 ) ) + bool promote = s.should_promote( current_mean, + s.select_error( abs_error, std_error ), + current_progress.current_iterations, + s.best_mean, + s.best_error ); + + if ( promote ) { - s.best_name = profileset_current_name; - s.best_mean = current_mean; - s.best_error = abs_error; - s.best_iterations = work_queue->progress().current_iterations; - s.best_precision_satisfied = ( current_error > 0 && current_error <= s.winner_precision ); - fmt::print( stderr, "\nfind_best: new best '{}' mean={:.2f} err={:.4f} ({:.3f}% rel) iters={} (prev best lower={:.2f})\n", s.best_name, s.best_mean, s.best_error, current_error, s.best_iterations, (s.best_mean - s.best_error) ); + s.best_name = profileset_current_name; + s.best_mean = current_mean; + s.best_error = s.select_error( abs_error, std_error ); + s.best_iterations = current_progress.current_iterations; + if ( s.verbose >= 1 ) + { + fmt::print( stderr, + "\nprofileset_cull: new best '{}' mean={:.2f} error={:.4f} iters={}\n", + s.best_name, s.best_mean, s.best_error, s.best_iterations ); + } } } } @@ -3135,6 +3142,172 @@ void sim_t::analyze_iteration_data() range::sort( high_iteration_data, iteration_data_cmp ); } +// --- profileset_cull_state_t helpers (concrete methods, no strategy classes) --- + +double sim_t::profileset_cull_state_t::z_critical_one_sided() const +{ + if ( alpha <= 0.001 ) return 3.09; // z_0.001 + if ( alpha <= 0.005 ) return 2.58; // z_0.005 + if ( alpha <= 0.01 ) return 2.33; // z_0.01 + if ( alpha <= 0.05 ) return 1.64; // z_0.05 + return 1.28; // z_0.10 +} + +bool sim_t::profileset_cull_state_t::ttest_is_significant( double candidate_mean, double candidate_se, + int candidate_iterations, double best_mean_val, + double best_se, ttest_direction dir ) const +{ + // Require enough iterations for normal approximation + if ( candidate_iterations < 30 || best_iterations < 30 ) + { + if ( verbose >= 2 ) + { + fmt::print( stderr, "profileset_cull: TTEST {}=NO | insufficient iterations (cand={}, best={}, need >= 30)\n", + ( dir == ttest_direction::BETTER ? "better" : "worse" ), candidate_iterations, best_iterations ); + } + return false; + } + + const double pooled_se = std::sqrt( candidate_se * candidate_se + best_se * best_se ); + if ( pooled_se <= 0 ) + { + if ( verbose >= 2 ) + { + fmt::print( stderr, "profileset_cull: TTEST {}=NO | pooled_se <= 0 ({:.6f})\n", + ( dir == ttest_direction::BETTER ? "better" : "worse" ), pooled_se ); + } + return false; + } + const double t_stat = ( candidate_mean - best_mean_val ) / pooled_se; + const double tcrit = z_critical_one_sided(); + if ( dir == ttest_direction::WORSE ) + return t_stat < -tcrit; // one-sided lower tail + else + return t_stat > tcrit; // one-sided upper tail +} + +bool sim_t::profileset_cull_state_t::should_cull( double candidate_mean, double candidate_error_ci_or_se, + int candidate_iterations, double best_mean_val, + double best_error_val ) const +{ + if ( candidate_iterations < min_iterations ) + { + if ( verbose >= 2 ) + { + fmt::print( stderr, + "profileset_cull: should_cull=NO (candidate_iterations={}, min_iterations={})\n", + candidate_iterations, min_iterations ); + } + return false; + } + + if ( method == CI_OVERLAP ) + { + double safety = margin > 0 ? margin * best_mean_val : 0.0; + double candidate_upper = candidate_mean + candidate_error_ci_or_se; + double best_lower = best_mean_val - best_error_val; + bool result = candidate_upper + safety < best_lower; + + if ( verbose >= 2 ) + { + fmt::print( stderr, + "profileset_cull: should_cull={} | method=CI_OVERLAP | cand_mean={:.2f} cand_err={:.4f} cand_upper={:.2f} | best_mean={:.2f} best_err={:.4f} best_lower={:.2f} | safety={:.4f} | test: {:.2f} < {:.2f}\n", + result ? "YES" : "NO", candidate_mean, candidate_error_ci_or_se, candidate_upper, best_mean_val, best_error_val, + best_lower, safety, candidate_upper + safety, best_lower ); + if ( result ) + { + double candidate_lower = candidate_mean - candidate_error_ci_or_se; + double best_upper = best_mean_val + best_error_val; + fmt::print( stderr, + "\nprofileset_cull: compare CI cand='{}' [{:.2f},{:.2f}] vs best='{}' [{:.2f},{:.2f}] | culling=YES\n", + best_name.empty() ? "candidate" : "candidate", + candidate_lower, candidate_upper, + best_name.empty() ? "best" : best_name, + best_lower, best_upper ); + } + } + + return result; + } + else // T_TEST + { + // candidate_error_ci_or_se is standard error in T_TEST mode + bool result = ttest_is_significant( candidate_mean, candidate_error_ci_or_se, candidate_iterations, best_mean_val, best_error_val, + ttest_direction::WORSE ); + if ( verbose >= 2 && result ) + { + fmt::print( stderr, + "\nprofileset_cull: compare TTEST cand='{}' mean={:.2f} se={:.4f} vs best='{}' mean={:.2f} se={:.4f} | culling=YES\n", + best_name.empty() ? "candidate" : "candidate", + candidate_mean, candidate_error_ci_or_se, + best_name.empty() ? "best" : best_name, + best_mean_val, best_error_val ); + } + return result; + } +} + +bool sim_t::profileset_cull_state_t::should_promote( double candidate_mean, double candidate_error_ci_or_se, + int candidate_iterations, double best_mean_val, + double best_error_val ) const +{ + if ( method == CI_OVERLAP ) + { + return ( candidate_mean - candidate_error_ci_or_se ) > ( best_mean_val + best_error_val ); + } + else // T_TEST + { + return ttest_is_significant( candidate_mean, candidate_error_ci_or_se, candidate_iterations, best_mean_val, best_error_val, + ttest_direction::BETTER ); + } +} + +// sim_t::seed_profileset_cull_from_baseline ============================ + +void sim_t::seed_profileset_cull_from_baseline() +{ + // Only seed if culling is enabled and we haven't seeded already + if ( !profileset_cull.enabled || profileset_cull.baseline_seeded ) + return; + + // Only the parent (baseline) sim should seed + if ( parent || profileset_enabled ) + return; + + // Get the baseline player for the chosen metric + if ( player_no_pet_list.empty() || profileset_report_player_index >= player_no_pet_list.size() ) + return; + + const auto baseline_player = player_no_pet_list[ profileset_report_player_index ]; + + // Get baseline statistics for the culling metric + auto baseline_data = profileset::metric_data( baseline_player, profileset_cull.metric ); + + // Calculate absolute error (half-width) from relative error + double relative_error = baseline_data.mean_std_dev / baseline_data.mean; + double absolute_error = relative_error * baseline_data.mean; + + // Get iterations from current simulation progress + auto current_progress = progress(); + + // Seed the culling state with baseline values + { + std::lock_guard lock( profileset_cull.mtx ); + + profileset_cull.best_name = "Baseline"; + profileset_cull.best_mean = baseline_data.mean; + profileset_cull.best_error = profileset_cull.select_error( absolute_error, baseline_data.mean_std_dev ); + profileset_cull.best_iterations = current_progress.current_iterations; + profileset_cull.baseline_seeded = true; + + if ( profileset_cull.verbose >= 1 ) + { + fmt::print( stderr, "\nprofileset_cull: baseline seeded '{}' mean={:.2f} err={:.4f} ({:.3f}% rel) iters={}\n", + profileset_cull.best_name, profileset_cull.best_mean, profileset_cull.best_error, relative_error * 100.0, + profileset_cull.best_iterations); + } + } +} // sim_t::iterate =========================================================== @@ -3869,13 +4042,30 @@ void sim_t::create_options() add_option( opt_int( "min_report_iteration_data", min_report_iteration_data ) ); add_option( opt_bool( "average_range", average_range ) ); add_option( opt_bool( "average_gauss", average_gauss ) ); - // Find-Best (profileset early elimination) options (MVP) - add_option( opt_bool( "find_best", find_best.enabled ) ); - add_option( opt_string( "find_best_metric", find_best_metric_str ) ); - add_option( opt_int( "find_best_min_iterations", find_best.min_iterations ) ); - add_option( opt_float( "find_best_winner_precision", find_best.winner_precision ) ); - add_option( opt_float( "find_best_elim_safety_margin", find_best.elim_safety_margin_frac ) ); - add_option( opt_int( "find_best_verbose", find_best.verbose ) ); + // Profileset culling (early elimination) options + add_option( opt_bool( "profileset_cull", profileset_cull.enabled ) ); + add_option( opt_func( "profileset_cull_method", []( sim_t* sim, util::string_view, util::string_view value ) { + if ( util::str_compare_ci( value, "ci" ) ) + sim->profileset_cull.method = sim_t::profileset_cull_state_t::CI_OVERLAP; + else if ( util::str_compare_ci( value, "t_test" ) ) + sim->profileset_cull.method = sim_t::profileset_cull_state_t::T_TEST; + else { + sim->error( "Invalid profileset_cull_method '{}', valid options: ci, t_test", value ); + return false; + } + return true; + } ) ); + add_option( opt_string( "profileset_cull_metric", profileset_cull.cull_metric_str ) ); + add_option( opt_func( "profileset_cull_min_iterations", []( sim_t* sim, util::string_view, util::string_view value ) { + unsigned val = std::stoul( std::string( value ) ); + if ( val < 30 && sim->profileset_cull.method == sim_t::profileset_cull_state_t::T_TEST ) { + sim->error( "profileset_cull_min_iterations={} is too low for reliable t-test (need >= 30 for normal approximation)", val ); + } + sim->profileset_cull.min_iterations = val; + return true; + } ) ); + add_option( opt_float( "profileset_cull_alpha", profileset_cull.alpha ) ); + add_option( opt_int( "profileset_cull_verbose", profileset_cull.verbose ) ); // Misc add_option( opt_list( "party", party_encoding ) ); add_option( opt_func( "active", parse_active ) ); @@ -4340,21 +4530,21 @@ void sim_t::setup( sim_control_t* c ) if ( player_list.empty() && spell_query == nullptr && !display_bonus_ids && display_build <= 1 ) throw sc_runtime_error( "Nothing to sim!" ); - // Finalize find_best configuration on parent sim only - if ( !parent && find_best.enabled ) + // Finalize profileset_cull configuration on parent sim only + if ( !parent && profileset_cull.enabled ) { // Determine metric - if ( !find_best_metric_str.empty() ) + if ( !profileset_cull.cull_metric_str.empty() ) { - auto m = util::parse_scale_metric( find_best_metric_str ); + auto m = util::parse_scale_metric( profileset_cull.cull_metric_str ); if ( m == SCALE_METRIC_NONE ) { - error( "find_best: unknown metric '{}' disabling feature", find_best_metric_str ); - find_best.enabled = false; + error( "profileset_cull: unknown metric '{}' disabling feature", profileset_cull.cull_metric_str ); + profileset_cull.enabled = false; } else { - find_best.metric = m; + profileset_cull.metric = m; } } else @@ -4362,12 +4552,12 @@ void sim_t::setup( sim_control_t* c ) // If only one profileset metric specified, use it; otherwise require explicit option if ( profileset_metric.size() == 1 ) { - find_best.metric = profileset_metric.front(); + profileset_cull.metric = profileset_metric.front(); } else { - error( "find_best: multiple profileset metrics active, specify find_best_metric=... disabling feature" ); - find_best.enabled = false; + error( "profileset_cull: multiple profileset metrics active, specify profileset_cull_metric=... disabling feature" ); + profileset_cull.enabled = false; } } } diff --git a/engine/sim/sim.hpp b/engine/sim/sim.hpp index da1c7ff701a..0acb363f80a 100644 --- a/engine/sim/sim.hpp +++ b/engine/sim/sim.hpp @@ -17,6 +17,7 @@ #include "util/util.hpp" #include "util/vector_with_callback.hpp" +#include #include #include @@ -96,8 +97,8 @@ struct sim_t : private sc_thread_t double current_mean; int analyze_error_interval, analyze_number; - // Find-Best (profileset early elimination) state (shared on parent sim) - struct find_best_state_t { + // Profileset culling (early elimination) state (shared on parent sim) + struct profileset_cull_state_t { mutex_t mtx; bool enabled = false; // Metric currently only supports primary profileset metric (DPS-family) @@ -105,20 +106,35 @@ struct sim_t : private sc_thread_t std::string best_name; // profileset name double best_mean = 0.0; // mean of current best double best_error = 0.0; // absolute half-width (same units as mean) - unsigned best_iterations = 0; // iterations when last updated - bool best_precision_satisfied = false; // winner precision threshold reached - // Configuration copied from options (stored here for child sims to read) - int min_iterations = 500; // minimum iterations before evaluating elimination - double winner_precision = -1.0; // percent relative error threshold (same unit as target_error/current_error) - double elim_safety_margin_frac = 0.001; // fractional safety margin (fraction of best mean) + int best_iterations = 0; // iterations when last updated + bool baseline_seeded = false; // whether baseline has seeded the initial best + enum method_e { CI_OVERLAP, T_TEST } method = T_TEST; + int min_iterations = 100; // minimum iterations before evaluating elimination + double margin = 0.001; // fractional safety margin for CI mode (fraction of best mean) + double alpha = 0.01; // alpha level for t-test mode (one-sided) int verbose = 0; // 0 silent, 1 events, 2 verbose - } find_best; + std::string cull_metric_str; // raw option string for metric + + // Encapsulated decision helpers (no virtual dispatch yet) + double z_critical_one_sided() const; + enum class ttest_direction { BETTER, WORSE }; + bool ttest_is_significant(double candidate_mean, double candidate_se, int candidate_iterations, + double best_mean_val, double best_se, ttest_direction dir) const; + bool should_cull(double candidate_mean, double candidate_error_ci_or_se, int candidate_iterations, + double best_mean_val, double best_error_val) const; + bool should_promote(double candidate_mean, double candidate_error_ci_or_se, int candidate_iterations, + double best_mean_val, double best_error_val) const; + const char* method_name() const { return ( method == T_TEST ) ? "t_test" : "ci"; } + bool prefers_standard_error() const { return method == T_TEST; } + double select_error(double candidate_ci_half_width, double candidate_standard_error) const { + return prefers_standard_error() ? candidate_standard_error : candidate_ci_half_width; + } + } profileset_cull; // Per-sim (child) flags used for reporting elimination - bool find_best_eliminated = false; // set true if this profileset was early stopped - std::string find_best_reason; // human readable reason + bool culled = false; // set true if this profileset was culled + std::string culled_reason; // human readable reason std::string profileset_current_name; // name of the profileset for this sim (child only) - std::string find_best_metric_str; // raw option string parsed during setup (parent only) sim_control_t* control; sim_t* parent; @@ -740,6 +756,7 @@ struct sim_t : private sc_thread_t bool execute(); void analyze_error(); void analyze_iteration_data(); + void seed_profileset_cull_from_baseline(); void print_options(); void add_option( std::unique_ptr opt ); void create_options(); From 162ff7d2a235e68273c51dd44378115be4d92445 Mon Sep 17 00:00:00 2001 From: Jordan Christensen Date: Wed, 10 Sep 2025 15:33:18 -0400 Subject: [PATCH 6/9] add baseline (cherry picked from commit 7de1c5a7b1e26a4ea6012274f4fef4e4409f3d0a) --- engine/sc_main.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/engine/sc_main.cpp b/engine/sc_main.cpp index 2e746602a84..dce3337d599 100644 --- a/engine/sc_main.cpp +++ b/engine/sc_main.cpp @@ -362,6 +362,10 @@ int sim_t::main( const std::vector& args ) plot->analyze(); reforge_plot->analyze(); + if ( profileset_cull.enabled ) { + seed_profileset_cull_from_baseline(); + } + if ( canceled == 0 && !profilesets->iterate( this ) ) canceled = true; else From f5d88d0ca3a38851eb352f516240fc58b507aa07 Mon Sep 17 00:00:00 2001 From: Jordan Christensen Date: Fri, 12 Sep 2025 20:20:09 -0400 Subject: [PATCH 7/9] switch order of interupt / unlock (cherry picked from commit c61fcd4e3d93c97fedee0582af60290554a76f7a) --- engine/sim/sim.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/engine/sim/sim.cpp b/engine/sim/sim.cpp index 50bb22404b3..d98fa6ac810 100644 --- a/engine/sim/sim.cpp +++ b/engine/sim/sim.cpp @@ -2308,9 +2308,8 @@ void sim_t::analyze_error() { fmt::print( stderr, "\n{}\n", culled_reason ); } - // Without this unlock the program hangs - work_queue -> unlock(); interrupt(); + work_queue -> unlock(); return; } // Promotion check if candidate clearly better From 2e4dd144e84efd21f826a292bff83770a608e8d9 Mon Sep 17 00:00:00 2001 From: Jordan Christensen Date: Thu, 25 Sep 2025 18:34:55 -0400 Subject: [PATCH 8/9] addressed PR comments. Moved HTML down, and collapsed, cleaned up JSON, and simplified iteration counts. Cleaned up method parsing (cherry picked from commit 84d2402bb345925c1d6c46bbfda93d95090b3547) --- engine/report/json/report_json.cpp | 32 ++++++++++------ engine/report/report_html_sim.cpp | 17 ++++++--- engine/sim/profileset.cpp | 4 -- engine/sim/sim.cpp | 61 ++++++++++-------------------- engine/sim/sim.hpp | 24 +++++++++++- 5 files changed, 75 insertions(+), 63 deletions(-) diff --git a/engine/report/json/report_json.cpp b/engine/report/json/report_json.cpp index a084c341d2b..47787909a4f 100644 --- a/engine/report/json/report_json.cpp +++ b/engine/report/json/report_json.cpp @@ -1024,11 +1024,6 @@ void profileset_json2( const profileset::profilesets_t& profileset, const sim_t& if ( sim.profileset_cull.enabled ) { - obj[ "profileset_cull_best" ] = ( sim.profileset_cull.best_name == profileset->name() ); - if ( sim.profileset_cull.best_name == profileset->name() ) - { - obj[ "profileset_cull_best_error" ] = sim.profileset_cull.best_error; - } if ( profileset->culled() ) { obj[ "culled" ] = true; @@ -1089,11 +1084,6 @@ void profileset_json3( const profileset::profilesets_t& profilesets, const sim_t // Profileset culling metadata at profileset level if ( sim.profileset_cull.enabled ) { - obj[ "profileset_cull_best" ] = ( sim.profileset_cull.best_name == profileset->name() ); - if ( sim.profileset_cull.best_name == profileset->name() ) - { - obj[ "profileset_cull_best_error" ] = sim.profileset_cull.best_error; - } if ( profileset->culled() ) { obj[ "culled" ] = true; @@ -1274,7 +1264,7 @@ void to_json( const ::report::json::report_configuration_t& report_configuration cull[ "enabled" ] = true; cull[ "method" ] = sim.profileset_cull.method_name(); cull[ "min_iterations" ] = sim.profileset_cull.min_iterations; - if ( sim.profileset_cull.method == sim_t::profileset_cull_state_t::T_TEST ) + if ( sim.profileset_cull.uses_alpha() ) cull[ "alpha" ] = sim.profileset_cull.alpha; else cull[ "margin" ] = sim.profileset_cull.margin; @@ -1358,6 +1348,26 @@ void to_json( const ::report::json::report_configuration_t& report_configuration add_non_zero( stats_root, "total_heal", sim.total_heal ); add_non_zero( stats_root, "total_absorb", sim.total_absorb ); + if ( sim.profileset_cull.enabled ) + { + const std::string best_name = sim.profileset_cull.best_name.empty() + ? sim.profileset_multiactor_base_name + : sim.profileset_cull.best_name; + + auto cull_stats = stats_root[ "profileset_cull" ]; + cull_stats[ "method" ] = sim.profileset_cull.method_name(); + cull_stats[ "metric" ] = util::scale_metric_type_abbrev( sim.profileset_cull.metric ); + + add_non_zero( cull_stats, "best_name", best_name ); + + if ( sim.profileset_cull.baseline_seeded ) + { + cull_stats[ "best_error" ] = sim.profileset_cull.best_error; + cull_stats[ "best_iterations" ] = sim.profileset_cull.best_iterations; + cull_stats[ "best_mean" ] = sim.profileset_cull.best_mean; + } + } + if ( sim.report_details != 0 ) { // Targets diff --git a/engine/report/report_html_sim.cpp b/engine/report/report_html_sim.cpp index e7be5df268b..de7079b7685 100644 --- a/engine/report/report_html_sim.cpp +++ b/engine/report/report_html_sim.cpp @@ -1158,14 +1158,19 @@ void print_profilesets( std::ostream& out, const profileset::profilesets_t& prof out << "

Profile sets

\n"; out << "
\n"; - // Profileset culling indicator and culled list + print_profilesets_chart( out, sim ); + + // Profileset culling indicator and culled list (moved below charts) if ( sim.profileset_cull.enabled ) { + out << "

Profileset culling details

\n"; + out << "
\n"; + out << "
"; out << "Profileset culling enabled: method=" - << ( sim.profileset_cull.method == sim_t::profileset_cull_state_t::T_TEST ? "t_test" : "ci" ) - << ", min_iters=" << sim.profileset_cull.min_iterations; - if ( sim.profileset_cull.method == sim_t::profileset_cull_state_t::T_TEST ) + << sim.profileset_cull.method_name() + << ", min_iters=" << sim.profileset_cull.min_iterations; + if ( sim.profileset_cull.uses_alpha() ) out << ", alpha=" << sim.profileset_cull.alpha; else out << ", margin=" << sim.profileset_cull.margin; @@ -1193,9 +1198,9 @@ void print_profilesets( std::ostream& out, const profileset::profilesets_t& prof } out << "
\n"; } - } - print_profilesets_chart( out, sim ); + out << "
\n"; // end toggle-content + } out << "
"; out << ""; diff --git a/engine/sim/profileset.cpp b/engine/sim/profileset.cpp index 4cda23ec954..33a4576a27b 100644 --- a/engine/sim/profileset.cpp +++ b/engine/sim/profileset.cpp @@ -138,10 +138,6 @@ void simulate_profileset( sim_t* parent, profileset::profile_set_t& set, sim_t*& if ( !ret || profile_sim -> is_canceled() ) { - if ( profile_sim->culled ) - { - fmt::print( stderr, "\nProfileset '{}' culled: {}\n", set.name(), profile_sim->culled_reason ); - } return; } diff --git a/engine/sim/sim.cpp b/engine/sim/sim.cpp index d98fa6ac810..ce8218d4f6c 100644 --- a/engine/sim/sim.cpp +++ b/engine/sim/sim.cpp @@ -2248,21 +2248,9 @@ void sim_t::analyze_error() auto &s = parent->profileset_cull; // Convert relative percent error to absolute half-width double abs_error = ( current_error / 100.0 ) * current_mean; - // Standard error for current candidate (already computed above) - double std_error = current_standard_error; - - // Snapshot progress once to avoid repeated work_queue calls - sim_progress_t current_progress; - if ( strict_work_queue ) { - // In strict_work_queue mode, read iterations from this child sim's own counters - current_progress.current_iterations = current_iteration; - current_progress.total_iterations = iterations; - } else { - current_progress = work_queue->progress(); - } - + // ensure enough iterations - if ( current_progress.current_iterations >= s.min_iterations ) + if ( n_iterations >= s.min_iterations ) { AUTO_LOCK( s.mtx ); // If no best yet, only promote if baseline hasn't been seeded @@ -2271,39 +2259,33 @@ void sim_t::analyze_error() { s.best_name = profileset_current_name; s.best_mean = current_mean; - s.best_error = s.select_error( abs_error, std_error ); - s.best_iterations = current_progress.current_iterations; + s.best_error = s.select_error( abs_error, current_standard_error ); + s.best_iterations = n_iterations; fmt::print( stderr, "\nprofileset_cull: initial best '{}' mean={:.2f} err={:.4f} ({:.3f}% rel) iters={}\n", s.best_name, s.best_mean, s.best_error, current_error, s.best_iterations ); } - else if ( s.best_name.empty() && s.baseline_seeded ) - { - // Baseline should have been seeded, but best_name is empty - this shouldn't happen - if ( s.verbose >= 1 ) - { - fmt::print( stderr, "\nprofileset_cull: warning - baseline was seeded but best_name is empty\n" ); - } - } else { if ( profileset_current_name == s.best_name ) { // Update best uncertainty window if shrunk - if ( s.select_error( abs_error, std_error ) < s.best_error ) + if ( s.select_error( abs_error, current_standard_error ) < s.best_error ) { s.best_error = s.select_error( abs_error, std_error ); s.best_mean = current_mean; - s.best_iterations = current_progress.current_iterations; + s.best_iterations = n_iterations; } } else { // Candidate: test elimination vs current best using current method double error_for_method = s.select_error( abs_error, current_standard_error ); - if ( s.should_cull( current_mean, error_for_method, current_progress.current_iterations, s.best_mean, s.best_error ) ) + if ( s.should_cull( current_mean, error_for_method, n_iterations, s.best_mean, s.best_error ) ) { culled = true; - culled_reason = fmt::format( "profileset_cull: eliminated vs '{}' using {}", s.best_name, s.method_name() ); + // Use a friendly label for baseline if best_name is empty + const std::string& best_label = s.best_name.empty() ? parent->profileset_multiactor_base_name : s.best_name; + culled_reason = fmt::format( "profileset_cull: eliminated vs '{}' using {}", best_label, s.method_name() ); if ( s.verbose >= 1 ) { fmt::print( stderr, "\n{}\n", culled_reason ); @@ -2314,8 +2296,8 @@ void sim_t::analyze_error() } // Promotion check if candidate clearly better bool promote = s.should_promote( current_mean, - s.select_error( abs_error, std_error ), - current_progress.current_iterations, + s.select_error( abs_error, current_standard_error ), + n_iterations, s.best_mean, s.best_error ); @@ -2323,8 +2305,8 @@ void sim_t::analyze_error() { s.best_name = profileset_current_name; s.best_mean = current_mean; - s.best_error = s.select_error( abs_error, std_error ); - s.best_iterations = current_progress.current_iterations; + s.best_error = s.select_error( abs_error, current_standard_error ); + s.best_iterations = n_iterations; if ( s.verbose >= 1 ) { fmt::print( stderr, @@ -3293,7 +3275,7 @@ void sim_t::seed_profileset_cull_from_baseline() { std::lock_guard lock( profileset_cull.mtx ); - profileset_cull.best_name = "Baseline"; + profileset_cull.best_name = ""; profileset_cull.best_mean = baseline_data.mean; profileset_cull.best_error = profileset_cull.select_error( absolute_error, baseline_data.mean_std_dev ); profileset_cull.best_iterations = current_progress.current_iterations; @@ -3302,7 +3284,7 @@ void sim_t::seed_profileset_cull_from_baseline() if ( profileset_cull.verbose >= 1 ) { fmt::print( stderr, "\nprofileset_cull: baseline seeded '{}' mean={:.2f} err={:.4f} ({:.3f}% rel) iters={}\n", - profileset_cull.best_name, profileset_cull.best_mean, profileset_cull.best_error, relative_error * 100.0, + profileset_multiactor_base_name, profileset_cull.best_mean, profileset_cull.best_error, relative_error * 100.0, profileset_cull.best_iterations); } } @@ -4044,14 +4026,13 @@ void sim_t::create_options() // Profileset culling (early elimination) options add_option( opt_bool( "profileset_cull", profileset_cull.enabled ) ); add_option( opt_func( "profileset_cull_method", []( sim_t* sim, util::string_view, util::string_view value ) { - if ( util::str_compare_ci( value, "ci" ) ) - sim->profileset_cull.method = sim_t::profileset_cull_state_t::CI_OVERLAP; - else if ( util::str_compare_ci( value, "t_test" ) ) - sim->profileset_cull.method = sim_t::profileset_cull_state_t::T_TEST; - else { - sim->error( "Invalid profileset_cull_method '{}', valid options: ci, t_test", value ); + auto m = sim_t::profileset_cull_state_t::parse_method( value ); + if ( m >= sim_t::profileset_cull_state_t::METHOD_MAX ) + { + sim->error( "Invalid profileset_cull_method '{}' , valid options: ci, t_test", value ); return false; } + sim->profileset_cull.method = m; return true; } ) ); add_option( opt_string( "profileset_cull_metric", profileset_cull.cull_metric_str ) ); diff --git a/engine/sim/sim.hpp b/engine/sim/sim.hpp index 0acb363f80a..608d6191e83 100644 --- a/engine/sim/sim.hpp +++ b/engine/sim/sim.hpp @@ -108,7 +108,7 @@ struct sim_t : private sc_thread_t double best_error = 0.0; // absolute half-width (same units as mean) int best_iterations = 0; // iterations when last updated bool baseline_seeded = false; // whether baseline has seeded the initial best - enum method_e { CI_OVERLAP, T_TEST } method = T_TEST; + enum method_e { CI_OVERLAP, T_TEST, METHOD_MAX } method = T_TEST; int min_iterations = 100; // minimum iterations before evaluating elimination double margin = 0.001; // fractional safety margin for CI mode (fraction of best mean) double alpha = 0.01; // alpha level for t-test mode (one-sided) @@ -124,8 +124,28 @@ struct sim_t : private sc_thread_t double best_mean_val, double best_error_val) const; bool should_promote(double candidate_mean, double candidate_error_ci_or_se, int candidate_iterations, double best_mean_val, double best_error_val) const; - const char* method_name() const { return ( method == T_TEST ) ? "t_test" : "ci"; } + + static const char* method_to_string( method_e m ) { + switch ( m ) { + case T_TEST: + return "t_test"; + case CI_OVERLAP: + return "ci"; + default: + return "unknown"; + } + } + static method_e parse_method( util::string_view v ) { + for ( int i = static_cast( CI_OVERLAP ); i < static_cast( METHOD_MAX ); ++i ) { + auto e = static_cast( i ); + if ( util::str_compare_ci( v, method_to_string( e ) ) ) + return e; + } + return METHOD_MAX; // invalid + } + const char* method_name() const { return method_to_string( method ); } bool prefers_standard_error() const { return method == T_TEST; } + bool uses_alpha() const { return method == T_TEST; } double select_error(double candidate_ci_half_width, double candidate_standard_error) const { return prefers_standard_error() ? candidate_standard_error : candidate_ci_half_width; } From bd878302bdacc2898cd706d3e5dd663ed6856b5d Mon Sep 17 00:00:00 2001 From: Jordan Christensen Date: Thu, 25 Sep 2025 18:56:44 -0400 Subject: [PATCH 9/9] missed one std_error (cherry picked from commit c3ac34bb71cb1d08cf631de46318859d451eab58) --- engine/sim/sim.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine/sim/sim.cpp b/engine/sim/sim.cpp index ce8218d4f6c..676f30ddb30 100644 --- a/engine/sim/sim.cpp +++ b/engine/sim/sim.cpp @@ -2271,7 +2271,7 @@ void sim_t::analyze_error() // Update best uncertainty window if shrunk if ( s.select_error( abs_error, current_standard_error ) < s.best_error ) { - s.best_error = s.select_error( abs_error, std_error ); + s.best_error = s.select_error( abs_error, current_standard_error ); s.best_mean = current_mean; s.best_iterations = n_iterations; }