Skip to content

Commit c73bd63

Browse files
Add group sizes to PerformanceResults + report group latency results
1 parent 983a568 commit c73bd63

File tree

8 files changed

+132
-5
lines changed

8 files changed

+132
-5
lines changed

loadgen/bindings/python_api.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ void StartTestWithGroupedQSL(
340340
GroupedQuerySampleLibraryTrampoline* qsl_cast =
341341
reinterpret_cast<GroupedQuerySampleLibraryTrampoline*>(qsl);
342342
LogSettings default_log_settings;
343-
assert(test_settings.use_grouped_qsl);
343+
assert(TestSettings.use_grouped_qsl);
344344
mlperf::StartTest(sut_cast, qsl_cast, test_settings, default_log_settings,
345345
audit_config_filename);
346346
}

loadgen/issue_query_controller.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,14 @@ void QueryMetadata::CoalesceQueries(QueryMetadata* queries, size_t first,
107107

108108
void QueryMetadata::Decoalesce() { query_to_send.resize(1); }
109109

110+
std::vector<QuerySampleIndex> QueryMetadata::GetSampleIndices(){
111+
std::vector<QuerySampleIndex> sample_indices;
112+
for (auto s: this->samples_){
113+
sample_indices.push_back(s.sample_index);
114+
}
115+
return sample_indices;
116+
}
117+
110118
/// \brief A base template that should never be used since each scenario has
111119
/// its own specialization.
112120
template <TestScenario scenario>

loadgen/issue_query_controller.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@ class QueryMetadata {
110110
/// \brief Set a coalesced query back to its original state.
111111
void Decoalesce();
112112

113+
std::vector<QuerySampleIndex> GetSampleIndices();
114+
113115
public:
114116
std::vector<QuerySample> query_to_send;
115117
const std::chrono::nanoseconds scheduled_delta;

loadgen/loadgen.cc

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,7 @@ std::vector<QueryMetadata> GenerateQueries(
417417
}
418418
} else if (settings.use_grouped_qsl) {
419419
g = grouped_sample_distribution(sample_rng);
420-
group_size = qsl->GroupSize(qsl->GroupOf(groups_first[g]));
420+
group_size = qsl->GroupSize(qsl->GroupOf(loaded_samples[groups_first[g]]));
421421
} else {
422422
for (auto& s : samples) {
423423
s = loaded_samples[settings.performance_issue_unique
@@ -639,6 +639,19 @@ PerformanceResult IssueQueries(SystemUnderTest* sut,
639639
queries[i].all_samples_done_time);
640640
}
641641
}
642+
std::vector<size_t> group_sizes;
643+
std::vector<QuerySampleIndex> sample_index;
644+
if (settings.use_grouped_qsl){
645+
for (size_t i = 0; i < queries.size(); i++){
646+
for (auto s: queries[i].GetSampleIndices()){
647+
sample_index.push_back(s);
648+
}
649+
}
650+
}
651+
652+
for (size_t i = 0; i < qsl->NumberOfGroups(); i++) {
653+
group_sizes.push_back(qsl->GroupSize(i));
654+
}
642655

643656
return PerformanceResult{
644657
std::move(sample_latencies),
@@ -649,7 +662,10 @@ PerformanceResult IssueQueries(SystemUnderTest* sut,
649662
final_query_issued_time,
650663
final_query_all_samples_done_time,
651664
TokenPerformanceResults{first_token_latencies, time_per_output_token_arr,
652-
tokens_per_sample}};
665+
tokens_per_sample},
666+
std::move(group_sizes),
667+
std::move(sample_index)
668+
};
653669
}
654670

655671
void LoadSamplesToRam(QuerySampleLibrary* qsl,

loadgen/results.cc

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,50 @@ void PerformanceSummary::ProcessTokenLatencies() {
146146
}
147147
}
148148

149+
void PerformanceSummary::ProcessGroupLatencies(){
150+
if (pr.sample_latencies.empty() || pr.group_sizes.empty() || (!settings.use_grouped_qsl) || (group_latencies_processed)) {
151+
return;
152+
}
153+
sample_count = pr.sample_latencies.size();
154+
std::vector<size_t> group_initial_idx;
155+
std::vector<QuerySampleLatency> group_latencies;
156+
size_t acum_group_idx = 0;
157+
158+
for(size_t i = 0; i < pr.group_sizes.size(); i++){
159+
group_initial_idx.push_back(acum_group_idx);
160+
acum_group_idx += pr.group_sizes[i];
161+
}
162+
size_t i = 0;
163+
QuerySampleLatency accumulated_sample_latency = 0;
164+
165+
while (i < pr.sample_index.size()) {
166+
auto sample_index = pr.sample_index[i];
167+
auto low = std::lower_bound (group_initial_idx.begin(), group_initial_idx.end(), sample_index);
168+
size_t idx = low - group_initial_idx.begin();
169+
if (group_initial_idx[idx] == sample_index){
170+
group_count++;
171+
QuerySampleLatency q = 0;
172+
for (size_t j = 0; j < pr.group_sizes[idx]; j++){
173+
q += pr.sample_latencies[i + j];
174+
}
175+
group_latencies.push_back(q);
176+
accumulated_sample_latency += q;
177+
i += pr.group_sizes[idx];
178+
} else {
179+
i = pr.sample_index.size();
180+
}
181+
}
182+
std::sort(group_latencies.begin(), group_latencies.end());
183+
group_latency_min = group_latencies.front();
184+
group_latency_max = group_latencies.back();
185+
group_latency_mean = accumulated_sample_latency / group_count;
186+
187+
for (auto& lp : group_latency_percentiles) {
188+
lp.query_latency = group_latencies[group_count * lp.percentile];
189+
}
190+
group_latencies_processed = true;
191+
};
192+
149193
bool PerformanceSummary::EarlyStopping(
150194
std::string* recommendation, int64_t queries_issued,
151195
std::vector<QuerySampleLatency>* sample_latencies,
@@ -380,6 +424,9 @@ bool PerformanceSummary::PerfConstraintsMet(std::string* recommendation) {
380424
}
381425

382426
void PerformanceSummary::LogSummary(AsyncSummary& summary) {
427+
if (settings.use_grouped_qsl) {
428+
ProcessGroupLatencies();
429+
}
383430
ProcessLatencies();
384431

385432
summary(
@@ -480,6 +527,15 @@ void PerformanceSummary::LogSummary(AsyncSummary& summary) {
480527
}
481528
}
482529

530+
if (settings.use_grouped_qsl) {
531+
double gps_as_completed =
532+
group_count / pr.final_query_all_samples_done_time;
533+
summary("Groups per second: ", group_count / pr.max_latency);
534+
summary("Completed tokens per second: ",
535+
DoubleToString(gps_as_completed));
536+
537+
}
538+
483539
std::string min_duration_recommendation;
484540
std::string perf_constraints_recommendation;
485541
std::string early_stopping_recommendation;
@@ -630,6 +686,17 @@ void PerformanceSummary::LogSummary(AsyncSummary& summary) {
630686
}
631687
}
632688

689+
if (settings.use_grouped_qsl) {
690+
summary("Min group latency (ns) : ", group_latency_min);
691+
summary("Max group latency (ns) : ", group_latency_max);
692+
summary("Mean group latency (ns) : ", group_latency_mean);
693+
for (auto& lp : group_latency_percentiles) {
694+
summary(
695+
DoubleToString(lp.percentile * 100) + " group percentile latency (ns) : ",
696+
lp.query_latency);
697+
}
698+
}
699+
633700
summary(
634701
"\n"
635702
"================================================\n"
@@ -640,6 +707,9 @@ void PerformanceSummary::LogSummary(AsyncSummary& summary) {
640707

641708
void PerformanceSummary::LogDetail(AsyncDetail& detail) {
642709
#if USE_NEW_LOGGING_FORMAT
710+
if (settings.use_grouped_qsl) {
711+
ProcessGroupLatencies();
712+
}
643713
ProcessLatencies();
644714

645715
// General validity checking
@@ -848,8 +918,23 @@ void PerformanceSummary::LogDetail(AsyncDetail& detail) {
848918
break;
849919
}
850920
}
851-
#endif
852921
}
922+
923+
if(settings.use_grouped_qsl) {
924+
MLPERF_LOG(detail, "result_group_min_latency_ns",
925+
group_latency_min);
926+
MLPERF_LOG(detail, "result_group_max_latency_ns",
927+
group_latency_max);
928+
MLPERF_LOG(detail, "result_group_mean_latency_ns",
929+
group_latency_mean);
930+
for (auto& lp : group_latency_percentiles) {
931+
MLPERF_LOG(detail,
932+
"result_group_" + DoubleToString(lp.percentile * 100) +
933+
"_percentile_latency_ns",
934+
lp.query_latency);
935+
}
936+
}
937+
#endif
853938
}
854939
} // namespace loadgen
855940
} // namespace mlperf

loadgen/results.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ struct PerformanceResult {
4444
double final_query_issued_time; // seconds from start.
4545
double final_query_all_samples_done_time; // seconds from start.
4646
TokenPerformanceResults token_results;
47+
std::vector<size_t> group_sizes;
48+
std::vector<QuerySampleIndex> sample_index;
4749
};
4850

4951
/// \brief Wraps PerformanceResult with relevant context to change how
@@ -99,6 +101,15 @@ struct PerformanceSummary {
99101
PercentileEntry tpot_percentiles[6] = {{.50}, {.90}, {.95},
100102
{.97}, {.99}, {.999}};
101103

104+
// Set by ProcessGroupLatencies
105+
size_t group_count = 0;
106+
bool group_latencies_processed = false;
107+
QuerySampleLatency group_latency_min = 0;
108+
QuerySampleLatency group_latency_max = 0;
109+
QuerySampleLatency group_latency_mean = 0;
110+
111+
PercentileEntry group_latency_percentiles[6] = {{.50}, {.90}, {.95},
112+
{.97}, {.99}, {.999}};
102113
#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64)
103114
// MSVC complains if there is no explicit constructor.
104115
// (target_latency_percentile above depends on construction with settings)
@@ -109,6 +120,7 @@ struct PerformanceSummary {
109120
#endif
110121
void ProcessLatencies();
111122
void ProcessTokenLatencies();
123+
void ProcessGroupLatencies();
112124

113125
bool MinDurationMet(std::string* recommendation);
114126
bool EarlyStopping(std::string* recommendation, int64_t queries_issued,

loadgen/test_settings_internal.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,9 @@ void LogRequestedTestSettings(const TestSettings &s) {
342342
s.performance_sample_count_override);
343343
MLPERF_LOG(detail, "requested_sample_concatenate_permutation",
344344
s.sample_concatenate_permutation);
345+
MLPERF_LOG(detail, "requested_server_constant_gen",
346+
s.server_constant_gen);
347+
MLPERF_LOG(detail, "requested_use_grouped_qsl", s.use_grouped_qsl);
345348
// Token latencies specific values
346349
if (s.use_token_latencies) {
347350
MLPERF_LOG(detail, "requested_use_token_latencies",
@@ -458,6 +461,7 @@ void TestSettingsInternal::LogEffectiveSettings() const {
458461
s.sample_concatenate_permutation);
459462
MLPERF_LOG(detail, "effective_server_constant_gen",
460463
s.server_constant_gen);
464+
MLPERF_LOG(detail, "effective_use_grouped_qsl", s.use_grouped_qsl);
461465
#else
462466
detail("");
463467
detail("Effective Settings:");
@@ -531,7 +535,6 @@ void TestSettingsInternal::LogSummary(AsyncSummary &summary) const {
531535
"samples_per_query value");
532536
}
533537
}
534-
535538
} // namespace loadgen
536539

537540
int TestSettings::FromConfig(const std::string &path, const std::string &model,

loadgen/test_settings_internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ limitations under the License.
2222

2323
#include "logging.h"
2424
#include "test_settings.h"
25+
#include "query_sample_library.h"
2526

2627
namespace mlperf {
2728

0 commit comments

Comments
 (0)