@@ -146,6 +146,50 @@ void PerformanceSummary::ProcessTokenLatencies() {
146146 }
147147}
148148
149+ void PerformanceSummary::ProcessGroupLatencies (){
150+ if (pr.sample_latencies .empty () || pr.group_sizes .empty () || (!settings.use_grouped_qsl ) || (group_latencies_processed)) {
151+ return ;
152+ }
153+ sample_count = pr.sample_latencies .size ();
154+ std::vector<size_t > group_initial_idx;
155+ std::vector<QuerySampleLatency> group_latencies;
156+ size_t acum_group_idx = 0 ;
157+
158+ for (size_t i = 0 ; i < pr.group_sizes .size (); i++){
159+ group_initial_idx.push_back (acum_group_idx);
160+ acum_group_idx += pr.group_sizes [i];
161+ }
162+ size_t i = 0 ;
163+ QuerySampleLatency accumulated_sample_latency = 0 ;
164+
165+ while (i < pr.sample_index .size ()) {
166+ auto sample_index = pr.sample_index [i];
167+ auto low = std::lower_bound (group_initial_idx.begin (), group_initial_idx.end (), sample_index);
168+ size_t idx = low - group_initial_idx.begin ();
169+ if (group_initial_idx[idx] == sample_index){
170+ group_count++;
171+ QuerySampleLatency q = 0 ;
172+ for (size_t j = 0 ; j < pr.group_sizes [idx]; j++){
173+ q += pr.sample_latencies [i + j];
174+ }
175+ group_latencies.push_back (q);
176+ accumulated_sample_latency += q;
177+ i += pr.group_sizes [idx];
178+ } else {
179+ i = pr.sample_index .size ();
180+ }
181+ }
182+ std::sort (group_latencies.begin (), group_latencies.end ());
183+ group_latency_min = group_latencies.front ();
184+ group_latency_max = group_latencies.back ();
185+ group_latency_mean = accumulated_sample_latency / group_count;
186+
187+ for (auto & lp : group_latency_percentiles) {
188+ lp.query_latency = group_latencies[group_count * lp.percentile ];
189+ }
190+ group_latencies_processed = true ;
191+ };
192+
149193bool PerformanceSummary::EarlyStopping (
150194 std::string* recommendation, int64_t queries_issued,
151195 std::vector<QuerySampleLatency>* sample_latencies,
@@ -380,6 +424,9 @@ bool PerformanceSummary::PerfConstraintsMet(std::string* recommendation) {
380424}
381425
382426void PerformanceSummary::LogSummary (AsyncSummary& summary) {
427+ if (settings.use_grouped_qsl ) {
428+ ProcessGroupLatencies ();
429+ }
383430 ProcessLatencies ();
384431
385432 summary (
@@ -480,6 +527,15 @@ void PerformanceSummary::LogSummary(AsyncSummary& summary) {
480527 }
481528 }
482529
530+ if (settings.use_grouped_qsl ) {
531+ double gps_as_completed =
532+ group_count / pr.final_query_all_samples_done_time ;
533+ summary (" Groups per second: " , group_count / pr.max_latency );
534+ summary (" Completed tokens per second: " ,
535+ DoubleToString (gps_as_completed));
536+
537+ }
538+
483539 std::string min_duration_recommendation;
484540 std::string perf_constraints_recommendation;
485541 std::string early_stopping_recommendation;
@@ -630,6 +686,17 @@ void PerformanceSummary::LogSummary(AsyncSummary& summary) {
630686 }
631687 }
632688
689+ if (settings.use_grouped_qsl ) {
690+ summary (" Min group latency (ns) : " , group_latency_min);
691+ summary (" Max group latency (ns) : " , group_latency_max);
692+ summary (" Mean group latency (ns) : " , group_latency_mean);
693+ for (auto & lp : group_latency_percentiles) {
694+ summary (
695+ DoubleToString (lp.percentile * 100 ) + " group percentile latency (ns) : " ,
696+ lp.query_latency );
697+ }
698+ }
699+
633700 summary (
634701 " \n "
635702 " ================================================\n "
@@ -640,6 +707,9 @@ void PerformanceSummary::LogSummary(AsyncSummary& summary) {
640707
641708void PerformanceSummary::LogDetail (AsyncDetail& detail) {
642709#if USE_NEW_LOGGING_FORMAT
710+ if (settings.use_grouped_qsl ) {
711+ ProcessGroupLatencies ();
712+ }
643713 ProcessLatencies ();
644714
645715 // General validity checking
@@ -848,8 +918,23 @@ void PerformanceSummary::LogDetail(AsyncDetail& detail) {
848918 break ;
849919 }
850920 }
851- #endif
852921 }
922+
923+ if (settings.use_grouped_qsl ) {
924+ MLPERF_LOG (detail, " result_group_min_latency_ns" ,
925+ group_latency_min);
926+ MLPERF_LOG (detail, " result_group_max_latency_ns" ,
927+ group_latency_max);
928+ MLPERF_LOG (detail, " result_group_mean_latency_ns" ,
929+ group_latency_mean);
930+ for (auto & lp : group_latency_percentiles) {
931+ MLPERF_LOG (detail,
932+ " result_group_" + DoubleToString (lp.percentile * 100 ) +
933+ " _percentile_latency_ns" ,
934+ lp.query_latency );
935+ }
936+ }
937+ #endif
853938}
854939} // namespace loadgen
855940} // namespace mlperf
0 commit comments