Skip to content

Commit d6051c1

Browse files
zzzariescopybara-github
authored andcommitted
Add dvfs time scale multiplier processing logic for roofline model.
PiperOrigin-RevId: 805986876
1 parent 3fa0faa commit d6051c1

File tree

12 files changed

+189
-39
lines changed

12 files changed

+189
-39
lines changed

frontend/app/common/constants/roofline_model_constants.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ export const SCATTER_CHART_AXIS = {
1212
minX: 0.00001,
1313
maxX: 100000,
1414
minY: 0.00001,
15-
maxY: 1000000,
15+
maxY: 10000000,
1616
};
1717

1818
/** scatter base options for roofline chart */
@@ -191,6 +191,13 @@ export const DEVICE_INFO = [
191191
unit: 'Flop/byte',
192192
display: false,
193193
},
194+
{
195+
id: 'time_scale_multiplier',
196+
label: 'Time Scale Multiplier',
197+
type: 'number',
198+
unit: '',
199+
display: true,
200+
},
194201
];
195202

196203
/**

frontend/app/components/roofline_model/BUILD

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ xprof_ng_module(
1919
"@npm//@ngrx/store",
2020
"@npm//@types/google.visualization",
2121
"@npm//rxjs",
22+
"@org_xprof//frontend/app/common/angular:angular_material_icon",
23+
"@org_xprof//frontend/app/common/angular:angular_material_progress_bar",
24+
"@org_xprof//frontend/app/common/angular:angular_material_slide_toggle",
25+
"@org_xprof//frontend/app/common/angular:angular_material_tooltip",
2226
"@org_xprof//frontend/app/common/classes",
2327
"@org_xprof//frontend/app/common/constants:roofline_model_constants",
2428
"@org_xprof//frontend/app/common/interfaces",
@@ -39,4 +43,5 @@ sass_binary(
3943
src = "roofline_model.scss",
4044
# stack = True,
4145
sourcemap = False,
46+
deps = ["@org_xprof//frontend/app/styles:common"],
4247
)

frontend/app/components/roofline_model/roofline_model.ng.html

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<div class="section-container">
1+
<div class="section-container" style="position: sticky; top: 0; z-index: 1; background: white;">
22
<div class="block-content">
33
<div class="row">
44
<h2>Device Information</h2>
@@ -10,6 +10,19 @@ <h2>Device Information</h2>
1010
<div *ngFor="let info of deviceInfoArray" style="font-size:16px">
1111
<div *ngIf="info.display"><b>{{info.label}}:</b> <span> {{deviceInfoText(info)}}</span></div>
1212
</div>
13+
<!--tslint:disable-next-line:need higher angular version to deprecate ngIf-->
14+
<div class="control" *ngIf="hasValidTimeScaleMultiplier()">
15+
<div class="control-title">
16+
Apply Scaling Factor
17+
<mat-icon
18+
class="tooltip-icon"
19+
matTooltip="Show flops utilization scaled with the evaluated device capability."
20+
matTooltipPosition="above">
21+
info
22+
</mat-icon>
23+
</div>
24+
<mat-slide-toggle [checked]="applyScalingFactor" (toggleChange)="toggleScalingFactor()"></mat-slide-toggle>
25+
</div>
1326
</div>
1427
</div>
1528

@@ -30,9 +43,13 @@ <h2>Section1: Program-Level Analysis</h2>
3043
(6) "Average" shows the average step information by aggregating the operations in the complete steps only.
3144
</div>
3245
</div>
33-
<div class="block-content">
46+
<div [hidden]="loadingAnalysis" class="block-content">
3447
<program-level-analysis #programLevelAnalysis [rooflineModelData]="dataTableProgram" [viewColumns]="columnsIdxProgram" [rooflineSeriesData]="scatterDataProgram" [scatterChartOptions]="scatterChartOptionsProgram" (filterUpdated)="updateDataTableProgram($event)"></program-level-analysis>
3548
</div>
49+
<div class="progress-container" [hidden]="!loadingAnalysis">
50+
<div class="loading-message">Loading analysis...</div>
51+
<mat-progress-bar color="primary" mode="indeterminate" aria-label="refreshing program level roofline analysis"></mat-progress-bar>
52+
</div>
3653
</div>
3754

3855
<div class="section-container">
@@ -51,7 +68,11 @@ <h2>Section2: Operation-Level Analysis</h2>
5168
(7) Ops with zero FLOP (e.g., data formatting ops like reshape, IDLE, etc.) do not show up in the roofline chart.<br>
5269
</div>
5370
</div>
54-
<div class="block-content">
71+
<div [hidden]="loadingAnalysis" class="block-content">
5572
<operation-level-analysis #opLevelAnalysis [selectedOp]="selectedOpName" [rooflineModelData]="dataTableOp" [viewColumns]="columnsIdxOp" [rooflineSeriesData]="scatterDataOp" [scatterChartOptions]="scatterChartOptionsOp" (filterUpdated)="updateDataTableOp($event)"></operation-level-analysis>
5673
</div>
74+
<div class="progress-container" [hidden]="!loadingAnalysis">
75+
<div class="loading-message">Loading analysis...</div>
76+
<mat-progress-bar color="primary" mode="indeterminate" aria-label="refreshing operation level roofline analysis"></mat-progress-bar>
77+
</div>
5778
</div>

frontend/app/components/roofline_model/roofline_model.scss

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
@import 'frontend/app/styles/common';
2+
3+
:host {
4+
display: block;
5+
}
6+
17
.section-container {
28
margin: 20px 20px 0px;
39
}
@@ -37,3 +43,21 @@
3743
background-color: #ffcccb;
3844
color: red;
3945
}
46+
47+
.tooltip-icon {
48+
transform: scale(0.8);
49+
}
50+
51+
.control-title {
52+
display: flex;
53+
flex-direction: row;
54+
align-items: center;
55+
}
56+
57+
.control {
58+
display: flex;
59+
flex-direction: row;
60+
align-items: center;
61+
justify-content: left;
62+
gap: 10px;
63+
}

frontend/app/components/roofline_model/roofline_model.ts

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import {getGigaflopsReadableString, setLoadingState} from 'org_xprof/frontend/ap
88
import {DATA_SERVICE_INTERFACE_TOKEN, DataServiceV2Interface} from 'org_xprof/frontend/app/services/data_service_v2/data_service_v2_interface';
99
import {setCurrentToolStateAction} from 'org_xprof/frontend/app/store/actions';
1010
import {ReplaySubject} from 'rxjs';
11-
import {takeUntil} from 'rxjs/operators';
11+
import {take, takeUntil} from 'rxjs/operators';
1212

1313
import {OperationLevelAnalysis} from './operation_level_analysis/operation_level_analysis';
1414
import {ProgramLevelAnalysis} from './program_level_analysis/program_level_analysis';
@@ -27,6 +27,7 @@ declare interface DeviceIndicators {
2727
hasCmem: boolean;
2828
hasMegacore: boolean;
2929
isGpu: boolean;
30+
timeScaleMultiplier: number;
3031
}
3132
type ColumnIdxArr = Array<number|google.visualization.ColumnSpec>;
3233

@@ -61,6 +62,9 @@ export class RooflineModel implements OnDestroy {
6162
@ViewChild('opLevelAnalysis') opLevelAnalysis?: OperationLevelAnalysis;
6263

6364
host = '';
65+
applyScalingFactor = false;
66+
loadingAnalysis = false;
67+
6468
// Device Information section data
6569
deviceInfoArray: DeviceInfoData[] = [];
6670
// Some critical indicators
@@ -69,6 +73,7 @@ export class RooflineModel implements OnDestroy {
6973
hasCmem: false,
7074
hasMegacore: false,
7175
isGpu: false,
76+
timeScaleMultiplier: 1.0,
7277
};
7378

7479
// dataTableRaw from the raw roofline model data
@@ -178,6 +183,20 @@ export class RooflineModel implements OnDestroy {
178183
});
179184
}
180185

186+
updateAnalysis() {
187+
this.loadingAnalysis = true;
188+
const params = new Map<string, string|boolean>();
189+
if (this.applyScalingFactor) {
190+
params.set('apply_time_scale_multiplier', this.applyScalingFactor);
191+
}
192+
this.dataService.getData(this.sessionId, this.tool, this.host, params)
193+
.pipe(take(1))
194+
.subscribe((data) => {
195+
this.parseData(data as RooflineModelData[]);
196+
this.loadingAnalysis = false;
197+
});
198+
}
199+
181200
parseData(data?: RooflineModelData[]) {
182201
if (!google?.visualization) {
183202
console.log('gviz lib is not loaded yet.');
@@ -203,6 +222,11 @@ export class RooflineModel implements OnDestroy {
203222
this.processScatterDataOp();
204223
}
205224

225+
hasValidTimeScaleMultiplier(): boolean {
226+
return this.deviceIndicators.timeScaleMultiplier > 0 &&
227+
this.deviceIndicators.timeScaleMultiplier !== 1;
228+
}
229+
206230
/** parse the device information from the original dataset */
207231
parseDeviceInfoData(dataTableRaw: google.visualization.DataTable) {
208232
this.deviceIndicators = {
@@ -211,6 +235,8 @@ export class RooflineModel implements OnDestroy {
211235
hasMegacore: !!Number(dataTableRaw.getTableProperty('megacore')),
212236
isGpu: dataTableRaw.getTableProperty('device_type')
213237
.startsWith(NVIDIA_GPU_TYPE_PREFIX),
238+
timeScaleMultiplier:
239+
Number(dataTableRaw.getTableProperty('time_scale_multiplier')) || 1,
214240
};
215241

216242
this.deviceInfoArray = DEVICE_INFO.reduce(
@@ -250,13 +276,25 @@ export class RooflineModel implements OnDestroy {
250276
curInfo.context +=
251277
'(if yes, the analysis assumes Megacore where an HLO runs on both TensorCores utilizing the full chip\'s resources so that the rooflines are twice higher)';
252278
curInfo.value = this.deviceIndicators.hasMegacore ? 'Yes' : 'No';
279+
} else if (
280+
cur.id === 'time_scale_multiplier' &&
281+
!this.hasValidTimeScaleMultiplier()) {
282+
curInfo.display = false;
253283
}
254284
}
255-
const value = this.dataTableRaw!.getTableProperty(cur.id);
285+
let value = this.dataTableRaw!.getTableProperty(cur.id);
286+
value = cur.type === 'number' ? Number(value) : value;
287+
if ([
288+
'peak_flop_rate', 'peak_vmem_read_bw', 'peak_vmem_write_bw'
289+
].includes(cur.id)) {
290+
curInfo.value = this.applyScalingFactor ?
291+
(value * this.deviceIndicators.timeScaleMultiplier).toFixed(2) :
292+
value;
293+
}
256294
acc.push({
257295
// convert numeric value to numbers, as some ridge numbers will be
258296
// used as axis values in chart
259-
value: cur.type === 'number' ? Number(value) : value,
297+
value,
260298
// put cur at last to overwrite with preprocessed data
261299
...curInfo,
262300
});
@@ -1185,6 +1223,11 @@ export class RooflineModel implements OnDestroy {
11851223
}
11861224
}
11871225

1226+
toggleScalingFactor() {
1227+
this.applyScalingFactor = !this.applyScalingFactor;
1228+
this.updateAnalysis();
1229+
}
1230+
11881231
ngOnDestroy() {
11891232
setLoadingState(false, this.store);
11901233
this.destroyed.next();

frontend/app/components/roofline_model/roofline_model_module.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
import {CommonModule} from '@angular/common';
22
import {NgModule} from '@angular/core';
3+
import {MatIconModule} from '@angular/material/icon';
4+
import {MatProgressBarModule} from '@angular/material/progress-bar';
5+
import {MatSlideToggleModule} from '@angular/material/slide-toggle';
6+
import {MatTooltipModule} from '@angular/material/tooltip';
37
import {TableModule} from 'org_xprof/frontend/app/components/chart/table/table_module';
48
import {CategoryFilterModule} from 'org_xprof/frontend/app/components/controls/category_filter/category_filter_module';
59
import {ExportAsCsvModule} from 'org_xprof/frontend/app/components/controls/export_as_csv/export_as_csv_module';
@@ -20,6 +24,10 @@ import {RooflineModel} from './roofline_model';
2024
StringFilterModule,
2125
ProgramLevelAnalysisModule,
2226
OperationLevelAnalysisModule,
27+
MatTooltipModule,
28+
MatSlideToggleModule,
29+
MatIconModule,
30+
MatProgressBarModule,
2331
],
2432
exports: [RooflineModel],
2533
})

frontend/app/services/data_service_v2/data_service_v2.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ export class DataServiceV2 implements DataServiceV2Interface {
103103

104104
private getHTTPParamsForDataQuery(
105105
run: string, tag: string, host: string,
106-
parameters: Map<string, string> = new Map()): HttpParams {
106+
parameters: Map<string, string|boolean> = new Map()): HttpParams {
107107
// Update searchparams with the updated run, tag and host.
108108
// In a Single Page App, we need to update the searchparams with the updated
109109
// run, tag and host on tool change for consistency.
@@ -130,7 +130,7 @@ export class DataServiceV2 implements DataServiceV2Interface {
130130

131131
getData(
132132
sessionId: string, tool: string, host: string,
133-
parameters: Map<string, string> = new Map()):
133+
parameters: Map<string, string|boolean> = new Map()):
134134
Observable<DataTable|DataTable[]|null> {
135135
const params =
136136
this.getHTTPParamsForDataQuery(sessionId, tool, host, parameters);

frontend/app/services/data_service_v2/data_service_v2_interface.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ export interface DataServiceV2Interface {
2525
sessionId: string,
2626
tool: string,
2727
host?: string,
28-
parameters?: Map<string, string>,
28+
parameters?: Map<string, string|boolean>,
2929
ignoreError?: boolean,
3030
): Observable<DataTable|DataTable[]|null>;
3131

plugin/xprof/protobuf/roofline_model.proto

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ enum RecordType {
2626
}
2727

2828
// A database of RooflineModel records.
29+
// Next ID: 17
2930
message RooflineModelDatabase {
3031
// The device type.
3132
optional string device_type = 1;
@@ -63,11 +64,16 @@ message RooflineModelDatabase {
6364
// Error and warning messages for diagnosing profiling issues.
6465
optional tensorflow.profiler.Diagnostics diagnostics = 7;
6566

67+
// The weighted average time scale multiplier over all device op metrics.
68+
// It is calculated by the total time of each op if running on default pstate
69+
// divided by actual total op time.
70+
optional double time_scale_multiplier = 16;
71+
6672
reserved 3, 4, 6;
6773
}
6874

6975
// There is one RooflineModelRecord for each HLO operation profiled.
70-
// Next ID: 44
76+
// Next ID: 45
7177
message RooflineModelRecord {
7278
// The record type.
7379
optional RecordType record_type = 18;
@@ -187,6 +193,10 @@ message RooflineModelRecord {
187193
// Whether the record is calculated including infeed and outfeed ops.
188194
optional bool include_infeed_outfeed = 26;
189195

196+
// Whether the device metrics (eg. flops utilization) is calculated with by
197+
// applying the time scale multiplier.
198+
optional bool apply_time_scale_multiplier = 44;
199+
190200
// Flops for the record
191201
optional uint64 flops = 36;
192202

xprof/convert/op_metrics_to_record.h

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,16 @@ inline double GigaFlopsPerSecondPerCore(const OpMetrics& metrics) {
4040
metrics.flops(), tsl::profiler::PicoToNano(metrics.time_ps()));
4141
}
4242

43+
// Normalized flop rate if running on default pstate.
44+
// Used to compare with default device peak flop rate to get utilization.
45+
inline double GigaFlopsPerSecondPerCoreNormalizedOnDvfs(
46+
const OpMetrics& metrics) {
47+
double time_scale_multiplier = tsl::profiler::SafeDivide(
48+
metrics.normalized_time_ps() * 1.0, metrics.time_ps());
49+
time_scale_multiplier = time_scale_multiplier ? time_scale_multiplier : 1.0;
50+
return GigaFlopsPerSecondPerCore(metrics) * time_scale_multiplier;
51+
}
52+
4353
inline double GigaModelFlopsPerSecondPerCore(const OpMetrics& metrics) {
4454
// flops and time_ps are accumulated across all occurrences on all cores.
4555
// time_ps is used instead of self_time_ps because flops for an op includes
@@ -157,7 +167,8 @@ static inline double GetMemoryPeakBandwidth(const PerfEnv& perf_env,
157167

158168
template <typename Record>
159169
inline void SetRooflineMetrics(const OpMetrics& metrics, const PerfEnv perf_env,
160-
const RunEnvironment& run_env, Record* record) {
170+
const RunEnvironment& run_env, Record* record,
171+
bool apply_time_scale_factor = false) {
161172
using ::tensorflow::profiler::MemorySpace;
162173
using ::tensorflow::profiler::PerformanceInfo;
163174

@@ -203,16 +214,19 @@ inline void SetRooflineMetrics(const OpMetrics& metrics, const PerfEnv perf_env,
203214
// access as HBM access.
204215
hbm_bytes = metrics.bytes_accessed();
205216
}
217+
int64_t device_time_ps = apply_time_scale_factor
218+
? metrics.normalized_time_ps()
219+
: metrics.time_ps();
206220
record->set_hbm_bw(tsl::profiler::GibibytesPerSecond(
207221
hbm_bytes, tsl::profiler::PicoToNano(metrics.time_ps())));
208222
record->set_cmem_read_bw(tsl::profiler::GibibytesPerSecond(
209-
cmem_read_bytes, tsl::profiler::PicoToNano(metrics.time_ps())));
223+
cmem_read_bytes, tsl::profiler::PicoToNano(device_time_ps)));
210224
record->set_cmem_write_bw(tsl::profiler::GibibytesPerSecond(
211-
cmem_write_bytes, tsl::profiler::PicoToNano(metrics.time_ps())));
225+
cmem_write_bytes, tsl::profiler::PicoToNano(device_time_ps)));
212226
record->set_vmem_read_bw(tsl::profiler::GibibytesPerSecond(
213-
vmem_read_bytes, tsl::profiler::PicoToNano(metrics.time_ps())));
227+
vmem_read_bytes, tsl::profiler::PicoToNano(device_time_ps)));
214228
record->set_vmem_write_bw(tsl::profiler::GibibytesPerSecond(
215-
vmem_write_bytes, tsl::profiler::PicoToNano(metrics.time_ps())));
229+
vmem_write_bytes, tsl::profiler::PicoToNano(device_time_ps)));
216230
record->set_hbm_operational_intensity(
217231
tsl::profiler::SafeDivide(metrics.flops(), hbm_bytes));
218232
record->set_cmem_read_operational_intensity(

0 commit comments

Comments
 (0)