We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 44ef929 commit 1072557Copy full SHA for 1072557
benchmarks/bench_load_latency.py
@@ -270,11 +270,12 @@ def print_run_settings(
270
mm_begin_cpu = mm_begin_timestamp.cpu().numpy()
271
mm_end_cpu = mm_end_timestamp.cpu().numpy()
272
273
- gpu_freq = iris.hip.get_wall_clock_rate(cur_rank) * 1e-3
+ gpu_freq = iris.hip.get_wall_clock_rate(cur_rank)
274
+
275
for destination_rank in range(num_ranks):
276
delta = mm_end_cpu[destination_rank, :] - mm_begin_cpu[destination_rank, :]
277
avg_cc = float(delta.sum() / max(1, delta.size) / max(1, niter))
- local_latency[destination_rank] = avg_cc / gpu_freq
278
+ local_latency[destination_rank] = avg_cc * 1e6 / gpu_freq
279
280
latency_matrix = mpi_allgather(local_latency.cpu())
281
0 commit comments