Skip to content

Commit d203b1a

Browse files
committed
enhance profiler config
Summary: allow users to specify the profiler schedule
1 parent eb13ba2 commit d203b1a

File tree

2 files changed

+10
-4
lines changed

2 files changed

+10
-4
lines changed

torchtitan/config/job_config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ class Profiling:
3434
profile_freq: int = 10
3535
"""How often to collect profile traces, in iterations"""
3636

37+
profiler_active: int = 1
38+
"""The steps profiler is active for"""
39+
40+
profiler_warmup: int = 3
41+
"""The number of warmup steps before the active step in each profiling cycle"""
42+
3743
enable_memory_snapshot: bool = False
3844
"""Whether to dump memory snapshot"""
3945

torchtitan/tools/profiling.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,6 @@
1414
from torchtitan.config import Profiling as ProfilingConfig
1515
from torchtitan.tools.logging import logger
1616

17-
# the number of warmup steps before the active step in each profiling cycle
18-
WARMUP = 3
19-
2017
# how much memory allocation/free ops to record in memory snapshots
2118
MEMORY_SNAPSHOT_MAX_ENTRIES = 100000
2219

@@ -58,7 +55,10 @@ def trace_handler(prof):
5855
if not os.path.exists(trace_dir):
5956
os.makedirs(trace_dir, exist_ok=True)
6057

61-
warmup, active = WARMUP, 1
58+
warmup, active = (
59+
profiling_config.profiler_warmup,
60+
profiling_config.profiler_active,
61+
)
6262
wait = profile_freq - (active + warmup)
6363
assert (
6464
wait >= 0

0 commit comments

Comments
 (0)