Skip to content

Commit 134320f

Browse files
committed
address comments
1 parent c424815 commit 134320f

File tree

1 file changed

+3
-6
lines changed

1 file changed

+3
-6
lines changed

torchtitan/experiments/gpt_oss/train_configs/debug_model.toml

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,33 +41,30 @@ seq_len = 2048
4141
max_norm = 1.0 # grad norm clipping
4242
steps = 10
4343
dataset = "c4_test" # supported datasets: c4_test (2K), c4 (177M)
44-
seed = 0
45-
deterministic = true
4644

4745
[parallelism]
4846
data_parallel_replicate_degree = 1
4947
data_parallel_shard_degree = -1
5048
fsdp_reshard_after_forward = "default" # default / never / always
51-
tensor_parallel_degree = 2
49+
tensor_parallel_degree = 1
5250
enable_async_tensor_parallel = false
5351
expert_parallel_degree = 1
5452
expert_tensor_parallel_degree = 1
5553

5654
[checkpoint]
57-
enable = true
55+
enable = false
5856
folder = "checkpoint"
5957
interval = 10
6058
last_save_model_only = false
6159
export_dtype = "float32"
6260
async_mode = "disabled" # ["disabled", "async", "async_with_pinned_mem"]
63-
initial_load_path = "/data/users/jianiw/torchtitan/outputs/checkpoint-seed/step-0"
6461

6562
[activation_checkpoint]
6663
mode = "none" # ["none", "selective", "full"]
6764
selective_ac_option = '2' # 'int' = ac every positive int layer or 'op', ac based on ops policy
6865

6966
[compile]
70-
enable=false
67+
enable = false
7168
components = ["model", "loss"]
7269

7370
[quantize.linear.float8]

0 commit comments

Comments
 (0)