Skip to content

Commit 331e6d8

Browse files
committed
address comments
1 parent c424815 commit 331e6d8

File tree

2 files changed

+4
-7
lines changed

2 files changed

+4
-7
lines changed

torchtitan/experiments/gpt_oss/model/args.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ class GptOssModelArgs(BaseModelArgs):
7272
n_kv_heads: int = 8
7373
sliding_window_size: int = 128
7474
attn_mask_type: str = "causal"
75-
use_flex_attn: bool = True
75+
use_flex_attn: bool = True # NOTE: gpt-oss only support FlexAttention
7676
# yarn
7777
original_seq_len: int = 4096
7878
rope_theta: float = 150000.0

torchtitan/experiments/gpt_oss/train_configs/debug_model.toml

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,33 +41,30 @@ seq_len = 2048
4141
max_norm = 1.0 # grad norm clipping
4242
steps = 10
4343
dataset = "c4_test" # supported datasets: c4_test (2K), c4 (177M)
44-
seed = 0
45-
deterministic = true
4644

4745
[parallelism]
4846
data_parallel_replicate_degree = 1
4947
data_parallel_shard_degree = -1
5048
fsdp_reshard_after_forward = "default" # default / never / always
51-
tensor_parallel_degree = 2
49+
tensor_parallel_degree = 1
5250
enable_async_tensor_parallel = false
5351
expert_parallel_degree = 1
5452
expert_tensor_parallel_degree = 1
5553

5654
[checkpoint]
57-
enable = true
55+
enable = false
5856
folder = "checkpoint"
5957
interval = 10
6058
last_save_model_only = false
6159
export_dtype = "float32"
6260
async_mode = "disabled" # ["disabled", "async", "async_with_pinned_mem"]
63-
initial_load_path = "/data/users/jianiw/torchtitan/outputs/checkpoint-seed/step-0"
6461

6562
[activation_checkpoint]
6663
mode = "none" # ["none", "selective", "full"]
6764
selective_ac_option = '2' # 'int' = ac every positive int layer or 'op', ac based on ops policy
6865

6966
[compile]
70-
enable=false
67+
enable = false
7168
components = ["model", "loss"]
7269

7370
[quantize.linear.float8]

0 commit comments

Comments
 (0)