File tree Expand file tree Collapse file tree 2 files changed +4
-7
lines changed 
torchtitan/experiments/gpt_oss Expand file tree Collapse file tree 2 files changed +4
-7
lines changed Original file line number Diff line number Diff line change @@ -72,7 +72,7 @@ class GptOssModelArgs(BaseModelArgs):
7272    n_kv_heads : int  =  8 
7373    sliding_window_size : int  =  128 
7474    attn_mask_type : str  =  "causal" 
75-     use_flex_attn : bool  =  True 
75+     use_flex_attn : bool  =  True    # NOTE: gpt-oss only support FlexAttention 
7676    # yarn 
7777    original_seq_len : int  =  4096 
7878    rope_theta : float  =  150000.0 
Original file line number Diff line number Diff line change @@ -41,33 +41,30 @@ seq_len = 2048
4141max_norm  = 1.0   #  grad norm clipping
4242steps  = 10 
4343dataset  = " c4_test" #  supported datasets: c4_test (2K), c4 (177M)
44- seed  = 0 
45- deterministic  = true 
4644
4745[parallelism ]
4846data_parallel_replicate_degree  = 1 
4947data_parallel_shard_degree  = -1 
5048fsdp_reshard_after_forward  = " default" #  default / never / always
51- tensor_parallel_degree  = 2 
49+ tensor_parallel_degree  = 1 
5250enable_async_tensor_parallel  = false 
5351expert_parallel_degree  = 1 
5452expert_tensor_parallel_degree  = 1 
5553
5654[checkpoint ]
57- enable  = true 
55+ enable  = false 
5856folder  = " checkpoint" 
5957interval  = 10 
6058last_save_model_only  = false 
6159export_dtype  = " float32" 
6260async_mode  = " disabled" #  ["disabled", "async", "async_with_pinned_mem"]
63- initial_load_path  = " /data/users/jianiw/torchtitan/outputs/checkpoint-seed/step-0" 
6461
6562[activation_checkpoint ]
6663mode  = " none" #  ["none", "selective", "full"]
6764selective_ac_option  = ' 2' #  'int' = ac every positive int layer or 'op', ac based on ops policy
6865
6966[compile ]
70- enable = false 
67+ enable  =  false 
7168components  = [" model" " loss" 
7269
7370[quantize .linear .float8 ]
 
 
   
 
     
   
   
          
    
    
     
    
      
     
     
    You can’t perform that action at this time.
  
 
    
  
    
      
        
     
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments