-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.yaml
More file actions
166 lines (154 loc) · 3.82 KB
/
config.yaml
File metadata and controls
166 lines (154 loc) · 3.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
model:
base_model_path:
default: 'stabilityai/stable-diffusion-3.5-medium'
type: str
help: Base model for transfer learning
data_type:
default: 'bf16'
type: str
help: Data type for training
project_config:
output_dir:
default: '/workspace/output-e2e'
type: str
help: Output directory
dataset:
dataset_path:
default: '/workspace/Trans10k/'
type: str
help: Path to dataset
dataset_name:
default: 'trans10k'
type: str
help: Name of dataset
val_difficulty:
default: 'mix'
type: str
help: Difficulty level for validation dataset ['easy', 'hard', 'mix']
test_difficulty:
default: 'hard'
type: str
help: Difficulty level for test dataset ['easy', 'hard', 'mix']
dataloader_num_workers:
default: 4
type: int
help: Number of workers for dataloader
prompt_embeds:
prompt_embeds_path:
default: './precomputed_prompt_embeddings/'
type: str
help: Path to prompt embeds
training:
resume_from_checkpoint:
default: checkpoint-5966
type: str
help: Path to checkpoint to resume from
epochs:
default: 43
type: int
help: Number of epochs to train
max_train_steps:
default: null
type: int
help: Maximum number of training steps
train_batch_size:
default: 4
type: int
help: Batch size for training
gradient_accumulation_steps:
default: 2 #Change to 4 for 2 GPUs
type: int
help: Gradient accumulation steps
save_checkpoint_steps:
default: 200
type: int
help: Save checkpoint every n steps
save_checkpoint_epochs:
default: 1
type: int
help: Save checkpoint every n epochs (overrides save_checkpoint_steps)
checkpoint_offset_warmup:
default: true
type: bool
help: Offset the checkpoint step by the number of warmup steps
val_steps:
default: 157
type: int
help: Validate every n steps
num_vals_train:
default: null
type: int
help: Number of validations per val_steps
validation:
load_checkpoint:
default: checkpoint-5966
type: str
help: Path to checkpoint to load for validation
num_vals_test:
default: null
type: int
help: Number of validations for test dataset
learning_rate_scheduler:
lr:
default: 2e-6 #5e-6 #2.5e-5
type: float
help: Learning rate for training
min_lr_ratio:
default: 0.1
type: float
help: Ratio of the minimum lr to the initial lr
lr_scheduler:
default: 'cosine'
type: str
help: Learning rate scheduler
lr_cycles:
default: 3.5
type: float
help: Number of cycles for learning rate scheduler
lr_warmup_steps:
default: null
type: int
help: Number of warmup steps for learning rate scheduler
lr_warmup_epochs:
default: 1
type: int
help: Number of warmup epochs for learning rate scheduler (overrides lr_warmup_steps)
optimizer:
adam_beta1:
default: 0.9
type: float
help: Beta1 for Adam optimizer
adam_beta2:
default: 0.999
type: float
help: Beta2 for Adam optimizer
adam_weight_decay:
default: 0.0
type: float
help: Weight decay for Adam optimizer
adam_epsilon:
default: 1e-15
type: float
help: Epsilon for Adam optimizer
noise:
weighting_scheme:
default: 'logit_normal'
type: str
help: Weighting scheme for noise ("sigma_sqrt", "logit_normal", "mode", "cosmap")
logit_mean:
default: 0.0
type: float
help: Mean for logit_normal weighting scheme
logit_std:
default: 1.0
type: float
help: Std for logit_normal weighting scheme
mode_scale:
default: 1.29
type: float
help: Scale of mode weighting scheme. Only effective when using the `'mode'` as the `weighting_scheme`.
gradient_clipping: # Ignore this if using DeepSpeed
max_grad_norm:
default: 1.0 #0.1
type: float
help: Maximum gradient norm