-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun.py
More file actions
149 lines (120 loc) · 4.14 KB
/
run.py
File metadata and controls
149 lines (120 loc) · 4.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import os
import sys
import subprocess
from termcolor import cprint
from optimization import optimization_config
# if you are the first time to train the model, set this to be True.
# if you have stopped the process and want to keep training, simply set this to be False and run this script.
start_from_scratch = True
eval_interval = optimization_config.eval_interval
save_interval = optimization_config.save_interval
total_steps = optimization_config.total_steps
pretrain_model = optimization_config.pretrained_model
judge_model = optimization_config.judge_model
model = os.path.abspath("") + "/optimization/ckpt/" + optimization_config.optimized_model_name
if start_from_scratch == False:
pretrain_model = model
eval_dataset = optimization_config.eval_dataset
train_dataset = optimization_config.train_dataset
gpu_groups = optimization_config.gpu_groups
gpu_groups_judge = optimization_config.gpu_groups_judge
eval_m_solution = optimization_config.eval_m_solution
eval_n_strategy = optimization_config.eval_n_strategy
eval_scale_tuple_list = optimization_config.eval_scale_tuple_list
eval_no_example = optimization_config.eval_no_example
def begin_with(file_name):
with open(file_name, "w") as f:
f.write("")
if start_from_scratch:
os.makedirs("evaluation/results", exist_ok=True)
os.makedirs("optimization/results", exist_ok=True)
begin_with("evaluation/results/results-eval-" + model.replace("/", ".") + "-" + eval_dataset + ".txt")
begin_with("optimization/results/results-rl-" + model.replace("/", ".") + "-" + train_dataset + ".txt")
# evaluation
def evaluation(model, eval_dataset, gpu_groups):
cprint(f"This is the {i}-th step for evaluation.", color = "green")
subprocess.run(
f'python eval.py '
f'--pretrained_model {model} '
f'--judge_model {judge_model} '
f'--dataset {eval_dataset} '
'--use_api False '
'--exe_verbose False '
'--is_final_eval False '
'--single_eval False '
f'--m_solution {eval_m_solution} '
f'--n_strategy {eval_n_strategy} '
f'--scale_tuple_list "{repr(eval_scale_tuple_list)}" '
f'--no_example {eval_no_example} '
f'--gpu_groups "{repr(gpu_groups)}" '
f'--gpu_groups_judge "{repr(gpu_groups_judge)}" ',
shell=True,
cwd='evaluation',
check=True,
)
# samlpe
def sample(model):
cprint(f"This is the {i}-th step for sampling.", color = "green")
subprocess.run(
f'python sample.py '
f'--pretrained_model {model} ',
shell=True,
cwd='optimization',
check=True,
)
# calculate
def calculate(model):
cprint(f"This is the {i}-th step for calculation.", color = "green")
subprocess.run(
f'python calculate.py '
f'--pretrained_model {model} ',
shell=True,
cwd='optimization',
check=True,
)
# assign reward
def assign_reward(model):
subprocess.run(
f'python reward.py '
f'--pretrained_model {model} ',
shell=True,
cwd='optimization',
check=True,
)
# train
def train(model):
cprint(f"This is the {i}-th step for training.", color = "green")
subprocess.run(
f'python -m train '
f'--pretrain {model} ',
shell=True,
cwd='optimization',
check=True
)
subprocess.run("rm -f optimization/ckpt/event*", shell=True, check=True)
# save
def save(model_from, model_to):
os.makedirs(model_to, exist_ok=True)
subprocess.run(f"rm -rf {model_to}/*", shell=True, check=True)
subprocess.run(f"cp -r {model_from}/* {model_to}/", shell=True, check=True)
# the first step if train from scratch
i = 0
#evaluation(pretrain_model, eval_dataset, gpu_groups)
sample(pretrain_model)
calculate(pretrain_model)
assign_reward(pretrain_model)
train(pretrain_model)
i += 1
# start the iterative optimization
while i <= total_steps:
if i % eval_interval == 0:
evaluation(model, eval_dataset, gpu_groups)
if i % save_interval == 0:
save(model, f"optimization/ckpt/iter{i}")
if i == total_steps:
break
sample(model)
calculate(model)
assign_reward(model)
train(model)
i += 1