-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrain.sh
More file actions
62 lines (59 loc) · 2.33 KB
/
train.sh
File metadata and controls
62 lines (59 loc) · 2.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/bin/bash
train_mode=open
top_k=10
experiment_name=Qwen2.5-3B-Instruct-OpenDecoder_nq_hotpotqa_${train_mode}_top${top_k}
WANDB_PROJECT=$experiment_name # project name in wandb
MODEL_PATH=./checkpoints/Qwen2.5-3B-Instruct-OpenDecoder
NQ_DATA_PATH=./datasets/nq/train.jsonl
NQ_RAG_DATA_PATH=./datasets/nq/RAG_train_input_robust.jsonl # ./datasets/nq/RAG_train_input.jsonl for not robust training
NQ_LLM_SCORE_PATH=./datasets/nq/train_LLM_ranking_score.jsonl
NQ_QPP_SCORE_PATH=./datasets/nq/train_QPP_score.jsonl
HOTPOTQA_DATA_PATH=./datasets/hotpotqa/train.jsonl
HOTPOTQA_RAG_DATA_PATH=./datasets/hotpotqa/RAG_train_input.jsonl # ./datasets/hotpotqa/RAG_train_input.jsonl for not robust training
HOTPOTQA_LLM_SCORE_PATH=./datasets/hotpotqa/train_LLM_ranking_score.jsonl
HOTPOTQA_QPP_SCORE_PATH=./datasets/hotpotqa/train_QPP_score.jsonl
RAG_TEXT_PATH=./datasets/wikipedia/pid2psg.pkl
MODEL_PATTERN=qwen_decoder # the path to the model in src/model
src_path=./src
log_folder="./logs/${experiment_name}"
mkdir -p $log_folder
log_name=$(date +"%m-%d_%H-%M").log
CUDA_VISIBLE_DEVICES=0 \
python ./src/train.py \
--model_name_or_path $MODEL_PATH \
--NQ_data_path $NQ_DATA_PATH \
--NQ_RAG_data_path $NQ_RAG_DATA_PATH \
--NQ_LLM_score_path $NQ_LLM_SCORE_PATH \
--NQ_QPP_score_path $NQ_QPP_SCORE_PATH \
--hotpotqa_data_path $HOTPOTQA_DATA_PATH \
--hotpotqa_RAG_data_path $HOTPOTQA_RAG_DATA_PATH \
--hotpotqa_LLM_score_path $HOTPOTQA_LLM_SCORE_PATH \
--hotpotqa_QPP_score_path $HOTPOTQA_QPP_SCORE_PATH \
--RAG_text_path $RAG_TEXT_PATH \
--model_pattern $MODEL_PATTERN \
--train_mode $train_mode \
--add_irrelevant_psg False \
--add_LLM_scores False \
--add_QPP_scores False \
--top_k $top_k \
--normalization_type normal \
--shuffle_RAG False \
--src_path $src_path \
--bf16 True \
--output_dir ./ckpts/${experiment_name} \
--run_name ${experiment_name} \
--num_train_epochs 1 \
--per_device_train_batch_size 4 \
--gradient_accumulation_steps 4 \
--save_strategy "steps" \
--save_steps 1000 \
--save_safetensors False \
--gradient_checkpointing True \
--save_total_limit 2 \
--learning_rate 1e-5 \
--weight_decay 0.1 \
--warmup_ratio 0.03 \
--lr_scheduler_type "cosine" \
--logging_steps 2 \
--model_max_length 4096 \
--lazy_loading True \