-
Notifications
You must be signed in to change notification settings - Fork 276
Expand file tree
/
Copy pathreward_fl_config.yaml
More file actions
70 lines (60 loc) · 1.71 KB
/
reward_fl_config.yaml
File metadata and controls
70 lines (60 loc) · 1.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
defaults:
- ../config/deepspeed_zero@_here_
- ../config/deepspeed_zero2@_here_
- ../config/deepspeed_zero2_cpuoffload@_here_
- ../config/deepspeed_zero3@_here_
- ../config/deepspeed_zero3_cpuoffload@_here_
hydra:
run:
dir: .
output_subdir: null
exp_name: "reward_fl_zero2_cpuoffload"
seed: 42
logging_dir: ./output/logs
output_dir: ./output
checkpoint_config:
type: file_system
output_dir: /data/models/reward_fl/
save_steps: 25
logging_steps: 1
resume_from_checkpoint: false
sequence_length: 1024
train_batch_size: 8
max_grad_norm: 1.0
actor_train:
model_args:
model_type: diffusion_module
dtype: bf16
model_config_kwargs:
model_name: wan2_2
model_paths: ./examples/wan2.2-14B-reward_fl_ds/wan22_paths.json
reward_model_path: /data/models/antelopev2/
tokenizer_path: /data/models/Wan-AI/Wan2.1-T2V-1.3B/google/umt5-xxl/
model_id_with_origin_paths: null
trainable_models: dit2
use_gradient_checkpointing_offload: true
extra_inputs: input_image
max_timestep_boundary: 1.0
min_timestep_boundary: 0.9
num_inference_steps: 8
mid_timestep: 4
final_timestep: 7
lora_base_model: dit2
lora_target_modules: q,k,v,o,ffn.0,ffn.2
lora_rank: 32
training_args:
learning_rate: 2.5e-6
lr_scheduler_type: constant
per_device_train_batch_size: 1
gradient_accumulation_steps: 1
warmup_steps: 10
num_train_epochs: 1
data_args:
file_name: ./data/example_video_dataset/metadata.csv
preprocessing_num_workers: 2
strategy_args:
strategy_name: diffusion_deepspeed_train
strategy_config: ${deepspeed_zero2_cpuoffload}
device_mapping: list(range(0,8))
system_envs:
RAY_PROFILING: "0"