ROLL/examples/wan2.2-14B-reward_fl_ds/reward_fl_config.yaml at main · alibaba/ROLL · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
defaults:
  - ../config/deepspeed_zero@_here_
  - ../config/deepspeed_zero2@_here_
  - ../config/deepspeed_zero2_cpuoffload@_here_
  - ../config/deepspeed_zero3@_here_
  - ../config/deepspeed_zero3_cpuoffload@_here_

hydra:
  run:
    dir: .
  output_subdir: null

exp_name: "reward_fl_zero2_cpuoffload"
seed: 42
logging_dir: ./output/logs
output_dir: ./output

checkpoint_config:
  type: file_system
  output_dir: /data/models/reward_fl/

save_steps: 25
logging_steps: 1
resume_from_checkpoint: false

sequence_length: 1024
train_batch_size: 8
max_grad_norm: 1.0

actor_train:
  model_args:
    model_type: diffusion_module
    dtype: bf16
    model_config_kwargs:
      model_name: wan2_2
      model_paths: ./examples/wan2.2-14B-reward_fl_ds/wan22_paths.json
      reward_model_path: /data/models/antelopev2/
      tokenizer_path: /data/models/Wan-AI/Wan2.1-T2V-1.3B/google/umt5-xxl/
      model_id_with_origin_paths: null
      trainable_models: dit2
      use_gradient_checkpointing_offload: true
      extra_inputs: input_image
      max_timestep_boundary: 1.0
      min_timestep_boundary: 0.9
      num_inference_steps: 8
      mid_timestep: 4
      final_timestep: 7
      lora_base_model: dit2
      lora_target_modules: q,k,v,o,ffn.0,ffn.2
      lora_rank: 32

  training_args:
    learning_rate: 2.5e-6
    lr_scheduler_type: constant
    per_device_train_batch_size: 1
    gradient_accumulation_steps: 1
    warmup_steps: 10
    num_train_epochs: 1

  data_args:
    file_name: ./data/example_video_dataset/metadata.csv
    preprocessing_num_workers: 2

  strategy_args:
    strategy_name: diffusion_deepspeed_train
    strategy_config: ${deepspeed_zero2_cpuoffload}
  device_mapping: list(range(0,8))

system_envs:
  RAY_PROFILING: "0"