motimalu commited on
Commit
54b6ea7
·
verified ·
1 Parent(s): dcf0106

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +47 -47
README.md CHANGED
@@ -73,81 +73,81 @@ Trained with [diffusion-pipe](https://github.com/tdrussell/diffusion-pipe)
73
  ### dataset.toml
74
  ```
75
  # Resolution settings.
76
- resolutions = [512]
77
 
78
  # Aspect ratio bucketing settings
79
- enable_ar_bucket = true
80
- min_ar = 0.5
81
- max_ar = 2.0
82
- num_ar_buckets = 7
83
 
84
  # Frame buckets (1 is for images)
85
- frame_buckets = [1]
86
 
87
  [[directory]] # IMAGES
88
  # Path to the directory containing images and their corresponding caption files.
89
- path = '/mnt/d/huanvideo/training_data/images'
90
- num_repeats = 5
91
- resolutions = [720]
92
- frame_buckets = [1] # Use 1 frame for images.
93
 
94
  [[directory]] # VIDEOS
95
  # Path to the directory containing videos and their corresponding caption files.
96
- path = '/mnt/d/huanvideo/training_data/videos'
97
- num_repeats = 5
98
- resolutions = [512] # Set video resolution to 256 (e.g., 244p).
99
- frame_buckets = [6, 28, 31, 32, 36, 42, 43, 48, 50, 53]
100
  ```
101
 
102
  ### config.toml
103
 
104
  ```
105
  # Dataset config file.
106
- output_dir = '/mnt/d/wan/training_output'
107
- dataset = 'dataset.toml'
108
 
109
  # Training settings
110
- epochs = 50
111
- micro_batch_size_per_gpu = 1
112
- pipeline_stages = 1
113
- gradient_accumulation_steps = 4
114
- gradient_clipping = 1.0
115
- warmup_steps = 100
116
 
117
  # eval settings
118
- eval_every_n_epochs = 5
119
- eval_before_first_step = true
120
- eval_micro_batch_size_per_gpu = 1
121
- eval_gradient_accumulation_steps = 1
122
 
123
  # misc settings
124
- save_every_n_epochs = 5
125
- checkpoint_every_n_minutes = 30
126
- activation_checkpointing = true
127
- partition_method = 'parameters'
128
- save_dtype = 'bfloat16'
129
- caching_batch_size = 1
130
- steps_per_print = 1
131
- video_clip_mode = 'single_middle'
132
 
133
  [model]
134
- type = 'wan'
135
- ckpt_path = '../Wan2.1-T2V-1.3B'
136
- dtype = 'bfloat16'
137
  # You can use fp8 for the transformer when training LoRA.
138
- transformer_dtype = 'float8'
139
- timestep_sample_method = 'logit_normal'
140
 
141
  [adapter]
142
- type = 'lora'
143
- rank = 32
144
- dtype = 'bfloat16'
145
 
146
  [optimizer]
147
- type = 'adamw_optimi'
148
- lr = 5e-5
149
- betas = [0.9, 0.99]
150
- weight_decay = 0.02
151
- eps = 1e-8
152
  ```
153
 
 
73
  ### dataset.toml
74
  ```
75
  # Resolution settings.
76
+ resolutions = [512]
77
 
78
  # Aspect ratio bucketing settings
79
+ enable_ar_bucket = true
80
+ min_ar = 0.5
81
+ max_ar = 2.0
82
+ num_ar_buckets = 7
83
 
84
  # Frame buckets (1 is for images)
85
+ frame_buckets = [1]
86
 
87
  [[directory]] # IMAGES
88
  # Path to the directory containing images and their corresponding caption files.
89
+ path = '/mnt/d/huanvideo/training_data/images'
90
+ num_repeats = 5
91
+ resolutions = [720]
92
+ frame_buckets = [1] # Use 1 frame for images.
93
 
94
  [[directory]] # VIDEOS
95
  # Path to the directory containing videos and their corresponding caption files.
96
+ path = '/mnt/d/huanvideo/training_data/videos'
97
+ num_repeats = 5
98
+ resolutions = [512] # Set video resolution to 256 (e.g., 244p).
99
+ frame_buckets = [6, 28, 31, 32, 36, 42, 43, 48, 50, 53]
100
  ```
101
 
102
  ### config.toml
103
 
104
  ```
105
  # Dataset config file.
106
+ output_dir = '/mnt/d/wan/training_output'
107
+ dataset = 'dataset.toml'
108
 
109
  # Training settings
110
+ epochs = 50
111
+ micro_batch_size_per_gpu = 1
112
+ pipeline_stages = 1
113
+ gradient_accumulation_steps = 4
114
+ gradient_clipping = 1.0
115
+ warmup_steps = 100
116
 
117
  # eval settings
118
+ eval_every_n_epochs = 5
119
+ eval_before_first_step = true
120
+ eval_micro_batch_size_per_gpu = 1
121
+ eval_gradient_accumulation_steps = 1
122
 
123
  # misc settings
124
+ save_every_n_epochs = 5
125
+ checkpoint_every_n_minutes = 30
126
+ activation_checkpointing = true
127
+ partition_method = 'parameters'
128
+ save_dtype = 'bfloat16'
129
+ caching_batch_size = 1
130
+ steps_per_print = 1
131
+ video_clip_mode = 'single_middle'
132
 
133
  [model]
134
+ type = 'wan'
135
+ ckpt_path = '../Wan2.1-T2V-1.3B'
136
+ dtype = 'bfloat16'
137
  # You can use fp8 for the transformer when training LoRA.
138
+ transformer_dtype = 'float8'
139
+ timestep_sample_method = 'logit_normal'
140
 
141
  [adapter]
142
+ type = 'lora'
143
+ rank = 32
144
+ dtype = 'bfloat16'
145
 
146
  [optimizer]
147
+ type = 'adamw_optimi'
148
+ lr = 5e-5
149
+ betas = [0.9, 0.99]
150
+ weight_decay = 0.02
151
+ eps = 1e-8
152
  ```
153