Update README.md
Browse files
README.md
CHANGED
|
@@ -73,81 +73,81 @@ Trained with [diffusion-pipe](https://github.com/tdrussell/diffusion-pipe)
|
|
| 73 |
### dataset.toml
|
| 74 |
```
|
| 75 |
# Resolution settings.
|
| 76 |
-
resolutions
|
| 77 |
|
| 78 |
# Aspect ratio bucketing settings
|
| 79 |
-
enable_ar_bucket
|
| 80 |
-
min_ar
|
| 81 |
-
max_ar
|
| 82 |
-
num_ar_buckets
|
| 83 |
|
| 84 |
# Frame buckets (1 is for images)
|
| 85 |
-
frame_buckets
|
| 86 |
|
| 87 |
[[directory]] # IMAGES
|
| 88 |
# Path to the directory containing images and their corresponding caption files.
|
| 89 |
-
path
|
| 90 |
-
num_repeats
|
| 91 |
-
resolutions
|
| 92 |
-
frame_buckets
|
| 93 |
|
| 94 |
[[directory]] # VIDEOS
|
| 95 |
# Path to the directory containing videos and their corresponding caption files.
|
| 96 |
-
path
|
| 97 |
-
num_repeats
|
| 98 |
-
resolutions
|
| 99 |
-
frame_buckets
|
| 100 |
```
|
| 101 |
|
| 102 |
### config.toml
|
| 103 |
|
| 104 |
```
|
| 105 |
# Dataset config file.
|
| 106 |
-
output_dir
|
| 107 |
-
dataset
|
| 108 |
|
| 109 |
# Training settings
|
| 110 |
-
epochs
|
| 111 |
-
micro_batch_size_per_gpu
|
| 112 |
-
pipeline_stages
|
| 113 |
-
gradient_accumulation_steps
|
| 114 |
-
gradient_clipping
|
| 115 |
-
warmup_steps
|
| 116 |
|
| 117 |
# eval settings
|
| 118 |
-
eval_every_n_epochs
|
| 119 |
-
eval_before_first_step
|
| 120 |
-
eval_micro_batch_size_per_gpu
|
| 121 |
-
eval_gradient_accumulation_steps
|
| 122 |
|
| 123 |
# misc settings
|
| 124 |
-
save_every_n_epochs
|
| 125 |
-
checkpoint_every_n_minutes
|
| 126 |
-
activation_checkpointing
|
| 127 |
-
partition_method
|
| 128 |
-
save_dtype
|
| 129 |
-
caching_batch_size
|
| 130 |
-
steps_per_print
|
| 131 |
-
video_clip_mode
|
| 132 |
|
| 133 |
[model]
|
| 134 |
-
type
|
| 135 |
-
ckpt_path
|
| 136 |
-
dtype
|
| 137 |
# You can use fp8 for the transformer when training LoRA.
|
| 138 |
-
transformer_dtype
|
| 139 |
-
timestep_sample_method
|
| 140 |
|
| 141 |
[adapter]
|
| 142 |
-
type
|
| 143 |
-
rank
|
| 144 |
-
dtype
|
| 145 |
|
| 146 |
[optimizer]
|
| 147 |
-
type
|
| 148 |
-
lr
|
| 149 |
-
betas
|
| 150 |
-
weight_decay
|
| 151 |
-
eps
|
| 152 |
```
|
| 153 |
|
|
|
|
| 73 |
### dataset.toml
|
| 74 |
```
|
| 75 |
# Resolution settings.
|
| 76 |
+
resolutions = [512]
|
| 77 |
|
| 78 |
# Aspect ratio bucketing settings
|
| 79 |
+
enable_ar_bucket = true
|
| 80 |
+
min_ar = 0.5
|
| 81 |
+
max_ar = 2.0
|
| 82 |
+
num_ar_buckets = 7
|
| 83 |
|
| 84 |
# Frame buckets (1 is for images)
|
| 85 |
+
frame_buckets = [1]
|
| 86 |
|
| 87 |
[[directory]] # IMAGES
|
| 88 |
# Path to the directory containing images and their corresponding caption files.
|
| 89 |
+
path = '/mnt/d/huanvideo/training_data/images'
|
| 90 |
+
num_repeats = 5
|
| 91 |
+
resolutions = [720]
|
| 92 |
+
frame_buckets = [1] # Use 1 frame for images.
|
| 93 |
|
| 94 |
[[directory]] # VIDEOS
|
| 95 |
# Path to the directory containing videos and their corresponding caption files.
|
| 96 |
+
path = '/mnt/d/huanvideo/training_data/videos'
|
| 97 |
+
num_repeats = 5
|
| 98 |
+
resolutions = [512] # Set video resolution to 256 (e.g., 244p).
|
| 99 |
+
frame_buckets = [6, 28, 31, 32, 36, 42, 43, 48, 50, 53]
|
| 100 |
```
|
| 101 |
|
| 102 |
### config.toml
|
| 103 |
|
| 104 |
```
|
| 105 |
# Dataset config file.
|
| 106 |
+
output_dir = '/mnt/d/wan/training_output'
|
| 107 |
+
dataset = 'dataset.toml'
|
| 108 |
|
| 109 |
# Training settings
|
| 110 |
+
epochs = 50
|
| 111 |
+
micro_batch_size_per_gpu = 1
|
| 112 |
+
pipeline_stages = 1
|
| 113 |
+
gradient_accumulation_steps = 4
|
| 114 |
+
gradient_clipping = 1.0
|
| 115 |
+
warmup_steps = 100
|
| 116 |
|
| 117 |
# eval settings
|
| 118 |
+
eval_every_n_epochs = 5
|
| 119 |
+
eval_before_first_step = true
|
| 120 |
+
eval_micro_batch_size_per_gpu = 1
|
| 121 |
+
eval_gradient_accumulation_steps = 1
|
| 122 |
|
| 123 |
# misc settings
|
| 124 |
+
save_every_n_epochs = 5
|
| 125 |
+
checkpoint_every_n_minutes = 30
|
| 126 |
+
activation_checkpointing = true
|
| 127 |
+
partition_method = 'parameters'
|
| 128 |
+
save_dtype = 'bfloat16'
|
| 129 |
+
caching_batch_size = 1
|
| 130 |
+
steps_per_print = 1
|
| 131 |
+
video_clip_mode = 'single_middle'
|
| 132 |
|
| 133 |
[model]
|
| 134 |
+
type = 'wan'
|
| 135 |
+
ckpt_path = '../Wan2.1-T2V-1.3B'
|
| 136 |
+
dtype = 'bfloat16'
|
| 137 |
# You can use fp8 for the transformer when training LoRA.
|
| 138 |
+
transformer_dtype = 'float8'
|
| 139 |
+
timestep_sample_method = 'logit_normal'
|
| 140 |
|
| 141 |
[adapter]
|
| 142 |
+
type = 'lora'
|
| 143 |
+
rank = 32
|
| 144 |
+
dtype = 'bfloat16'
|
| 145 |
|
| 146 |
[optimizer]
|
| 147 |
+
type = 'adamw_optimi'
|
| 148 |
+
lr = 5e-5
|
| 149 |
+
betas = [0.9, 0.99]
|
| 150 |
+
weight_decay = 0.02
|
| 151 |
+
eps = 1e-8
|
| 152 |
```
|
| 153 |
|