motimalu
/

wan-flat-color-1.3b-v2

@@ -73,81 +73,81 @@ Trained with [diffusion-pipe](https://github.com/tdrussell/diffusion-pipe)
 ### dataset.toml
 ```
 # Resolution settings.
-resolutions &#x3D; [512]
 # Aspect ratio bucketing settings
-enable_ar_bucket &#x3D; true
-min_ar &#x3D; 0.5
-max_ar &#x3D; 2.0
-num_ar_buckets &#x3D; 7
 # Frame buckets (1 is for images)
-frame_buckets &#x3D; [1]
 [[directory]] # IMAGES
 # Path to the directory containing images and their corresponding caption files.
-path &#x3D; &#39;&#x2F;mnt&#x2F;d&#x2F;huanvideo&#x2F;training_data&#x2F;images&#39;
-num_repeats &#x3D; 5
-resolutions &#x3D; [720]
-frame_buckets &#x3D; [1] # Use 1 frame for images.
 [[directory]] # VIDEOS
 # Path to the directory containing videos and their corresponding caption files.
-path &#x3D; &#39;&#x2F;mnt&#x2F;d&#x2F;huanvideo&#x2F;training_data&#x2F;videos&#39;
-num_repeats &#x3D; 5
-resolutions &#x3D; [512] # Set video resolution to 256 (e.g., 244p).
-frame_buckets &#x3D; [6, 28, 31, 32, 36, 42, 43, 48, 50, 53]
 ```
 ### config.toml
 ```
 # Dataset config file.
-output_dir &#x3D; &#39;&#x2F;mnt&#x2F;d&#x2F;wan&#x2F;training_output&#39;
-dataset &#x3D; &#39;dataset.toml&#39;
 # Training settings
-epochs &#x3D; 50
-micro_batch_size_per_gpu &#x3D; 1
-pipeline_stages &#x3D; 1
-gradient_accumulation_steps &#x3D; 4
-gradient_clipping &#x3D; 1.0
-warmup_steps &#x3D; 100
 # eval settings
-eval_every_n_epochs &#x3D; 5
-eval_before_first_step &#x3D; true
-eval_micro_batch_size_per_gpu &#x3D; 1
-eval_gradient_accumulation_steps &#x3D; 1
 # misc settings
-save_every_n_epochs &#x3D; 5
-checkpoint_every_n_minutes &#x3D; 30
-activation_checkpointing &#x3D; true
-partition_method &#x3D; &#39;parameters&#39;
-save_dtype &#x3D; &#39;bfloat16&#39;
-caching_batch_size &#x3D; 1
-steps_per_print &#x3D; 1
-video_clip_mode &#x3D; &#39;single_middle&#39;
 [model]
-type &#x3D; &#39;wan&#39;
-ckpt_path &#x3D; &#39;..&#x2F;Wan2.1-T2V-1.3B&#39;
-dtype &#x3D; &#39;bfloat16&#39;
 # You can use fp8 for the transformer when training LoRA.
-transformer_dtype &#x3D; &#39;float8&#39;
-timestep_sample_method &#x3D; &#39;logit_normal&#39;
 [adapter]
-type &#x3D; &#39;lora&#39;
-rank &#x3D; 32
-dtype &#x3D; &#39;bfloat16&#39;
 [optimizer]
-type &#x3D; &#39;adamw_optimi&#39;
-lr &#x3D; 5e-5
-betas &#x3D; [0.9, 0.99]
-weight_decay &#x3D; 0.02
-eps &#x3D; 1e-8
 ```

 ### dataset.toml
 ```
 # Resolution settings.
+resolutions = [512]
 # Aspect ratio bucketing settings
+enable_ar_bucket = true
+min_ar = 0.5
+max_ar = 2.0
+num_ar_buckets = 7
 # Frame buckets (1 is for images)
+frame_buckets = [1]
 [[directory]] # IMAGES
 # Path to the directory containing images and their corresponding caption files.
+path = '/mnt/d/huanvideo/training_data/images'
+num_repeats = 5
+resolutions = [720]
+frame_buckets = [1] # Use 1 frame for images.
 [[directory]] # VIDEOS
 # Path to the directory containing videos and their corresponding caption files.
+path = '/mnt/d/huanvideo/training_data/videos'
+num_repeats = 5
+resolutions = [512] # Set video resolution to 256 (e.g., 244p).
+frame_buckets = [6, 28, 31, 32, 36, 42, 43, 48, 50, 53]
 ```
 ### config.toml
 ```
 # Dataset config file.
+output_dir = '/mnt/d/wan/training_output'
+dataset = 'dataset.toml'
 # Training settings
+epochs = 50
+micro_batch_size_per_gpu = 1
+pipeline_stages = 1
+gradient_accumulation_steps = 4
+gradient_clipping = 1.0
+warmup_steps = 100
 # eval settings
+eval_every_n_epochs = 5
+eval_before_first_step = true
+eval_micro_batch_size_per_gpu = 1
+eval_gradient_accumulation_steps = 1
 # misc settings
+save_every_n_epochs = 5
+checkpoint_every_n_minutes = 30
+activation_checkpointing = true
+partition_method = 'parameters'
+save_dtype = 'bfloat16'
+caching_batch_size = 1
+steps_per_print = 1
+video_clip_mode = 'single_middle'
 [model]
+type = 'wan'
+ckpt_path = '../Wan2.1-T2V-1.3B'
+dtype = 'bfloat16'
 # You can use fp8 for the transformer when training LoRA.
+transformer_dtype = 'float8'
+timestep_sample_method = 'logit_normal'
 [adapter]
+type = 'lora'
+rank = 32
+dtype = 'bfloat16'
 [optimizer]
+type = 'adamw_optimi'
+lr = 5e-5
+betas = [0.9, 0.99]
+weight_decay = 0.02
+eps = 1e-8
 ```