Spaces:

weiyi01191
/

DeepOperateAI-Video

Running

App Files Files Community

weiyi01191 commited on Jun 9

Commit

e2b0fd0

1 Parent(s): e8e4446

Update test_configs/llama2_test_config.yaml

Browse files

Files changed (1) hide show

test_configs/llama2_test_config.yaml +55 -15

test_configs/llama2_test_config.yaml CHANGED Viewed

@@ -2,7 +2,7 @@ model:
   arch: mini_gpt4_llama_v2
   freeze_vit: True
   freeze_qformer: True
-  max_txt_len: 384
   low_resource: True
   image_size: 224
   end_sym: "</s>"
@@ -10,47 +10,87 @@ model:
   ckpt: "checkpoints/video_llama_checkpoint_last.pth"
   use_grad_checkpoint: True
   chat_template: True
-  lora_r: 96
-  lora_alpha: 24
-  length: 50
   use_grad_checkpoint_llm: True
-  max_context_len: 4096
   architectures: [
     "MiniGPT4_Video"
   ]
   device: "cuda"
-  drop_path_rate: 0
   img_size: 224
   model_type: "minigpt4_video"
-  num_query_token: 48
   prompt: ""
   torch_dtype: "float16"
-  transformers_version: "4.42.3"
   vit_precision: "fp16"
   vit_model: "eva_clip_g"
   token_pooling: true
-  lora_target_modules : ["q_proj","v_proj","k_proj","o_proj","gate_proj","up_proj","down_proj"]
-  lora_dropout: 0.08
   remove_template: false
   prompt_path: ""
   minigpt4_gpu_id: 0
   whisper_gpu_id: 0
   answer_module_gpu_id: 0
 datasets:
   video_chatgpt: #99378 row  - 13224 video
-    batch_size: 3
     vis_processor:
       train:
         name: "blip2_image_train"
         image_size: 224
     text_processor:
       train:
         name: "blip_caption"
-    sample_ratio: 200
 run:
   seed: 42
   amp: true

   arch: mini_gpt4_llama_v2
   freeze_vit: True
   freeze_qformer: True
+  max_txt_len: 512
   low_resource: True
   image_size: 224
   end_sym: "</s>"
   ckpt: "checkpoints/video_llama_checkpoint_last.pth"
   use_grad_checkpoint: True
   chat_template: True
+  lora_r: 64
+  lora_alpha: 16
+  length: 45
   use_grad_checkpoint_llm: True
+  max_context_len: 3072
   architectures: [
     "MiniGPT4_Video"
   ]
   device: "cuda"
+  drop_path_rate: 0.1
   img_size: 224
   model_type: "minigpt4_video"
+  num_query_token: 32
   prompt: ""
   torch_dtype: "float16"
+  transformers_version: "4.37.2"
   vit_precision: "fp16"
   vit_model: "eva_clip_g"
   token_pooling: true
+  lora_target_modules: ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj"]
+  lora_dropout: 0.05
   remove_template: false
   prompt_path: ""
   minigpt4_gpu_id: 0
   whisper_gpu_id: 0
   answer_module_gpu_id: 0
+  gradient_accumulation_steps: 1
+  warmup_steps: 100
+  save_steps: 1000
+  logging_steps: 50
+  eval_steps: 500
+  max_new_tokens: 512
+  temperature: 0.7
+  top_p: 0.9
+  do_sample: true
+  num_beams: 1
 datasets:
   video_chatgpt: #99378 row  - 13224 video
+    batch_size: 2
+    num_workers: 2
     vis_processor:
       train:
         name: "blip2_image_train"
         image_size: 224
+        mean: [0.48145466, 0.4578275, 0.40821073]
+        std: [0.26862954, 0.26130258, 0.27577711]
+      eval:
+        name: "blip2_image_eval"
+        image_size: 224
     text_processor:
       train:
         name: "blip_caption"
+        max_words: 512
+      eval:
+        name: "blip_caption"
+        max_words: 512
+    sample_ratio: 100
 run:
   seed: 42
   amp: true
+  distributed: false
+  gpu_id: 0
+  world_size: 1
+  rank: 0
+  dataloader_num_workers: 2
+  pin_memory: true
+  persistent_workers: true
+  prefetch_factor: 2
+  clip_grad_norm: 1.0
+  weight_decay: 0.01
+  adam_epsilon: 1e-8
+  adam_beta1: 0.9
+  adam_beta2: 0.999
+inference:
+  batch_size: 1
+  max_frames: 45
+  frame_interval: 2
+  subtitle_max_len: 400
+  enable_subtitles: true
+  whisper_model: "base"
+  response_format: "detailed"
+  include_timestamps: false