Spaces:

weiyi01191
/

DeepOperateAI-Video

Sleeping

App Files Files Community

weiyi01191 commited on Jun 10

Commit

e2e886e

verified ·

1 Parent(s): c8360a7

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -2

app.py CHANGED Viewed

@@ -275,16 +275,52 @@ def generate_prediction(video_path, instruction, gen_subtitles=True, stream=Fals
     # 设置随机种子
     setup_seeds(seed)
     try:
         answers = model.generate(
             prepared_images,
             prompt,
-            max_new_tokens=args.max_new_tokens if args else 512,
             do_sample=True,
             lengths=[length],
-            num_beams=1
         )
         return answers[0]
     except Exception as e:
         return f"生成预测时出错: {str(e)}"

     # 设置随机种子
     setup_seeds(seed)
+    # 🔧 GPU内存优化和cuBLAS错误处理
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()  # 清理缓存
+        torch.cuda.synchronize()  # 同步GPU操作
     try:
+        # 🔧 使用更保守的生成参数避免cuBLAS错误
         answers = model.generate(
             prepared_images,
             prompt,
+            max_new_tokens=min(args.max_new_tokens if args else 256, 256),  # 限制最大token数
             do_sample=True,
             lengths=[length],
+            num_beams=1,  # 保持beam=1减少计算
+            temperature=0.8,  # 添加温度参数
+            top_p=0.9,     # 添加top_p参数
+            repetition_penalty=1.1  # 避免重复
         )
         return answers[0]
+    except RuntimeError as e:
+        if "cublasLt" in str(e) or "cuBLAS" in str(e):
+            # 🚨 cuBLAS错误特殊处理
+            print(f"⚠️ 检测到cuBLAS错误，尝试降级处理: {e}")
+            # 强制清理GPU内存
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                torch.cuda.synchronize()
+                gc.collect()
+            try:
+                # 🔧 使用更小的参数重试
+                answers = model.generate(
+                    prepared_images,
+                    prompt,
+                    max_new_tokens=128,  # 进一步减少token数
+                    do_sample=False,     # 关闭采样减少计算
+                    lengths=[min(length, 16)],  # 减少长度
+                    num_beams=1,
+                    temperature=1.0
+                )
+                return answers[0]
+            except Exception as e2:
+                return f"GPU运算错误，请重试。错误信息: {str(e2)}"
+        else:
+            return f"生成预测时出错: {str(e)}"
     except Exception as e:
         return f"生成预测时出错: {str(e)}"