Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -275,16 +275,52 @@ def generate_prediction(video_path, instruction, gen_subtitles=True, stream=Fals
|
|
| 275 |
# 设置随机种子
|
| 276 |
setup_seeds(seed)
|
| 277 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
try:
|
|
|
|
| 279 |
answers = model.generate(
|
| 280 |
prepared_images,
|
| 281 |
prompt,
|
| 282 |
-
max_new_tokens=args.max_new_tokens if args else
|
| 283 |
do_sample=True,
|
| 284 |
lengths=[length],
|
| 285 |
-
num_beams=1
|
|
|
|
|
|
|
|
|
|
| 286 |
)
|
| 287 |
return answers[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
except Exception as e:
|
| 289 |
return f"生成预测时出错: {str(e)}"
|
| 290 |
|
|
|
|
| 275 |
# 设置随机种子
|
| 276 |
setup_seeds(seed)
|
| 277 |
|
| 278 |
+
# 🔧 GPU内存优化和cuBLAS错误处理
|
| 279 |
+
if torch.cuda.is_available():
|
| 280 |
+
torch.cuda.empty_cache() # 清理缓存
|
| 281 |
+
torch.cuda.synchronize() # 同步GPU操作
|
| 282 |
+
|
| 283 |
try:
|
| 284 |
+
# 🔧 使用更保守的生成参数避免cuBLAS错误
|
| 285 |
answers = model.generate(
|
| 286 |
prepared_images,
|
| 287 |
prompt,
|
| 288 |
+
max_new_tokens=min(args.max_new_tokens if args else 256, 256), # 限制最大token数
|
| 289 |
do_sample=True,
|
| 290 |
lengths=[length],
|
| 291 |
+
num_beams=1, # 保持beam=1减少计算
|
| 292 |
+
temperature=0.8, # 添加温度参数
|
| 293 |
+
top_p=0.9, # 添加top_p参数
|
| 294 |
+
repetition_penalty=1.1 # 避免重复
|
| 295 |
)
|
| 296 |
return answers[0]
|
| 297 |
+
except RuntimeError as e:
|
| 298 |
+
if "cublasLt" in str(e) or "cuBLAS" in str(e):
|
| 299 |
+
# 🚨 cuBLAS错误特殊处理
|
| 300 |
+
print(f"⚠️ 检测到cuBLAS错误,尝试降级处理: {e}")
|
| 301 |
+
|
| 302 |
+
# 强制清理GPU内存
|
| 303 |
+
if torch.cuda.is_available():
|
| 304 |
+
torch.cuda.empty_cache()
|
| 305 |
+
torch.cuda.synchronize()
|
| 306 |
+
gc.collect()
|
| 307 |
+
|
| 308 |
+
try:
|
| 309 |
+
# 🔧 使用更小的参数重试
|
| 310 |
+
answers = model.generate(
|
| 311 |
+
prepared_images,
|
| 312 |
+
prompt,
|
| 313 |
+
max_new_tokens=128, # 进一步减少token数
|
| 314 |
+
do_sample=False, # 关闭采样减少计算
|
| 315 |
+
lengths=[min(length, 16)], # 减少长度
|
| 316 |
+
num_beams=1,
|
| 317 |
+
temperature=1.0
|
| 318 |
+
)
|
| 319 |
+
return answers[0]
|
| 320 |
+
except Exception as e2:
|
| 321 |
+
return f"GPU运算错误,请重试。错误信息: {str(e2)}"
|
| 322 |
+
else:
|
| 323 |
+
return f"生成预测时出错: {str(e)}"
|
| 324 |
except Exception as e:
|
| 325 |
return f"生成预测时出错: {str(e)}"
|
| 326 |
|