Tonic commited on
Commit
3448aad
·
1 Parent(s): 37641fb

removes flash attention from app

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -119,7 +119,7 @@ def load_model():
119
  "torch_dtype": torch.bfloat16 if DEVICE == "cuda" else torch.float32, # Use float16 on GPU, float32 on CPU
120
  "trust_remote_code": True,
121
  "low_cpu_mem_usage": True,
122
- "attn_implementation": "flash_attention_2" if DEVICE == "cuda" else "eager"
123
  }
124
 
125
  logger.info(f"Model loading parameters: {model_kwargs}")
 
119
  "torch_dtype": torch.bfloat16 if DEVICE == "cuda" else torch.float32, # Use float16 on GPU, float32 on CPU
120
  "trust_remote_code": True,
121
  "low_cpu_mem_usage": True,
122
+ # "attn_implementation": "flash_attention_2" if DEVICE == "cuda" else "eager"
123
  }
124
 
125
  logger.info(f"Model loading parameters: {model_kwargs}")