Tonic commited on
Commit
fbcc018
·
1 Parent(s): d749fc8

makes modifications for zerogpu spaces

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. requirements.txt +1 -1
app.py CHANGED
@@ -116,7 +116,7 @@ def load_model():
116
  # Load the full fine-tuned model with optimized settings
117
  model_kwargs = {
118
  "device_map": "auto" if DEVICE == "cuda" else "cpu",
119
- "torch_dtype": torch.float16 if DEVICE == "cuda" else torch.float32, # Use float16 on GPU, float32 on CPU
120
  "trust_remote_code": True,
121
  "low_cpu_mem_usage": True,
122
  "attn_implementation": "flash_attention_2" if DEVICE == "cuda" else "eager"
 
116
  # Load the full fine-tuned model with optimized settings
117
  model_kwargs = {
118
  "device_map": "auto" if DEVICE == "cuda" else "cpu",
119
+ "torch_dtype": torch.bfloat16 if DEVICE == "cuda" else torch.float32, # Use float16 on GPU, float32 on CPU
120
  "trust_remote_code": True,
121
  "low_cpu_mem_usage": True,
122
  "attn_implementation": "flash_attention_2" if DEVICE == "cuda" else "eager"
requirements.txt CHANGED
@@ -1,3 +1,4 @@
 
1
  gradio>=5.38.2
2
  torch>=2.0.0
3
  transformers>=4.54.0
@@ -9,4 +10,3 @@ pyyaml>=6.0
9
  psutil>=5.9.0
10
  tqdm>=4.64.0
11
  requests>=2.31.0
12
- https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu118torch1.12cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
 
1
+ https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu118torch1.12cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
2
  gradio>=5.38.2
3
  torch>=2.0.0
4
  transformers>=4.54.0
 
10
  psutil>=5.9.0
11
  tqdm>=4.64.0
12
  requests>=2.31.0