Spaces:
Running
on
L40S
Running
on
L40S
Update app.py
Browse files
app.py
CHANGED
|
@@ -27,8 +27,8 @@ def _load_agg_stats() -> dict:
|
|
| 27 |
except json.JSONDecodeError:
|
| 28 |
print(f"Warning: {AGG_FILE} is corrupted. Starting with empty stats.")
|
| 29 |
return {"8-bit bnb": {"attempts": 0, "correct": 0}, "4-bit bnb": {"attempts": 0, "correct": 0}}
|
| 30 |
-
return {"8-bit bnb": {"attempts":
|
| 31 |
-
"4-bit bnb": {"attempts":
|
| 32 |
|
| 33 |
def _save_agg_stats(stats: dict) -> None:
|
| 34 |
with InterProcessLock(str(LOCK_FILE)):
|
|
@@ -59,7 +59,8 @@ def load_bf16_pipeline():
|
|
| 59 |
torch_dtype=torch.bfloat16,
|
| 60 |
token=HF_TOKEN
|
| 61 |
)
|
| 62 |
-
pipe.to(DEVICE)
|
|
|
|
| 63 |
end_time = time.time()
|
| 64 |
mem_reserved = torch.cuda.memory_reserved(0)/1024**3 if DEVICE == "cuda" else 0
|
| 65 |
print(f"BF16 Pipeline loaded in {end_time - start_time:.2f}s. Memory reserved: {mem_reserved:.2f} GB")
|
|
@@ -80,7 +81,8 @@ def load_bnb_8bit_pipeline():
|
|
| 80 |
MODEL_ID,
|
| 81 |
torch_dtype=torch.bfloat16
|
| 82 |
)
|
| 83 |
-
pipe.to(DEVICE)
|
|
|
|
| 84 |
end_time = time.time()
|
| 85 |
mem_reserved = torch.cuda.memory_reserved(0)/1024**3 if DEVICE == "cuda" else 0
|
| 86 |
print(f"8-bit BNB pipeline loaded in {end_time - start_time:.2f}s. Memory reserved: {mem_reserved:.2f} GB")
|
|
@@ -101,7 +103,8 @@ def load_bnb_4bit_pipeline():
|
|
| 101 |
MODEL_ID,
|
| 102 |
torch_dtype=torch.bfloat16
|
| 103 |
)
|
| 104 |
-
pipe.to(DEVICE)
|
|
|
|
| 105 |
end_time = time.time()
|
| 106 |
mem_reserved = torch.cuda.memory_reserved(0)/1024**3 if DEVICE == "cuda" else 0
|
| 107 |
print(f"4-bit BNB pipeline loaded in {end_time - start_time:.2f}s. Memory reserved: {mem_reserved:.2f} GB")
|
|
|
|
| 27 |
except json.JSONDecodeError:
|
| 28 |
print(f"Warning: {AGG_FILE} is corrupted. Starting with empty stats.")
|
| 29 |
return {"8-bit bnb": {"attempts": 0, "correct": 0}, "4-bit bnb": {"attempts": 0, "correct": 0}}
|
| 30 |
+
return {"8-bit bnb": {"attempts": 157, "correct": 74},
|
| 31 |
+
"4-bit bnb": {"attempts": 159, "correct": 78}}
|
| 32 |
|
| 33 |
def _save_agg_stats(stats: dict) -> None:
|
| 34 |
with InterProcessLock(str(LOCK_FILE)):
|
|
|
|
| 59 |
torch_dtype=torch.bfloat16,
|
| 60 |
token=HF_TOKEN
|
| 61 |
)
|
| 62 |
+
# pipe.to(DEVICE)
|
| 63 |
+
pipe.enable_model_cpu_offload()
|
| 64 |
end_time = time.time()
|
| 65 |
mem_reserved = torch.cuda.memory_reserved(0)/1024**3 if DEVICE == "cuda" else 0
|
| 66 |
print(f"BF16 Pipeline loaded in {end_time - start_time:.2f}s. Memory reserved: {mem_reserved:.2f} GB")
|
|
|
|
| 81 |
MODEL_ID,
|
| 82 |
torch_dtype=torch.bfloat16
|
| 83 |
)
|
| 84 |
+
# pipe.to(DEVICE)
|
| 85 |
+
pipe.enable_model_cpu_offload()
|
| 86 |
end_time = time.time()
|
| 87 |
mem_reserved = torch.cuda.memory_reserved(0)/1024**3 if DEVICE == "cuda" else 0
|
| 88 |
print(f"8-bit BNB pipeline loaded in {end_time - start_time:.2f}s. Memory reserved: {mem_reserved:.2f} GB")
|
|
|
|
| 103 |
MODEL_ID,
|
| 104 |
torch_dtype=torch.bfloat16
|
| 105 |
)
|
| 106 |
+
# pipe.to(DEVICE)
|
| 107 |
+
pipe.enable_model_cpu_offload()
|
| 108 |
end_time = time.time()
|
| 109 |
mem_reserved = torch.cuda.memory_reserved(0)/1024**3 if DEVICE == "cuda" else 0
|
| 110 |
print(f"4-bit BNB pipeline loaded in {end_time - start_time:.2f}s. Memory reserved: {mem_reserved:.2f} GB")
|