Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,22 +6,21 @@ import numpy as np
|
|
| 6 |
from torch.nn import functional as F
|
| 7 |
import os
|
| 8 |
from threading import Thread
|
| 9 |
-
|
| 10 |
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
tok = AutoTokenizer.from_pretrained("google/gemma-2b-it",token=token)
|
| 13 |
# using CUDA for an optimal experience
|
| 14 |
-
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
|
|
| 15 |
model = model.to(device)
|
| 16 |
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
def user(message, history):
|
| 21 |
-
# Append the user's message to the conversation history
|
| 22 |
-
return "", history + [[message, ""]]
|
| 23 |
-
|
| 24 |
-
|
| 25 |
def chat(message, history):
|
| 26 |
chat = []
|
| 27 |
for item in history:
|
|
|
|
| 6 |
from torch.nn import functional as F
|
| 7 |
import os
|
| 8 |
from threading import Thread
|
| 9 |
+
import spaces
|
| 10 |
|
| 11 |
+
token = os.environ["HF_TOKEN"]
|
| 12 |
+
model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it",
|
| 13 |
+
# torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 14 |
+
torch_dtype=torch.float16,
|
| 15 |
+
token=token)
|
| 16 |
tok = AutoTokenizer.from_pretrained("google/gemma-2b-it",token=token)
|
| 17 |
# using CUDA for an optimal experience
|
| 18 |
+
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 19 |
+
device = torch.device('cuda')
|
| 20 |
model = model.to(device)
|
| 21 |
|
| 22 |
|
| 23 |
+
@spaces.GPU
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
def chat(message, history):
|
| 25 |
chat = []
|
| 26 |
for item in history:
|