Spaces:

not-lain
/

text-streaming

Running on Zero

not-lain commited on Feb 23, 2024

Commit

4e81072

verified ·

1 Parent(s): 1141e21

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,22 +6,21 @@ import numpy as np
 from torch.nn import functional as F
 import os
 from threading import Thread
-token = os.environ["HF_TOKEN"]
-model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,token=token)
 tok = AutoTokenizer.from_pretrained("google/gemma-2b-it",token=token)
 # using CUDA for an optimal experience
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 model = model.to(device)
-start_message = ""
-def user(message, history):
-    # Append the user's message to the conversation history
-    return "", history + [[message, ""]]
 def chat(message, history):
     chat = []
     for item in history:

 from torch.nn import functional as F
 import os
 from threading import Thread
+import spaces
+token = os.environ["HF_TOKEN"]
+model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it",
+                                             # torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+                                             torch_dtype=torch.float16,
+                                             token=token)
 tok = AutoTokenizer.from_pretrained("google/gemma-2b-it",token=token)
 # using CUDA for an optimal experience
+# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+device = torch.device('cuda')
 model = model.to(device)
+@spaces.GPU
 def chat(message, history):
     chat = []
     for item in history: