stable-code-instruct-3b

Runtime error

pvduy commited on Mar 22, 2024

Commit

0a1707e

1 Parent(s): 54fe16b

change device

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ def parse_args():
     return parser.parse_args()
 def predict(message, history, system_prompt, temperature, max_tokens):
-    global model, tokenizer
     instruction = "<|im_start|>system\nA chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n<|im_end|>\n"
     for human, assistant in history:
         instruction += '<|im_start|>user\n' + human + '\n<|im_end|>\n<|im_start|>assistant\n' + assistant
@@ -33,8 +33,8 @@ def predict(message, history, system_prompt, temperature, max_tokens):
     if input_ids.shape[1] > MAX_MAX_NEW_TOKENS:
         input_ids = input_ids[:, -MAX_MAX_NEW_TOKENS:]
-    input_ids = input_ids.cuda()
-    attention_mask = attention_mask.cuda()
     generate_kwargs = dict(
         {"input_ids": input_ids, "attention_mask": attention_mask},
         streamer=streamer,
@@ -59,7 +59,8 @@ if __name__ == "__main__":
     args = parse_args()
     tokenizer = AutoTokenizer.from_pretrained("stabilityai/stable-code-instruct-3b")
     model = AutoModelForCausalLM.from_pretrained("stabilityai/stable-code-instruct-3b")
-    model = model.cuda()
     gr.ChatInterface(
         predict,
         title="Stable Code Instruct Chat - Demo",

     return parser.parse_args()
 def predict(message, history, system_prompt, temperature, max_tokens):
+    global model, tokenizer, device
     instruction = "<|im_start|>system\nA chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n<|im_end|>\n"
     for human, assistant in history:
         instruction += '<|im_start|>user\n' + human + '\n<|im_end|>\n<|im_start|>assistant\n' + assistant
     if input_ids.shape[1] > MAX_MAX_NEW_TOKENS:
         input_ids = input_ids[:, -MAX_MAX_NEW_TOKENS:]
+    input_ids = input_ids.to(device)
+    attention_mask = attention_mask.to(device)
     generate_kwargs = dict(
         {"input_ids": input_ids, "attention_mask": attention_mask},
         streamer=streamer,
     args = parse_args()
     tokenizer = AutoTokenizer.from_pretrained("stabilityai/stable-code-instruct-3b")
     model = AutoModelForCausalLM.from_pretrained("stabilityai/stable-code-instruct-3b")
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    model = model.to(device)
     gr.ChatInterface(
         predict,
         title="Stable Code Instruct Chat - Demo",