Di Zhang
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,7 +12,7 @@ model_path = snapshot_download(
|
|
| 12 |
)
|
| 13 |
|
| 14 |
tokenizer = LlamaTokenizer.from_pretrained(model_path)
|
| 15 |
-
model = LlamaForCausalLM.from_pretrained(model_path)
|
| 16 |
|
| 17 |
DESCRIPTION = '''
|
| 18 |
# SimpleBerry/LLaMA-O1-Supervised-1129 | Duplicate the space and set it to private for faster & personal inference for free.
|
|
@@ -35,6 +35,7 @@ def llama_o1_template(data):
|
|
| 35 |
text = template.format(content=data)
|
| 36 |
return text
|
| 37 |
|
|
|
|
| 38 |
def generate_text(message, history, max_tokens=512, temperature=0.9, top_p=0.95):
|
| 39 |
input_text = llama_o1_template(message)
|
| 40 |
inputs = tokenizer(input_text, return_tensors="pt")
|
|
|
|
| 12 |
)
|
| 13 |
|
| 14 |
tokenizer = LlamaTokenizer.from_pretrained(model_path)
|
| 15 |
+
model = LlamaForCausalLM.from_pretrained(model_path,device_map='auto')
|
| 16 |
|
| 17 |
DESCRIPTION = '''
|
| 18 |
# SimpleBerry/LLaMA-O1-Supervised-1129 | Duplicate the space and set it to private for faster & personal inference for free.
|
|
|
|
| 35 |
text = template.format(content=data)
|
| 36 |
return text
|
| 37 |
|
| 38 |
+
@spaces.GPU
|
| 39 |
def generate_text(message, history, max_tokens=512, temperature=0.9, top_p=0.95):
|
| 40 |
input_text = llama_o1_template(message)
|
| 41 |
inputs = tokenizer(input_text, return_tensors="pt")
|