Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -46,8 +46,8 @@ h1 {
|
|
| 46 |
"""
|
| 47 |
|
| 48 |
# Load the tokenizer and model
|
| 49 |
-
tokenizer = AutoTokenizer.from_pretrained("tokyotech-llm/Llama-
|
| 50 |
-
model = AutoModelForCausalLM.from_pretrained("tokyotech-llm/Llama-
|
| 51 |
|
| 52 |
@spaces.GPU()
|
| 53 |
def chat_llama3_8b(message: str,
|
|
@@ -81,8 +81,7 @@ def chat_llama3_8b(message: str,
|
|
| 81 |
max_new_tokens=max_new_tokens,
|
| 82 |
do_sample=True,
|
| 83 |
temperature=temperature,
|
| 84 |
-
top_p=0.9
|
| 85 |
-
repetition_penalty=1.1,
|
| 86 |
)
|
| 87 |
# This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
|
| 88 |
if temperature == 0:
|
|
|
|
| 46 |
"""
|
| 47 |
|
| 48 |
# Load the tokenizer and model
|
| 49 |
+
tokenizer = AutoTokenizer.from_pretrained("tokyotech-llm/Gemma-2-Llama-Swallow-27b-it-v0.1")
|
| 50 |
+
model = AutoModelForCausalLM.from_pretrained("tokyotech-llm/Gemma-2-Llama-Swallow-27b-it-v0.1", torch_dtype=torch.bfloat16, device_map="auto")
|
| 51 |
|
| 52 |
@spaces.GPU()
|
| 53 |
def chat_llama3_8b(message: str,
|
|
|
|
| 81 |
max_new_tokens=max_new_tokens,
|
| 82 |
do_sample=True,
|
| 83 |
temperature=temperature,
|
| 84 |
+
top_p=0.9
|
|
|
|
| 85 |
)
|
| 86 |
# This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
|
| 87 |
if temperature == 0:
|