Spaces:

alfredplpl
/

Gemma-2-Llama-Swallow-27b-it-v0.1

Running on Zero

alfredplpl commited on May 19

Commit

b9b28aa

verified ·

1 Parent(s): bcd9696

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -46,8 +46,8 @@ h1 {
 """
 # Load the tokenizer and model
-tokenizer = AutoTokenizer.from_pretrained("tokyotech-llm/Llama-3.1-Swallow-8B-Instruct-v0.1")
-model = AutoModelForCausalLM.from_pretrained("tokyotech-llm/Llama-3.1-Swallow-8B-Instruct-v0.1", torch_dtype=torch.bfloat16, device_map="auto")
 @spaces.GPU()
 def chat_llama3_8b(message: str,
@@ -81,8 +81,7 @@ def chat_llama3_8b(message: str,
         max_new_tokens=max_new_tokens,
         do_sample=True,
         temperature=temperature,
-        top_p=0.9,
-        repetition_penalty=1.1,
     )
     # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
     if temperature == 0:

 """
 # Load the tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained("tokyotech-llm/Gemma-2-Llama-Swallow-27b-it-v0.1")
+model = AutoModelForCausalLM.from_pretrained("tokyotech-llm/Gemma-2-Llama-Swallow-27b-it-v0.1", torch_dtype=torch.bfloat16, device_map="auto")
 @spaces.GPU()
 def chat_llama3_8b(message: str,
         max_new_tokens=max_new_tokens,
         do_sample=True,
         temperature=temperature,
+        top_p=0.9
     )
     # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
     if temperature == 0: