alfredplpl commited on
Commit
b9b28aa
·
verified ·
1 Parent(s): bcd9696

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -4
app.py CHANGED
@@ -46,8 +46,8 @@ h1 {
46
  """
47
 
48
  # Load the tokenizer and model
49
- tokenizer = AutoTokenizer.from_pretrained("tokyotech-llm/Llama-3.1-Swallow-8B-Instruct-v0.1")
50
- model = AutoModelForCausalLM.from_pretrained("tokyotech-llm/Llama-3.1-Swallow-8B-Instruct-v0.1", torch_dtype=torch.bfloat16, device_map="auto")
51
 
52
  @spaces.GPU()
53
  def chat_llama3_8b(message: str,
@@ -81,8 +81,7 @@ def chat_llama3_8b(message: str,
81
  max_new_tokens=max_new_tokens,
82
  do_sample=True,
83
  temperature=temperature,
84
- top_p=0.9,
85
- repetition_penalty=1.1,
86
  )
87
  # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
88
  if temperature == 0:
 
46
  """
47
 
48
  # Load the tokenizer and model
49
+ tokenizer = AutoTokenizer.from_pretrained("tokyotech-llm/Gemma-2-Llama-Swallow-27b-it-v0.1")
50
+ model = AutoModelForCausalLM.from_pretrained("tokyotech-llm/Gemma-2-Llama-Swallow-27b-it-v0.1", torch_dtype=torch.bfloat16, device_map="auto")
51
 
52
  @spaces.GPU()
53
  def chat_llama3_8b(message: str,
 
81
  max_new_tokens=max_new_tokens,
82
  do_sample=True,
83
  temperature=temperature,
84
+ top_p=0.9
 
85
  )
86
  # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
87
  if temperature == 0: