Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -42,10 +42,6 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 42 |
device_map="auto",
|
| 43 |
)
|
| 44 |
tokenizer = AutoTokenizer.from_pretrained(MODELS)
|
| 45 |
-
terminators = [
|
| 46 |
-
tokenizer.eos_token_id,
|
| 47 |
-
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
| 48 |
-
]
|
| 49 |
|
| 50 |
@spaces.GPU
|
| 51 |
def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
|
|
@@ -72,7 +68,7 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
|
|
| 72 |
max_new_tokens=max_new_tokens,
|
| 73 |
do_sample=True,
|
| 74 |
temperature=temperature,
|
| 75 |
-
|
| 76 |
)
|
| 77 |
|
| 78 |
thread = Thread(target=model.generate, kwargs=generate_kwargs)
|
|
@@ -109,7 +105,7 @@ with gr.Blocks(css=CSS) as demo:
|
|
| 109 |
minimum=128,
|
| 110 |
maximum=4096,
|
| 111 |
step=1,
|
| 112 |
-
value=
|
| 113 |
label="Max new tokens",
|
| 114 |
render=False,
|
| 115 |
),
|
|
|
|
| 42 |
device_map="auto",
|
| 43 |
)
|
| 44 |
tokenizer = AutoTokenizer.from_pretrained(MODELS)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
@spaces.GPU
|
| 47 |
def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
|
|
|
|
| 68 |
max_new_tokens=max_new_tokens,
|
| 69 |
do_sample=True,
|
| 70 |
temperature=temperature,
|
| 71 |
+
eos_token_id = [128001, 128009],
|
| 72 |
)
|
| 73 |
|
| 74 |
thread = Thread(target=model.generate, kwargs=generate_kwargs)
|
|
|
|
| 105 |
minimum=128,
|
| 106 |
maximum=4096,
|
| 107 |
step=1,
|
| 108 |
+
value=1024,
|
| 109 |
label="Max new tokens",
|
| 110 |
render=False,
|
| 111 |
),
|