Di Zhang
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -31,31 +31,19 @@ def llama_o1_template(data):
|
|
| 31 |
text = template.format(content=data)
|
| 32 |
return text
|
| 33 |
|
| 34 |
-
def format_response(response):
|
| 35 |
-
response = response.replace('<start_of_father_id>','')
|
| 36 |
-
response = response.replace('<end_of_father_id><start_of_local_id>','π')
|
| 37 |
-
response = response.replace('<end_of_local_id><start_of_thought>',', ')
|
| 38 |
-
response = response.replace('<end_of_thought><start_of_rating>','')
|
| 39 |
-
response = response.replace('<end_of_rating>','')
|
| 40 |
-
response = response.replace('<positive_rating>','π')
|
| 41 |
-
response = response.replace('<negative_rating>','π')
|
| 42 |
-
|
| 43 |
def generate_text(message, history, max_tokens=512, temperature=0.9, top_p=0.95):
|
| 44 |
temp = ""
|
| 45 |
input_texts = [llama_o1_template(message)]
|
| 46 |
input_texts = [input_text.replace('<|end_of_text|>','') for input_text in input_texts]
|
| 47 |
#print(f"input_texts[0]: {input_texts[0]}")
|
| 48 |
inputs = model.tokenize(input_texts[0].encode('utf-8'))
|
| 49 |
-
|
| 50 |
#print(f"token: {token}")
|
| 51 |
-
|
| 52 |
#print(f"text detok: {text}")
|
| 53 |
-
texts = format_response(texts)
|
| 54 |
-
for text in texts:
|
| 55 |
temp += text.decode('utf-8')
|
| 56 |
yield temp
|
| 57 |
|
| 58 |
-
|
| 59 |
with gr.Blocks() as demo:
|
| 60 |
gr.Markdown(DESCRIPTION)
|
| 61 |
|
|
|
|
| 31 |
text = template.format(content=data)
|
| 32 |
return text
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
def generate_text(message, history, max_tokens=512, temperature=0.9, top_p=0.95):
|
| 35 |
temp = ""
|
| 36 |
input_texts = [llama_o1_template(message)]
|
| 37 |
input_texts = [input_text.replace('<|end_of_text|>','') for input_text in input_texts]
|
| 38 |
#print(f"input_texts[0]: {input_texts[0]}")
|
| 39 |
inputs = model.tokenize(input_texts[0].encode('utf-8'))
|
| 40 |
+
for token in model.generate(inputs, top_p=top_p, temp=temperature):
|
| 41 |
#print(f"token: {token}")
|
| 42 |
+
text = model.detokenize([token])
|
| 43 |
#print(f"text detok: {text}")
|
|
|
|
|
|
|
| 44 |
temp += text.decode('utf-8')
|
| 45 |
yield temp
|
| 46 |
|
|
|
|
| 47 |
with gr.Blocks() as demo:
|
| 48 |
gr.Markdown(DESCRIPTION)
|
| 49 |
|