Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -34,30 +34,26 @@ def create_chat_template_messages(history, prompt):
|
|
| 34 |
|
| 35 |
return messages
|
| 36 |
|
| 37 |
-
# Async function for generating responses using two models
|
| 38 |
@spaces.GPU
|
| 39 |
-
|
| 40 |
# Create messages array for chat history and apply template
|
| 41 |
messages = create_chat_template_messages(history, prompt)
|
| 42 |
wrapped_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_special_tokens=True, add_generation_prompt=True)
|
| 43 |
|
| 44 |
#already has special tokens
|
| 45 |
inputs = tokenizer.encode(wrapped_prompt, add_special_tokens=False, return_tensors="pt").to("cuda")
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
model1.generate, inputs, max_length=2048, temperature=1
|
| 49 |
-
)
|
| 50 |
|
| 51 |
# Custom sampler task: loop over generator and collect outputs in a list
|
| 52 |
async def custom_sampler_task():
|
| 53 |
generated_list = []
|
| 54 |
-
generator = creative_sampler.generate(wrapped_prompt, max_length=2048, temperature=
|
| 55 |
for token in generator:
|
| 56 |
generated_list.append(token)
|
| 57 |
return tokenizer.decode(generated_list, skip_special_tokens=True)
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
standard_output, custom_output = await asyncio.gather(standard_task, custom_sampler_task())
|
| 61 |
# Decode standard output and remove the prompt from the generated response
|
| 62 |
standard_response = tokenizer.decode(standard_output[0][len(inputs[0]):], skip_special_tokens=True)
|
| 63 |
|
|
|
|
| 34 |
|
| 35 |
return messages
|
| 36 |
|
|
|
|
| 37 |
@spaces.GPU
|
| 38 |
+
def generate_responses(prompt, history):
|
| 39 |
# Create messages array for chat history and apply template
|
| 40 |
messages = create_chat_template_messages(history, prompt)
|
| 41 |
wrapped_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_special_tokens=True, add_generation_prompt=True)
|
| 42 |
|
| 43 |
#already has special tokens
|
| 44 |
inputs = tokenizer.encode(wrapped_prompt, add_special_tokens=False, return_tensors="pt").to("cuda")
|
| 45 |
+
def standard_task():
|
| 46 |
+
return model1.generate(**inputs, max_length=2048, temperature=0.7)
|
|
|
|
|
|
|
| 47 |
|
| 48 |
# Custom sampler task: loop over generator and collect outputs in a list
|
| 49 |
async def custom_sampler_task():
|
| 50 |
generated_list = []
|
| 51 |
+
generator = creative_sampler.generate(wrapped_prompt, max_length=2048, temperature=0.7)
|
| 52 |
for token in generator:
|
| 53 |
generated_list.append(token)
|
| 54 |
return tokenizer.decode(generated_list, skip_special_tokens=True)
|
| 55 |
+
standard_output = standard_task()
|
| 56 |
+
custom_output = asyncio.run(custom_sampler_task())
|
|
|
|
| 57 |
# Decode standard output and remove the prompt from the generated response
|
| 58 |
standard_response = tokenizer.decode(standard_output[0][len(inputs[0]):], skip_special_tokens=True)
|
| 59 |
|