Spaces:

allenai
/

BaseChat

Running

App Files Files Community

BaseChat / app.py

yuchenlin

fix the typo of BF16

50c166d over 1 year ago

raw

history blame contribute delete

8.64 kB

	import gradio as gr
	import os
	from typing import List
	import logging
	import urllib.request
	from utils import model_name_mapping, urial_template, openai_base_request, chat_template, openai_chat_request
	from constant import js_code_label, my_css, HEADER_MD, BASE_TO_ALIGNED, MODELS
	from openai import OpenAI
	import datetime
	# add logging info to console
	logging.basicConfig(level=logging.INFO)

	URIAL_VERSION = "inst_1k_v4.help"
	URIAL_URL = f"https://raw.githubusercontent.com/Re-Align/URIAL/main/urial_prompts/{URIAL_VERSION}.txt"
	urial_prompt = urllib.request.urlopen(URIAL_URL).read().decode('utf-8')
	urial_prompt = urial_prompt.replace("```", '"""') # new version of URIAL uses """ instead of ```
	STOP_STRS = ['"""', '# Query:', '# Answer:']

	addr_limit_counter = {}
	LAST_UPDATE_TIME = datetime.datetime.now()


	models = MODELS


	# mega_hist = {
	# "base": [],
	# "aligned": []
	# }

	def respond(
	message,
	history: list[tuple[str, str]],
	max_tokens,
	temperature,
	top_p,
	rp,
	model_name,
	model_type,
	api_key,
	request:gr.Request
	):
	global STOP_STRS, urial_prompt, LAST_UPDATE_TIME, addr_limit_counter

	assert model_type in ["base", "aligned"]
	# if history:
	# if model_type == "base":
	# mega_hist["base"] = history
	# else:
	# mega_hist["aligned"] = history


	if model_type == "base":
	prompt = urial_template(urial_prompt, history, message)
	else:
	messages = chat_template(history, message)

	# _model_name = "meta-llama/Llama-3-8b-hf"
	_model_name = model_name_mapping(model_name)

	if api_key and len(api_key) == 64:
	api_key = api_key
	else:
	api_key = None

	# headers = request.headers
	# if already 24 hours passed, reset the counter
	if datetime.datetime.now() - LAST_UPDATE_TIME > datetime.timedelta(days=1):
	addr_limit_counter = {}
	LAST_UPDATE_TIME = datetime.datetime.now()
	host_addr = request.client.host
	if host_addr not in addr_limit_counter:
	addr_limit_counter[host_addr] = 0
	if addr_limit_counter[host_addr] > 100:
	return "You have reached the limit of 100 requests for today. Please use your own API key."

	if model_type == "base":
	infer_request = openai_base_request(prompt=prompt, model=_model_name,
	temperature=temperature,
	max_tokens=max_tokens,
	top_p=top_p,
	repetition_penalty=rp,
	stop=STOP_STRS, api_key=api_key)
	else:
	infer_request = openai_chat_request(messages=messages, model=_model_name,
	temperature=temperature,
	max_tokens=max_tokens,
	top_p=top_p,
	repetition_penalty=rp,
	stop=STOP_STRS, api_key=api_key)

	addr_limit_counter[host_addr] += 1
	logging.info(f"Requesting chat completion from OpenAI API with model {_model_name}")
	logging.info(f"addr_limit_counter: {addr_limit_counter}; Last update time: {LAST_UPDATE_TIME};")

	response = ""
	for msg in infer_request:
	# print(msg.choices[0].delta.keys())
	if hasattr(msg.choices[0], "delta"):
	# Note: 'ChoiceDelta' object may or may not be not subscriptable
	if "content" in msg.choices[0].delta:
	token = msg.choices[0].delta["content"]
	else:
	token = msg.choices[0].delta.content
	else:
	token = msg.choices[0].text
	if model_type == "base":
	should_stop = False
	for _stop in STOP_STRS:
	if _stop in response + token:
	should_stop = True
	break
	if should_stop:
	break
	if token is None:
	continue
	response += token
	if model_type == "base":
	if response.endswith('\n"'):
	response = response[:-1]
	elif response.endswith('\n""'):
	response = response[:-2]
	yield history + [(message, response)]
	# mega_hist[model_type].append((message, response))
	# yield mega_hist[model_type]



	def load_models(base_model_name):
	print(f"base_model_name={base_model_name}")
	out_box = [gr.Chatbot(), gr.Chatbot(), gr.Dropdown()]
	out_box[0] = (gr.update(label=f"Chat with Base LLM: {base_model_name}"))
	aligned_model_name = BASE_TO_ALIGNED[base_model_name]
	out_box[1] = (gr.update(label=f"Chat with Aligned LLM: {aligned_model_name}"))
	out_box[2] = (gr.update(value=aligned_model_name, interactive=False))
	return out_box[0], out_box[1], out_box[2]

	def clear_fn():
	# mega_hist["base"] = []
	# mega_hist["aligned"] = []
	return None, None, None


	with gr.Blocks(gr.themes.Soft(), js=js_code_label, css=my_css) as demo:
	api_key = gr.Textbox(label="🔑 APIKey", placeholder="Enter your Together/Hyperbolic API Key. Leave it blank to use our key with limited usage.", type="password", elem_id="api_key", visible=False)

	gr.Markdown(HEADER_MD)

	with gr.Row():
	chat_a = gr.Chatbot(height=500, label="Chat with Base LLMs via URIAL")
	chat_b = gr.Chatbot(height=500, label="Chat with Aligned LLMs")

	with gr.Group():
	with gr.Row():
	with gr.Column(scale=1.5):
	message = gr.Textbox(label="Prompt", placeholder="Enter your message here")
	with gr.Row():
	with gr.Column(scale=2):
	with gr.Row():
	left_model_choice = gr.Dropdown(label="Base Model", choices=models, interactive=True)
	right_model_choice = gr.Textbox(label="Aligned Model", placeholder="xxx", visible=True)
	with gr.Row():
	btn = gr.Button("🚀 Chat")
	# gr.Markdown("---")
	with gr.Row():
	stop_btn = gr.Button("⏸️ Stop")
	clear_btn = gr.Button("🔁 Clear")
	with gr.Row():
	gr.Markdown(">> - We thank for the support of Llama-3.1-405B from [Hyperbolic AI](https://hyperbolic.xyz/). ")
	with gr.Column(scale=1):
	with gr.Accordion("⚙️ Params for Base LLM", open=True):
	with gr.Row():
	max_tokens_1 = gr.Slider(label="Max tokens", value=256, minimum=0, maximum=2048, step=16, interactive=True, visible=True)
	temperature_1 = gr.Slider(label="Temperature", step=0.01, minimum=0.01, maximum=1.0, value=0.9)
	with gr.Row():
	top_p_1 = gr.Slider(label="Top-P", step=0.01, minimum=0.01, maximum=1.0, value=0.9)
	rp_1 = gr.Slider(label="Repetition Penalty", step=0.1, minimum=0.1, maximum=2.0, value=1.1)
	with gr.Accordion("⚙️ Params for Aligned LLM", open=True):
	with gr.Row():
	max_tokens_2 = gr.Slider(label="Max tokens", value=256, minimum=0, maximum=2048, step=16, interactive=True, visible=True)
	temperature_2 = gr.Slider(label="Temperature", step=0.01, minimum=0.01, maximum=1.0, value=0.9)
	with gr.Row():
	top_p_2 = gr.Slider(label="Top-P", step=0.01, minimum=0.01, maximum=1.0, value=0.9)
	rp_2 = gr.Slider(label="Repetition Penalty", step=0.1, minimum=0.1, maximum=2.0, value=1.0)

	left_model_choice.value = "Llama-3.1-405B-FP8"
	right_model_choice.value = "Llama-3.1-405B-Instruct-BF16"
	left_model_choice.change(load_models, [left_model_choice], [chat_a, chat_b, right_model_choice])

	model_type_left = gr.Textbox(visible=False, value="base")
	model_type_right = gr.Textbox(visible=False, value="aligned")

	go1 = btn.click(respond, [message, chat_a, max_tokens_1, temperature_1, top_p_1, rp_1, left_model_choice, model_type_left, api_key], chat_a)
	go2 = btn.click(respond, [message, chat_b, max_tokens_2, temperature_2, top_p_2, rp_2, right_model_choice, model_type_right, api_key], chat_b)

	stop_btn.click(None, None, None, cancels=[go1, go2])
	clear_btn.click(clear_fn, None, [message, chat_a, chat_b])

	if __name__ == "__main__":
	demo.launch(show_api=False)