Spaces:
Running
Running
File size: 5,512 Bytes
3f41fce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import os, sys, time, asyncio
from typing import List, Dict
import gradio as gr
from dotenv import load_dotenv
# Windows event loop (prevents asyncio warnings on Win)
if sys.platform.startswith("win"):
try:
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
except Exception:
pass
# config from .env or defaults
load_dotenv()
APP_Name = os.getenv("APP_Name", "Ghaymah GenAI chatbots")
APP_Version = os.getenv("APP_Version", "0.1.0")
API_KEY = os.getenv("API_KEY", "")
# Models from .env or fallback to your set
MODELS = [m.strip() for m in os.getenv("Models", "").split(",") if m.strip()] or [
"gemma-3-4b-it",
"QwQ-32B",
"DeepSeek-V3-0324",
"Qwen/Qwen3-32B",
"zai-org/GLM-4.5-Air",
"moonshotai/Kimi-K2-Instruct",
]
# Friendly descriptions & logo
MODEL_INFO = {
"gemma-3-4b-it": "Google Gemma-3 4B Instruct — light, fast, solid reasoning.",
"QwQ-32B": "QwQ-32B — reasoning-focused; strong long-form answers.",
"DeepSeek-V3-0324": "DeepSeek V3 (0324) — versatile, great multi-step reasoning.",
"Qwen/Qwen3-32B": "Qwen3-32B — multilingual, good code & math.",
"zai-org/GLM-4.5-Air": "GLM-4.5-Air — efficient generalist, good latency.",
"moonshotai/Kimi-K2-Instruct": "Kimi K2 Instruct — long-context, helpful writing.",
}
LOGO_PATH = "download.jpeg" # change to your image if different
# ── OpenAI-compatible client ──────────────────────────────────────────────────
from openai import OpenAI
BASE_URL = "https://genai.ghaymah.systems"
client = OpenAI(api_key=API_KEY, base_url=BASE_URL) if API_KEY else None
SYSTEM_SEED = "You are Ghaymah Assistant. Be concise and helpful."
# Helpers
BACKOFF = [5, 10, 20] # basic retry for 429s
def safe_chat_complete(model: str, messages: List[Dict], max_tokens: int = 800) -> str:
if not client:
return "⚠️ Missing API_KEY in .env"
attempt = 0
while True:
try:
resp = client.chat.completions.create(
model=model,
messages=messages,
max_tokens=max_tokens,
temperature=0.3,
timeout=90,
)
return resp.choices[0].message.content or ""
except Exception as e:
msg = str(e)
if ("429" in msg or "Rate" in msg) and attempt < len(BACKOFF):
time.sleep(BACKOFF[attempt]); attempt += 1
continue
return f"Request failed for `{model}`: {e}"
def init_state():
return {"messages": [{"role": "system", "content": SYSTEM_SEED}]}
# Gradio app
with gr.Blocks(title=APP_Name) as demo:
state = gr.State(init_state())
gr.Markdown(f"# {APP_Name} \n<span style='opacity:.7'>v{APP_Version}</span>")
with gr.Row():
# Left: Chat
with gr.Column(scale=3):
chat = gr.Chatbot(label="Chat", height=520, type="messages", value=[])
user_in = gr.Textbox(label="Your message", placeholder="Type here…", lines=2)
with gr.Row():
send_btn = gr.Button("Send", variant="primary")
clear_btn = gr.Button("Clear")
# Right: Model selector + logo + info
with gr.Column(scale=1, min_width=320):
model_choice = gr.Radio(
choices=MODELS,
value=MODELS[0],
label="Models",
info="Select Your Model Here",
)
gr.Image(LOGO_PATH, show_label=False, container=False)
info_md = gr.Markdown(MODEL_INFO.get(MODELS[0], ""))
def _update_info(m: str) -> str:
title = f"**{m}**"
desc = MODEL_INFO.get(m, "")
return f"{title}\n\n{desc}"
model_choice.change(_update_info, model_choice, info_md)
# Step 1: push the user message into the chat stream
def on_submit(msg, chat_messages):
if not msg:
return "", (chat_messages or [])
updated = (chat_messages or []) + [{"role": "user", "content": msg}]
return "", updated
def bot_step(chat_messages, chosen_model, st):
msgs = [{"role": "system", "content": SYSTEM_SEED}]
# only include last 2 visible messages
for m in (chat_messages or [])[-2:]:
role, content = m.get("role"), m.get("content")
if role in ("user", "assistant") and isinstance(content, str):
msgs.append({"role": role, "content": content})
reply = safe_chat_complete(chosen_model, msgs, max_tokens=800)
updated = (chat_messages or []) + [{"role": "assistant", "content": reply}]
st = st or init_state()
st["messages"] = msgs + [{"role": "assistant", "content": reply}]
return updated, st
# Clear
def on_clear():
return [], init_state()
# Wire events
user_in.submit(on_submit, [user_in, chat], [user_in, chat]) \
.then(bot_step, [chat, model_choice, state], [chat, state])
send_btn.click(on_submit, [user_in, chat], [user_in, chat]) \
.then(bot_step, [chat, model_choice, state], [chat, state])
clear_btn.click(on_clear, outputs=[chat, state])
if __name__ == "__main__":
demo.queue()
demo.launch(debug=True)
|