Spaces:
Sleeping
Sleeping
File size: 3,563 Bytes
082f7c1 837be14 92a045a 082f7c1 882b8d5 9020491 faf62c8 6af6ba1 882b8d5 92a045a 882b8d5 fbee7ec 837be14 f726ac6 92a045a 837be14 fbee7ec 837be14 fbee7ec 882b8d5 837be14 146c824 882b8d5 837be14 f726ac6 fbee7ec 92a045a fbee7ec 92a045a 146c824 92a045a fbee7ec 837be14 92a045a fbee7ec 92a045a fbee7ec 92a045a 882b8d5 92a045a fbee7ec 082f7c1 882b8d5 92a045a 082f7c1 146c824 fbee7ec 146c824 882b8d5 146c824 48e1009 082f7c1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import gradio as gr
import os
from huggingface_hub import InferenceClient
# --- UPDATED: Best Coding Models on Free Tier (2025/2026) ---
AVAILABLE_MODELS = [
"Qwen/Qwen2.5-Coder-32B-Instruct", # SOTA Coding Model (Best overall)
"Qwen/Qwen2.5-72B-Instruct", # Larger General Model (Great at Logic)
"meta-llama/Llama-3.1-8B-Instruct", # Fast & Reliable
"mistralai/Mistral-7B-Instruct-v0.2:featherless-ai", # 12B Model (Very Smart)
"zai-org/GLM-4.7:zai-org", # Extremely Fast
"agentica-org/DeepCoder-14B-Preview:featherless-ai", # Specialized Code Model
"agentica-org/DeepCoder-14B-Preview", # Backup Model
]
def respond(message, history, system_message, temperature, model_id, request: gr.Request):
# --- 1. Authentication Logic ---
token = None
if request:
token = getattr(request, "token", None)
if token is None:
token = os.getenv("HF_TOKEN")
if token is None:
yield "Error: No authentication token found. Please add 'HF_TOKEN' to Space Secrets."
return
# --- 2. Setup Client ---
# The client is created dynamically based on the selected model_id
client = InferenceClient(model_id, token=token)
# --- 3. Build Messages ---
messages = [{"role": "system", "content": system_message}]
for msg in history:
messages.append(msg)
messages.append({"role": "user", "content": message})
# --- 4. Generate Response ---
try:
stream = client.chat_completion(
messages,
max_tokens=2048,
stream=True,
temperature=temperature,
top_p=0.9
)
response_text = ""
for chunk in stream:
# FIX: Check if choices exist before accessing index [0]
if not chunk.choices:
continue
content = chunk.choices[0].delta.content
if content:
response_text += content
yield response_text
except Exception as e:
# Better Error Handling for Model Switching
error_msg = str(e)
if "404" in error_msg or "model_not_supported" in error_msg:
yield f"Error: The model **{model_id}** is currently offline or overloaded. \n\n👉 Please select a different model from the dropdown above."
else:
yield f"Error: {error_msg}"
# --- 5. Build UI ---
with gr.Blocks(fill_height=True) as demo:
with gr.Sidebar():
gr.Markdown("# 🤖 Multi-Model Coding Assistant")
gr.Markdown("Select a model from the **Additional Inputs** menu below to switch between different AI coding experts.")
gr.LoginButton("Sign in")
gr.ChatInterface(
respond,
type="messages",
additional_inputs=[
gr.Textbox(
value="You are an expert software engineer. Provide clean, efficient, and well-commented code.",
label="System Instruction",
lines=2
),
gr.Slider(
minimum=0.1,
maximum=2.0,
value=0.5,
step=0.1,
label="Temperature (Lower = More Precise)"
),
# --- Model Selection Dropdown ---
gr.Dropdown(
choices=AVAILABLE_MODELS,
value=AVAILABLE_MODELS[0], # Default to Qwen 2.5 Coder
label="Select AI Model",
interactive=True
)
]
)
demo.launch() |