MultiModelCoder

Sleeping

File size: 3,563 Bytes

082f7c1
837be14
92a045a
082f7c1
882b8d5
 
 
 
 
9020491
faf62c8
6af6ba1
 
882b8d5
92a045a
882b8d5
fbee7ec
837be14
 
f726ac6
92a045a
837be14
 
 
 
fbee7ec
837be14
 
fbee7ec
882b8d5
837be14
146c824
882b8d5
837be14
f726ac6
fbee7ec
 
92a045a
 
 
fbee7ec
92a045a
 
 
 
 
146c824
 
92a045a
fbee7ec
837be14
92a045a
fbee7ec
 
 
 
92a045a
 
 
 
fbee7ec
92a045a
882b8d5
 
 
 
 
 
92a045a
fbee7ec
082f7c1
 
882b8d5
 
92a045a
082f7c1
146c824
 
fbee7ec
146c824
882b8d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146c824
 
48e1009
082f7c1

import gradio as gr
import os
from huggingface_hub import InferenceClient

# --- UPDATED: Best Coding Models on Free Tier (2025/2026) ---
AVAILABLE_MODELS = [
    "Qwen/Qwen2.5-Coder-32B-Instruct",    # SOTA Coding Model (Best overall)
    "Qwen/Qwen2.5-72B-Instruct",          # Larger General Model (Great at Logic)
    "meta-llama/Llama-3.1-8B-Instruct",   # Fast & Reliable
    "mistralai/Mistral-7B-Instruct-v0.2:featherless-ai", # 12B Model (Very Smart)
    "zai-org/GLM-4.7:zai-org",    # Extremely Fast
    "agentica-org/DeepCoder-14B-Preview:featherless-ai", # Specialized Code Model
    "agentica-org/DeepCoder-14B-Preview",       # Backup Model
]

def respond(message, history, system_message, temperature, model_id, request: gr.Request):
    # --- 1. Authentication Logic ---
    token = None
    if request:
        token = getattr(request, "token", None)
    
    if token is None:
        token = os.getenv("HF_TOKEN")

    if token is None:
        yield "Error: No authentication token found. Please add 'HF_TOKEN' to Space Secrets."
        return

    # --- 2. Setup Client ---
    # The client is created dynamically based on the selected model_id
    client = InferenceClient(model_id, token=token)
    
    # --- 3. Build Messages ---
    messages = [{"role": "system", "content": system_message}]
    
    for msg in history:
        messages.append(msg)
    
    messages.append({"role": "user", "content": message})

    # --- 4. Generate Response ---
    try:
        stream = client.chat_completion(
            messages, 
            max_tokens=2048, 
            stream=True, 
            temperature=temperature,
            top_p=0.9
        )
        
        response_text = ""
        for chunk in stream:
            # FIX: Check if choices exist before accessing index [0]
            if not chunk.choices:
                continue
            
            content = chunk.choices[0].delta.content
            if content:
                response_text += content
                yield response_text

    except Exception as e:
        # Better Error Handling for Model Switching
        error_msg = str(e)
        if "404" in error_msg or "model_not_supported" in error_msg:
            yield f"Error: The model **{model_id}** is currently offline or overloaded. \n\n👉 Please select a different model from the dropdown above."
        else:
            yield f"Error: {error_msg}"

# --- 5. Build UI ---
with gr.Blocks(fill_height=True) as demo:
    with gr.Sidebar():
        gr.Markdown("# 🤖 Multi-Model Coding Assistant")
        gr.Markdown("Select a model from the **Additional Inputs** menu below to switch between different AI coding experts.")
        gr.LoginButton("Sign in")
    
    gr.ChatInterface(
        respond,
        type="messages", 
        additional_inputs=[
            gr.Textbox(
                value="You are an expert software engineer. Provide clean, efficient, and well-commented code.", 
                label="System Instruction", 
                lines=2
            ),
            gr.Slider(
                minimum=0.1, 
                maximum=2.0, 
                value=0.5, 
                step=0.1, 
                label="Temperature (Lower = More Precise)"
            ),
            # --- Model Selection Dropdown ---
            gr.Dropdown(
                choices=AVAILABLE_MODELS,
                value=AVAILABLE_MODELS[0], # Default to Qwen 2.5 Coder
                label="Select AI Model",
                interactive=True
            )
        ]
    )

demo.launch()