Spaces:

amd
/

gpt-oss-120b-chatbot

Running on CPU Upgrade

File size: 1,952 Bytes

a93f838
ba7492c
a93f838
f1b7ce9
a93f838
f1b7ce9
 
 
 
 
 
 
 
 
a93f838
 
f1b7ce9
 
 
 
 
a93f838
 
f1b7ce9
 
 
a93f838
f1b7ce9
a93f838
 
f1b7ce9
 
 
a93f838
f1b7ce9
a93f838
 
 
f1b7ce9
 
a93f838
f1b7ce9
 
 
 
 
a93f838
f1b7ce9
 
a93f838
f1b7ce9
a93f838
f1b7ce9
 
a93f838
 
 
 
f1b7ce9
a93f838
 
 
f1b7ce9
 
 
 
 
 
a93f838

import logging
from openai import OpenAI
from typing import List, Generator, Optional

logging.basicConfig(level=logging.INFO)

def request_generation(
    api_key: str,
    api_base: str,
    message: str,
    system_prompt: str,
    model_name: str,
    chat_history: Optional[List[dict]] = None,
    temperature: float = 0.3,
    frequency_penalty: float = 0.0,
    presence_penalty: float = 0.0,
    max_new_tokens: int = 1024,
    tools: Optional[List[dict]] = None,
    tool_choice: Optional[str] = None,
) -> Generator[str, None, None]:
    """
    Sends a streaming chat request to an OpenAI-compatible backend using the official OpenAI client.
    Buffers output to improve LaTeX rendering.
    """
    client = OpenAI(api_key=api_key, base_url=api_base)

    messages = [{"role": "system", "content": system_prompt}]
    if chat_history:
        messages.extend(chat_history)
    messages.append({"role": "user", "content": message})

    request_args = {
        "model": model_name,
        "messages": messages,
        "temperature": temperature,
        "frequency_penalty": frequency_penalty,
        "presence_penalty": presence_penalty,
        "max_tokens": max_new_tokens,
        "stream": True,
    }

    if tools:
        request_args["tools"] = tools
    if tool_choice:
        request_args["tool_choice"] = tool_choice

    logging.info(f"[Gateway] Request to {api_base} | Model: {model_name}")

    try:
        stream = client.chat.completions.create(**request_args)

        collected = ""
        buffer = ""

        for chunk in stream:
            delta = chunk.choices[0].delta.content or ""
            collected += delta
            buffer += delta

            if "\n" in buffer or len(buffer) > 150:
                yield buffer
                buffer = ""

        if buffer:
            yield buffer

    except Exception as e:
        logging.exception("[Gateway] Streaming failed")
        yield f"Error: {e}"