Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 1,952 Bytes
a93f838 ba7492c a93f838 f1b7ce9 a93f838 f1b7ce9 a93f838 f1b7ce9 a93f838 f1b7ce9 a93f838 f1b7ce9 a93f838 f1b7ce9 a93f838 f1b7ce9 a93f838 f1b7ce9 a93f838 f1b7ce9 a93f838 f1b7ce9 a93f838 f1b7ce9 a93f838 f1b7ce9 a93f838 f1b7ce9 a93f838 f1b7ce9 a93f838 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import logging
from openai import OpenAI
from typing import List, Generator, Optional
logging.basicConfig(level=logging.INFO)
def request_generation(
api_key: str,
api_base: str,
message: str,
system_prompt: str,
model_name: str,
chat_history: Optional[List[dict]] = None,
temperature: float = 0.3,
frequency_penalty: float = 0.0,
presence_penalty: float = 0.0,
max_new_tokens: int = 1024,
tools: Optional[List[dict]] = None,
tool_choice: Optional[str] = None,
) -> Generator[str, None, None]:
"""
Sends a streaming chat request to an OpenAI-compatible backend using the official OpenAI client.
Buffers output to improve LaTeX rendering.
"""
client = OpenAI(api_key=api_key, base_url=api_base)
messages = [{"role": "system", "content": system_prompt}]
if chat_history:
messages.extend(chat_history)
messages.append({"role": "user", "content": message})
request_args = {
"model": model_name,
"messages": messages,
"temperature": temperature,
"frequency_penalty": frequency_penalty,
"presence_penalty": presence_penalty,
"max_tokens": max_new_tokens,
"stream": True,
}
if tools:
request_args["tools"] = tools
if tool_choice:
request_args["tool_choice"] = tool_choice
logging.info(f"[Gateway] Request to {api_base} | Model: {model_name}")
try:
stream = client.chat.completions.create(**request_args)
collected = ""
buffer = ""
for chunk in stream:
delta = chunk.choices[0].delta.content or ""
collected += delta
buffer += delta
if "\n" in buffer or len(buffer) > 150:
yield buffer
buffer = ""
if buffer:
yield buffer
except Exception as e:
logging.exception("[Gateway] Streaming failed")
yield f"Error: {e}" |