Spaces:

amd
/

gpt-oss-120b-chatbot

Running on CPU Upgrade

App Files Files Community

gpt-oss-120b-chatbot / gateway.py

mahdicv

reverting to chat.completion API

a93f838 about 2 months ago

raw

history blame contribute delete

1.95 kB

	import logging
	from openai import OpenAI
	from typing import List, Generator, Optional

	logging.basicConfig(level=logging.INFO)

	def request_generation(
	api_key: str,
	api_base: str,
	message: str,
	system_prompt: str,
	model_name: str,
	chat_history: Optional[List[dict]] = None,
	temperature: float = 0.3,
	frequency_penalty: float = 0.0,
	presence_penalty: float = 0.0,
	max_new_tokens: int = 1024,
	tools: Optional[List[dict]] = None,
	tool_choice: Optional[str] = None,
	) -> Generator[str, None, None]:
	"""
	Sends a streaming chat request to an OpenAI-compatible backend using the official OpenAI client.
	Buffers output to improve LaTeX rendering.
	"""
	client = OpenAI(api_key=api_key, base_url=api_base)

	messages = [{"role": "system", "content": system_prompt}]
	if chat_history:
	messages.extend(chat_history)
	messages.append({"role": "user", "content": message})

	request_args = {
	"model": model_name,
	"messages": messages,
	"temperature": temperature,
	"frequency_penalty": frequency_penalty,
	"presence_penalty": presence_penalty,
	"max_tokens": max_new_tokens,
	"stream": True,
	}

	if tools:
	request_args["tools"] = tools
	if tool_choice:
	request_args["tool_choice"] = tool_choice

	logging.info(f"[Gateway] Request to {api_base} \| Model: {model_name}")

	try:
	stream = client.chat.completions.create(**request_args)

	collected = ""
	buffer = ""

	for chunk in stream:
	delta = chunk.choices[0].delta.content or ""
	collected += delta
	buffer += delta

	if "\n" in buffer or len(buffer) > 150:
	yield buffer
	buffer = ""

	if buffer:
	yield buffer

	except Exception as e:
	logging.exception("[Gateway] Streaming failed")
	yield f"Error: {e}"