web-server

Sleeping

App Files Files Community

web-server / main.py

pvanand

Update main.py

d3051c0 verified over 1 year ago

raw

history blame

2.61 kB

	from fastapi import FastAPI, HTTPException
	from fastapi.responses import StreamingResponse
	from pydantic import BaseModel, Field
	from typing import Literal
	import os
	from functools import lru_cache
	from openai import OpenAI

	app = FastAPI()

	ModelID = Literal[
	"meta-llama/llama-3-70b-instruct",
	"anthropic/claude-3.5-sonnet",
	"deepseek/deepseek-coder",
	"anthropic/claude-3-haiku",
	"openai/gpt-3.5-turbo-instruct",
	"qwen/qwen-72b-chat",
	"google/gemma-2-27b-it"
	]

	class QueryModel(BaseModel):
	user_query: str = Field(..., description="User's coding query")
	model_id: ModelID = Field(
	default="meta-llama/llama-3-70b-instruct",
	description="ID of the model to use for response generation"
	)

	class Config:
	schema_extra = {
	"example": {
	"user_query": "How do I implement a binary search in Python?",
	"model_id": "meta-llama/llama-3-70b-instruct"
	}
	}

	@lru_cache()
	def get_api_keys():
	return {
	"OPENROUTER_API_KEY": f"sk-or-v1-{os.environ['OPENROUTER_API_KEY']}"
	}

	api_keys = get_api_keys()
	or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1")

	@lru_cache()
	def chat_with_llama_stream(messages, model, max_output_tokens=4000):
	try:
	response = or_client.chat.completions.create(
	model=model,
	messages=messages,
	max_tokens=max_output_tokens,
	stream=True
	)
	for chunk in response:
	if chunk.choices[0].delta.content is not None:
	yield chunk.choices[0].delta.content
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")

	@app.post("/coding-assistant")
	async def coding_assistant(query: QueryModel):
	"""
	Coding assistant endpoint that provides programming help based on user queries.

	Available models:
	- meta-llama/llama-3-70b-instruct (default)
	- anthropic/claude-3.5-sonnet
	- deepseek/deepseek-coder
	- anthropic/claude-3-haiku
	- openai/gpt-3.5-turbo-instruct
	- qwen/qwen-72b-chat
	- google/gemma-2-27b-it
	"""
	system_prompt = "You are a helpful assistant proficient in coding tasks. Help the user in understanding and writing code."
	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": query.user_query}
	]

	return StreamingResponse(
	chat_with_llama_stream(messages, model=query.model_id),
	media_type="text/event-stream"
	)