Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, HTTPException | |
| from fastapi.responses import StreamingResponse | |
| from pydantic import BaseModel, Field | |
| from typing import Literal | |
| import os | |
| from functools import lru_cache | |
| from openai import OpenAI | |
| app = FastAPI() | |
| ModelID = Literal[ | |
| "meta-llama/llama-3-70b-instruct", | |
| "anthropic/claude-3.5-sonnet", | |
| "deepseek/deepseek-coder", | |
| "anthropic/claude-3-haiku", | |
| "openai/gpt-3.5-turbo-instruct", | |
| "qwen/qwen-72b-chat", | |
| "google/gemma-2-27b-it" | |
| ] | |
| class QueryModel(BaseModel): | |
| user_query: str = Field(..., description="User's coding query") | |
| model_id: ModelID = Field( | |
| default="meta-llama/llama-3-70b-instruct", | |
| description="ID of the model to use for response generation" | |
| ) | |
| class Config: | |
| schema_extra = { | |
| "example": { | |
| "user_query": "How do I implement a binary search in Python?", | |
| "model_id": "meta-llama/llama-3-70b-instruct" | |
| } | |
| } | |
| def get_api_keys(): | |
| return { | |
| "OPENROUTER_API_KEY": f"sk-or-v1-{os.environ['OPENROUTER_API_KEY']}" | |
| } | |
| api_keys = get_api_keys() | |
| or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1") | |
| def chat_with_llama_stream(messages, model, max_output_tokens=4000): | |
| try: | |
| response = or_client.chat.completions.create( | |
| model=model, | |
| messages=messages, | |
| max_tokens=max_output_tokens, | |
| stream=True | |
| ) | |
| for chunk in response: | |
| if chunk.choices[0].delta.content is not None: | |
| yield chunk.choices[0].delta.content | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}") | |
| async def coding_assistant(query: QueryModel): | |
| """ | |
| Coding assistant endpoint that provides programming help based on user queries. | |
| Available models: | |
| - meta-llama/llama-3-70b-instruct (default) | |
| - anthropic/claude-3.5-sonnet | |
| - deepseek/deepseek-coder | |
| - anthropic/claude-3-haiku | |
| - openai/gpt-3.5-turbo-instruct | |
| - qwen/qwen-72b-chat | |
| - google/gemma-2-27b-it | |
| """ | |
| system_prompt = "You are a helpful assistant proficient in coding tasks. Help the user in understanding and writing code." | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": query.user_query} | |
| ] | |
| return StreamingResponse( | |
| chat_with_llama_stream(messages, model=query.model_id), | |
| media_type="text/event-stream" | |
| ) | |