Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, Request, Response | |
| from pydantic import BaseModel | |
| from typing import Union, Dict, List, Any | |
| import requests | |
| import base64 | |
| from typing import List, Dict, Union | |
| import json | |
| import requests | |
| import base64 | |
| from typing import List, Dict, Union | |
| class LLM: | |
| def __init__(self, model: str, system_message: str = "You are a Helpful AI."): | |
| self.model = model | |
| self.conversation_history = [{"role": "system", "content": system_message}] | |
| def chat(self, messages: List[Dict[str, str]]) -> Union[str, None]: | |
| url = "https://api.deepinfra.com/v1/openai/chat/completions" | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', | |
| 'Accept-Language': 'en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3', | |
| 'Cache-Control': 'no-cache', | |
| 'Connection': 'keep-alive', | |
| 'Content-Type': 'application/json', | |
| 'Origin': 'https://deepinfra.com', | |
| 'Pragma': 'no-cache', | |
| 'Referer': 'https://deepinfra.com/', | |
| 'Sec-Fetch-Dest': 'empty', | |
| 'Sec-Fetch-Mode': 'cors', | |
| 'Sec-Fetch-Site': 'same-site', | |
| 'X-Deepinfra-Source': 'web-embed', | |
| 'accept': 'text/event-stream', | |
| 'sec-ch-ua': '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"', | |
| 'sec-ch-ua-mobile': '?0', | |
| 'sec-ch-ua-platform': '"macOS"' | |
| } | |
| data = json.dumps( | |
| { | |
| 'model': self.model, | |
| 'messages': messages, | |
| 'temperature': 0.7, | |
| 'max_tokens': 8028, | |
| 'stop': [], | |
| 'stream': False #dont change it | |
| }, separators=(',', ':') | |
| ) | |
| try: | |
| result = requests.post(url=url, data=data, headers=headers) | |
| return result.json()['choices'][0]['message']['content'] | |
| except: | |
| return None | |
| app = FastAPI() | |
| class Model(BaseModel): | |
| id: str | |
| object: str | |
| created: int | |
| owned_by: str | |
| class Message(BaseModel): | |
| role: str | |
| content: str | |
| class CompletionRequest(BaseModel): | |
| model: str | |
| messages: List[Message] | |
| class CompletionResponse(BaseModel): | |
| id: str | |
| object: str | |
| created: int | |
| model: str | |
| choices: List[Dict[str, Any]] | |
| usage: Dict[str, int] | |
| models = [ | |
| {"id": "meta-llama/Meta-Llama-3-70B-Instruct", "object": "model", "created": 1686935002, "owned_by": "meta"}, | |
| {"id": "google/gemma-2-27b-it", "object": "model", "created": 1686935002, "owned_by": "meta"}, | |
| {"id": "google/gemma-2-9b-it", "object": "model", "created": 1686935002, "owned_by": "ConsiousAI"}, | |
| {"id": "cognitivecomputations/dolphin-2.9.1-llama-3-70b", "object": "model", "created": 1686935002, "owned_by": "cognitivecomputations"}, | |
| {"id": "nvidia/Nemotron-4-340B-Instruct", "object": "model", "created": 1686935002, "owned_by": "nvidia"}, | |
| {"id": "Qwen/Qwen2-72B-Instruct", "object": "model", "created": 1686935002, "owned_by": "qwen"}, | |
| {"id": "google/gemma-2-9b-it", "object": "model", "created": 1686935002, "owned_by": "ConsiousAI"}, | |
| {"id": "openchat/openchat-3.6-8b", "object": "model", "created": 1686935002, "owned_by": "unknown"}, | |
| {"id": "mistralai/Mistral-7B-Instruct-v0.3", "object": "model", "created": 1686935002, "owned_by": "mistral"}, | |
| {"id": "meta-llama/Meta-Llama-3-8B-Instruct", "object": "model", "created": 1686935002, "owned_by": "meta"}, | |
| {"id": "mistralai/Mixtral-8x22B-Instruct-v0.1", "object": "model", "created": 1686935002, "owned_by": "mistral"}, | |
| {"id": "mistralai/Mixtral-8x7B-Instruct-v0.1", "object": "model", "created": 1686935002, "owned_by": "mistral"}, | |
| {"id": "Qwen/Qwen2-7B-Instruct", "object": "model", "created": 1686935002, "owned_by": "Qwen"}, | |
| {"id": "meta-llama/Meta-Llama-3.1-405B-Instruct", "object": "model", "created": 1686935002, "owned_by": "meta"} | |
| ] | |
| def handle_completions(completion_request: CompletionRequest): | |
| system_prompt = next((message.content for message in completion_request.messages if message.role == 'system'), None) | |
| user_query = next((message.content for message in completion_request.messages if message.role == 'user'), None) | |
| response_text = generative(query=user_query, system_prompt=system_prompt, model=completion_request.model) | |
| response = CompletionResponse( | |
| id="chatcmpl-1", | |
| object="chat.completion", | |
| created=1234567890, | |
| model=completion_request.model, | |
| choices=[{"index": 0, "message": {"role": "assistant", "content": response_text}, "finish_reason": "stop"}], | |
| usage={"prompt_tokens": sum(len(message.content.split()) for message in completion_request.messages), "total_tokens": sum(len(message.content.split()) for message in completion_request.messages) + len(response_text.split())} | |
| ) | |
| return response | |
| def get_models(): | |
| return {"object": "list", "data": models} | |
| def create_completion(prompt: str, model: str, best_of: int = 1, echo: bool = False, frequency_penalty: float = 0.0): | |
| response_text = generative(prompt, "you are an helpful assistant", model) | |
| response = { | |
| "id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7", | |
| "object": "text_completion", | |
| "created": 1589478378, | |
| "model": model, | |
| "system_fingerprint": "fp_44709d6fcb", | |
| "choices": [{"text": response_text, "index": 0, "logprobs": None, "finish_reason": "length"}] | |
| } | |
| return response | |
| def generative(system_prompt, query, model): | |
| llm = LLM(model=model, system_message=system_prompt) | |
| messages = [{"role": "user", "content": query}] | |
| response = llm.chat(messages) | |
| return response | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=8000) |