web-server

Sleeping

App Files Files Community

pvanand commited on Jul 31, 2024

Commit

768bfff

verified ·

1 Parent(s): 113e85f

Update main.py

Browse files

Files changed (1) hide show

main.py +50 -60

main.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import logging
 from fastapi import FastAPI, HTTPException, Depends, Security, BackgroundTasks
 from fastapi.security import APIKeyHeader
 from fastapi.responses import StreamingResponse
@@ -14,15 +13,11 @@ import time
 from datetime import datetime, timedelta
 import asyncio
 import requests
-from prompts import CODING_ASSISTANT_PROMPT, NEWS_ASSISTANT_PROMPT, generate_news_prompt
 from fastapi_cache import FastAPICache
 from fastapi_cache.backends.inmemory import InMemoryBackend
 from fastapi_cache.decorator import cache
-# Set up logging
-logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
 app = FastAPI()
 API_KEY_NAME = "X-API-Key"
@@ -74,7 +69,6 @@ class NewsQueryModel(BaseModel):
 @lru_cache()
 def get_api_keys():
-    logger.debug("Fetching API keys")
     return {
         "OPENROUTER_API_KEY": f"sk-or-v1-{os.environ['OPENROUTER_API_KEY']}",
         "BRAVE_API_KEY": os.environ['BRAVE_API_KEY']
@@ -91,16 +85,12 @@ last_activity: Dict[str, float] = {}
 encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
 def limit_tokens(input_string, token_limit=6000):
-    logger.debug(f"Limiting tokens for input string, token limit: {token_limit}")
     return encoding.decode(encoding.encode(input_string)[:token_limit])
 def calculate_tokens(msgs):
-    token_count = sum(len(encoding.encode(str(m))) for m in msgs)
-    logger.debug(f"Calculated token count: {token_count}")
-    return token_count
 def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_history=4, max_output_tokens=2500):
-    logger.info(f"Starting chat with model: {model}")
     while calculate_tokens(messages) > (8000 - max_output_tokens):
         if len(messages) > max_llm_history:
             messages = [messages[0]] + messages[-max_llm_history:]
@@ -108,11 +98,9 @@ def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_history=4, m
             max_llm_history -= 1
             if max_llm_history < 2:
                 error_message = "Token limit exceeded. Please shorten your input or start a new conversation."
-                logger.error(error_message)
                 raise HTTPException(status_code=400, detail=error_message)
     try:
-        logger.debug("Sending request to OpenAI")
         response = or_client.chat.completions.create(
             model=model,
             messages=messages,
@@ -127,25 +115,20 @@ def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_history=4, m
                 full_response += content
                 yield content
-        logger.debug("Finished streaming response")
         # After streaming, add the full response to the conversation history
         messages.append({"role": "assistant", "content": full_response})
     except Exception as e:
-        logger.error(f"Error in model response: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")
 async def verify_api_key(api_key: str = Security(api_key_header)):
     if api_key != API_KEY:
-        logger.warning("Invalid API key attempt")
         raise HTTPException(status_code=403, detail="Could not validate credentials")
-    logger.debug("API key verified successfully")
     return api_key
 # SQLite setup
 DB_PATH = '/app/data/conversations.db'
 def init_db():
-    logger.info("Initializing database")
     os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
     conn = sqlite3.connect(DB_PATH)
     c = conn.cursor()
@@ -158,22 +141,18 @@ def init_db():
                   timestamp DATETIME DEFAULT CURRENT_TIMESTAMP)''')
     conn.commit()
     conn.close()
-    logger.debug("Database initialized")
 init_db()
 def update_db(user_id, conversation_id, message, response):
-    logger.debug(f"Updating database for conversation {conversation_id}")
     conn = sqlite3.connect(DB_PATH)
     c = conn.cursor()
     c.execute('''INSERT INTO conversations (user_id, conversation_id, message, response)
                  VALUES (?, ?, ?, ?)''', (user_id, conversation_id, message, response))
     conn.commit()
     conn.close()
-    logger.debug("Database updated successfully")
 async def clear_inactive_conversations():
-    logger.info("Starting inactive conversation cleanup task")
     while True:
         current_time = time.time()
         inactive_convos = [conv_id for conv_id, last_time in last_activity.items()
@@ -183,18 +162,27 @@ async def clear_inactive_conversations():
                 del conversations[conv_id]
             if conv_id in last_activity:
                 del last_activity[conv_id]
-        logger.debug(f"Cleared {len(inactive_convos)} inactive conversations")
         await asyncio.sleep(60)  # Check every minute
 @app.on_event("startup")
 async def startup_event():
-    logger.info("Starting up FastAPI application")
     FastAPICache.init(InMemoryBackend(), prefix="fastapi-cache")
     asyncio.create_task(clear_inactive_conversations())
 @app.post("/coding-assistant")
 async def coding_assistant(query: QueryModel, background_tasks: BackgroundTasks, api_key: str = Depends(verify_api_key)):
-    logger.info(f"Received coding assistant request for user {query.user_id}")
     if query.conversation_id not in conversations:
         conversations[query.conversation_id] = [
             {"role": "system", "content": "You are a helpful assistant proficient in coding tasks. Help the user in understanding and writing code."}
@@ -211,16 +199,18 @@ async def coding_assistant(query: QueryModel, background_tasks: BackgroundTasks,
         for content in chat_with_llama_stream(limited_conversation, model=query.model_id):
             full_response += content
             yield content
-        logger.debug(f"Finished processing response for conversation {query.conversation_id}")
         background_tasks.add_task(update_db, query.user_id, query.conversation_id, query.user_query, full_response)
     return StreamingResponse(process_response(), media_type="text/event-stream")
 # New functions for news assistant
-def fetch_news(query, num_results=20):
-    logger.info(f"Fetching news for query: {query}")
-    url = "https://api.search.brave.com/res/v1/news/search"
     headers = {
         "Accept": "application/json",
         "Accept-Encoding": "gzip",
@@ -230,33 +220,37 @@ def fetch_news(query, num_results=20):
     response = requests.get(url, headers=headers, params=params)
-    if response.status_code == 200:
-        news_data = response.json()
-        logger.debug(f"Fetched {len(news_data['results'])} news items")
-        return [
-            {
-                "title": item["title"],
-                "snippet": item["extra_snippets"][0] if "extra_snippets" in item and item["extra_snippets"] else "",
-                "last_updated": item.get("age", ""),
-            }
-            for item in news_data['results']
-            if "extra_snippets" in item and item["extra_snippets"]
-        ][:num_results]
-    else:
-        logger.error(f"Failed to fetch news. Status code: {response.status_code}")
         return []
 @lru_cache(maxsize=100)
-def cached_fetch_news(query: str):
-    logger.debug(f"Fetching cached news for query: {query}")
-    return fetch_news(query)
 def analyze_news(query):
-    logger.info(f"Analyzing news for query: {query}")
-    news_data = cached_fetch_news(query)
     if not news_data:
-        logger.warning("No news data fetched")
         return "Failed to fetch news data.", []
     # Prepare the prompt for the AI
@@ -268,23 +262,23 @@ def analyze_news(query):
         {"role": "user", "content": prompt}
     ]
-    logger.debug("News analysis prompt prepared")
     return messages
 @app.post("/news-assistant")
 async def news_assistant(query: NewsQueryModel, api_key: str = Depends(verify_api_key)):
-    logger.info(f"Received news assistant request for query: {query.query}")
     messages = analyze_news(query.query)
     if not messages:
-        logger.error("Failed to fetch news data")
         raise HTTPException(status_code=500, detail="Failed to fetch news data")
     def process_response():
         for content in chat_with_llama_stream(messages, model=query.model_id):
             yield content
-    logger.debug("Starting to stream news assistant response")
     return StreamingResponse(process_response(), media_type="text/event-stream")
 class SearchQueryModel(BaseModel):
@@ -305,7 +299,6 @@ def analyze_search_results(query):
     search_data = internet_search(query, type="web")
     if not search_data:
-        logger.error("Failed to fetch search data")
         return "Failed to fetch search data.", []
     # Prepare the prompt for the AI
@@ -332,12 +325,9 @@ async def search_assistant(query: SearchQueryModel, api_key: str = Depends(verif
     def process_response():
         for content in chat_with_llama_stream(messages, model=query.model_id):
             yield content
-    logger.debug("Starting to stream news assistant response")
-    return StreamingResponse(process_response(), media_type="text/event-stream")
 if __name__ == "__main__":
     import uvicorn
-    logger.info("Starting uvicorn server")
     uvicorn.run(app, host="0.0.0.0", port=7860)

 from fastapi import FastAPI, HTTPException, Depends, Security, BackgroundTasks
 from fastapi.security import APIKeyHeader
 from fastapi.responses import StreamingResponse
 from datetime import datetime, timedelta
 import asyncio
 import requests
+from prompts import CODING_ASSISTANT_PROMPT, NEWS_ASSISTANT_PROMPT, generate_news_prompt, SEARCH_ASSISTANT_PROMPT, generate_search_prompt
 from fastapi_cache import FastAPICache
 from fastapi_cache.backends.inmemory import InMemoryBackend
 from fastapi_cache.decorator import cache
 app = FastAPI()
 API_KEY_NAME = "X-API-Key"
 @lru_cache()
 def get_api_keys():
     return {
         "OPENROUTER_API_KEY": f"sk-or-v1-{os.environ['OPENROUTER_API_KEY']}",
         "BRAVE_API_KEY": os.environ['BRAVE_API_KEY']
 encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
 def limit_tokens(input_string, token_limit=6000):
     return encoding.decode(encoding.encode(input_string)[:token_limit])
 def calculate_tokens(msgs):
+    return sum(len(encoding.encode(str(m))) for m in msgs)
 def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_history=4, max_output_tokens=2500):
     while calculate_tokens(messages) > (8000 - max_output_tokens):
         if len(messages) > max_llm_history:
             messages = [messages[0]] + messages[-max_llm_history:]
             max_llm_history -= 1
             if max_llm_history < 2:
                 error_message = "Token limit exceeded. Please shorten your input or start a new conversation."
                 raise HTTPException(status_code=400, detail=error_message)
     try:
         response = or_client.chat.completions.create(
             model=model,
             messages=messages,
                 full_response += content
                 yield content
         # After streaming, add the full response to the conversation history
         messages.append({"role": "assistant", "content": full_response})
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")
 async def verify_api_key(api_key: str = Security(api_key_header)):
     if api_key != API_KEY:
         raise HTTPException(status_code=403, detail="Could not validate credentials")
     return api_key
 # SQLite setup
 DB_PATH = '/app/data/conversations.db'
 def init_db():
     os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
     conn = sqlite3.connect(DB_PATH)
     c = conn.cursor()
                   timestamp DATETIME DEFAULT CURRENT_TIMESTAMP)''')
     conn.commit()
     conn.close()
 init_db()
 def update_db(user_id, conversation_id, message, response):
     conn = sqlite3.connect(DB_PATH)
     c = conn.cursor()
     c.execute('''INSERT INTO conversations (user_id, conversation_id, message, response)
                  VALUES (?, ?, ?, ?)''', (user_id, conversation_id, message, response))
     conn.commit()
     conn.close()
 async def clear_inactive_conversations():
     while True:
         current_time = time.time()
         inactive_convos = [conv_id for conv_id, last_time in last_activity.items()
                 del conversations[conv_id]
             if conv_id in last_activity:
                 del last_activity[conv_id]
         await asyncio.sleep(60)  # Check every minute
 @app.on_event("startup")
 async def startup_event():
     FastAPICache.init(InMemoryBackend(), prefix="fastapi-cache")
     asyncio.create_task(clear_inactive_conversations())
 @app.post("/coding-assistant")
 async def coding_assistant(query: QueryModel, background_tasks: BackgroundTasks, api_key: str = Depends(verify_api_key)):
+    """
+    Coding assistant endpoint that provides programming help based on user queries.
+    Available models:
+    - meta-llama/llama-3-70b-instruct (default)
+    - anthropic/claude-3.5-sonnet
+    - deepseek/deepseek-coder
+    - anthropic/claude-3-haiku
+    - openai/gpt-3.5-turbo-instruct
+    - qwen/qwen-72b-chat
+    - google/gemma-2-27b-it
+    Requires API Key authentication via X-API-Key header.
+    """
     if query.conversation_id not in conversations:
         conversations[query.conversation_id] = [
             {"role": "system", "content": "You are a helpful assistant proficient in coding tasks. Help the user in understanding and writing code."}
         for content in chat_with_llama_stream(limited_conversation, model=query.model_id):
             full_response += content
             yield content
         background_tasks.add_task(update_db, query.user_id, query.conversation_id, query.user_query, full_response)
     return StreamingResponse(process_response(), media_type="text/event-stream")
 # New functions for news assistant
+def internet_search(query, type = "web", num_results=20):
+    if type == "web":
+        url = "https://api.search.brave.com/res/v1/web/search"
+    else:
+        url = "https://api.search.brave.com/res/v1/news/search"
     headers = {
         "Accept": "application/json",
         "Accept-Encoding": "gzip",
     response = requests.get(url, headers=headers, params=params)
+    if response.status_code != 200:
         return []
+    if type == "web":
+        search_data = response.json()["web"]["results"]
+    else:
+        search_data = response.json()["results"]
+    processed_results = []
+    for item in search_data:
+        if not item.get("extra_snippets"):
+            continue
+        result = {
+            "title": item["title"],
+            "snippet": item["extra_snippets"][0],
+            "last_updated": item.get("age", "")
+        }
+        processed_results.append(result)
+    return processed_results[:num_results]
 @lru_cache(maxsize=100)
+def cached_internet_search(query: str):
+    return internet_search(query, type = "news")
 def analyze_news(query):
+    news_data = cached_internet_search(query)
     if not news_data:
         return "Failed to fetch news data.", []
     # Prepare the prompt for the AI
         {"role": "user", "content": prompt}
     ]
     return messages
 @app.post("/news-assistant")
 async def news_assistant(query: NewsQueryModel, api_key: str = Depends(verify_api_key)):
+    """
+    News assistant endpoint that provides summaries and analysis of recent news based on user queries.
+    Requires API Key authentication via X-API-Key header.
+    """
     messages = analyze_news(query.query)
     if not messages:
         raise HTTPException(status_code=500, detail="Failed to fetch news data")
     def process_response():
         for content in chat_with_llama_stream(messages, model=query.model_id):
             yield content
+#meta-llama/llama-3-70b-instruct  google/gemini-pro-1.5
     return StreamingResponse(process_response(), media_type="text/event-stream")
 class SearchQueryModel(BaseModel):
     search_data = internet_search(query, type="web")
     if not search_data:
         return "Failed to fetch search data.", []
     # Prepare the prompt for the AI
     def process_response():
         for content in chat_with_llama_stream(messages, model=query.model_id):
             yield content
+    return StreamingResponse(process_response(), media_type="text/event-stream")
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)