Spaces:
Runtime error
Runtime error
| # RAG_Library_2.py | |
| # Description: This script contains the main RAG pipeline function and related functions for the RAG pipeline. | |
| # | |
| # Import necessary modules and functions | |
| import configparser | |
| import logging | |
| import os | |
| from typing import Dict, Any, List, Optional | |
| # Local Imports | |
| #from App_Function_Libraries.RAG.ChromaDB_Library import process_and_store_content, vector_search, chroma_client | |
| from App_Function_Libraries.Article_Extractor_Lib import scrape_article | |
| from App_Function_Libraries.DB.DB_Manager import add_media_to_database, search_db, get_unprocessed_media, \ | |
| fetch_keywords_for_media | |
| from App_Function_Libraries.Utils.Utils import load_comprehensive_config | |
| # | |
| # 3rd-Party Imports | |
| import openai | |
| # | |
| ######################################################################################################################## | |
| # | |
| # Functions: | |
| # Initialize OpenAI client (adjust this based on your API key management) | |
| openai.api_key = "your-openai-api-key" | |
| # Get the directory of the current script | |
| current_dir = os.path.dirname(os.path.abspath(__file__)) | |
| # Construct the path to the config file | |
| config_path = os.path.join(current_dir, 'Config_Files', 'config.txt') | |
| # Read the config file | |
| config = configparser.ConfigParser() | |
| # Read the configuration file | |
| config.read('config.txt') | |
| def generate_answer(api_choice: str, context: str, query: str) -> str: | |
| logging.debug("Entering generate_answer function") | |
| config = load_comprehensive_config() | |
| logging.debug(f"Config sections: {config.sections()}") | |
| prompt = f"Context: {context}\n\nQuestion: {query}" | |
| if api_choice == "OpenAI": | |
| from App_Function_Libraries.Summarization_General_Lib import summarize_with_openai | |
| return summarize_with_openai(config['API']['openai_api_key'], prompt, "") | |
| elif api_choice == "Anthropic": | |
| from App_Function_Libraries.Summarization_General_Lib import summarize_with_anthropic | |
| return summarize_with_anthropic(config['API']['anthropic_api_key'], prompt, "") | |
| elif api_choice == "Cohere": | |
| from App_Function_Libraries.Summarization_General_Lib import summarize_with_cohere | |
| return summarize_with_cohere(config['API']['cohere_api_key'], prompt, "") | |
| elif api_choice == "Groq": | |
| from App_Function_Libraries.Summarization_General_Lib import summarize_with_groq | |
| return summarize_with_groq(config['API']['groq_api_key'], prompt, "") | |
| elif api_choice == "OpenRouter": | |
| from App_Function_Libraries.Summarization_General_Lib import summarize_with_openrouter | |
| return summarize_with_openrouter(config['API']['openrouter_api_key'], prompt, "") | |
| elif api_choice == "HuggingFace": | |
| from App_Function_Libraries.Summarization_General_Lib import summarize_with_huggingface | |
| return summarize_with_huggingface(config['API']['huggingface_api_key'], prompt, "") | |
| elif api_choice == "DeepSeek": | |
| from App_Function_Libraries.Summarization_General_Lib import summarize_with_deepseek | |
| return summarize_with_deepseek(config['API']['deepseek_api_key'], prompt, "") | |
| elif api_choice == "Mistral": | |
| from App_Function_Libraries.Summarization_General_Lib import summarize_with_mistral | |
| return summarize_with_mistral(config['API']['mistral_api_key'], prompt, "") | |
| elif api_choice == "Local-LLM": | |
| from App_Function_Libraries.Local_Summarization_Lib import summarize_with_local_llm | |
| return summarize_with_local_llm(config['API']['local_llm_path'], prompt, "") | |
| elif api_choice == "Llama.cpp": | |
| from App_Function_Libraries.Local_Summarization_Lib import summarize_with_llama | |
| return summarize_with_llama(config['API']['llama_api_key'], prompt, "") | |
| elif api_choice == "Kobold": | |
| from App_Function_Libraries.Local_Summarization_Lib import summarize_with_kobold | |
| return summarize_with_kobold(config['API']['kobold_api_key'], prompt, "") | |
| elif api_choice == "Ooba": | |
| from App_Function_Libraries.Local_Summarization_Lib import summarize_with_oobabooga | |
| return summarize_with_oobabooga(config['API']['ooba_api_key'], prompt, "") | |
| elif api_choice == "TabbyAPI": | |
| from App_Function_Libraries.Local_Summarization_Lib import summarize_with_tabbyapi | |
| return summarize_with_tabbyapi(config['API']['tabby_api_key'], prompt, "") | |
| elif api_choice == "vLLM": | |
| from App_Function_Libraries.Local_Summarization_Lib import summarize_with_vllm | |
| return summarize_with_vllm(config['API']['vllm_api_key'], prompt, "") | |
| elif api_choice == "ollama": | |
| from App_Function_Libraries.Local_Summarization_Lib import summarize_with_ollama | |
| return summarize_with_ollama(config['API']['ollama_api_key'], prompt, "") | |
| else: | |
| raise ValueError(f"Unsupported API choice: {api_choice}") | |
| def perform_full_text_search(query: str, relevant_media_ids: List[str] = None) -> List[Dict[str, Any]]: | |
| fts_results = search_db(query, ["content"], "", page=1, results_per_page=5) | |
| filtered_fts_results = [ | |
| { | |
| "content": result['content'], | |
| "metadata": {"media_id": result['id']} | |
| } | |
| for result in fts_results | |
| if relevant_media_ids is None or result['id'] in relevant_media_ids | |
| ] | |
| return filtered_fts_results | |
| def fetch_relevant_media_ids(keywords: List[str]) -> List[int]: | |
| relevant_ids = set() | |
| try: | |
| for keyword in keywords: | |
| media_ids = fetch_keywords_for_media(keyword) | |
| relevant_ids.update(media_ids) | |
| except Exception as e: | |
| logging.error(f"Error fetching relevant media IDs: {str(e)}") | |
| return list(relevant_ids) | |
| # Example usage: | |
| # 1. Initialize the system: | |
| # create_tables(db) # Ensure FTS tables are set up | |
| # | |
| # 2. Create ChromaDB | |
| # chroma_client = ChromaDBClient() | |
| # | |
| # 3. Create Embeddings | |
| # Store embeddings in ChromaDB | |
| # preprocess_all_content() or create_embeddings() | |
| # | |
| # 4. Perform RAG search across all content: | |
| # result = rag_search("What are the key points about climate change?") | |
| # print(result['answer']) | |
| # | |
| # (Extra)5. Perform RAG on a specific URL: | |
| # result = rag_pipeline("https://example.com/article", "What is the main topic of this article?") | |
| # print(result['answer']) | |
| # | |
| ######################################################################################################################## | |
| ############################################################################################################ | |
| # | |
| # ElasticSearch Retriever | |
| # https://github.com/langchain-ai/langchain/tree/44e3e2391c48bfd0a8e6a20adde0b6567f4f43c3/templates/rag-elasticsearch | |
| # | |
| # https://github.com/langchain-ai/langchain/tree/44e3e2391c48bfd0a8e6a20adde0b6567f4f43c3/templates/rag-self-query | |
| # | |
| # End of RAG_Library_2.py | |
| ############################################################################################################ | |