""" Script to set up the Pinecone database with medical data """ from enhanced_data_loader import load_comprehensive_medical_datasets, chunk_text from embedding_service import EmbeddingService import time def setup_database(): """Set up Pinecone database with medical documents""" print("="*50) print("Setting up Medical Chatbot Database") print("="*50) # Load comprehensive medical data from multiple sources documents = load_comprehensive_medical_datasets() # Chunk large documents chunked_documents = [] for doc in documents: chunks = chunk_text(doc['text']) for chunk in chunks: chunked_documents.append({ 'text': chunk, 'source': doc['source'], 'metadata': doc['metadata'] }) print(f"Total chunks: {len(chunked_documents)}") # Initialize embedding service embedding_service = EmbeddingService() # Upload to Pinecone embedding_service.upsert_documents(chunked_documents) print("\n" + "="*50) print("Database setup complete!") print("="*50) if __name__ == "__main__": setup_database()