Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Script to migrate test_intercom_data from contextdb instance to keshavchhaparia instance. | |
| """ | |
| import sys | |
| from pymongo import MongoClient | |
| from loguru import logger | |
| # Source MongoDB (contextdb instance) | |
| SOURCE_URI = "mongodb+srv://contextdb:HOqIgSH01CoEiMb1@cluster0.d9cmff.mongodb.net/" | |
| SOURCE_DB = "second_brain_course" | |
| SOURCE_COLLECTION = "test_intercom_data" | |
| # Target MongoDB (keshavchhaparia instance) | |
| TARGET_URI = "mongodb+srv://keshavchhaparia:bUSBXeVCGWDyQhDG@saaslabs.awtivxf.mongodb.net/" | |
| TARGET_DB = "second_brain_course" | |
| TARGET_COLLECTION = "test_intercom_data" | |
| def migrate_data(): | |
| """Migrate test_intercom_data collection from source to target MongoDB.""" | |
| logger.info("π Starting MongoDB data migration...") | |
| # Connect to source MongoDB | |
| logger.info(f"π‘ Connecting to source MongoDB: {SOURCE_URI}") | |
| try: | |
| source_client = MongoClient(SOURCE_URI) | |
| source_db = source_client[SOURCE_DB] | |
| source_collection = source_db[SOURCE_COLLECTION] | |
| logger.info("β Connected to source MongoDB") | |
| except Exception as e: | |
| logger.error(f"β Failed to connect to source MongoDB: {e}") | |
| return False | |
| # Connect to target MongoDB | |
| logger.info(f"π‘ Connecting to target MongoDB: {TARGET_URI}") | |
| try: | |
| target_client = MongoClient(TARGET_URI) | |
| target_db = target_client[TARGET_DB] | |
| target_collection = target_db[TARGET_COLLECTION] | |
| logger.info("β Connected to target MongoDB") | |
| except Exception as e: | |
| logger.error(f"β Failed to connect to target MongoDB: {e}") | |
| return False | |
| try: | |
| # Get document count from source | |
| source_count = source_collection.count_documents({}) | |
| logger.info(f"π Source collection has {source_count} documents") | |
| if source_count == 0: | |
| logger.warning("β οΈ Source collection is empty, nothing to migrate") | |
| return True | |
| # Delete existing target collection | |
| logger.info(f"ποΈ Deleting existing target collection: {TARGET_COLLECTION}") | |
| target_collection.drop() | |
| logger.info("β Target collection deleted") | |
| # Copy documents from source to target | |
| logger.info("π Copying documents from source to target...") | |
| # Process in batches to avoid memory issues | |
| batch_size = 100 | |
| total_copied = 0 | |
| for skip in range(0, source_count, batch_size): | |
| # Get batch of documents | |
| documents = list(source_collection.find().skip(skip).limit(batch_size)) | |
| if documents: | |
| # Insert batch into target | |
| target_collection.insert_many(documents) | |
| total_copied += len(documents) | |
| logger.info(f"π¦ Copied batch: {len(documents)} documents (Total: {total_copied}/{source_count})") | |
| # Verify migration | |
| target_count = target_collection.count_documents({}) | |
| logger.info(f"β Migration completed! Target collection has {target_count} documents") | |
| if target_count == source_count: | |
| logger.info("π Migration successful - document counts match!") | |
| return True | |
| else: | |
| logger.error(f"β Migration failed - document count mismatch: {target_count} vs {source_count}") | |
| return False | |
| except Exception as e: | |
| logger.error(f"β Migration failed: {e}") | |
| return False | |
| finally: | |
| # Close connections | |
| source_client.close() | |
| target_client.close() | |
| logger.info("π MongoDB connections closed") | |
| def verify_migration(): | |
| """Verify the migration was successful.""" | |
| logger.info("π Verifying migration...") | |
| try: | |
| # Connect to target MongoDB | |
| target_client = MongoClient(TARGET_URI) | |
| target_db = target_client[TARGET_DB] | |
| target_collection = target_db[TARGET_COLLECTION] | |
| # Get sample documents | |
| sample_docs = list(target_collection.find().limit(3)) | |
| logger.info(f"π Sample documents in target collection:") | |
| for i, doc in enumerate(sample_docs, 1): | |
| conversation_id = doc.get('metadata', {}).get('properties', {}).get('conversation_id', 'N/A') | |
| has_analysis = 'conversation_analysis' in doc | |
| quality_score = doc.get('content_quality_score', 'N/A') | |
| logger.info(f" {i}. Conversation ID: {conversation_id}, Has Analysis: {has_analysis}, Quality: {quality_score}") | |
| target_client.close() | |
| logger.info("β Verification completed") | |
| except Exception as e: | |
| logger.error(f"β Verification failed: {e}") | |
| if __name__ == "__main__": | |
| logger.info("=" * 60) | |
| logger.info("π MongoDB Data Migration Script") | |
| logger.info("=" * 60) | |
| # Run migration | |
| success = migrate_data() | |
| if success: | |
| # Verify migration | |
| verify_migration() | |
| logger.info("π Migration completed successfully!") | |
| else: | |
| logger.error("β Migration failed!") | |
| sys.exit(1) | |