context-ai / migrate_mongodb_data.py
chinmayjha's picture
Add Conversation Analysis Dashboard for Hugging Face Spaces
422e708
raw
history blame
5.19 kB
#!/usr/bin/env python3
"""
Script to migrate test_intercom_data from contextdb instance to keshavchhaparia instance.
"""
import sys
from pymongo import MongoClient
from loguru import logger
# Source MongoDB (contextdb instance)
SOURCE_URI = "mongodb+srv://contextdb:HOqIgSH01CoEiMb1@cluster0.d9cmff.mongodb.net/"
SOURCE_DB = "second_brain_course"
SOURCE_COLLECTION = "test_intercom_data"
# Target MongoDB (keshavchhaparia instance)
TARGET_URI = "mongodb+srv://keshavchhaparia:bUSBXeVCGWDyQhDG@saaslabs.awtivxf.mongodb.net/"
TARGET_DB = "second_brain_course"
TARGET_COLLECTION = "test_intercom_data"
def migrate_data():
"""Migrate test_intercom_data collection from source to target MongoDB."""
logger.info("πŸš€ Starting MongoDB data migration...")
# Connect to source MongoDB
logger.info(f"πŸ“‘ Connecting to source MongoDB: {SOURCE_URI}")
try:
source_client = MongoClient(SOURCE_URI)
source_db = source_client[SOURCE_DB]
source_collection = source_db[SOURCE_COLLECTION]
logger.info("βœ… Connected to source MongoDB")
except Exception as e:
logger.error(f"❌ Failed to connect to source MongoDB: {e}")
return False
# Connect to target MongoDB
logger.info(f"πŸ“‘ Connecting to target MongoDB: {TARGET_URI}")
try:
target_client = MongoClient(TARGET_URI)
target_db = target_client[TARGET_DB]
target_collection = target_db[TARGET_COLLECTION]
logger.info("βœ… Connected to target MongoDB")
except Exception as e:
logger.error(f"❌ Failed to connect to target MongoDB: {e}")
return False
try:
# Get document count from source
source_count = source_collection.count_documents({})
logger.info(f"πŸ“Š Source collection has {source_count} documents")
if source_count == 0:
logger.warning("⚠️ Source collection is empty, nothing to migrate")
return True
# Delete existing target collection
logger.info(f"πŸ—‘οΈ Deleting existing target collection: {TARGET_COLLECTION}")
target_collection.drop()
logger.info("βœ… Target collection deleted")
# Copy documents from source to target
logger.info("πŸ“‹ Copying documents from source to target...")
# Process in batches to avoid memory issues
batch_size = 100
total_copied = 0
for skip in range(0, source_count, batch_size):
# Get batch of documents
documents = list(source_collection.find().skip(skip).limit(batch_size))
if documents:
# Insert batch into target
target_collection.insert_many(documents)
total_copied += len(documents)
logger.info(f"πŸ“¦ Copied batch: {len(documents)} documents (Total: {total_copied}/{source_count})")
# Verify migration
target_count = target_collection.count_documents({})
logger.info(f"βœ… Migration completed! Target collection has {target_count} documents")
if target_count == source_count:
logger.info("πŸŽ‰ Migration successful - document counts match!")
return True
else:
logger.error(f"❌ Migration failed - document count mismatch: {target_count} vs {source_count}")
return False
except Exception as e:
logger.error(f"❌ Migration failed: {e}")
return False
finally:
# Close connections
source_client.close()
target_client.close()
logger.info("πŸ”Œ MongoDB connections closed")
def verify_migration():
"""Verify the migration was successful."""
logger.info("πŸ” Verifying migration...")
try:
# Connect to target MongoDB
target_client = MongoClient(TARGET_URI)
target_db = target_client[TARGET_DB]
target_collection = target_db[TARGET_COLLECTION]
# Get sample documents
sample_docs = list(target_collection.find().limit(3))
logger.info(f"πŸ“‹ Sample documents in target collection:")
for i, doc in enumerate(sample_docs, 1):
conversation_id = doc.get('metadata', {}).get('properties', {}).get('conversation_id', 'N/A')
has_analysis = 'conversation_analysis' in doc
quality_score = doc.get('content_quality_score', 'N/A')
logger.info(f" {i}. Conversation ID: {conversation_id}, Has Analysis: {has_analysis}, Quality: {quality_score}")
target_client.close()
logger.info("βœ… Verification completed")
except Exception as e:
logger.error(f"❌ Verification failed: {e}")
if __name__ == "__main__":
logger.info("=" * 60)
logger.info("πŸ”„ MongoDB Data Migration Script")
logger.info("=" * 60)
# Run migration
success = migrate_data()
if success:
# Verify migration
verify_migration()
logger.info("πŸŽ‰ Migration completed successfully!")
else:
logger.error("❌ Migration failed!")
sys.exit(1)