Spaces:
Sleeping
Sleeping
File size: 5,186 Bytes
422e708 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
#!/usr/bin/env python3
"""
Script to migrate test_intercom_data from contextdb instance to keshavchhaparia instance.
"""
import sys
from pymongo import MongoClient
from loguru import logger
# Source MongoDB (contextdb instance)
SOURCE_URI = "mongodb+srv://contextdb:HOqIgSH01CoEiMb1@cluster0.d9cmff.mongodb.net/"
SOURCE_DB = "second_brain_course"
SOURCE_COLLECTION = "test_intercom_data"
# Target MongoDB (keshavchhaparia instance)
TARGET_URI = "mongodb+srv://keshavchhaparia:bUSBXeVCGWDyQhDG@saaslabs.awtivxf.mongodb.net/"
TARGET_DB = "second_brain_course"
TARGET_COLLECTION = "test_intercom_data"
def migrate_data():
"""Migrate test_intercom_data collection from source to target MongoDB."""
logger.info("π Starting MongoDB data migration...")
# Connect to source MongoDB
logger.info(f"π‘ Connecting to source MongoDB: {SOURCE_URI}")
try:
source_client = MongoClient(SOURCE_URI)
source_db = source_client[SOURCE_DB]
source_collection = source_db[SOURCE_COLLECTION]
logger.info("β
Connected to source MongoDB")
except Exception as e:
logger.error(f"β Failed to connect to source MongoDB: {e}")
return False
# Connect to target MongoDB
logger.info(f"π‘ Connecting to target MongoDB: {TARGET_URI}")
try:
target_client = MongoClient(TARGET_URI)
target_db = target_client[TARGET_DB]
target_collection = target_db[TARGET_COLLECTION]
logger.info("β
Connected to target MongoDB")
except Exception as e:
logger.error(f"β Failed to connect to target MongoDB: {e}")
return False
try:
# Get document count from source
source_count = source_collection.count_documents({})
logger.info(f"π Source collection has {source_count} documents")
if source_count == 0:
logger.warning("β οΈ Source collection is empty, nothing to migrate")
return True
# Delete existing target collection
logger.info(f"ποΈ Deleting existing target collection: {TARGET_COLLECTION}")
target_collection.drop()
logger.info("β
Target collection deleted")
# Copy documents from source to target
logger.info("π Copying documents from source to target...")
# Process in batches to avoid memory issues
batch_size = 100
total_copied = 0
for skip in range(0, source_count, batch_size):
# Get batch of documents
documents = list(source_collection.find().skip(skip).limit(batch_size))
if documents:
# Insert batch into target
target_collection.insert_many(documents)
total_copied += len(documents)
logger.info(f"π¦ Copied batch: {len(documents)} documents (Total: {total_copied}/{source_count})")
# Verify migration
target_count = target_collection.count_documents({})
logger.info(f"β
Migration completed! Target collection has {target_count} documents")
if target_count == source_count:
logger.info("π Migration successful - document counts match!")
return True
else:
logger.error(f"β Migration failed - document count mismatch: {target_count} vs {source_count}")
return False
except Exception as e:
logger.error(f"β Migration failed: {e}")
return False
finally:
# Close connections
source_client.close()
target_client.close()
logger.info("π MongoDB connections closed")
def verify_migration():
"""Verify the migration was successful."""
logger.info("π Verifying migration...")
try:
# Connect to target MongoDB
target_client = MongoClient(TARGET_URI)
target_db = target_client[TARGET_DB]
target_collection = target_db[TARGET_COLLECTION]
# Get sample documents
sample_docs = list(target_collection.find().limit(3))
logger.info(f"π Sample documents in target collection:")
for i, doc in enumerate(sample_docs, 1):
conversation_id = doc.get('metadata', {}).get('properties', {}).get('conversation_id', 'N/A')
has_analysis = 'conversation_analysis' in doc
quality_score = doc.get('content_quality_score', 'N/A')
logger.info(f" {i}. Conversation ID: {conversation_id}, Has Analysis: {has_analysis}, Quality: {quality_score}")
target_client.close()
logger.info("β
Verification completed")
except Exception as e:
logger.error(f"β Verification failed: {e}")
if __name__ == "__main__":
logger.info("=" * 60)
logger.info("π MongoDB Data Migration Script")
logger.info("=" * 60)
# Run migration
success = migrate_data()
if success:
# Verify migration
verify_migration()
logger.info("π Migration completed successfully!")
else:
logger.error("β Migration failed!")
sys.exit(1)
|