Spaces:

JustTheStatsHuman
/

Togmal-demo

Configuration error

Togmal-demo / expand_vector_db.py

HeTalksInMaths

Fix all MCP tool bugs reported by Claude Code

99bdd87 23 days ago

4.73 kB

	#!/usr/bin/env python3
	"""
	Expand Vector Database with Comprehensive Data
	==============================================

	This script loads data from multiple sources to create a comprehensive
	vector database with better domain coverage:

	1. Full MMLU dataset (all domains, no sampling)
	2. MMLU-Pro (harder questions)
	3. GPQA Diamond (graduate-level questions)
	4. MATH dataset (competition mathematics)

	Target: 20,000+ questions across 20+ domains
	"""

	from pathlib import Path
	from benchmark_vector_db import BenchmarkVectorDB
	import logging

	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	def expand_database():
	"""Build comprehensive vector database"""

	logger.info("=" * 60)
	logger.info("Expanding Vector Database with Comprehensive Data")
	logger.info("=" * 60)

	# Initialize new database
	db = BenchmarkVectorDB(
	db_path=Path("./data/benchmark_vector_db_expanded"),
	embedding_model="all-MiniLM-L6-v2"
	)

	# Build with significantly higher limits
	logger.info("\nPhase 1: Loading MMLU-Pro (harder subset)")
	logger.info("-" * 40)
	mmlu_pro_questions = db.load_mmlu_pro_dataset(max_samples=5000)
	logger.info(f"Loaded {len(mmlu_pro_questions)} MMLU-Pro questions")

	logger.info("\nPhase 2: Loading GPQA Diamond (graduate-level)")
	logger.info("-" * 40)
	gpqa_questions = db.load_gpqa_dataset(fetch_real_scores=False)
	logger.info(f"Loaded {len(gpqa_questions)} GPQA questions")

	logger.info("\nPhase 3: Loading MATH dataset (competition math)")
	logger.info("-" * 40)
	math_questions = db.load_math_dataset(max_samples=2000)
	logger.info(f"Loaded {len(math_questions)} MATH questions")

	# Combine all questions
	all_questions = mmlu_pro_questions + gpqa_questions + math_questions
	logger.info(f"\nTotal questions to index: {len(all_questions)}")

	# Index into vector database
	if all_questions:
	logger.info("\nIndexing questions into vector database...")
	logger.info("This may take several minutes...")
	db.index_questions(all_questions)

	# Get final statistics
	logger.info("\n" + "=" * 60)
	logger.info("Database Statistics")
	logger.info("=" * 60)

	stats = db.get_statistics()
	logger.info(f"\nTotal Questions: {stats['total_questions']}")
	logger.info(f"\nSources:")
	for source, count in stats.get('sources', {}).items():
	logger.info(f" {source}: {count}")

	logger.info(f"\nDomains:")
	for domain, count in sorted(stats.get('domains', {}).items(), key=lambda x: x[1], reverse=True)[:20]:
	logger.info(f" {domain}: {count}")

	logger.info(f"\nDifficulty Levels:")
	for level, count in stats.get('difficulty_levels', {}).items():
	logger.info(f" {level}: {count}")

	logger.info("\n" + "=" * 60)
	logger.info("✅ Database expansion complete!")
	logger.info("=" * 60)

	return db, stats


	def test_expanded_database(db):
	"""Test the expanded database with example queries"""

	logger.info("\n" + "=" * 60)
	logger.info("Testing Expanded Database")
	logger.info("=" * 60)

	test_prompts = [
	# Hard prompts
	("Graduate-level physics", "Calculate the quantum correction to the partition function for a 3D harmonic oscillator"),
	("Abstract mathematics", "Prove that every field is also a ring"),
	("Competition math", "Find all zeros of the polynomial x^3 + 2x + 2 in Z_7"),

	# Easy prompts
	("Basic arithmetic", "What is 2 + 2?"),
	("General knowledge", "What is the capital of France?"),

	# Domain-specific
	("Medical reasoning", "Diagnose a patient with acute chest pain"),
	("Legal knowledge", "Explain the doctrine of precedent in common law"),
	("Computer science", "Implement a binary search tree"),
	]

	for category, prompt in test_prompts:
	logger.info(f"\n{category}: '{prompt[:50]}...'")
	result = db.query_similar_questions(prompt, k=3)
	logger.info(f" Risk Level: {result['risk_level']}")
	logger.info(f" Success Rate: {result['weighted_success_rate']:.1%}")
	logger.info(f" Recommendation: {result['recommendation']}")


	if __name__ == "__main__":
	# Expand database
	db, stats = expand_database()

	# Test with example queries
	test_expanded_database(db)

	logger.info("\n🎉 All done! You can now use the expanded database.")
	logger.info("To switch to the expanded database, update your demo files:")
	logger.info(" db_path=Path('./data/benchmark_vector_db_expanded')")