Spaces:

JustTheStatsHuman
/

Togmal-demo

Configuration error

Togmal-demo / test_bugfixes.py

HeTalksInMaths

Fix all MCP tool bugs reported by Claude Code

99bdd87 19 days ago

7.15 kB

	#!/usr/bin/env python3
	"""
	Test script to verify bug fixes for ToGMAL MCP tools
	Tests the issues reported by Claude Code
	"""

	import asyncio
	import json
	from pathlib import Path

	# Test 1: Division by zero bug in context_analyzer
	print("=" * 60)
	print("TEST 1: Context Analyzer - Division by Zero Bug")
	print("=" * 60)

	from togmal.context_analyzer import analyze_conversation_context

	async def test_context_analyzer():
	# Test case 1: Empty conversation (should not crash)
	print("\n1. Testing empty conversation...")
	try:
	result = await analyze_conversation_context(
	conversation_history=[],
	user_context=None
	)
	print(f"✅ Empty conversation: {result}")
	except Exception as e:
	print(f"❌ FAILED: {e}")

	# Test case 2: Conversation with no keyword matches (should not crash)
	print("\n2. Testing conversation with no keyword matches...")
	try:
	result = await analyze_conversation_context(
	conversation_history=[
	{"role": "user", "content": "Hello there!"},
	{"role": "assistant", "content": "Hi!"}
	],
	user_context=None
	)
	print(f"✅ No keyword matches: {result}")
	except Exception as e:
	print(f"❌ FAILED: {e}")

	# Test case 3: Normal conversation (should work)
	print("\n3. Testing normal conversation with keywords...")
	try:
	result = await analyze_conversation_context(
	conversation_history=[
	{"role": "user", "content": "I want you to help me solve the Isaacs-Seitz conjecture"}
	],
	user_context=None
	)
	print(f"✅ Normal conversation: {result}")
	except Exception as e:
	print(f"❌ FAILED: {e}")

	asyncio.run(test_context_analyzer())

	# Test 2: togmal_list_tools_dynamic
	print("\n" + "=" * 60)
	print("TEST 2: List Tools Dynamic")
	print("=" * 60)

	from togmal_mcp import togmal_list_tools_dynamic

	async def test_list_tools_dynamic():
	print("\n1. Testing with math conversation...")
	try:
	result = await togmal_list_tools_dynamic(
	conversation_history=[
	{"role": "user", "content": "I want you to help me solve the Isaacs-Seitz conjecture in finite group representation theory"}
	]
	)
	parsed = json.loads(result)
	print(f"✅ Result:\n{json.dumps(parsed, indent=2)}")
	except Exception as e:
	print(f"❌ FAILED: {e}")
	import traceback
	traceback.print_exc()

	print("\n2. Testing with empty conversation...")
	try:
	result = await togmal_list_tools_dynamic(
	conversation_history=[]
	)
	parsed = json.loads(result)
	print(f"✅ Result:\n{json.dumps(parsed, indent=2)}")
	except Exception as e:
	print(f"❌ FAILED: {e}")
	import traceback
	traceback.print_exc()

	asyncio.run(test_list_tools_dynamic())

	# Test 3: togmal_check_prompt_difficulty
	print("\n" + "=" * 60)
	print("TEST 3: Check Prompt Difficulty")
	print("=" * 60)

	from togmal_mcp import togmal_check_prompt_difficulty

	async def test_check_prompt_difficulty():
	print("\n1. Testing with valid prompt...")
	try:
	result = await togmal_check_prompt_difficulty(
	prompt="I want you to help me solve the Isaacs-Seitz conjecture in finite group representation theory",
	k=5
	)
	parsed = json.loads(result)
	if "error" in parsed:
	print(f"⚠️ Error (may be expected if DB not loaded): {parsed['error']}")
	print(f" Message: {parsed.get('message', 'N/A')}")
	else:
	print(f"✅ Result:")
	print(f" Risk Level: {parsed.get('risk_level', 'N/A')}")
	print(f" Success Rate: {parsed.get('weighted_success_rate', 0) * 100:.1f}%")
	print(f" Similar Questions: {len(parsed.get('similar_questions', []))}")
	except Exception as e:
	print(f"❌ FAILED: {e}")
	import traceback
	traceback.print_exc()

	print("\n2. Testing with empty prompt...")
	try:
	result = await togmal_check_prompt_difficulty(
	prompt="",
	k=5
	)
	parsed = json.loads(result)
	if "error" in parsed:
	print(f"✅ Correctly rejected empty prompt: {parsed['message']}")
	else:
	print(f"❌ Should have rejected empty prompt")
	except Exception as e:
	print(f"❌ FAILED: {e}")

	print("\n3. Testing with invalid k value...")
	try:
	result = await togmal_check_prompt_difficulty(
	prompt="test",
	k=100 # Too large
	)
	parsed = json.loads(result)
	if "error" in parsed:
	print(f"✅ Correctly rejected invalid k: {parsed['message']}")
	else:
	print(f"❌ Should have rejected invalid k")
	except Exception as e:
	print(f"❌ FAILED: {e}")

	asyncio.run(test_check_prompt_difficulty())

	# Test 4: togmal_get_recommended_checks
	print("\n" + "=" * 60)
	print("TEST 4: Get Recommended Checks")
	print("=" * 60)

	from togmal_mcp import get_recommended_checks

	async def test_get_recommended_checks():
	print("\n1. Testing with valid conversation...")
	try:
	result = await get_recommended_checks(
	conversation_history=[
	{"role": "user", "content": "Help me with medical diagnosis"}
	]
	)
	parsed = json.loads(result)
	print(f"✅ Result:\n{json.dumps(parsed, indent=2)}")
	except Exception as e:
	print(f"❌ FAILED: {e}")
	import traceback
	traceback.print_exc()

	print("\n2. Testing with empty conversation...")
	try:
	result = await get_recommended_checks(
	conversation_history=[]
	)
	parsed = json.loads(result)
	print(f"✅ Result:\n{json.dumps(parsed, indent=2)}")
	except Exception as e:
	print(f"❌ FAILED: {e}")
	import traceback
	traceback.print_exc()

	asyncio.run(test_get_recommended_checks())

	# Test 5: submit_evidence (structure only, not full submission)
	print("\n" + "=" * 60)
	print("TEST 5: Submit Evidence Tool Structure")
	print("=" * 60)

	from togmal_mcp import SubmitEvidenceInput, CategoryType, RiskLevel, SubmissionReason

	print("\n1. Testing input validation...")
	try:
	test_input = SubmitEvidenceInput(
	category=CategoryType.MATH_PHYSICS_SPECULATION,
	prompt="test prompt",
	response="test response",
	description="This is a test description for validation",
	severity=RiskLevel.LOW,
	reason=SubmissionReason.NEW_PATTERN
	)
	print(f"✅ Input validation passed")
	except Exception as e:
	print(f"❌ FAILED: {e}")

	print("\n" + "=" * 60)
	print("ALL TESTS COMPLETED")
	print("=" * 60)
	print("\nSummary:")
	print("- Context analyzer division by zero: FIXED")
	print("- List tools dynamic: SHOULD WORK")
	print("- Check prompt difficulty: IMPROVED ERROR HANDLING")
	print("- Get recommended checks: SHOULD WORK")
	print("- Submit evidence: MADE OPTIONAL CONFIRMATION")