Spaces:

JustTheStatsHuman
/

Togmal-demo

Configuration error

File size: 7,151 Bytes

99bdd87

#!/usr/bin/env python3
"""
Test script to verify bug fixes for ToGMAL MCP tools
Tests the issues reported by Claude Code
"""

import asyncio
import json
from pathlib import Path

# Test 1: Division by zero bug in context_analyzer
print("=" * 60)
print("TEST 1: Context Analyzer - Division by Zero Bug")
print("=" * 60)

from togmal.context_analyzer import analyze_conversation_context

async def test_context_analyzer():
    # Test case 1: Empty conversation (should not crash)
    print("\n1. Testing empty conversation...")
    try:
        result = await analyze_conversation_context(
            conversation_history=[],
            user_context=None
        )
        print(f"✅ Empty conversation: {result}")
    except Exception as e:
        print(f"❌ FAILED: {e}")
    
    # Test case 2: Conversation with no keyword matches (should not crash)
    print("\n2. Testing conversation with no keyword matches...")
    try:
        result = await analyze_conversation_context(
            conversation_history=[
                {"role": "user", "content": "Hello there!"},
                {"role": "assistant", "content": "Hi!"}
            ],
            user_context=None
        )
        print(f"✅ No keyword matches: {result}")
    except Exception as e:
        print(f"❌ FAILED: {e}")
    
    # Test case 3: Normal conversation (should work)
    print("\n3. Testing normal conversation with keywords...")
    try:
        result = await analyze_conversation_context(
            conversation_history=[
                {"role": "user", "content": "I want you to help me solve the Isaacs-Seitz conjecture"}
            ],
            user_context=None
        )
        print(f"✅ Normal conversation: {result}")
    except Exception as e:
        print(f"❌ FAILED: {e}")

asyncio.run(test_context_analyzer())

# Test 2: togmal_list_tools_dynamic
print("\n" + "=" * 60)
print("TEST 2: List Tools Dynamic")
print("=" * 60)

from togmal_mcp import togmal_list_tools_dynamic

async def test_list_tools_dynamic():
    print("\n1. Testing with math conversation...")
    try:
        result = await togmal_list_tools_dynamic(
            conversation_history=[
                {"role": "user", "content": "I want you to help me solve the Isaacs-Seitz conjecture in finite group representation theory"}
            ]
        )
        parsed = json.loads(result)
        print(f"✅ Result:\n{json.dumps(parsed, indent=2)}")
    except Exception as e:
        print(f"❌ FAILED: {e}")
        import traceback
        traceback.print_exc()
    
    print("\n2. Testing with empty conversation...")
    try:
        result = await togmal_list_tools_dynamic(
            conversation_history=[]
        )
        parsed = json.loads(result)
        print(f"✅ Result:\n{json.dumps(parsed, indent=2)}")
    except Exception as e:
        print(f"❌ FAILED: {e}")
        import traceback
        traceback.print_exc()

asyncio.run(test_list_tools_dynamic())

# Test 3: togmal_check_prompt_difficulty
print("\n" + "=" * 60)
print("TEST 3: Check Prompt Difficulty")
print("=" * 60)

from togmal_mcp import togmal_check_prompt_difficulty

async def test_check_prompt_difficulty():
    print("\n1. Testing with valid prompt...")
    try:
        result = await togmal_check_prompt_difficulty(
            prompt="I want you to help me solve the Isaacs-Seitz conjecture in finite group representation theory",
            k=5
        )
        parsed = json.loads(result)
        if "error" in parsed:
            print(f"⚠️  Error (may be expected if DB not loaded): {parsed['error']}")
            print(f"   Message: {parsed.get('message', 'N/A')}")
        else:
            print(f"✅ Result:")
            print(f"   Risk Level: {parsed.get('risk_level', 'N/A')}")
            print(f"   Success Rate: {parsed.get('weighted_success_rate', 0) * 100:.1f}%")
            print(f"   Similar Questions: {len(parsed.get('similar_questions', []))}")
    except Exception as e:
        print(f"❌ FAILED: {e}")
        import traceback
        traceback.print_exc()
    
    print("\n2. Testing with empty prompt...")
    try:
        result = await togmal_check_prompt_difficulty(
            prompt="",
            k=5
        )
        parsed = json.loads(result)
        if "error" in parsed:
            print(f"✅ Correctly rejected empty prompt: {parsed['message']}")
        else:
            print(f"❌ Should have rejected empty prompt")
    except Exception as e:
        print(f"❌ FAILED: {e}")
    
    print("\n3. Testing with invalid k value...")
    try:
        result = await togmal_check_prompt_difficulty(
            prompt="test",
            k=100  # Too large
        )
        parsed = json.loads(result)
        if "error" in parsed:
            print(f"✅ Correctly rejected invalid k: {parsed['message']}")
        else:
            print(f"❌ Should have rejected invalid k")
    except Exception as e:
        print(f"❌ FAILED: {e}")

asyncio.run(test_check_prompt_difficulty())

# Test 4: togmal_get_recommended_checks
print("\n" + "=" * 60)
print("TEST 4: Get Recommended Checks")
print("=" * 60)

from togmal_mcp import get_recommended_checks

async def test_get_recommended_checks():
    print("\n1. Testing with valid conversation...")
    try:
        result = await get_recommended_checks(
            conversation_history=[
                {"role": "user", "content": "Help me with medical diagnosis"}
            ]
        )
        parsed = json.loads(result)
        print(f"✅ Result:\n{json.dumps(parsed, indent=2)}")
    except Exception as e:
        print(f"❌ FAILED: {e}")
        import traceback
        traceback.print_exc()
    
    print("\n2. Testing with empty conversation...")
    try:
        result = await get_recommended_checks(
            conversation_history=[]
        )
        parsed = json.loads(result)
        print(f"✅ Result:\n{json.dumps(parsed, indent=2)}")
    except Exception as e:
        print(f"❌ FAILED: {e}")
        import traceback
        traceback.print_exc()

asyncio.run(test_get_recommended_checks())

# Test 5: submit_evidence (structure only, not full submission)
print("\n" + "=" * 60)
print("TEST 5: Submit Evidence Tool Structure")
print("=" * 60)

from togmal_mcp import SubmitEvidenceInput, CategoryType, RiskLevel, SubmissionReason

print("\n1. Testing input validation...")
try:
    test_input = SubmitEvidenceInput(
        category=CategoryType.MATH_PHYSICS_SPECULATION,
        prompt="test prompt",
        response="test response",
        description="This is a test description for validation",
        severity=RiskLevel.LOW,
        reason=SubmissionReason.NEW_PATTERN
    )
    print(f"✅ Input validation passed")
except Exception as e:
    print(f"❌ FAILED: {e}")

print("\n" + "=" * 60)
print("ALL TESTS COMPLETED")
print("=" * 60)
print("\nSummary:")
print("- Context analyzer division by zero: FIXED")
print("- List tools dynamic: SHOULD WORK")
print("- Check prompt difficulty: IMPROVED ERROR HANDLING")
print("- Get recommended checks: SHOULD WORK")
print("- Submit evidence: MADE OPTIONAL CONFIRMATION")