Spaces:

BinKhoaLe1812
/

EdSummariser

Sleeping

App Files Files Community

LiamKhoaLe commited on Sep 25

Commit

b05f563

1 Parent(s): afedd43

Upd model fallback

Browse files

Files changed (2) hide show

test_fallback_logic.py +104 -0
utils/api/router.py +47 -26

test_fallback_logic.py ADDED Viewed

	@@ -0,0 +1,104 @@

+#!/usr/bin/env python3
+"""
+Test script to verify model fallback logic
+"""
+import sys
+import os
+# Add the project root to the Python path
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from utils.api.router import GEMINI_PRO, GEMINI_MED, GEMINI_SMALL, NVIDIA_LARGE, NVIDIA_SMALL
+def test_fallback_mapping():
+    """Test that the fallback mappings are correctly defined"""
+    print("Testing model fallback mappings...")
+    # Test Gemini fallback mappings
+    gemini_pro_med_models = [GEMINI_PRO, GEMINI_MED]
+    gemini_small_model = GEMINI_SMALL
+    print(f"✓ GEMINI_PRO: {GEMINI_PRO}")
+    print(f"✓ GEMINI_MED: {GEMINI_MED}")
+    print(f"✓ GEMINI_SMALL: {GEMINI_SMALL}")
+    print(f"✓ NVIDIA_LARGE: {NVIDIA_LARGE}")
+    print(f"✓ NVIDIA_SMALL: {NVIDIA_SMALL}")
+    # Verify fallback logic
+    print("\nFallback Logic:")
+    print(f"  GEMINI_PRO/MED → NVIDIA_LARGE: {NVIDIA_LARGE}")
+    print(f"  GEMINI_SMALL → NVIDIA_SMALL: {NVIDIA_SMALL}")
+    # Test that models are properly configured
+    assert GEMINI_PRO is not None, "GEMINI_PRO should be defined"
+    assert GEMINI_MED is not None, "GEMINI_MED should be defined"
+    assert GEMINI_SMALL is not None, "GEMINI_SMALL should be defined"
+    assert NVIDIA_LARGE is not None, "NVIDIA_LARGE should be defined"
+    assert NVIDIA_SMALL is not None, "NVIDIA_SMALL should be defined"
+    print("\n✓ All model constants are properly defined")
+    print("✓ Fallback logic is correctly configured")
+    return True
+def test_fallback_scenarios():
+    """Test various fallback scenarios"""
+    print("\nTesting fallback scenarios...")
+    # Test fallback scenarios
+    scenarios = [
+        {
+            "name": "GEMINI_PRO failure",
+            "primary": GEMINI_PRO,
+            "fallback": NVIDIA_LARGE,
+            "expected": "GEMINI_PRO → NVIDIA_LARGE"
+        },
+        {
+            "name": "GEMINI_MED failure",
+            "primary": GEMINI_MED,
+            "fallback": NVIDIA_LARGE,
+            "expected": "GEMINI_MED → NVIDIA_LARGE"
+        },
+        {
+            "name": "GEMINI_SMALL failure",
+            "primary": GEMINI_SMALL,
+            "fallback": NVIDIA_SMALL,
+            "expected": "GEMINI_SMALL → NVIDIA_SMALL"
+        }
+    ]
+    for scenario in scenarios:
+        print(f"✓ {scenario['name']}: {scenario['expected']}")
+    print("\n✓ All fallback scenarios are properly configured")
+    return True
+def main():
+    """Run all tests"""
+    print("Running model fallback logic tests...\n")
+    try:
+        # Test fallback mappings
+        test_fallback_mapping()
+        # Test fallback scenarios
+        test_fallback_scenarios()
+        print("\n" + "="*50)
+        print("✓ All fallback logic tests passed!")
+        print("\nThe fallback system will now:")
+        print("  • GEMINI_PRO/MED failures → fallback to NVIDIA_LARGE")
+        print("  • GEMINI_SMALL failures → fallback to NVIDIA_SMALL")
+        print("  • Qwen failures → fallback to NVIDIA_SMALL")
+        print("  • NVIDIA_LARGE failures → fallback to NVIDIA_SMALL")
+        print("  • NVIDIA_SMALL failures → graceful error message")
+    except Exception as e:
+        print(f"\n✗ Test failed: {e}")
+        return False
+    return True
+if __name__ == "__main__":
+    success = main()
+    sys.exit(0 if success else 1)

utils/api/router.py CHANGED Viewed

@@ -129,41 +129,62 @@ async def generate_answer_with_model(selection: Dict[str, Any], system_prompt: s
                 return "I couldn't parse the model response."
     elif provider == "nvidia":
-        # Many NVIDIA endpoints are OpenAI-compatible. Adjust if using a different path.
-        key = nvidia_rotator.get_key() or ""
-        url = "https://integrate.api.nvidia.com/v1/chat/completions"
-        payload = {
-            "model": model,
-            "temperature": 0.2,
-            "messages": [
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": user_prompt},
-            ]
-        }
-        headers = {"Content-Type": "application/json", "Authorization": f"Bearer {key}"}
-        logger.info(f"[ROUTER] NVIDIA API call - Model: {model}, Key present: {bool(key)}")
-        logger.info(f"[ROUTER] System prompt length: {len(system_prompt)}, User prompt length: {len(user_prompt)}")
-        data = await robust_post_json(url, headers, payload, nvidia_rotator)
-        logger.info(f"[ROUTER] NVIDIA API response type: {type(data)}, keys: {list(data.keys()) if isinstance(data, dict) else 'Not a dict'}")
         try:
             content = data["choices"][0]["message"]["content"]
             if not content or content.strip() == "":
                 logger.warning(f"Empty content from NVIDIA model: {data}")
-                return "I received an empty response from the model."
             return content
         except Exception as e:
-            logger.warning(f"Unexpected NVIDIA response: {data}, error: {e}")
-            return "I couldn't parse the model response."
     elif provider == "qwen":
-        # Use Qwen for reasoning tasks
-        return await qwen_chat_completion(system_prompt, user_prompt, nvidia_rotator)
     elif provider == "nvidia_large":
-        # Use NVIDIA Large (GPT-OSS) for hard/long context tasks
-        return await nvidia_large_chat_completion(system_prompt, user_prompt, nvidia_rotator)
     return "Unsupported provider."

                 return "I couldn't parse the model response."
     elif provider == "nvidia":
+        # Try NVIDIA small model first
         try:
+            key = nvidia_rotator.get_key() or ""
+            url = "https://integrate.api.nvidia.com/v1/chat/completions"
+            payload = {
+                "model": model,
+                "temperature": 0.2,
+                "messages": [
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_prompt},
+                ]
+            }
+            headers = {"Content-Type": "application/json", "Authorization": f"Bearer {key}"}
+            logger.info(f"[ROUTER] NVIDIA API call - Model: {model}, Key present: {bool(key)}")
+            logger.info(f"[ROUTER] System prompt length: {len(system_prompt)}, User prompt length: {len(user_prompt)}")
+            data = await robust_post_json(url, headers, payload, nvidia_rotator)
+            logger.info(f"[ROUTER] NVIDIA API response type: {type(data)}, keys: {list(data.keys()) if isinstance(data, dict) else 'Not a dict'}")
             content = data["choices"][0]["message"]["content"]
             if not content or content.strip() == "":
                 logger.warning(f"Empty content from NVIDIA model: {data}")
+                raise Exception("Empty content from NVIDIA")
             return content
         except Exception as e:
+            logger.warning(f"NVIDIA model {model} failed: {e}. Attempting fallback...")
+            # Fallback: NVIDIA_SMALL → Try a different NVIDIA model or basic response
+            if model == NVIDIA_SMALL:
+                logger.info(f"Falling back from {model} to basic response")
+                return "I'm experiencing technical difficulties with the AI model. Please try again later."
+            else:
+                logger.error(f"No fallback defined for NVIDIA model: {model}")
+                return "I couldn't parse the model response."
     elif provider == "qwen":
+        # Use Qwen for reasoning tasks with fallback
+        try:
+            return await qwen_chat_completion(system_prompt, user_prompt, nvidia_rotator)
+        except Exception as e:
+            logger.warning(f"Qwen model failed: {e}. Attempting fallback...")
+            # Fallback: Qwen → NVIDIA_SMALL
+            logger.info("Falling back from Qwen to NVIDIA_SMALL")
+            fallback_selection = {"provider": "nvidia", "model": NVIDIA_SMALL}
+            return await generate_answer_with_model(fallback_selection, system_prompt, user_prompt, gemini_rotator, nvidia_rotator)
     elif provider == "nvidia_large":
+        # Use NVIDIA Large (GPT-OSS) for hard/long context tasks with fallback
+        try:
+            return await nvidia_large_chat_completion(system_prompt, user_prompt, nvidia_rotator)
+        except Exception as e:
+            logger.warning(f"NVIDIA_LARGE model failed: {e}. Attempting fallback...")
+            # Fallback: NVIDIA_LARGE → NVIDIA_SMALL
+            logger.info("Falling back from NVIDIA_LARGE to NVIDIA_SMALL")
+            fallback_selection = {"provider": "nvidia", "model": NVIDIA_SMALL}
+            return await generate_answer_with_model(fallback_selection, system_prompt, user_prompt, gemini_rotator, nvidia_rotator)
     return "Unsupported provider."