Spaces:
Sleeping
Sleeping
File size: 5,347 Bytes
aa9003d 6d2a17c 4386026 aa9003d 4386026 aa9003d 4386026 ef1ba2b 4386026 ef1ba2b 4386026 ef1ba2b 4386026 ef1ba2b aa9003d 4386026 aa9003d ef1ba2b 4386026 aa9003d 4386026 3855268 aa9003d 4386026 aa9003d 4386026 aa9003d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
# ────────────────────────────── memo/nvidia.py ──────────────────────────────
"""
NVIDIA Integration
Functions for interacting with NVIDIA's API for summarization and analysis.
"""
import os
import json
from typing import List, Dict, Any
from utils.logger import get_logger
from utils.api.rotator import robust_post_json
from utils.api.router import qwen_chat_completion
logger = get_logger("NVIDIA_INTEGRATION", __name__)
NVIDIA_SMALL = os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct")
NVIDIA_MEDIUM = os.getenv("NVIDIA_MEDIUM", "qwen/qwen3-next-80b-a3b-thinking")
async def nvidia_chat(system_prompt: str, user_prompt: str, nvidia_key: str, rotator) -> str:
"""
Minimal NVIDIA Chat call that enforces no-comment concise outputs.
"""
url = "https://integrate.api.nvidia.com/v1/chat/completions"
payload = {
"model": NVIDIA_SMALL,
"temperature": 0.0,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
]
}
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {nvidia_key or ''}"}
data = None
try:
data = await robust_post_json(url, headers, payload, rotator)
return data["choices"][0]["message"]["content"]
except Exception as e:
logger.warning(f"NVIDIA chat error: {e} • response: {data}")
return ""
async def qwen_chat(system_prompt: str, user_prompt: str, rotator) -> str:
"""
Qwen chat call for medium complexity tasks with thinking mode.
"""
try:
return await qwen_chat_completion(system_prompt, user_prompt, rotator)
except Exception as e:
logger.warning(f"Qwen chat error: {e}")
return ""
def safe_json(s: str) -> Any:
"""Safely parse JSON string"""
try:
return json.loads(s)
except Exception:
# Try to extract a JSON object from text
start = s.find("{")
end = s.rfind("}")
if start != -1 and end != -1 and end > start:
try:
return json.loads(s[start:end+1])
except Exception:
return {}
return {}
async def summarize_qa(question: str, answer: str, rotator) -> str:
"""
Returns a single line block:
q: <concise>\na: <concise>
No extra commentary.
"""
sys = "You are a terse summarizer. Output exactly two lines:\nq: <short question summary>\na: <short answer summary>\nNo extra text."
user = f"Question:\n{question}\n\nAnswer:\n{answer}"
key = rotator.get_key()
out = await nvidia_chat(sys, user, key, rotator)
# Basic guard if the model returns extra prose
lines = [ln.strip() for ln in out.splitlines() if ln.strip()]
ql = next((l for l in lines if l.lower().startswith('q:')), None)
al = next((l for l in lines if l.lower().startswith('a:')), None)
if not ql or not al:
# Fallback truncate
ql = "q: " + (question.strip()[:160] + ("…" if len(question.strip()) > 160 else ""))
al = "a: " + (answer.strip()[:220] + ("…" if len(answer.strip()) > 220 else ""))
return f"{ql}\n{al}"
async def files_relevance(question: str, file_summaries: List[Dict[str, str]], rotator) -> Dict[str, bool]:
"""
Ask Qwen model to mark each file as relevant (true) or not (false) for the question.
Returns {filename: bool}
"""
sys = "You classify file relevance. Return STRICT JSON only with shape {\"relevance\":[{\"filename\":\"...\",\"relevant\":true|false}]}."
items = [{"filename": f["filename"], "summary": f.get("summary","")} for f in file_summaries]
user = f"Question: {question}\n\nFiles:\n{json.dumps(items, ensure_ascii=False)}\n\nReturn JSON only."
# Use Qwen for better JSON parsing and reasoning
out = await qwen_chat(sys, user, rotator)
data = safe_json(out) or {}
rels = {}
for row in data.get("relevance", []):
fn = row.get("filename")
rv = row.get("relevant")
if isinstance(fn, str) and isinstance(rv, bool):
rels[fn] = rv
# If parsing failed, default to considering all files possibly relevant
if not rels and file_summaries:
rels = {f["filename"]: True for f in file_summaries}
return rels
async def related_recent_context(question: str, recent_memories: List[str], rotator) -> str:
"""
Use Qwen to select related items from recent memories.
Enhanced function for better context memory ability.
"""
if not recent_memories:
return ""
sys = "Pick only items that directly relate to the new question. Output the selected items verbatim, no commentary. If none, output nothing."
numbered = [{"id": i+1, "text": s} for i, s in enumerate(recent_memories)]
user = f"Question: {question}\nCandidates:\n{json.dumps(numbered, ensure_ascii=False)}\nSelect any related items and output ONLY their 'text' lines concatenated."
try:
# Use Qwen for better reasoning and context selection
out = await qwen_chat(sys, user, rotator)
return out.strip()
except Exception as e:
logger.warning(f"Recent-related Qwen error: {e}")
return ""
|