Spaces:

Nymbo
/

Tools

Running

App Files Files Community

Nymbo commited on Oct 11

Commit

37dcc6f

verified ·

1 Parent(s): bb22cbf

Deep_Researcher now knows the current date

Browse files

Files changed (1) hide show

Modules/Deep_Research.py +47 -36

Modules/Deep_Research.py CHANGED Viewed

@@ -6,6 +6,7 @@ import tempfile
 import time
 from collections import deque
 from concurrent.futures import Future, ThreadPoolExecutor, as_completed
 from typing import Annotated, Dict, List, Tuple
 from urllib.parse import urlparse
@@ -23,11 +24,45 @@ HF_TEXTGEN_TOKEN = os.getenv("HF_READ_TOKEN") or os.getenv("HF_TOKEN")
 # Single source of truth for the LLM-facing tool description
 TOOL_SUMMARY = (
-    "Run multiple DuckDuckGo searches (up to 50 max results), fetch pages, and produce a comprehensive research report with sources; "
     "returns (Markdown report, newline-separated source links, downloadable report path). "
     "Provide the user with one-paragraph summary of the research report and the txt file in this format `![research_report](URL)`"
 )
 class SlowHost(Exception):
     pass
@@ -106,38 +141,6 @@ def _truncate_join(parts: List[str], max_chars: int) -> Tuple[str, bool]:
 def _build_research_prompt(summary: str, queries: List[str], url_list: List[str], pages_map: Dict[str, str]) -> str:
-    researcher_instructions = (
-        "You are Nymbot, a helpful deep research assistant. You will be asked a Query from a user and you will create a long, comprehensive, well-structured research report in response to the user's Query.\n\n"
-        "You have been provided with User Question, Search Queries, and numerous webpages that the searches yielded.\n\n"
-        "<report_format>\n"
-        "Write a well-formatted report in the structure of a scientific report to a broad audience. The report must be readable and have a nice flow of Markdown headers and paragraphs of text. Do NOT use bullet points or lists which break up the natural flow. The report must be exhaustive for comprehensive topics.\n"
-        "For any given user query, first determine the major themes or areas that need investigation, then structure these as main sections, and develop detailed subsections that explore various facets of each theme. Each section and subsection requires paragraphs of texts that need to all connect into one narrative flow.\n"
-        "</report_format>\n\n"
-        "<document_structure>\n"
-        "- Always begin with a clear title using a single # header\n"
-        "- Organize content into major sections using ## headers\n"
-        "- Further divide into subsections using ### headers\n"
-        "- Use #### headers sparingly for special subsections\n"
-        "- Never skip header levels\n"
-        "- Write multiple paragraphs per section or subsection\n"
-        "- Each paragraph must contain at least 4-5 sentences, present novel insights and analysis grounded in source material, connect ideas to original query, and build upon previous paragraphs to create a narrative flow\n"
-        "- Never use lists, instead always use text or tables\n\n"
-        "Mandatory Section Flow:\n"
-        "1. Title (# level)\n   - Before writing the main report, start with one detailed paragraph summarizing key findings\n"
-        "2. Main Body Sections (## level)\n   - Each major topic gets its own section (## level). There MUST BE at least 5 sections.\n   - Use ### subsections for detailed analysis\n   - Every section or subsection needs at least one paragraph of narrative before moving to the next section\n   - Do NOT have a section titled \"Main Body Sections\" and instead pick informative section names that convey the theme of the section\n"
-        "3. Conclusion (## level)\n   - Synthesis of findings\n   - Potential recommendations or next steps\n"
-        "</document_structure>\n\n"
-        "<planning_rules>\n"
-        "- Always break it down into multiple steps\n"
-        "- Assess the different sources and whether they are useful for any steps needed to answer the query\n"
-        "- Create the best report that weighs all the evidence from the sources\n"
-        "- Remember that the current date is: Wednesday, April 23, 2025, 11:50 AM EDT\n"
-        "- Make sure that your final report addresses all parts of the query\n"
-        "- Communicate a brief high-level plan in the introduction; do not reveal chain-of-thought.\n"
-        "- When referencing sources during analysis, you should still refer to them by index with brackets and follow <citations>\n"
-        "- As a final step, review your planned report structure and ensure it completely answers the query.\n"
-        "</planning_rules>\n\n"
-    )
     sources_blocks: List[str] = []
     indexed_urls: List[str] = []
     for idx, url in enumerate(url_list, start=1):
@@ -147,7 +150,7 @@ def _build_research_prompt(summary: str, queries: List[str], url_list: List[str]
         indexed_urls.append(f"[{idx}] {url}")
         sources_blocks.append(f"[Source {idx}] URL: {url}\n\n{text}")
     sources_joined, truncated = _truncate_join(sources_blocks, max_chars=100_000)
-    prompt_parts = [researcher_instructions]
     prompt_parts.append("<user_query_summary>\n" + (summary or "") + "\n</user_query_summary>\n")
     populated = [q for q in queries if q and q.strip()]
     if populated:
@@ -337,8 +340,15 @@ def Deep_Research(
                             pass
                     schedule_next(executor)
     prompt = _build_research_prompt(summary=summary or "", queries=[q for q in queries if q.strip()], url_list=list(pages.keys()), pages_map=pages)
     messages = [
-        {"role": "system", "content": "You are Nymbot, an expert deep research assistant."},
         {"role": "user", "content": prompt},
     ]
     try:
@@ -371,7 +381,8 @@ def Deep_Research(
                 pages_map={key: pages[key] for key in list(pages.keys())[:30]},
             )
             messages = [
-                {"role": "system", "content": "You are Nymbot, an expert deep research assistant."},
                 {"role": "user", "content": prompt2},
             ]
             print("[LLM] Attempt 2: provider=cerebras (trimmed), max_tokens=16384", flush=True)

 import time
 from collections import deque
 from concurrent.futures import Future, ThreadPoolExecutor, as_completed
+from datetime import datetime
 from typing import Annotated, Dict, List, Tuple
 from urllib.parse import urlparse
 # Single source of truth for the LLM-facing tool description
 TOOL_SUMMARY = (
+    "Write a summary of what the user wants to research, and "
+    "run multiple DuckDuckGo searches (up to 50 max results between all queries), fetch pages, and a Research agent will produce a comprehensive research report with sources; "
     "returns (Markdown report, newline-separated source links, downloadable report path). "
     "Provide the user with one-paragraph summary of the research report and the txt file in this format `![research_report](URL)`"
 )
+RESEARCHER_SYSTEM_PROMPT = (
+    "You are Nymbot, a helpful deep research assistant. You will be asked a Query from a user and you will create a long, comprehensive, well-structured research report in response to the user's Query.\n\n"
+    "You will receive a summary of the user question, the search queries used, and the fetched webpages. Follow the guidance below when writing the report.\n\n"
+    "<report_format>\n"
+    "Write a well-formatted report in the structure of a scientific report to a broad audience. The report must be readable and have a nice flow of Markdown headers and paragraphs of text. Do NOT use bullet points or lists which break up the natural flow. The report must be exhaustive for comprehensive topics.\n"
+    "For any given user query, first determine the major themes or areas that need investigation, then structure these as main sections, and develop detailed subsections that explore various facets of each theme. Each section and subsection requires paragraphs of texts that need to all connect into one narrative flow.\n"
+    "</report_format>\n\n"
+    "<document_structure>\n"
+    "- Always begin with a clear title using a single # header\n"
+    "- Organize content into major sections using ## headers\n"
+    "- Further divide into subsections using ### headers\n"
+    "- Use #### headers sparingly for special subsections\n"
+    "- Never skip header levels\n"
+    "- Write multiple paragraphs per section or subsection\n"
+    "- Each paragraph must contain at least 4-5 sentences, present novel insights and analysis grounded in source material, connect ideas to original query, and build upon previous paragraphs to create a narrative flow\n"
+    "- Never use lists, instead always use text or tables\n\n"
+    "Mandatory Section Flow:\n"
+    "1. Title (# level)\n   - Before writing the main report, start with one detailed paragraph summarizing key findings\n"
+    "2. Main Body Sections (## level)\n   - Each major topic gets its own section (## level). There MUST BE at least 5 sections.\n   - Use ### subsections for detailed analysis\n   - Every section or subsection needs at least one paragraph of narrative before moving to the next section\n   - Do NOT have a section titled \"Main Body Sections\" and instead pick informative section names that convey the theme of the section\n"
+    "3. Conclusion (## level)\n   - Synthesis of findings\n   - Potential recommendations or next steps\n"
+    "</document_structure>\n\n"
+    "<planning_rules>\n"
+    "- Always break it down into multiple steps\n"
+    "- Assess the different sources and whether they are useful for any steps needed to answer the query\n"
+    "- Create the best report that weighs all the evidence from the sources\n"
+    "- Use the current date supplied in the first user message to contextualize findings\n"
+    "- Make sure that your final report addresses all parts of the query\n"
+    "- Communicate a brief high-level plan in the introduction; do not reveal chain-of-thought.\n"
+    "- When referencing sources during analysis, you should still refer to them by index with brackets and follow <citations>\n"
+    "- As a final step, review your planned report structure and ensure it completely answers the query.\n"
+    "</planning_rules>\n\n"
+)
 class SlowHost(Exception):
     pass
 def _build_research_prompt(summary: str, queries: List[str], url_list: List[str], pages_map: Dict[str, str]) -> str:
     sources_blocks: List[str] = []
     indexed_urls: List[str] = []
     for idx, url in enumerate(url_list, start=1):
         indexed_urls.append(f"[{idx}] {url}")
         sources_blocks.append(f"[Source {idx}] URL: {url}\n\n{text}")
     sources_joined, truncated = _truncate_join(sources_blocks, max_chars=100_000)
+    prompt_parts: List[str] = []
     prompt_parts.append("<user_query_summary>\n" + (summary or "") + "\n</user_query_summary>\n")
     populated = [q for q in queries if q and q.strip()]
     if populated:
                             pass
                     schedule_next(executor)
     prompt = _build_research_prompt(summary=summary or "", queries=[q for q in queries if q.strip()], url_list=list(pages.keys()), pages_map=pages)
+    now = datetime.now().astimezone()
+    date_str = now.strftime("%A, %B %d, %Y %I:%M %p %Z").strip()
+    if not date_str:
+        date_str = now.isoformat()
+    system_message = {"role": "system", "content": RESEARCHER_SYSTEM_PROMPT}
+    date_message = {"role": "user", "content": f"The current date is {date_str}. Return only the research report."}
     messages = [
+        system_message,
+        date_message,
         {"role": "user", "content": prompt},
     ]
     try:
                 pages_map={key: pages[key] for key in list(pages.keys())[:30]},
             )
             messages = [
+                system_message,
+                date_message,
                 {"role": "user", "content": prompt2},
             ]
             print("[LLM] Attempt 2: provider=cerebras (trimmed), max_tokens=16384", flush=True)