Nymbo commited on
Commit
37dcc6f
·
verified ·
1 Parent(s): bb22cbf

Deep_Researcher now knows the current date

Browse files
Files changed (1) hide show
  1. Modules/Deep_Research.py +47 -36
Modules/Deep_Research.py CHANGED
@@ -6,6 +6,7 @@ import tempfile
6
  import time
7
  from collections import deque
8
  from concurrent.futures import Future, ThreadPoolExecutor, as_completed
 
9
  from typing import Annotated, Dict, List, Tuple
10
  from urllib.parse import urlparse
11
 
@@ -23,11 +24,45 @@ HF_TEXTGEN_TOKEN = os.getenv("HF_READ_TOKEN") or os.getenv("HF_TOKEN")
23
 
24
  # Single source of truth for the LLM-facing tool description
25
  TOOL_SUMMARY = (
26
- "Run multiple DuckDuckGo searches (up to 50 max results), fetch pages, and produce a comprehensive research report with sources; "
 
27
  "returns (Markdown report, newline-separated source links, downloadable report path). "
28
  "Provide the user with one-paragraph summary of the research report and the txt file in this format `![research_report](URL)`"
29
  )
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  class SlowHost(Exception):
33
  pass
@@ -106,38 +141,6 @@ def _truncate_join(parts: List[str], max_chars: int) -> Tuple[str, bool]:
106
 
107
 
108
  def _build_research_prompt(summary: str, queries: List[str], url_list: List[str], pages_map: Dict[str, str]) -> str:
109
- researcher_instructions = (
110
- "You are Nymbot, a helpful deep research assistant. You will be asked a Query from a user and you will create a long, comprehensive, well-structured research report in response to the user's Query.\n\n"
111
- "You have been provided with User Question, Search Queries, and numerous webpages that the searches yielded.\n\n"
112
- "<report_format>\n"
113
- "Write a well-formatted report in the structure of a scientific report to a broad audience. The report must be readable and have a nice flow of Markdown headers and paragraphs of text. Do NOT use bullet points or lists which break up the natural flow. The report must be exhaustive for comprehensive topics.\n"
114
- "For any given user query, first determine the major themes or areas that need investigation, then structure these as main sections, and develop detailed subsections that explore various facets of each theme. Each section and subsection requires paragraphs of texts that need to all connect into one narrative flow.\n"
115
- "</report_format>\n\n"
116
- "<document_structure>\n"
117
- "- Always begin with a clear title using a single # header\n"
118
- "- Organize content into major sections using ## headers\n"
119
- "- Further divide into subsections using ### headers\n"
120
- "- Use #### headers sparingly for special subsections\n"
121
- "- Never skip header levels\n"
122
- "- Write multiple paragraphs per section or subsection\n"
123
- "- Each paragraph must contain at least 4-5 sentences, present novel insights and analysis grounded in source material, connect ideas to original query, and build upon previous paragraphs to create a narrative flow\n"
124
- "- Never use lists, instead always use text or tables\n\n"
125
- "Mandatory Section Flow:\n"
126
- "1. Title (# level)\n - Before writing the main report, start with one detailed paragraph summarizing key findings\n"
127
- "2. Main Body Sections (## level)\n - Each major topic gets its own section (## level). There MUST BE at least 5 sections.\n - Use ### subsections for detailed analysis\n - Every section or subsection needs at least one paragraph of narrative before moving to the next section\n - Do NOT have a section titled \"Main Body Sections\" and instead pick informative section names that convey the theme of the section\n"
128
- "3. Conclusion (## level)\n - Synthesis of findings\n - Potential recommendations or next steps\n"
129
- "</document_structure>\n\n"
130
- "<planning_rules>\n"
131
- "- Always break it down into multiple steps\n"
132
- "- Assess the different sources and whether they are useful for any steps needed to answer the query\n"
133
- "- Create the best report that weighs all the evidence from the sources\n"
134
- "- Remember that the current date is: Wednesday, April 23, 2025, 11:50 AM EDT\n"
135
- "- Make sure that your final report addresses all parts of the query\n"
136
- "- Communicate a brief high-level plan in the introduction; do not reveal chain-of-thought.\n"
137
- "- When referencing sources during analysis, you should still refer to them by index with brackets and follow <citations>\n"
138
- "- As a final step, review your planned report structure and ensure it completely answers the query.\n"
139
- "</planning_rules>\n\n"
140
- )
141
  sources_blocks: List[str] = []
142
  indexed_urls: List[str] = []
143
  for idx, url in enumerate(url_list, start=1):
@@ -147,7 +150,7 @@ def _build_research_prompt(summary: str, queries: List[str], url_list: List[str]
147
  indexed_urls.append(f"[{idx}] {url}")
148
  sources_blocks.append(f"[Source {idx}] URL: {url}\n\n{text}")
149
  sources_joined, truncated = _truncate_join(sources_blocks, max_chars=100_000)
150
- prompt_parts = [researcher_instructions]
151
  prompt_parts.append("<user_query_summary>\n" + (summary or "") + "\n</user_query_summary>\n")
152
  populated = [q for q in queries if q and q.strip()]
153
  if populated:
@@ -337,8 +340,15 @@ def Deep_Research(
337
  pass
338
  schedule_next(executor)
339
  prompt = _build_research_prompt(summary=summary or "", queries=[q for q in queries if q.strip()], url_list=list(pages.keys()), pages_map=pages)
 
 
 
 
 
 
340
  messages = [
341
- {"role": "system", "content": "You are Nymbot, an expert deep research assistant."},
 
342
  {"role": "user", "content": prompt},
343
  ]
344
  try:
@@ -371,7 +381,8 @@ def Deep_Research(
371
  pages_map={key: pages[key] for key in list(pages.keys())[:30]},
372
  )
373
  messages = [
374
- {"role": "system", "content": "You are Nymbot, an expert deep research assistant."},
 
375
  {"role": "user", "content": prompt2},
376
  ]
377
  print("[LLM] Attempt 2: provider=cerebras (trimmed), max_tokens=16384", flush=True)
 
6
  import time
7
  from collections import deque
8
  from concurrent.futures import Future, ThreadPoolExecutor, as_completed
9
+ from datetime import datetime
10
  from typing import Annotated, Dict, List, Tuple
11
  from urllib.parse import urlparse
12
 
 
24
 
25
  # Single source of truth for the LLM-facing tool description
26
  TOOL_SUMMARY = (
27
+ "Write a summary of what the user wants to research, and "
28
+ "run multiple DuckDuckGo searches (up to 50 max results between all queries), fetch pages, and a Research agent will produce a comprehensive research report with sources; "
29
  "returns (Markdown report, newline-separated source links, downloadable report path). "
30
  "Provide the user with one-paragraph summary of the research report and the txt file in this format `![research_report](URL)`"
31
  )
32
 
33
+ RESEARCHER_SYSTEM_PROMPT = (
34
+ "You are Nymbot, a helpful deep research assistant. You will be asked a Query from a user and you will create a long, comprehensive, well-structured research report in response to the user's Query.\n\n"
35
+ "You will receive a summary of the user question, the search queries used, and the fetched webpages. Follow the guidance below when writing the report.\n\n"
36
+ "<report_format>\n"
37
+ "Write a well-formatted report in the structure of a scientific report to a broad audience. The report must be readable and have a nice flow of Markdown headers and paragraphs of text. Do NOT use bullet points or lists which break up the natural flow. The report must be exhaustive for comprehensive topics.\n"
38
+ "For any given user query, first determine the major themes or areas that need investigation, then structure these as main sections, and develop detailed subsections that explore various facets of each theme. Each section and subsection requires paragraphs of texts that need to all connect into one narrative flow.\n"
39
+ "</report_format>\n\n"
40
+ "<document_structure>\n"
41
+ "- Always begin with a clear title using a single # header\n"
42
+ "- Organize content into major sections using ## headers\n"
43
+ "- Further divide into subsections using ### headers\n"
44
+ "- Use #### headers sparingly for special subsections\n"
45
+ "- Never skip header levels\n"
46
+ "- Write multiple paragraphs per section or subsection\n"
47
+ "- Each paragraph must contain at least 4-5 sentences, present novel insights and analysis grounded in source material, connect ideas to original query, and build upon previous paragraphs to create a narrative flow\n"
48
+ "- Never use lists, instead always use text or tables\n\n"
49
+ "Mandatory Section Flow:\n"
50
+ "1. Title (# level)\n - Before writing the main report, start with one detailed paragraph summarizing key findings\n"
51
+ "2. Main Body Sections (## level)\n - Each major topic gets its own section (## level). There MUST BE at least 5 sections.\n - Use ### subsections for detailed analysis\n - Every section or subsection needs at least one paragraph of narrative before moving to the next section\n - Do NOT have a section titled \"Main Body Sections\" and instead pick informative section names that convey the theme of the section\n"
52
+ "3. Conclusion (## level)\n - Synthesis of findings\n - Potential recommendations or next steps\n"
53
+ "</document_structure>\n\n"
54
+ "<planning_rules>\n"
55
+ "- Always break it down into multiple steps\n"
56
+ "- Assess the different sources and whether they are useful for any steps needed to answer the query\n"
57
+ "- Create the best report that weighs all the evidence from the sources\n"
58
+ "- Use the current date supplied in the first user message to contextualize findings\n"
59
+ "- Make sure that your final report addresses all parts of the query\n"
60
+ "- Communicate a brief high-level plan in the introduction; do not reveal chain-of-thought.\n"
61
+ "- When referencing sources during analysis, you should still refer to them by index with brackets and follow <citations>\n"
62
+ "- As a final step, review your planned report structure and ensure it completely answers the query.\n"
63
+ "</planning_rules>\n\n"
64
+ )
65
+
66
 
67
  class SlowHost(Exception):
68
  pass
 
141
 
142
 
143
  def _build_research_prompt(summary: str, queries: List[str], url_list: List[str], pages_map: Dict[str, str]) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  sources_blocks: List[str] = []
145
  indexed_urls: List[str] = []
146
  for idx, url in enumerate(url_list, start=1):
 
150
  indexed_urls.append(f"[{idx}] {url}")
151
  sources_blocks.append(f"[Source {idx}] URL: {url}\n\n{text}")
152
  sources_joined, truncated = _truncate_join(sources_blocks, max_chars=100_000)
153
+ prompt_parts: List[str] = []
154
  prompt_parts.append("<user_query_summary>\n" + (summary or "") + "\n</user_query_summary>\n")
155
  populated = [q for q in queries if q and q.strip()]
156
  if populated:
 
340
  pass
341
  schedule_next(executor)
342
  prompt = _build_research_prompt(summary=summary or "", queries=[q for q in queries if q.strip()], url_list=list(pages.keys()), pages_map=pages)
343
+ now = datetime.now().astimezone()
344
+ date_str = now.strftime("%A, %B %d, %Y %I:%M %p %Z").strip()
345
+ if not date_str:
346
+ date_str = now.isoformat()
347
+ system_message = {"role": "system", "content": RESEARCHER_SYSTEM_PROMPT}
348
+ date_message = {"role": "user", "content": f"The current date is {date_str}. Return only the research report."}
349
  messages = [
350
+ system_message,
351
+ date_message,
352
  {"role": "user", "content": prompt},
353
  ]
354
  try:
 
381
  pages_map={key: pages[key] for key in list(pages.keys())[:30]},
382
  )
383
  messages = [
384
+ system_message,
385
+ date_message,
386
  {"role": "user", "content": prompt2},
387
  ]
388
  print("[LLM] Attempt 2: provider=cerebras (trimmed), max_tokens=16384", flush=True)