remove google scholar
Browse files- app.py +5 -54
- consilium_mcp +1 -0
- enhanced_search_functions.py +2 -8
- requirements.txt +1 -2
- research_tools/__init__.py +0 -2
- research_tools/base_tool.py +0 -1
- research_tools/research_agent.py +3 -5
- research_tools/scholar_search.py +0 -248
- test_research_tools.py +1 -4
app.py
CHANGED
|
@@ -392,33 +392,10 @@ class VisualConsensusEngine:
|
|
| 392 |
self.update_research_progress(f"Wikipedia search complete - found {len(result)} characters")
|
| 393 |
|
| 394 |
elif function_name == "search_academic":
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
self.update_research_progress("Searching academic papers on arXiv...")
|
| 400 |
-
result = self.search_agent.tools['arxiv'].search(arguments["query"])
|
| 401 |
-
self.update_research_progress(f"arXiv search complete - found {len(result)} characters")
|
| 402 |
-
|
| 403 |
-
elif source == "scholar":
|
| 404 |
-
self.update_research_progress("Connecting to Google Scholar...")
|
| 405 |
-
self.update_research_progress("Searching peer-reviewed research...")
|
| 406 |
-
result = self.search_agent.tools['scholar'].search(arguments["query"])
|
| 407 |
-
self.update_research_progress(f"Google Scholar search complete - found {len(result)} characters")
|
| 408 |
-
|
| 409 |
-
else: # both sources
|
| 410 |
-
self.update_research_progress("Connecting to arXiv preprint server...")
|
| 411 |
-
self.update_research_progress("Searching academic papers on arXiv...")
|
| 412 |
-
arxiv_result = self.search_agent.tools['arxiv'].search(arguments["query"])
|
| 413 |
-
self.update_research_progress(f"arXiv complete ({len(arxiv_result)} chars) - now searching Google Scholar...")
|
| 414 |
-
|
| 415 |
-
self.update_research_progress("Connecting to Google Scholar...")
|
| 416 |
-
self.update_research_progress("Searching peer-reviewed research...")
|
| 417 |
-
scholar_result = self.search_agent.tools['scholar'].search(arguments["query"])
|
| 418 |
-
self.update_research_progress("Combining arXiv and Google Scholar results...")
|
| 419 |
-
|
| 420 |
-
result = f"{arxiv_result}\n\n{scholar_result}"
|
| 421 |
-
self.update_research_progress(f"Academic search complete - combined {len(result)} characters")
|
| 422 |
|
| 423 |
elif function_name == "search_technology_trends":
|
| 424 |
self.update_research_progress("Connecting to GitHub API...")
|
|
@@ -1477,7 +1454,7 @@ with gr.Blocks(title="π Consilium: Multi-AI Expert Consensus Platform", theme
|
|
| 1477 |
* Visual roundtable of the AI models, including speech bubbles to see the discussion in real time.
|
| 1478 |
* MCP mode enabled to also use it directly in, for example, Claude Desktop (without the visual table).
|
| 1479 |
* Includes Mistral (**mistral-large-latest**) via their API and the Models **DeepSeek-R1**, **Meta-Llama-3.3-70B-Instruct** and **QwQ-32B** via the SambaNova API.
|
| 1480 |
-
* Research Agent with 6 sources (**Web Search**, **Wikipedia**, **arXiv**, **GitHub**, **SEC EDGAR
|
| 1481 |
* Assign different roles to the models, the protocol they should follow, and decide the communication strategy.
|
| 1482 |
* Pick one model as the lead analyst (had the best results when picking Mistral).
|
| 1483 |
* Configure the amount of discussion rounds.
|
|
@@ -1704,32 +1681,6 @@ with gr.Blocks(title="π Consilium: Multi-AI Expert Consensus Platform", theme
|
|
| 1704 |
""")
|
| 1705 |
|
| 1706 |
with gr.Tab("π Documentation"):
|
| 1707 |
-
gr.Markdown("""
|
| 1708 |
-
## π¬ **Research Capabilities**
|
| 1709 |
-
|
| 1710 |
-
### **π Multi-Source Research**
|
| 1711 |
-
- **DuckDuckGo Web Search**: Current events, news, real-time information
|
| 1712 |
-
- **Wikipedia**: Authoritative background and encyclopedic data
|
| 1713 |
-
- **arXiv**: Academic papers and scientific research preprints
|
| 1714 |
-
- **Google Scholar**: Peer-reviewed research and citation analysis
|
| 1715 |
-
- **GitHub**: Technology trends, adoption patterns, developer activity
|
| 1716 |
-
- **SEC EDGAR**: Public company financial data and regulatory filings
|
| 1717 |
-
|
| 1718 |
-
### **π― Smart Research Routing**
|
| 1719 |
-
The system automatically routes queries to the most appropriate sources:
|
| 1720 |
-
- **Academic queries** β arXiv + Google Scholar
|
| 1721 |
-
- **Technology questions** β GitHub + Web Search
|
| 1722 |
-
- **Company research** β SEC filings + Web Search
|
| 1723 |
-
- **Current events** β Web Search + Wikipedia
|
| 1724 |
-
- **Deep research** β Multi-source synthesis with quality scoring
|
| 1725 |
-
|
| 1726 |
-
### **π Research Quality Scoring**
|
| 1727 |
-
Each research result is scored on:
|
| 1728 |
-
- **Recency** (0-1): How current is the information
|
| 1729 |
-
- **Authority** (0-1): Source credibility and reliability
|
| 1730 |
-
- **Specificity** (0-1): Quantitative data and specific details
|
| 1731 |
-
- **Relevance** (0-1): How well it matches the query
|
| 1732 |
-
""")
|
| 1733 |
gr.Markdown("""
|
| 1734 |
## π **Expert Role Assignments**
|
| 1735 |
|
|
|
|
| 392 |
self.update_research_progress(f"Wikipedia search complete - found {len(result)} characters")
|
| 393 |
|
| 394 |
elif function_name == "search_academic":
|
| 395 |
+
self.update_research_progress("Connecting to arXiv preprint server...")
|
| 396 |
+
self.update_research_progress("Searching academic papers on arXiv...")
|
| 397 |
+
result = self.search_agent.tools['arxiv'].search(arguments["query"])
|
| 398 |
+
self.update_research_progress(f"arXiv search complete - found {len(result)} characters")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 399 |
|
| 400 |
elif function_name == "search_technology_trends":
|
| 401 |
self.update_research_progress("Connecting to GitHub API...")
|
|
|
|
| 1454 |
* Visual roundtable of the AI models, including speech bubbles to see the discussion in real time.
|
| 1455 |
* MCP mode enabled to also use it directly in, for example, Claude Desktop (without the visual table).
|
| 1456 |
* Includes Mistral (**mistral-large-latest**) via their API and the Models **DeepSeek-R1**, **Meta-Llama-3.3-70B-Instruct** and **QwQ-32B** via the SambaNova API.
|
| 1457 |
+
* Research Agent with 6 sources (**Web Search**, **Wikipedia**, **arXiv**, **GitHub**, **SEC EDGAR**) for comprehensive live research.
|
| 1458 |
* Assign different roles to the models, the protocol they should follow, and decide the communication strategy.
|
| 1459 |
* Pick one model as the lead analyst (had the best results when picking Mistral).
|
| 1460 |
* Configure the amount of discussion rounds.
|
|
|
|
| 1681 |
""")
|
| 1682 |
|
| 1683 |
with gr.Tab("π Documentation"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1684 |
gr.Markdown("""
|
| 1685 |
## π **Expert Role Assignments**
|
| 1686 |
|
consilium_mcp
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Subproject commit 883815f94aa0a2cba5d9bf5ea89db12fd75a1676
|
enhanced_search_functions.py
CHANGED
|
@@ -48,19 +48,13 @@ ENHANCED_SEARCH_FUNCTIONS = [
|
|
| 48 |
"type": "function",
|
| 49 |
"function": {
|
| 50 |
"name": "search_academic",
|
| 51 |
-
"description": "Search academic papers and research on arXiv
|
| 52 |
"parameters": {
|
| 53 |
"type": "object",
|
| 54 |
"properties": {
|
| 55 |
"query": {
|
| 56 |
"type": "string",
|
| 57 |
"description": "Academic research query to find peer-reviewed papers and scientific studies"
|
| 58 |
-
},
|
| 59 |
-
"source": {
|
| 60 |
-
"type": "string",
|
| 61 |
-
"enum": ["arxiv", "scholar", "both"],
|
| 62 |
-
"description": "Academic source to search - arXiv for preprints, Scholar for citations, both for comprehensive",
|
| 63 |
-
"default": "both"
|
| 64 |
}
|
| 65 |
},
|
| 66 |
"required": ["query"]
|
|
@@ -117,7 +111,7 @@ ENHANCED_SEARCH_FUNCTIONS = [
|
|
| 117 |
"type": "array",
|
| 118 |
"items": {
|
| 119 |
"type": "string",
|
| 120 |
-
"enum": ["web", "wikipedia", "arxiv", "
|
| 121 |
},
|
| 122 |
"description": "Priority list of sources to focus on for this research",
|
| 123 |
"default": []
|
|
|
|
| 48 |
"type": "function",
|
| 49 |
"function": {
|
| 50 |
"name": "search_academic",
|
| 51 |
+
"description": "Search academic papers and research on arXiv for scientific evidence",
|
| 52 |
"parameters": {
|
| 53 |
"type": "object",
|
| 54 |
"properties": {
|
| 55 |
"query": {
|
| 56 |
"type": "string",
|
| 57 |
"description": "Academic research query to find peer-reviewed papers and scientific studies"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
}
|
| 59 |
},
|
| 60 |
"required": ["query"]
|
|
|
|
| 111 |
"type": "array",
|
| 112 |
"items": {
|
| 113 |
"type": "string",
|
| 114 |
+
"enum": ["web", "wikipedia", "arxiv", "github", "sec"]
|
| 115 |
},
|
| 116 |
"description": "Priority list of sources to focus on for this research",
|
| 117 |
"default": []
|
requirements.txt
CHANGED
|
@@ -7,5 +7,4 @@ python-dotenv
|
|
| 7 |
duckduckgo-search
|
| 8 |
wikipedia
|
| 9 |
gradio-consilium-roundtable
|
| 10 |
-
openai
|
| 11 |
-
scholarly
|
|
|
|
| 7 |
duckduckgo-search
|
| 8 |
wikipedia
|
| 9 |
gradio-consilium-roundtable
|
| 10 |
+
openai
|
|
|
research_tools/__init__.py
CHANGED
|
@@ -5,7 +5,6 @@ from .wikipedia_search import WikipediaSearchTool
|
|
| 5 |
from .arxiv_search import ArxivSearchTool
|
| 6 |
from .github_search import GitHubSearchTool
|
| 7 |
from .sec_search import SECSearchTool
|
| 8 |
-
from .scholar_search import GoogleScholarTool
|
| 9 |
from .research_agent import EnhancedResearchAgent
|
| 10 |
|
| 11 |
__all__ = [
|
|
@@ -15,6 +14,5 @@ __all__ = [
|
|
| 15 |
'ArxivSearchTool',
|
| 16 |
'GitHubSearchTool',
|
| 17 |
'SECSearchTool',
|
| 18 |
-
'GoogleScholarTool',
|
| 19 |
'EnhancedResearchAgent'
|
| 20 |
]
|
|
|
|
| 5 |
from .arxiv_search import ArxivSearchTool
|
| 6 |
from .github_search import GitHubSearchTool
|
| 7 |
from .sec_search import SECSearchTool
|
|
|
|
| 8 |
from .research_agent import EnhancedResearchAgent
|
| 9 |
|
| 10 |
__all__ = [
|
|
|
|
| 14 |
'ArxivSearchTool',
|
| 15 |
'GitHubSearchTool',
|
| 16 |
'SECSearchTool',
|
|
|
|
| 17 |
'EnhancedResearchAgent'
|
| 18 |
]
|
research_tools/base_tool.py
CHANGED
|
@@ -65,7 +65,6 @@ class BaseTool(ABC):
|
|
| 65 |
"""Check source authority and credibility indicators"""
|
| 66 |
authority_indicators = {
|
| 67 |
'arxiv': 0.9,
|
| 68 |
-
'scholar': 0.9,
|
| 69 |
'sec': 0.95,
|
| 70 |
'github': 0.7,
|
| 71 |
'wikipedia': 0.8,
|
|
|
|
| 65 |
"""Check source authority and credibility indicators"""
|
| 66 |
authority_indicators = {
|
| 67 |
'arxiv': 0.9,
|
|
|
|
| 68 |
'sec': 0.95,
|
| 69 |
'github': 0.7,
|
| 70 |
'wikipedia': 0.8,
|
research_tools/research_agent.py
CHANGED
|
@@ -11,7 +11,6 @@ from .wikipedia_search import WikipediaSearchTool
|
|
| 11 |
from .arxiv_search import ArxivSearchTool
|
| 12 |
from .github_search import GitHubSearchTool
|
| 13 |
from .sec_search import SECSearchTool
|
| 14 |
-
from .scholar_search import GoogleScholarTool
|
| 15 |
|
| 16 |
|
| 17 |
class EnhancedResearchAgent:
|
|
@@ -24,8 +23,7 @@ class EnhancedResearchAgent:
|
|
| 24 |
'wikipedia': WikipediaSearchTool(),
|
| 25 |
'arxiv': ArxivSearchTool(),
|
| 26 |
'github': GitHubSearchTool(),
|
| 27 |
-
'sec': SECSearchTool()
|
| 28 |
-
'scholar': GoogleScholarTool()
|
| 29 |
}
|
| 30 |
|
| 31 |
# Tool availability status
|
|
@@ -92,7 +90,7 @@ class EnhancedResearchAgent:
|
|
| 92 |
for tool_name, tool in self.tools.items():
|
| 93 |
if tool.should_use_for_query(query):
|
| 94 |
# Return first matching tool based on priority order
|
| 95 |
-
priority_order = ['arxiv', 'sec', 'github', '
|
| 96 |
if tool_name in priority_order[:3]: # High-priority specialized tools
|
| 97 |
return tool_name
|
| 98 |
|
|
@@ -123,7 +121,7 @@ class EnhancedResearchAgent:
|
|
| 123 |
# Ensure we don't overwhelm with too many sources
|
| 124 |
if len(relevant_tools) > 4:
|
| 125 |
# Prioritize specialized tools
|
| 126 |
-
priority_order = ['arxiv', 'sec', 'github', '
|
| 127 |
relevant_tools = [tool for tool in priority_order if tool in relevant_tools][:4]
|
| 128 |
|
| 129 |
return relevant_tools
|
|
|
|
| 11 |
from .arxiv_search import ArxivSearchTool
|
| 12 |
from .github_search import GitHubSearchTool
|
| 13 |
from .sec_search import SECSearchTool
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
class EnhancedResearchAgent:
|
|
|
|
| 23 |
'wikipedia': WikipediaSearchTool(),
|
| 24 |
'arxiv': ArxivSearchTool(),
|
| 25 |
'github': GitHubSearchTool(),
|
| 26 |
+
'sec': SECSearchTool()
|
|
|
|
| 27 |
}
|
| 28 |
|
| 29 |
# Tool availability status
|
|
|
|
| 90 |
for tool_name, tool in self.tools.items():
|
| 91 |
if tool.should_use_for_query(query):
|
| 92 |
# Return first matching tool based on priority order
|
| 93 |
+
priority_order = ['arxiv', 'sec', 'github', 'wikipedia', 'web']
|
| 94 |
if tool_name in priority_order[:3]: # High-priority specialized tools
|
| 95 |
return tool_name
|
| 96 |
|
|
|
|
| 121 |
# Ensure we don't overwhelm with too many sources
|
| 122 |
if len(relevant_tools) > 4:
|
| 123 |
# Prioritize specialized tools
|
| 124 |
+
priority_order = ['arxiv', 'sec', 'github', 'wikipedia', 'web']
|
| 125 |
relevant_tools = [tool for tool in priority_order if tool in relevant_tools][:4]
|
| 126 |
|
| 127 |
return relevant_tools
|
research_tools/scholar_search.py
DELETED
|
@@ -1,248 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Google Scholar Search Tool for academic research
|
| 3 |
-
"""
|
| 4 |
-
from .base_tool import BaseTool
|
| 5 |
-
from typing import List, Dict, Optional
|
| 6 |
-
|
| 7 |
-
try:
|
| 8 |
-
from scholarly import scholarly
|
| 9 |
-
SCHOLARLY_AVAILABLE = True
|
| 10 |
-
except ImportError:
|
| 11 |
-
SCHOLARLY_AVAILABLE = False
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
class GoogleScholarTool(BaseTool):
|
| 15 |
-
"""Search Google Scholar for academic research papers"""
|
| 16 |
-
|
| 17 |
-
def __init__(self):
|
| 18 |
-
super().__init__("Google Scholar", "Search Google Scholar for academic research papers and citations")
|
| 19 |
-
self.available = SCHOLARLY_AVAILABLE
|
| 20 |
-
self.rate_limit_delay = 3.0 # Be very respectful to Google Scholar
|
| 21 |
-
|
| 22 |
-
def search(self, query: str, max_results: int = 4, **kwargs) -> str:
|
| 23 |
-
"""Search Google Scholar for research papers"""
|
| 24 |
-
if not self.available:
|
| 25 |
-
return self._unavailable_response(query)
|
| 26 |
-
|
| 27 |
-
self.rate_limit()
|
| 28 |
-
|
| 29 |
-
try:
|
| 30 |
-
# Search for publications with timeout handling
|
| 31 |
-
search_query = scholarly.search_pubs(query)
|
| 32 |
-
|
| 33 |
-
papers = []
|
| 34 |
-
for i, paper in enumerate(search_query):
|
| 35 |
-
if i >= max_results:
|
| 36 |
-
break
|
| 37 |
-
papers.append(paper)
|
| 38 |
-
|
| 39 |
-
if papers:
|
| 40 |
-
result = f"**Google Scholar Research for: {query}**\n\n"
|
| 41 |
-
result += self._format_scholar_results(papers)
|
| 42 |
-
result += self._analyze_research_quality(papers)
|
| 43 |
-
return result
|
| 44 |
-
else:
|
| 45 |
-
return f"**Google Scholar Research for: {query}**\n\nNo relevant academic papers found."
|
| 46 |
-
|
| 47 |
-
except Exception as e:
|
| 48 |
-
error_msg = str(e)
|
| 49 |
-
if "blocked" in error_msg.lower() or "captcha" in error_msg.lower():
|
| 50 |
-
return f"**Google Scholar Research for: {query}**\n\nGoogle Scholar is temporarily blocking automated requests. This is normal behavior. Academic research is available through other sources like arXiv."
|
| 51 |
-
elif "timeout" in error_msg.lower():
|
| 52 |
-
return f"**Google Scholar Research for: {query}**\n\nRequest timeout - Google Scholar may be experiencing high load. Academic research available but slower than expected."
|
| 53 |
-
else:
|
| 54 |
-
return self.format_error_response(query, str(e))
|
| 55 |
-
|
| 56 |
-
def _unavailable_response(self, query: str) -> str:
|
| 57 |
-
"""Response when scholarly library is not available"""
|
| 58 |
-
result = f"**Google Scholar Research for: {query}**\n\n"
|
| 59 |
-
result += "**Library Not Available**\n"
|
| 60 |
-
result += "Google Scholar integration requires the 'scholarly' library.\n\n"
|
| 61 |
-
result += "**Installation Instructions:**\n"
|
| 62 |
-
result += "```bash\n"
|
| 63 |
-
result += "pip install scholarly\n"
|
| 64 |
-
result += "```\n\n"
|
| 65 |
-
result += "**Alternative Academic Sources:**\n"
|
| 66 |
-
result += "β’ arXiv (for preprints and technical papers)\n"
|
| 67 |
-
result += "β’ PubMed (for medical and life sciences)\n"
|
| 68 |
-
result += "β’ IEEE Xplore (for engineering and computer science)\n"
|
| 69 |
-
result += "β’ JSTOR (for humanities and social sciences)\n\n"
|
| 70 |
-
result += "**Research Recommendation:**\n"
|
| 71 |
-
result += f"For the query '{query}', consider searching:\n"
|
| 72 |
-
result += "β’ Recent academic publications\n"
|
| 73 |
-
result += "β’ Peer-reviewed research articles\n"
|
| 74 |
-
result += "β’ Citation networks and impact metrics\n\n"
|
| 75 |
-
|
| 76 |
-
return result
|
| 77 |
-
|
| 78 |
-
def _format_scholar_results(self, papers: List[Dict]) -> str:
|
| 79 |
-
"""Format Google Scholar search results"""
|
| 80 |
-
result = ""
|
| 81 |
-
|
| 82 |
-
for i, paper in enumerate(papers, 1):
|
| 83 |
-
# Extract paper information safely with better handling
|
| 84 |
-
title = paper.get('title', paper.get('bib', {}).get('title', 'Unknown Title'))
|
| 85 |
-
|
| 86 |
-
# Handle authors more robustly
|
| 87 |
-
authors = self._format_authors(paper.get('author', paper.get('bib', {}).get('author', [])))
|
| 88 |
-
|
| 89 |
-
# Get year from multiple possible locations
|
| 90 |
-
year = (paper.get('year') or
|
| 91 |
-
paper.get('bib', {}).get('pub_year') or
|
| 92 |
-
paper.get('bib', {}).get('year') or
|
| 93 |
-
'Unknown Year')
|
| 94 |
-
|
| 95 |
-
# Get venue from multiple possible locations
|
| 96 |
-
venue = (paper.get('venue') or
|
| 97 |
-
paper.get('bib', {}).get('venue') or
|
| 98 |
-
paper.get('bib', {}).get('journal') or
|
| 99 |
-
paper.get('bib', {}).get('booktitle') or
|
| 100 |
-
'Unknown Venue')
|
| 101 |
-
|
| 102 |
-
citations = paper.get('num_citations', paper.get('citedby', 0))
|
| 103 |
-
|
| 104 |
-
result += f"**Paper {i}: {title}**\n"
|
| 105 |
-
result += f"Authors: {authors}\n"
|
| 106 |
-
result += f"Year: {year} | Venue: {venue}\n"
|
| 107 |
-
result += f"Citations: {citations:,}\n"
|
| 108 |
-
|
| 109 |
-
# Add abstract if available
|
| 110 |
-
abstract = (paper.get('abstract') or
|
| 111 |
-
paper.get('bib', {}).get('abstract') or
|
| 112 |
-
paper.get('summary'))
|
| 113 |
-
|
| 114 |
-
if abstract and len(str(abstract).strip()) > 10:
|
| 115 |
-
abstract_text = str(abstract)
|
| 116 |
-
if len(abstract_text) > 300:
|
| 117 |
-
abstract_text = abstract_text[:300] + "..."
|
| 118 |
-
result += f"Abstract: {abstract_text}\n"
|
| 119 |
-
|
| 120 |
-
# Add URL if available
|
| 121 |
-
url = (paper.get('url') or
|
| 122 |
-
paper.get('pub_url') or
|
| 123 |
-
paper.get('eprint_url'))
|
| 124 |
-
|
| 125 |
-
if url:
|
| 126 |
-
result += f"URL: {url}\n"
|
| 127 |
-
|
| 128 |
-
result += "\n"
|
| 129 |
-
|
| 130 |
-
return result
|
| 131 |
-
|
| 132 |
-
def _format_authors(self, authors) -> str:
|
| 133 |
-
"""Format author list safely with improved handling"""
|
| 134 |
-
if not authors:
|
| 135 |
-
return "Unknown Authors"
|
| 136 |
-
|
| 137 |
-
if isinstance(authors, str):
|
| 138 |
-
return authors
|
| 139 |
-
elif isinstance(authors, list):
|
| 140 |
-
# Handle list of author dictionaries or strings
|
| 141 |
-
author_names = []
|
| 142 |
-
for author in authors[:5]: # Limit to first 5 authors
|
| 143 |
-
if isinstance(author, dict):
|
| 144 |
-
# Try different possible name fields
|
| 145 |
-
name = (author.get('name') or
|
| 146 |
-
author.get('full_name') or
|
| 147 |
-
author.get('firstname', '') + ' ' + author.get('lastname', '') or
|
| 148 |
-
str(author))
|
| 149 |
-
name = name.strip()
|
| 150 |
-
else:
|
| 151 |
-
name = str(author).strip()
|
| 152 |
-
|
| 153 |
-
if name and name != 'Unknown Authors':
|
| 154 |
-
author_names.append(name)
|
| 155 |
-
|
| 156 |
-
if not author_names:
|
| 157 |
-
return "Unknown Authors"
|
| 158 |
-
|
| 159 |
-
if len(authors) > 5:
|
| 160 |
-
author_names.append("et al.")
|
| 161 |
-
|
| 162 |
-
return ", ".join(author_names)
|
| 163 |
-
else:
|
| 164 |
-
return str(authors) if authors else "Unknown Authors"
|
| 165 |
-
|
| 166 |
-
def _analyze_research_quality(self, papers: List[Dict]) -> str:
|
| 167 |
-
"""Analyze the quality and impact of research results"""
|
| 168 |
-
if not papers:
|
| 169 |
-
return ""
|
| 170 |
-
|
| 171 |
-
# Calculate citation metrics
|
| 172 |
-
citations = [paper.get('num_citations', 0) for paper in papers]
|
| 173 |
-
total_citations = sum(citations)
|
| 174 |
-
avg_citations = total_citations / len(papers) if papers else 0
|
| 175 |
-
high_impact_papers = sum(1 for c in citations if c > 100)
|
| 176 |
-
|
| 177 |
-
# Analyze publication years
|
| 178 |
-
years = [paper.get('year') for paper in papers if paper.get('year')]
|
| 179 |
-
recent_papers = sum(1 for year in years if isinstance(year, (int, str)) and str(year) in ['2023', '2024', '2025'])
|
| 180 |
-
|
| 181 |
-
# Analyze venues
|
| 182 |
-
venues = [paper.get('venue', '') for paper in papers]
|
| 183 |
-
unique_venues = len(set(v for v in venues if v and v != 'Unknown Venue'))
|
| 184 |
-
|
| 185 |
-
result = f"**Research Quality Analysis:**\n"
|
| 186 |
-
result += f"β’ Papers analyzed: {len(papers)}\n"
|
| 187 |
-
result += f"β’ Total citations: {total_citations:,}\n"
|
| 188 |
-
result += f"β’ Average citations per paper: {avg_citations:.1f}\n"
|
| 189 |
-
result += f"β’ High-impact papers (>100 citations): {high_impact_papers}\n"
|
| 190 |
-
result += f"β’ Recent publications (2023-2025): {recent_papers}\n"
|
| 191 |
-
result += f"β’ Venue diversity: {unique_venues} different publication venues\n"
|
| 192 |
-
|
| 193 |
-
# Research quality assessment
|
| 194 |
-
if avg_citations > 50:
|
| 195 |
-
quality_level = "High Impact"
|
| 196 |
-
elif avg_citations > 20:
|
| 197 |
-
quality_level = "Moderate Impact"
|
| 198 |
-
elif avg_citations > 5:
|
| 199 |
-
quality_level = "Emerging Research"
|
| 200 |
-
else:
|
| 201 |
-
quality_level = "Early Stage"
|
| 202 |
-
|
| 203 |
-
result += f"β’ Research maturity: {quality_level}\n"
|
| 204 |
-
|
| 205 |
-
# Authority assessment
|
| 206 |
-
if high_impact_papers > 0 and recent_papers > 0:
|
| 207 |
-
authority = "High - Established field with recent developments"
|
| 208 |
-
elif high_impact_papers > 0:
|
| 209 |
-
authority = "Moderate - Established field, may need recent updates"
|
| 210 |
-
elif recent_papers > 0:
|
| 211 |
-
authority = "Emerging - New research area with growing interest"
|
| 212 |
-
else:
|
| 213 |
-
authority = "Limited - Sparse academic coverage"
|
| 214 |
-
|
| 215 |
-
result += f"β’ Academic authority: {authority}\n\n"
|
| 216 |
-
|
| 217 |
-
return result
|
| 218 |
-
|
| 219 |
-
def should_use_for_query(self, query: str) -> bool:
|
| 220 |
-
"""Google Scholar is good for academic research, citations, and scholarly articles"""
|
| 221 |
-
academic_indicators = [
|
| 222 |
-
'research', 'study', 'academic', 'paper', 'journal', 'peer-reviewed',
|
| 223 |
-
'citation', 'scholar', 'university', 'professor', 'phd', 'thesis',
|
| 224 |
-
'methodology', 'experiment', 'analysis', 'theory', 'empirical',
|
| 225 |
-
'literature review', 'meta-analysis', 'systematic review',
|
| 226 |
-
'conference', 'publication', 'scholarly'
|
| 227 |
-
]
|
| 228 |
-
|
| 229 |
-
query_lower = query.lower()
|
| 230 |
-
return any(indicator in query_lower for indicator in academic_indicators)
|
| 231 |
-
|
| 232 |
-
def extract_key_info(self, text: str) -> dict:
|
| 233 |
-
"""Extract key information from Scholar results"""
|
| 234 |
-
base_info = super().extract_key_info(text)
|
| 235 |
-
|
| 236 |
-
if text:
|
| 237 |
-
# Look for Scholar-specific patterns
|
| 238 |
-
base_info.update({
|
| 239 |
-
'has_citations': 'Citations:' in text,
|
| 240 |
-
'has_abstracts': 'Abstract:' in text,
|
| 241 |
-
'has_venues': 'Venue:' in text,
|
| 242 |
-
'has_recent_papers': any(year in text for year in ['2023', '2024', '2025']),
|
| 243 |
-
'has_high_impact': any(citation in text for citation in ['100', '200', '500', '1000']),
|
| 244 |
-
'is_available': 'Library Not Available' not in text,
|
| 245 |
-
'paper_count': text.count('**Paper')
|
| 246 |
-
})
|
| 247 |
-
|
| 248 |
-
return base_info
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test_research_tools.py
CHANGED
|
@@ -35,7 +35,6 @@ def test_tool_imports():
|
|
| 35 |
from research_tools.arxiv_search import ArxivSearchTool
|
| 36 |
from research_tools.github_search import GitHubSearchTool
|
| 37 |
from research_tools.sec_search import SECSearchTool
|
| 38 |
-
from research_tools.scholar_search import GoogleScholarTool
|
| 39 |
|
| 40 |
print("β
All tool imports successful")
|
| 41 |
return True
|
|
@@ -108,8 +107,7 @@ def test_individual_tools():
|
|
| 108 |
'wikipedia': ('artificial intelligence', {}),
|
| 109 |
'arxiv': ('machine learning', {}),
|
| 110 |
'github': ('python', {}),
|
| 111 |
-
'sec': ('Apple', {})
|
| 112 |
-
'scholar': ('deep learning', {})
|
| 113 |
}
|
| 114 |
|
| 115 |
for tool_name, (query, kwargs) in test_queries.items():
|
|
@@ -262,7 +260,6 @@ def test_dependency_check():
|
|
| 262 |
'requests': 'HTTP requests',
|
| 263 |
'xml.etree.ElementTree': 'XML parsing (built-in)',
|
| 264 |
'wikipedia': 'Wikipedia search',
|
| 265 |
-
'scholarly': 'Google Scholar (optional)',
|
| 266 |
'smolagents': 'Web search agents'
|
| 267 |
}
|
| 268 |
|
|
|
|
| 35 |
from research_tools.arxiv_search import ArxivSearchTool
|
| 36 |
from research_tools.github_search import GitHubSearchTool
|
| 37 |
from research_tools.sec_search import SECSearchTool
|
|
|
|
| 38 |
|
| 39 |
print("β
All tool imports successful")
|
| 40 |
return True
|
|
|
|
| 107 |
'wikipedia': ('artificial intelligence', {}),
|
| 108 |
'arxiv': ('machine learning', {}),
|
| 109 |
'github': ('python', {}),
|
| 110 |
+
'sec': ('Apple', {})
|
|
|
|
| 111 |
}
|
| 112 |
|
| 113 |
for tool_name, (query, kwargs) in test_queries.items():
|
|
|
|
| 260 |
'requests': 'HTTP requests',
|
| 261 |
'xml.etree.ElementTree': 'XML parsing (built-in)',
|
| 262 |
'wikipedia': 'Wikipedia search',
|
|
|
|
| 263 |
'smolagents': 'Web search agents'
|
| 264 |
}
|
| 265 |
|