dylanebert's picture
Simplify and optimize research-tracker-mcp functions
057e151
raw
history blame
38 kB
"""
Research Tracker MCP Server
A Gradio-based MCP server that provides research inference utilities.
Delegates inference logic to the research-tracker-backend for consistency.
"""
import os
import requests
import gradio as gr
from typing import List, Dict, Any, Optional
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Configuration
BACKEND_URL = "https://dylanebert-research-tracker-backend.hf.space"
HF_TOKEN = os.environ.get("HF_TOKEN")
REQUEST_TIMEOUT = 30
if not HF_TOKEN:
logger.warning("HF_TOKEN not found in environment variables")
def validate_input(input_data: str, input_name: str = "input") -> str:
"""
Validate and sanitize input data.
Args:
input_data: The input string to validate
input_name: Name of the input for error messages
Returns:
Cleaned input string
Raises:
ValueError: If input is invalid
"""
if not input_data:
raise ValueError(f"{input_name} cannot be empty or None")
cleaned = input_data.strip()
if not cleaned:
raise ValueError(f"{input_name} cannot be empty after trimming")
# Basic URL validation if it looks like a URL
if cleaned.startswith(("http://", "https://")):
if len(cleaned) > 2000:
raise ValueError(f"{input_name} URL is too long (max 2000 characters)")
# Check for suspicious patterns
suspicious_patterns = ["javascript:", "data:", "file:", "ftp:"]
if any(pattern in cleaned.lower() for pattern in suspicious_patterns):
raise ValueError(f"{input_name} contains invalid URL scheme")
return cleaned
def make_backend_request(endpoint: str, data: Dict[str, Any]) -> Dict[str, Any]:
"""
Make a request to the research-tracker-backend with comprehensive error handling.
Args:
endpoint: The backend endpoint to call (e.g., 'infer-authors')
data: The data to send in the request body
Returns:
The response data from the backend
Raises:
Exception: If the request fails or returns an error
"""
if not HF_TOKEN:
logger.warning("HF_TOKEN not available - backend requests may fail")
url = f"{BACKEND_URL}/{endpoint}"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {HF_TOKEN}" if HF_TOKEN else ""
}
try:
logger.debug(f"Making request to {endpoint} with data: {data}")
response = requests.post(url, json=data, headers=headers, timeout=REQUEST_TIMEOUT)
if response.status_code == 401:
raise Exception("Authentication failed - please check HF_TOKEN")
elif response.status_code == 403:
raise Exception("Access forbidden - insufficient permissions")
elif response.status_code == 404:
raise Exception(f"Backend endpoint {endpoint} not found")
elif response.status_code == 422:
raise Exception("Invalid request data format")
elif response.status_code >= 500:
raise Exception(f"Backend server error (status {response.status_code})")
response.raise_for_status()
result = response.json()
logger.debug(f"Backend response: {result}")
return result
except requests.exceptions.Timeout:
raise Exception(f"Backend request to {endpoint} timed out after {REQUEST_TIMEOUT}s")
except requests.exceptions.ConnectionError:
raise Exception(f"Failed to connect to backend - service may be unavailable")
except requests.exceptions.RequestException as e:
raise Exception(f"Backend request to {endpoint} failed: {str(e)}")
except ValueError as e:
raise Exception(f"Invalid JSON response from backend: {str(e)}")
def create_row_data(input_data: str) -> Dict[str, Any]:
"""
Create standardized row data structure for backend requests.
Args:
input_data: The input string to analyze
Returns:
Dictionary with appropriate field populated
"""
row_data = {
"Name": None,
"Authors": [],
"Paper": None,
"Code": None,
"Project": None,
"Space": None,
"Model": None,
"Dataset": None,
}
# Classify input based on URL patterns
if input_data.startswith(("http://", "https://")):
if "arxiv.org" in input_data or "huggingface.co/papers" in input_data:
row_data["Paper"] = input_data
elif "github.com" in input_data:
row_data["Code"] = input_data
elif "github.io" in input_data:
row_data["Project"] = input_data
elif "huggingface.co/spaces" in input_data:
row_data["Space"] = input_data
elif "huggingface.co/datasets" in input_data:
row_data["Dataset"] = input_data
elif "huggingface.co/" in input_data:
row_data["Model"] = input_data
else:
row_data["Paper"] = input_data
else:
row_data["Name"] = input_data
return row_data
def infer_authors(input_data: str) -> List[str]:
"""
Infer authors from research paper or project information.
This function attempts to extract author names from various inputs like
paper URLs (arXiv, Hugging Face papers), project pages, or repository links.
It uses the research-tracker-backend inference engine with sophisticated
author extraction from paper metadata and repository contributor information.
Args:
input_data: A URL, paper title, or other research-related input.
Supports arXiv URLs, GitHub repositories, HuggingFace resources,
project pages, and natural language paper titles.
Returns:
A list of author names as strings, or empty list if no authors found.
Authors are returned in the order they appear in the original source.
Examples:
>>> infer_authors("https://arxiv.org/abs/2010.11929")
["Alexey Dosovitskiy", "Lucas Beyer", "Alexander Kolesnikov", ...]
>>> infer_authors("https://github.com/google-research/vision_transformer")
["Alexey Dosovitskiy", "Lucas Beyer", ...]
>>> infer_authors("Vision Transformer")
["Alexey Dosovitskiy", "Lucas Beyer", ...]
Raises:
No exceptions are raised - errors are logged and empty list returned.
"""
try:
# Validate and clean input
cleaned_input = validate_input(input_data, "input_data")
# Create structured data for backend
row_data = create_row_data(cleaned_input)
# Call the backend
result = make_backend_request("infer-authors", row_data)
# Extract and validate authors from response
authors = result.get("authors", [])
if isinstance(authors, str):
# Handle comma-separated string format
authors = [author.strip() for author in authors.split(",") if author.strip()]
elif not isinstance(authors, list):
logger.warning(f"Unexpected authors format: {type(authors)}")
authors = []
# Filter out empty or invalid author names
valid_authors = []
for author in authors:
if isinstance(author, str) and len(author.strip()) > 0:
cleaned_author = author.strip()
# Basic validation - authors should have reasonable length
if 2 <= len(cleaned_author) <= 100:
valid_authors.append(cleaned_author)
logger.info(f"Successfully inferred {len(valid_authors)} authors from input")
return valid_authors
except ValueError as e:
logger.error(f"Input validation error: {e}")
return []
except Exception as e:
logger.error(f"Error inferring authors: {e}")
return []
def infer_paper_url(input_data: str) -> str:
"""
Infer the paper URL from various research-related inputs.
Args:
input_data: A URL, repository link, or other research-related input
Returns:
The paper URL (typically arXiv or Hugging Face papers), or empty string if not found
"""
if not input_data or not input_data.strip():
return ""
try:
row_data = create_row_data(input_data.strip())
result = make_backend_request("infer-paper", row_data)
return result.get("paper", "")
except Exception as e:
logger.error(f"Error inferring paper: {e}")
return ""
def infer_code_repository(input_data: str) -> str:
"""
Infer the code repository URL from research-related inputs.
Args:
input_data: A URL, paper link, or other research-related input
Returns:
The code repository URL (typically GitHub), or empty string if not found
"""
if not input_data or not input_data.strip():
return ""
try:
row_data = create_row_data(input_data.strip())
result = make_backend_request("infer-code", row_data)
return result.get("code", "")
except Exception as e:
logger.error(f"Error inferring code: {e}")
return ""
def infer_research_name(input_data: str) -> str:
"""
Infer the research paper or project name from various inputs.
Args:
input_data: A URL, repository link, or other research-related input
Returns:
The research name/title, or empty string if not found
"""
if not input_data or not input_data.strip():
return ""
try:
row_data = create_row_data(input_data.strip())
result = make_backend_request("infer-name", row_data)
return result.get("name", "")
except Exception as e:
logger.error(f"Error inferring name: {e}")
return ""
def classify_research_url(url: str) -> str:
"""
Classify the type of research-related URL or input.
This function determines what type of research resource a given URL
or input represents (paper, code, model, dataset, etc.).
Args:
url: The URL or input to classify
Returns:
The field type: "Paper", "Code", "Space", "Model", "Dataset", "Project", or "Unknown"
Examples:
>>> classify_research_url("https://arxiv.org/abs/2010.11929")
"Paper"
>>> classify_research_url("https://github.com/google-research/vision_transformer")
"Code"
>>> classify_research_url("https://huggingface.co/google/vit-base-patch16-224")
"Model"
"""
if not url or not url.strip():
return "Unknown"
try:
# Call the backend
result = make_backend_request("infer-field", {"value": url})
# Extract field from response
field = result.get("field", "Unknown")
return field if field else "Unknown"
except Exception as e:
logger.error(f"Error classifying URL: {e}")
return "Unknown"
def infer_organizations(input_data: str) -> List[str]:
"""
Infer affiliated organizations from research paper or project information.
Args:
input_data: A URL, paper title, or other research-related input
Returns:
A list of organization names, or empty list if no organizations found
"""
if not input_data or not input_data.strip():
return []
try:
row_data = create_row_data(input_data.strip())
result = make_backend_request("infer-orgs", row_data)
orgs = result.get("orgs", [])
if isinstance(orgs, str):
orgs = [org.strip() for org in orgs.split(",") if org.strip()]
elif not isinstance(orgs, list):
orgs = []
return orgs
except Exception as e:
logger.error(f"Error inferring organizations: {e}")
return []
def infer_publication_date(input_data: str) -> str:
"""
Infer publication date from research paper or project information.
Args:
input_data: A URL, paper title, or other research-related input
Returns:
Publication date as string (YYYY-MM-DD format), or empty string if not found
"""
if not input_data or not input_data.strip():
return ""
try:
row_data = create_row_data(input_data.strip())
result = make_backend_request("infer-date", row_data)
return result.get("date", "")
except Exception as e:
logger.error(f"Error inferring publication date: {e}")
return ""
def infer_model(input_data: str) -> str:
"""
Infer associated HuggingFace model from research paper or project information.
Args:
input_data: A URL, paper title, or other research-related input
Returns:
HuggingFace model URL, or empty string if no model found
"""
if not input_data or not input_data.strip():
return ""
try:
row_data = create_row_data(input_data.strip())
result = make_backend_request("infer-model", row_data)
return result.get("model", "")
except Exception as e:
logger.error(f"Error inferring model: {e}")
return ""
def infer_dataset(input_data: str) -> str:
"""
Infer associated HuggingFace dataset from research paper or project information.
Args:
input_data: A URL, paper title, or other research-related input
Returns:
HuggingFace dataset URL, or empty string if no dataset found
"""
if not input_data or not input_data.strip():
return ""
try:
row_data = create_row_data(input_data.strip())
result = make_backend_request("infer-dataset", row_data)
return result.get("dataset", "")
except Exception as e:
logger.error(f"Error inferring dataset: {e}")
return ""
def infer_space(input_data: str) -> str:
"""
Infer associated HuggingFace space from research paper or project information.
Args:
input_data: A URL, paper title, or other research-related input
Returns:
HuggingFace space URL, or empty string if no space found
"""
if not input_data or not input_data.strip():
return ""
try:
row_data = create_row_data(input_data.strip())
result = make_backend_request("infer-space", row_data)
return result.get("space", "")
except Exception as e:
logger.error(f"Error inferring space: {e}")
return ""
def infer_license(input_data: str) -> str:
"""
Infer license information from research repository or project.
Args:
input_data: A URL, repository link, or other research-related input
Returns:
License name/type, or empty string if no license found
"""
if not input_data or not input_data.strip():
return ""
try:
row_data = create_row_data(input_data.strip())
result = make_backend_request("infer-license", row_data)
return result.get("license", "")
except Exception as e:
logger.error(f"Error inferring license: {e}")
return ""
def batch_infer_research(input_list: List[str], inference_type: str = "authors") -> List[Dict[str, Any]]:
"""
Perform batch inference on multiple research items for scale analysis.
This function processes multiple research URLs or titles simultaneously,
applying the specified inference type to each item. Useful for analyzing
large research datasets, comparing multiple papers, or building research
knowledge graphs.
Args:
input_list: List of URLs, paper titles, or research-related inputs to process
inference_type: Type of inference to perform on each item.
Options: "authors", "paper", "code", "name", "organizations",
"date", "model", "dataset", "space", "license", "classify"
Returns:
List of dictionaries, each containing:
- "input": The original input string
- "result": The inference result (format depends on inference_type)
- "success": Boolean indicating if inference succeeded
- "error": Error message if inference failed
Examples:
>>> papers = [
... "https://arxiv.org/abs/2010.11929",
... "https://arxiv.org/abs/1706.03762",
... "https://github.com/openai/gpt-2"
... ]
>>> results = batch_infer_research(papers, "authors")
>>> for result in results:
... print(f"{result['input']}: {len(result['result'])} authors")
>>> urls = ["https://huggingface.co/bert-base-uncased", "https://github.com/pytorch/pytorch"]
>>> classifications = batch_infer_research(urls, "classify")
Notes:
- Processing is done sequentially to avoid overwhelming the backend
- Failed inferences return empty results rather than raising exceptions
- Large batches may take significant time - consider chunking for very large datasets
"""
if not input_list:
return []
# Map inference types to their corresponding functions
inference_functions = {
"authors": infer_authors,
"paper": infer_paper_url,
"code": infer_code_repository,
"name": infer_research_name,
"organizations": infer_organizations,
"date": infer_publication_date,
"model": infer_model,
"dataset": infer_dataset,
"space": infer_space,
"license": infer_license,
"classify": classify_research_url,
}
if inference_type not in inference_functions:
logger.error(f"Invalid inference type: {inference_type}")
return []
inference_func = inference_functions[inference_type]
results = []
logger.info(f"Starting batch inference of type '{inference_type}' on {len(input_list)} items")
for i, input_item in enumerate(input_list):
try:
if not input_item or not isinstance(input_item, str):
results.append({
"input": str(input_item),
"result": None,
"success": False,
"error": "Invalid input: must be non-empty string"
})
continue
# Perform inference
result = inference_func(input_item)
results.append({
"input": input_item,
"result": result,
"success": True,
"error": None
})
logger.debug(f"Batch item {i+1}/{len(input_list)} completed successfully")
except Exception as e:
logger.error(f"Batch inference failed for item {i+1}: {e}")
results.append({
"input": input_item,
"result": None,
"success": False,
"error": str(e)
})
successful_count = sum(1 for r in results if r["success"])
logger.info(f"Batch inference completed: {successful_count}/{len(input_list)} successful")
return results
def find_research_relationships(input_data: str) -> Dict[str, Any]:
"""
Find ALL related research resources across platforms for comprehensive analysis.
This function performs a comprehensive analysis of a research item to find
all related resources including papers, code repositories, models, datasets,
spaces, and metadata. It's designed for building research knowledge graphs
and understanding the complete ecosystem around a research topic.
Args:
input_data: A URL, paper title, or other research-related input
Returns:
Dictionary containing all discovered related resources:
{
"paper": str | None, # Associated research paper
"code": str | None, # Code repository URL
"name": str | None, # Research/project name
"authors": List[str], # Author names
"organizations": List[str], # Affiliated organizations
"date": str | None, # Publication date
"model": str | None, # HuggingFace model URL
"dataset": str | None, # HuggingFace dataset URL
"space": str | None, # HuggingFace space URL
"license": str | None, # License information
"field_type": str | None, # Classification of input type
"success_count": int, # Number of successful inferences
"total_inferences": int # Total inferences attempted
}
Examples:
>>> relationships = find_research_relationships("https://arxiv.org/abs/2010.11929")
>>> print(f"Found {relationships['success_count']} related resources")
>>> print(f"Authors: {relationships['authors']}")
>>> print(f"Code: {relationships['code']}")
>>> print(f"Model: {relationships['model']}")
>>> ecosystem = find_research_relationships("Vision Transformer")
>>> if ecosystem['paper']:
... print(f"Paper: {ecosystem['paper']}")
>>> if ecosystem['code']:
... print(f"Implementation: {ecosystem['code']}")
"""
try:
# Validate input
cleaned_input = validate_input(input_data, "input_data")
# Initialize result structure
relationships = {
"paper": None,
"code": None,
"name": None,
"authors": [],
"organizations": [],
"date": None,
"model": None,
"dataset": None,
"space": None,
"license": None,
"field_type": None,
"success_count": 0,
"total_inferences": 11 # Number of inference types we'll attempt
}
# Define inference operations
inferences = [
("paper", infer_paper_url),
("code", infer_code_repository),
("name", infer_research_name),
("authors", infer_authors),
("organizations", infer_organizations),
("date", infer_publication_date),
("model", infer_model),
("dataset", infer_dataset),
("space", infer_space),
("license", infer_license),
("field_type", classify_research_url)
]
logger.info(f"Finding research relationships for: {cleaned_input}")
# Perform all inferences
for field_name, inference_func in inferences:
try:
result = inference_func(cleaned_input)
# Handle different return types
if isinstance(result, list) and result:
relationships[field_name] = result
relationships["success_count"] += 1
elif isinstance(result, str) and result.strip():
relationships[field_name] = result.strip()
relationships["success_count"] += 1
# else: leave as None (unsuccessful inference)
except Exception as e:
logger.warning(f"Failed to infer {field_name}: {e}")
# Continue with other inferences
logger.info(f"Research relationship analysis completed: {relationships['success_count']}/{relationships['total_inferences']} successful")
return relationships
except ValueError as e:
logger.error(f"Input validation error: {e}")
return {"error": str(e), "success_count": 0, "total_inferences": 0}
except Exception as e:
logger.error(f"Error finding research relationships: {e}")
return {"error": str(e), "success_count": 0, "total_inferences": 0}
def validate_research_urls(urls: List[str]) -> List[Dict[str, Any]]:
"""
Validate accessibility and format of research URLs at scale.
This function checks multiple research URLs for accessibility, format
validity, and basic content analysis. Useful for data cleaning,
link validation, and quality assurance of research datasets.
Args:
urls: List of URLs to validate
Returns:
List of validation results, each containing:
- "url": The original URL
- "accessible": Boolean indicating if URL is reachable
- "status_code": HTTP status code (if applicable)
- "format_valid": Boolean indicating if URL format is valid
- "platform": Detected platform (arxiv, github, huggingface, etc.)
- "error": Error message if validation failed
Examples:
>>> urls = [
... "https://arxiv.org/abs/2010.11929",
... "https://github.com/google-research/vision_transformer",
... "https://invalid-url-example"
... ]
>>> validation_results = validate_research_urls(urls)
>>> accessible_urls = [r for r in validation_results if r["accessible"]]
>>> print(f"{len(accessible_urls)}/{len(urls)} URLs are accessible")
"""
if not urls:
return []
results = []
logger.info(f"Validating {len(urls)} research URLs")
for url in urls:
result = {
"url": url,
"accessible": False,
"status_code": None,
"format_valid": False,
"platform": "unknown",
"error": None
}
try:
# Basic format validation
if not isinstance(url, str) or not url.strip():
result["error"] = "Invalid URL format: empty or non-string"
results.append(result)
continue
cleaned_url = url.strip()
# URL format validation
if not cleaned_url.startswith(("http://", "https://")):
result["error"] = "Invalid URL format: must start with http:// or https://"
results.append(result)
continue
result["format_valid"] = True
# Platform detection
if "arxiv.org" in cleaned_url:
result["platform"] = "arxiv"
elif "github.com" in cleaned_url:
result["platform"] = "github"
elif "huggingface.co" in cleaned_url:
result["platform"] = "huggingface"
elif "github.io" in cleaned_url:
result["platform"] = "github_pages"
# Accessibility check
try:
response = requests.head(cleaned_url, timeout=10, allow_redirects=True)
result["status_code"] = response.status_code
result["accessible"] = 200 <= response.status_code < 400
except requests.exceptions.Timeout:
result["error"] = "Timeout: URL not accessible within 10 seconds"
except requests.exceptions.ConnectionError:
result["error"] = "Connection error: Unable to reach URL"
except requests.exceptions.RequestException as e:
result["error"] = f"Request failed: {str(e)}"
except Exception as e:
result["error"] = f"Validation error: {str(e)}"
results.append(result)
accessible_count = sum(1 for r in results if r["accessible"])
logger.info(f"URL validation completed: {accessible_count}/{len(urls)} accessible")
return results
# Create Gradio interface
def create_demo():
"""Create the Gradio demo interface for testing."""
with gr.Blocks(title="Research Tracker MCP Server") as demo:
gr.Markdown("# Research Tracker MCP Server")
gr.Markdown("Test the comprehensive research inference utilities available through MCP. This server provides cross-platform research analysis, batch processing, and relationship discovery.")
# Core inference functions
with gr.TabItem("Core Inference"):
with gr.Tab("Authors"):
with gr.Row():
author_input = gr.Textbox(
label="Input (URL, paper title, etc.)",
placeholder="https://arxiv.org/abs/2010.11929",
lines=1
)
author_output = gr.JSON(label="Authors")
author_btn = gr.Button("Infer Authors")
author_btn.click(infer_authors, inputs=author_input, outputs=author_output)
with gr.Tab("Paper"):
with gr.Row():
paper_input = gr.Textbox(
label="Input (GitHub repo, project name, etc.)",
placeholder="https://github.com/google-research/vision_transformer",
lines=1
)
paper_output = gr.Textbox(label="Paper URL")
paper_btn = gr.Button("Infer Paper")
paper_btn.click(infer_paper_url, inputs=paper_input, outputs=paper_output)
with gr.Tab("Code"):
with gr.Row():
code_input = gr.Textbox(
label="Input (paper URL, project name, etc.)",
placeholder="https://arxiv.org/abs/2010.11929",
lines=1
)
code_output = gr.Textbox(label="Code Repository URL")
code_btn = gr.Button("Infer Code")
code_btn.click(infer_code_repository, inputs=code_input, outputs=code_output)
with gr.Tab("Name"):
with gr.Row():
name_input = gr.Textbox(
label="Input (URL, repo, etc.)",
placeholder="https://github.com/google-research/vision_transformer",
lines=1
)
name_output = gr.Textbox(label="Research Name/Title")
name_btn = gr.Button("Infer Name")
name_btn.click(infer_research_name, inputs=name_input, outputs=name_output)
with gr.Tab("Classify"):
with gr.Row():
classify_input = gr.Textbox(
label="URL to classify",
placeholder="https://huggingface.co/google/vit-base-patch16-224",
lines=1
)
classify_output = gr.Textbox(label="URL Type")
classify_btn = gr.Button("Classify URL")
classify_btn.click(classify_research_url, inputs=classify_input, outputs=classify_output)
# Extended inference functions
with gr.TabItem("Extended Inference"):
with gr.Tab("Organizations"):
with gr.Row():
orgs_input = gr.Textbox(
label="Input (paper URL, repo, etc.)",
placeholder="https://arxiv.org/abs/2010.11929",
lines=1
)
orgs_output = gr.JSON(label="Organizations")
orgs_btn = gr.Button("Infer Organizations")
orgs_btn.click(infer_organizations, inputs=orgs_input, outputs=orgs_output)
with gr.Tab("Publication Date"):
with gr.Row():
date_input = gr.Textbox(
label="Input (paper URL, repo, etc.)",
placeholder="https://arxiv.org/abs/2010.11929",
lines=1
)
date_output = gr.Textbox(label="Publication Date")
date_btn = gr.Button("Infer Date")
date_btn.click(infer_publication_date, inputs=date_input, outputs=date_output)
with gr.Tab("Model"):
with gr.Row():
model_input = gr.Textbox(
label="Input (paper URL, project name, etc.)",
placeholder="https://arxiv.org/abs/2010.11929",
lines=1
)
model_output = gr.Textbox(label="HuggingFace Model URL")
model_btn = gr.Button("Infer Model")
model_btn.click(infer_model, inputs=model_input, outputs=model_output)
with gr.Tab("Dataset"):
with gr.Row():
dataset_input = gr.Textbox(
label="Input (paper URL, project name, etc.)",
placeholder="https://arxiv.org/abs/1706.03762",
lines=1
)
dataset_output = gr.Textbox(label="HuggingFace Dataset URL")
dataset_btn = gr.Button("Infer Dataset")
dataset_btn.click(infer_dataset, inputs=dataset_input, outputs=dataset_output)
with gr.Tab("Space"):
with gr.Row():
space_input = gr.Textbox(
label="Input (model URL, paper, etc.)",
placeholder="https://huggingface.co/google/vit-base-patch16-224",
lines=1
)
space_output = gr.Textbox(label="HuggingFace Space URL")
space_btn = gr.Button("Infer Space")
space_btn.click(infer_space, inputs=space_input, outputs=space_output)
with gr.Tab("License"):
with gr.Row():
license_input = gr.Textbox(
label="Input (repository URL, project, etc.)",
placeholder="https://github.com/google-research/vision_transformer",
lines=1
)
license_output = gr.Textbox(label="License Information")
license_btn = gr.Button("Infer License")
license_btn.click(infer_license, inputs=license_input, outputs=license_output)
# Research intelligence functions
with gr.TabItem("Research Intelligence"):
with gr.Tab("Research Relationships"):
gr.Markdown("Find ALL related resources for comprehensive research analysis")
with gr.Row():
relationships_input = gr.Textbox(
label="Input (URL, paper title, etc.)",
placeholder="https://arxiv.org/abs/2010.11929",
lines=1
)
relationships_output = gr.JSON(label="Related Resources")
relationships_btn = gr.Button("Find Research Relationships")
relationships_btn.click(find_research_relationships, inputs=relationships_input, outputs=relationships_output)
with gr.Tab("Batch Processing"):
gr.Markdown("Process multiple research items simultaneously")
with gr.Row():
with gr.Column():
batch_input = gr.Textbox(
label="Input URLs/Titles (one per line)",
placeholder="https://arxiv.org/abs/2010.11929\nhttps://github.com/openai/gpt-2\nVision Transformer",
lines=5
)
batch_type = gr.Dropdown(
choices=["authors", "paper", "code", "name", "organizations", "date", "model", "dataset", "space", "license", "classify"],
value="authors",
label="Inference Type"
)
batch_output = gr.JSON(label="Batch Results")
def process_batch(input_text, inference_type):
if not input_text.strip():
return []
input_list = [line.strip() for line in input_text.strip().split('\n') if line.strip()]
return batch_infer_research(input_list, inference_type)
batch_btn = gr.Button("Process Batch")
batch_btn.click(process_batch, inputs=[batch_input, batch_type], outputs=batch_output)
with gr.Tab("URL Validation"):
gr.Markdown("Validate accessibility and format of research URLs")
with gr.Row():
with gr.Column():
url_input = gr.Textbox(
label="URLs to validate (one per line)",
placeholder="https://arxiv.org/abs/2010.11929\nhttps://github.com/google-research/vision_transformer\nhttps://huggingface.co/google/vit-base-patch16-224",
lines=5
)
url_output = gr.JSON(label="Validation Results")
def validate_urls(input_text):
if not input_text.strip():
return []
url_list = [line.strip() for line in input_text.strip().split('\n') if line.strip()]
return validate_research_urls(url_list)
url_btn = gr.Button("Validate URLs")
url_btn.click(validate_urls, inputs=url_input, outputs=url_output)
return demo
if __name__ == "__main__":
demo = create_demo()
demo.launch(mcp_server=True, share=False)