Spaces:

dylanebert
/

research-tracker-mcp

Running

App Files Files Community

research-tracker-mcp / app.py

dylanebert

Simplify and optimize research-tracker-mcp functions

057e151 5 months ago

raw

history blame

38 kB

	"""
	Research Tracker MCP Server

	A Gradio-based MCP server that provides research inference utilities.
	Delegates inference logic to the research-tracker-backend for consistency.
	"""

	import os
	import requests
	import gradio as gr
	from typing import List, Dict, Any, Optional
	import logging

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Configuration
	BACKEND_URL = "https://dylanebert-research-tracker-backend.hf.space"
	HF_TOKEN = os.environ.get("HF_TOKEN")
	REQUEST_TIMEOUT = 30

	if not HF_TOKEN:
	logger.warning("HF_TOKEN not found in environment variables")


	def validate_input(input_data: str, input_name: str = "input") -> str:
	"""
	Validate and sanitize input data.

	Args:
	input_data: The input string to validate
	input_name: Name of the input for error messages

	Returns:
	Cleaned input string

	Raises:
	ValueError: If input is invalid
	"""
	if not input_data:
	raise ValueError(f"{input_name} cannot be empty or None")

	cleaned = input_data.strip()
	if not cleaned:
	raise ValueError(f"{input_name} cannot be empty after trimming")

	# Basic URL validation if it looks like a URL
	if cleaned.startswith(("http://", "https://")):
	if len(cleaned) > 2000:
	raise ValueError(f"{input_name} URL is too long (max 2000 characters)")
	# Check for suspicious patterns
	suspicious_patterns = ["javascript:", "data:", "file:", "ftp:"]
	if any(pattern in cleaned.lower() for pattern in suspicious_patterns):
	raise ValueError(f"{input_name} contains invalid URL scheme")

	return cleaned


	def make_backend_request(endpoint: str, data: Dict[str, Any]) -> Dict[str, Any]:
	"""
	Make a request to the research-tracker-backend with comprehensive error handling.

	Args:
	endpoint: The backend endpoint to call (e.g., 'infer-authors')
	data: The data to send in the request body

	Returns:
	The response data from the backend

	Raises:
	Exception: If the request fails or returns an error
	"""
	if not HF_TOKEN:
	logger.warning("HF_TOKEN not available - backend requests may fail")

	url = f"{BACKEND_URL}/{endpoint}"
	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {HF_TOKEN}" if HF_TOKEN else ""
	}

	try:
	logger.debug(f"Making request to {endpoint} with data: {data}")
	response = requests.post(url, json=data, headers=headers, timeout=REQUEST_TIMEOUT)

	if response.status_code == 401:
	raise Exception("Authentication failed - please check HF_TOKEN")
	elif response.status_code == 403:
	raise Exception("Access forbidden - insufficient permissions")
	elif response.status_code == 404:
	raise Exception(f"Backend endpoint {endpoint} not found")
	elif response.status_code == 422:
	raise Exception("Invalid request data format")
	elif response.status_code >= 500:
	raise Exception(f"Backend server error (status {response.status_code})")

	response.raise_for_status()
	result = response.json()
	logger.debug(f"Backend response: {result}")
	return result

	except requests.exceptions.Timeout:
	raise Exception(f"Backend request to {endpoint} timed out after {REQUEST_TIMEOUT}s")
	except requests.exceptions.ConnectionError:
	raise Exception(f"Failed to connect to backend - service may be unavailable")
	except requests.exceptions.RequestException as e:
	raise Exception(f"Backend request to {endpoint} failed: {str(e)}")
	except ValueError as e:
	raise Exception(f"Invalid JSON response from backend: {str(e)}")


	def create_row_data(input_data: str) -> Dict[str, Any]:
	"""
	Create standardized row data structure for backend requests.

	Args:
	input_data: The input string to analyze

	Returns:
	Dictionary with appropriate field populated
	"""
	row_data = {
	"Name": None,
	"Authors": [],
	"Paper": None,
	"Code": None,
	"Project": None,
	"Space": None,
	"Model": None,
	"Dataset": None,
	}

	# Classify input based on URL patterns
	if input_data.startswith(("http://", "https://")):
	if "arxiv.org" in input_data or "huggingface.co/papers" in input_data:
	row_data["Paper"] = input_data
	elif "github.com" in input_data:
	row_data["Code"] = input_data
	elif "github.io" in input_data:
	row_data["Project"] = input_data
	elif "huggingface.co/spaces" in input_data:
	row_data["Space"] = input_data
	elif "huggingface.co/datasets" in input_data:
	row_data["Dataset"] = input_data
	elif "huggingface.co/" in input_data:
	row_data["Model"] = input_data
	else:
	row_data["Paper"] = input_data
	else:
	row_data["Name"] = input_data

	return row_data


	def infer_authors(input_data: str) -> List[str]:
	"""
	Infer authors from research paper or project information.

	This function attempts to extract author names from various inputs like
	paper URLs (arXiv, Hugging Face papers), project pages, or repository links.
	It uses the research-tracker-backend inference engine with sophisticated
	author extraction from paper metadata and repository contributor information.

	Args:
	input_data: A URL, paper title, or other research-related input.
	Supports arXiv URLs, GitHub repositories, HuggingFace resources,
	project pages, and natural language paper titles.

	Returns:
	A list of author names as strings, or empty list if no authors found.
	Authors are returned in the order they appear in the original source.

	Examples:
	>>> infer_authors("https://arxiv.org/abs/2010.11929")
	["Alexey Dosovitskiy", "Lucas Beyer", "Alexander Kolesnikov", ...]

	>>> infer_authors("https://github.com/google-research/vision_transformer")
	["Alexey Dosovitskiy", "Lucas Beyer", ...]

	>>> infer_authors("Vision Transformer")
	["Alexey Dosovitskiy", "Lucas Beyer", ...]

	Raises:
	No exceptions are raised - errors are logged and empty list returned.
	"""
	try:
	# Validate and clean input
	cleaned_input = validate_input(input_data, "input_data")

	# Create structured data for backend
	row_data = create_row_data(cleaned_input)

	# Call the backend
	result = make_backend_request("infer-authors", row_data)

	# Extract and validate authors from response
	authors = result.get("authors", [])
	if isinstance(authors, str):
	# Handle comma-separated string format
	authors = [author.strip() for author in authors.split(",") if author.strip()]
	elif not isinstance(authors, list):
	logger.warning(f"Unexpected authors format: {type(authors)}")
	authors = []

	# Filter out empty or invalid author names
	valid_authors = []
	for author in authors:
	if isinstance(author, str) and len(author.strip()) > 0:
	cleaned_author = author.strip()
	# Basic validation - authors should have reasonable length
	if 2 <= len(cleaned_author) <= 100:
	valid_authors.append(cleaned_author)

	logger.info(f"Successfully inferred {len(valid_authors)} authors from input")
	return valid_authors

	except ValueError as e:
	logger.error(f"Input validation error: {e}")
	return []
	except Exception as e:
	logger.error(f"Error inferring authors: {e}")
	return []


	def infer_paper_url(input_data: str) -> str:
	"""
	Infer the paper URL from various research-related inputs.

	Args:
	input_data: A URL, repository link, or other research-related input

	Returns:
	The paper URL (typically arXiv or Hugging Face papers), or empty string if not found
	"""
	if not input_data or not input_data.strip():
	return ""

	try:
	row_data = create_row_data(input_data.strip())
	result = make_backend_request("infer-paper", row_data)
	return result.get("paper", "")

	except Exception as e:
	logger.error(f"Error inferring paper: {e}")
	return ""


	def infer_code_repository(input_data: str) -> str:
	"""
	Infer the code repository URL from research-related inputs.

	Args:
	input_data: A URL, paper link, or other research-related input

	Returns:
	The code repository URL (typically GitHub), or empty string if not found
	"""
	if not input_data or not input_data.strip():
	return ""

	try:
	row_data = create_row_data(input_data.strip())
	result = make_backend_request("infer-code", row_data)
	return result.get("code", "")

	except Exception as e:
	logger.error(f"Error inferring code: {e}")
	return ""


	def infer_research_name(input_data: str) -> str:
	"""
	Infer the research paper or project name from various inputs.

	Args:
	input_data: A URL, repository link, or other research-related input

	Returns:
	The research name/title, or empty string if not found
	"""
	if not input_data or not input_data.strip():
	return ""

	try:
	row_data = create_row_data(input_data.strip())
	result = make_backend_request("infer-name", row_data)
	return result.get("name", "")

	except Exception as e:
	logger.error(f"Error inferring name: {e}")
	return ""


	def classify_research_url(url: str) -> str:
	"""
	Classify the type of research-related URL or input.

	This function determines what type of research resource a given URL
	or input represents (paper, code, model, dataset, etc.).

	Args:
	url: The URL or input to classify

	Returns:
	The field type: "Paper", "Code", "Space", "Model", "Dataset", "Project", or "Unknown"

	Examples:
	>>> classify_research_url("https://arxiv.org/abs/2010.11929")
	"Paper"

	>>> classify_research_url("https://github.com/google-research/vision_transformer")
	"Code"

	>>> classify_research_url("https://huggingface.co/google/vit-base-patch16-224")
	"Model"
	"""
	if not url or not url.strip():
	return "Unknown"

	try:
	# Call the backend
	result = make_backend_request("infer-field", {"value": url})

	# Extract field from response
	field = result.get("field", "Unknown")
	return field if field else "Unknown"

	except Exception as e:
	logger.error(f"Error classifying URL: {e}")
	return "Unknown"


	def infer_organizations(input_data: str) -> List[str]:
	"""
	Infer affiliated organizations from research paper or project information.

	Args:
	input_data: A URL, paper title, or other research-related input

	Returns:
	A list of organization names, or empty list if no organizations found
	"""
	if not input_data or not input_data.strip():
	return []

	try:
	row_data = create_row_data(input_data.strip())
	result = make_backend_request("infer-orgs", row_data)

	orgs = result.get("orgs", [])
	if isinstance(orgs, str):
	orgs = [org.strip() for org in orgs.split(",") if org.strip()]
	elif not isinstance(orgs, list):
	orgs = []

	return orgs

	except Exception as e:
	logger.error(f"Error inferring organizations: {e}")
	return []


	def infer_publication_date(input_data: str) -> str:
	"""
	Infer publication date from research paper or project information.

	Args:
	input_data: A URL, paper title, or other research-related input

	Returns:
	Publication date as string (YYYY-MM-DD format), or empty string if not found
	"""
	if not input_data or not input_data.strip():
	return ""

	try:
	row_data = create_row_data(input_data.strip())
	result = make_backend_request("infer-date", row_data)
	return result.get("date", "")

	except Exception as e:
	logger.error(f"Error inferring publication date: {e}")
	return ""


	def infer_model(input_data: str) -> str:
	"""
	Infer associated HuggingFace model from research paper or project information.

	Args:
	input_data: A URL, paper title, or other research-related input

	Returns:
	HuggingFace model URL, or empty string if no model found
	"""
	if not input_data or not input_data.strip():
	return ""

	try:
	row_data = create_row_data(input_data.strip())
	result = make_backend_request("infer-model", row_data)
	return result.get("model", "")

	except Exception as e:
	logger.error(f"Error inferring model: {e}")
	return ""


	def infer_dataset(input_data: str) -> str:
	"""
	Infer associated HuggingFace dataset from research paper or project information.

	Args:
	input_data: A URL, paper title, or other research-related input

	Returns:
	HuggingFace dataset URL, or empty string if no dataset found
	"""
	if not input_data or not input_data.strip():
	return ""

	try:
	row_data = create_row_data(input_data.strip())
	result = make_backend_request("infer-dataset", row_data)
	return result.get("dataset", "")

	except Exception as e:
	logger.error(f"Error inferring dataset: {e}")
	return ""


	def infer_space(input_data: str) -> str:
	"""
	Infer associated HuggingFace space from research paper or project information.

	Args:
	input_data: A URL, paper title, or other research-related input

	Returns:
	HuggingFace space URL, or empty string if no space found
	"""
	if not input_data or not input_data.strip():
	return ""

	try:
	row_data = create_row_data(input_data.strip())
	result = make_backend_request("infer-space", row_data)
	return result.get("space", "")

	except Exception as e:
	logger.error(f"Error inferring space: {e}")
	return ""


	def infer_license(input_data: str) -> str:
	"""
	Infer license information from research repository or project.

	Args:
	input_data: A URL, repository link, or other research-related input

	Returns:
	License name/type, or empty string if no license found
	"""
	if not input_data or not input_data.strip():
	return ""

	try:
	row_data = create_row_data(input_data.strip())
	result = make_backend_request("infer-license", row_data)
	return result.get("license", "")

	except Exception as e:
	logger.error(f"Error inferring license: {e}")
	return ""


	def batch_infer_research(input_list: List[str], inference_type: str = "authors") -> List[Dict[str, Any]]:
	"""
	Perform batch inference on multiple research items for scale analysis.

	This function processes multiple research URLs or titles simultaneously,
	applying the specified inference type to each item. Useful for analyzing
	large research datasets, comparing multiple papers, or building research
	knowledge graphs.

	Args:
	input_list: List of URLs, paper titles, or research-related inputs to process
	inference_type: Type of inference to perform on each item.
	Options: "authors", "paper", "code", "name", "organizations",
	"date", "model", "dataset", "space", "license", "classify"

	Returns:
	List of dictionaries, each containing:
	- "input": The original input string
	- "result": The inference result (format depends on inference_type)
	- "success": Boolean indicating if inference succeeded
	- "error": Error message if inference failed

	Examples:
	>>> papers = [
	... "https://arxiv.org/abs/2010.11929",
	... "https://arxiv.org/abs/1706.03762",
	... "https://github.com/openai/gpt-2"
	... ]
	>>> results = batch_infer_research(papers, "authors")
	>>> for result in results:
	... print(f"{result['input']}: {len(result['result'])} authors")

	>>> urls = ["https://huggingface.co/bert-base-uncased", "https://github.com/pytorch/pytorch"]
	>>> classifications = batch_infer_research(urls, "classify")

	Notes:
	- Processing is done sequentially to avoid overwhelming the backend
	- Failed inferences return empty results rather than raising exceptions
	- Large batches may take significant time - consider chunking for very large datasets
	"""
	if not input_list:
	return []

	# Map inference types to their corresponding functions
	inference_functions = {
	"authors": infer_authors,
	"paper": infer_paper_url,
	"code": infer_code_repository,
	"name": infer_research_name,
	"organizations": infer_organizations,
	"date": infer_publication_date,
	"model": infer_model,
	"dataset": infer_dataset,
	"space": infer_space,
	"license": infer_license,
	"classify": classify_research_url,
	}

	if inference_type not in inference_functions:
	logger.error(f"Invalid inference type: {inference_type}")
	return []

	inference_func = inference_functions[inference_type]
	results = []

	logger.info(f"Starting batch inference of type '{inference_type}' on {len(input_list)} items")

	for i, input_item in enumerate(input_list):
	try:
	if not input_item or not isinstance(input_item, str):
	results.append({
	"input": str(input_item),
	"result": None,
	"success": False,
	"error": "Invalid input: must be non-empty string"
	})
	continue

	# Perform inference
	result = inference_func(input_item)

	results.append({
	"input": input_item,
	"result": result,
	"success": True,
	"error": None
	})

	logger.debug(f"Batch item {i+1}/{len(input_list)} completed successfully")

	except Exception as e:
	logger.error(f"Batch inference failed for item {i+1}: {e}")
	results.append({
	"input": input_item,
	"result": None,
	"success": False,
	"error": str(e)
	})

	successful_count = sum(1 for r in results if r["success"])
	logger.info(f"Batch inference completed: {successful_count}/{len(input_list)} successful")

	return results


	def find_research_relationships(input_data: str) -> Dict[str, Any]:
	"""
	Find ALL related research resources across platforms for comprehensive analysis.

	This function performs a comprehensive analysis of a research item to find
	all related resources including papers, code repositories, models, datasets,
	spaces, and metadata. It's designed for building research knowledge graphs
	and understanding the complete ecosystem around a research topic.

	Args:
	input_data: A URL, paper title, or other research-related input

	Returns:
	Dictionary containing all discovered related resources:
	{
	"paper": str \| None, # Associated research paper
	"code": str \| None, # Code repository URL
	"name": str \| None, # Research/project name
	"authors": List[str], # Author names
	"organizations": List[str], # Affiliated organizations
	"date": str \| None, # Publication date
	"model": str \| None, # HuggingFace model URL
	"dataset": str \| None, # HuggingFace dataset URL
	"space": str \| None, # HuggingFace space URL
	"license": str \| None, # License information
	"field_type": str \| None, # Classification of input type
	"success_count": int, # Number of successful inferences
	"total_inferences": int # Total inferences attempted
	}

	Examples:
	>>> relationships = find_research_relationships("https://arxiv.org/abs/2010.11929")
	>>> print(f"Found {relationships['success_count']} related resources")
	>>> print(f"Authors: {relationships['authors']}")
	>>> print(f"Code: {relationships['code']}")
	>>> print(f"Model: {relationships['model']}")

	>>> ecosystem = find_research_relationships("Vision Transformer")
	>>> if ecosystem['paper']:
	... print(f"Paper: {ecosystem['paper']}")
	>>> if ecosystem['code']:
	... print(f"Implementation: {ecosystem['code']}")
	"""
	try:
	# Validate input
	cleaned_input = validate_input(input_data, "input_data")

	# Initialize result structure
	relationships = {
	"paper": None,
	"code": None,
	"name": None,
	"authors": [],
	"organizations": [],
	"date": None,
	"model": None,
	"dataset": None,
	"space": None,
	"license": None,
	"field_type": None,
	"success_count": 0,
	"total_inferences": 11 # Number of inference types we'll attempt
	}

	# Define inference operations
	inferences = [
	("paper", infer_paper_url),
	("code", infer_code_repository),
	("name", infer_research_name),
	("authors", infer_authors),
	("organizations", infer_organizations),
	("date", infer_publication_date),
	("model", infer_model),
	("dataset", infer_dataset),
	("space", infer_space),
	("license", infer_license),
	("field_type", classify_research_url)
	]

	logger.info(f"Finding research relationships for: {cleaned_input}")

	# Perform all inferences
	for field_name, inference_func in inferences:
	try:
	result = inference_func(cleaned_input)

	# Handle different return types
	if isinstance(result, list) and result:
	relationships[field_name] = result
	relationships["success_count"] += 1
	elif isinstance(result, str) and result.strip():
	relationships[field_name] = result.strip()
	relationships["success_count"] += 1
	# else: leave as None (unsuccessful inference)

	except Exception as e:
	logger.warning(f"Failed to infer {field_name}: {e}")
	# Continue with other inferences

	logger.info(f"Research relationship analysis completed: {relationships['success_count']}/{relationships['total_inferences']} successful")
	return relationships

	except ValueError as e:
	logger.error(f"Input validation error: {e}")
	return {"error": str(e), "success_count": 0, "total_inferences": 0}
	except Exception as e:
	logger.error(f"Error finding research relationships: {e}")
	return {"error": str(e), "success_count": 0, "total_inferences": 0}


	def validate_research_urls(urls: List[str]) -> List[Dict[str, Any]]:
	"""
	Validate accessibility and format of research URLs at scale.

	This function checks multiple research URLs for accessibility, format
	validity, and basic content analysis. Useful for data cleaning,
	link validation, and quality assurance of research datasets.

	Args:
	urls: List of URLs to validate

	Returns:
	List of validation results, each containing:
	- "url": The original URL
	- "accessible": Boolean indicating if URL is reachable
	- "status_code": HTTP status code (if applicable)
	- "format_valid": Boolean indicating if URL format is valid
	- "platform": Detected platform (arxiv, github, huggingface, etc.)
	- "error": Error message if validation failed

	Examples:
	>>> urls = [
	... "https://arxiv.org/abs/2010.11929",
	... "https://github.com/google-research/vision_transformer",
	... "https://invalid-url-example"
	... ]
	>>> validation_results = validate_research_urls(urls)
	>>> accessible_urls = [r for r in validation_results if r["accessible"]]
	>>> print(f"{len(accessible_urls)}/{len(urls)} URLs are accessible")
	"""
	if not urls:
	return []

	results = []
	logger.info(f"Validating {len(urls)} research URLs")

	for url in urls:
	result = {
	"url": url,
	"accessible": False,
	"status_code": None,
	"format_valid": False,
	"platform": "unknown",
	"error": None
	}

	try:
	# Basic format validation
	if not isinstance(url, str) or not url.strip():
	result["error"] = "Invalid URL format: empty or non-string"
	results.append(result)
	continue

	cleaned_url = url.strip()

	# URL format validation
	if not cleaned_url.startswith(("http://", "https://")):
	result["error"] = "Invalid URL format: must start with http:// or https://"
	results.append(result)
	continue

	result["format_valid"] = True

	# Platform detection
	if "arxiv.org" in cleaned_url:
	result["platform"] = "arxiv"
	elif "github.com" in cleaned_url:
	result["platform"] = "github"
	elif "huggingface.co" in cleaned_url:
	result["platform"] = "huggingface"
	elif "github.io" in cleaned_url:
	result["platform"] = "github_pages"

	# Accessibility check
	try:
	response = requests.head(cleaned_url, timeout=10, allow_redirects=True)
	result["status_code"] = response.status_code
	result["accessible"] = 200 <= response.status_code < 400

	except requests.exceptions.Timeout:
	result["error"] = "Timeout: URL not accessible within 10 seconds"
	except requests.exceptions.ConnectionError:
	result["error"] = "Connection error: Unable to reach URL"
	except requests.exceptions.RequestException as e:
	result["error"] = f"Request failed: {str(e)}"

	except Exception as e:
	result["error"] = f"Validation error: {str(e)}"

	results.append(result)

	accessible_count = sum(1 for r in results if r["accessible"])
	logger.info(f"URL validation completed: {accessible_count}/{len(urls)} accessible")

	return results


	# Create Gradio interface
	def create_demo():
	"""Create the Gradio demo interface for testing."""

	with gr.Blocks(title="Research Tracker MCP Server") as demo:
	gr.Markdown("# Research Tracker MCP Server")
	gr.Markdown("Test the comprehensive research inference utilities available through MCP. This server provides cross-platform research analysis, batch processing, and relationship discovery.")

	# Core inference functions
	with gr.TabItem("Core Inference"):
	with gr.Tab("Authors"):
	with gr.Row():
	author_input = gr.Textbox(
	label="Input (URL, paper title, etc.)",
	placeholder="https://arxiv.org/abs/2010.11929",
	lines=1
	)
	author_output = gr.JSON(label="Authors")
	author_btn = gr.Button("Infer Authors")
	author_btn.click(infer_authors, inputs=author_input, outputs=author_output)

	with gr.Tab("Paper"):
	with gr.Row():
	paper_input = gr.Textbox(
	label="Input (GitHub repo, project name, etc.)",
	placeholder="https://github.com/google-research/vision_transformer",
	lines=1
	)
	paper_output = gr.Textbox(label="Paper URL")
	paper_btn = gr.Button("Infer Paper")
	paper_btn.click(infer_paper_url, inputs=paper_input, outputs=paper_output)

	with gr.Tab("Code"):
	with gr.Row():
	code_input = gr.Textbox(
	label="Input (paper URL, project name, etc.)",
	placeholder="https://arxiv.org/abs/2010.11929",
	lines=1
	)
	code_output = gr.Textbox(label="Code Repository URL")
	code_btn = gr.Button("Infer Code")
	code_btn.click(infer_code_repository, inputs=code_input, outputs=code_output)

	with gr.Tab("Name"):
	with gr.Row():
	name_input = gr.Textbox(
	label="Input (URL, repo, etc.)",
	placeholder="https://github.com/google-research/vision_transformer",
	lines=1
	)
	name_output = gr.Textbox(label="Research Name/Title")
	name_btn = gr.Button("Infer Name")
	name_btn.click(infer_research_name, inputs=name_input, outputs=name_output)

	with gr.Tab("Classify"):
	with gr.Row():
	classify_input = gr.Textbox(
	label="URL to classify",
	placeholder="https://huggingface.co/google/vit-base-patch16-224",
	lines=1
	)
	classify_output = gr.Textbox(label="URL Type")
	classify_btn = gr.Button("Classify URL")
	classify_btn.click(classify_research_url, inputs=classify_input, outputs=classify_output)

	# Extended inference functions
	with gr.TabItem("Extended Inference"):
	with gr.Tab("Organizations"):
	with gr.Row():
	orgs_input = gr.Textbox(
	label="Input (paper URL, repo, etc.)",
	placeholder="https://arxiv.org/abs/2010.11929",
	lines=1
	)
	orgs_output = gr.JSON(label="Organizations")
	orgs_btn = gr.Button("Infer Organizations")
	orgs_btn.click(infer_organizations, inputs=orgs_input, outputs=orgs_output)

	with gr.Tab("Publication Date"):
	with gr.Row():
	date_input = gr.Textbox(
	label="Input (paper URL, repo, etc.)",
	placeholder="https://arxiv.org/abs/2010.11929",
	lines=1
	)
	date_output = gr.Textbox(label="Publication Date")
	date_btn = gr.Button("Infer Date")
	date_btn.click(infer_publication_date, inputs=date_input, outputs=date_output)

	with gr.Tab("Model"):
	with gr.Row():
	model_input = gr.Textbox(
	label="Input (paper URL, project name, etc.)",
	placeholder="https://arxiv.org/abs/2010.11929",
	lines=1
	)
	model_output = gr.Textbox(label="HuggingFace Model URL")
	model_btn = gr.Button("Infer Model")
	model_btn.click(infer_model, inputs=model_input, outputs=model_output)

	with gr.Tab("Dataset"):
	with gr.Row():
	dataset_input = gr.Textbox(
	label="Input (paper URL, project name, etc.)",
	placeholder="https://arxiv.org/abs/1706.03762",
	lines=1
	)
	dataset_output = gr.Textbox(label="HuggingFace Dataset URL")
	dataset_btn = gr.Button("Infer Dataset")
	dataset_btn.click(infer_dataset, inputs=dataset_input, outputs=dataset_output)

	with gr.Tab("Space"):
	with gr.Row():
	space_input = gr.Textbox(
	label="Input (model URL, paper, etc.)",
	placeholder="https://huggingface.co/google/vit-base-patch16-224",
	lines=1
	)
	space_output = gr.Textbox(label="HuggingFace Space URL")
	space_btn = gr.Button("Infer Space")
	space_btn.click(infer_space, inputs=space_input, outputs=space_output)

	with gr.Tab("License"):
	with gr.Row():
	license_input = gr.Textbox(
	label="Input (repository URL, project, etc.)",
	placeholder="https://github.com/google-research/vision_transformer",
	lines=1
	)
	license_output = gr.Textbox(label="License Information")
	license_btn = gr.Button("Infer License")
	license_btn.click(infer_license, inputs=license_input, outputs=license_output)

	# Research intelligence functions
	with gr.TabItem("Research Intelligence"):
	with gr.Tab("Research Relationships"):
	gr.Markdown("Find ALL related resources for comprehensive research analysis")
	with gr.Row():
	relationships_input = gr.Textbox(
	label="Input (URL, paper title, etc.)",
	placeholder="https://arxiv.org/abs/2010.11929",
	lines=1
	)
	relationships_output = gr.JSON(label="Related Resources")
	relationships_btn = gr.Button("Find Research Relationships")
	relationships_btn.click(find_research_relationships, inputs=relationships_input, outputs=relationships_output)

	with gr.Tab("Batch Processing"):
	gr.Markdown("Process multiple research items simultaneously")
	with gr.Row():
	with gr.Column():
	batch_input = gr.Textbox(
	label="Input URLs/Titles (one per line)",
	placeholder="https://arxiv.org/abs/2010.11929\nhttps://github.com/openai/gpt-2\nVision Transformer",
	lines=5
	)
	batch_type = gr.Dropdown(
	choices=["authors", "paper", "code", "name", "organizations", "date", "model", "dataset", "space", "license", "classify"],
	value="authors",
	label="Inference Type"
	)
	batch_output = gr.JSON(label="Batch Results")

	def process_batch(input_text, inference_type):
	if not input_text.strip():
	return []
	input_list = [line.strip() for line in input_text.strip().split('\n') if line.strip()]
	return batch_infer_research(input_list, inference_type)

	batch_btn = gr.Button("Process Batch")
	batch_btn.click(process_batch, inputs=[batch_input, batch_type], outputs=batch_output)

	with gr.Tab("URL Validation"):
	gr.Markdown("Validate accessibility and format of research URLs")
	with gr.Row():
	with gr.Column():
	url_input = gr.Textbox(
	label="URLs to validate (one per line)",
	placeholder="https://arxiv.org/abs/2010.11929\nhttps://github.com/google-research/vision_transformer\nhttps://huggingface.co/google/vit-base-patch16-224",
	lines=5
	)
	url_output = gr.JSON(label="Validation Results")

	def validate_urls(input_text):
	if not input_text.strip():
	return []
	url_list = [line.strip() for line in input_text.strip().split('\n') if line.strip()]
	return validate_research_urls(url_list)

	url_btn = gr.Button("Validate URLs")
	url_btn.click(validate_urls, inputs=url_input, outputs=url_output)

	return demo


	if __name__ == "__main__":
	demo = create_demo()
	demo.launch(mcp_server=True, share=False)