Spaces:

nihalaninihal
/

githubRAG

Sleeping

App Files Files Community

githubRAG / app.py

nihalaninihal

Update app.py

b0187b7 verified 9 months ago

raw

history blame contribute delete

105 kB

	import gradio as gr
	import google.generativeai as genai
	import os
	from dotenv import load_dotenv
	from github import Github, RateLimitExceededException, GithubException
	import json
	from pathlib import Path
	from datetime import datetime, timedelta
	from collections import defaultdict
	import base64
	from typing import Dict, List, Any, Optional, Tuple
	import tempfile
	from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
	import asyncio
	import aiohttp
	import re
	import ast
	from concurrent.futures import ThreadPoolExecutor
	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns
	from packaging import version
	import requests
	from bs4 import BeautifulSoup
	import networkx as nx
	import math
	import logging
	import time

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Load environment variables (consider handling missing .env)
	load_dotenv()

	# --- Constants and Global Variables ---

	# Store API tokens globally
	GITHUB_TOKEN = os.getenv("GITHUB_TOKEN") #getting github token using os
	GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") #getting gemini api key using os

	# Constants for rate limiting - make them configurable if needed
	MIN_RATE_LIMIT_BUFFER = 50 # Keep a buffer to avoid hitting the limit
	INITIAL_BACKOFF = 60 # Initial backoff time in seconds

	# Enhanced relevant file extensions
	RELEVANT_EXTENSIONS = {
	".py": "Python",
	".js": "JavaScript",
	".ts": "TypeScript",
	".jsx": "React",
	".tsx": "React TypeScript",
	".java": "Java",
	".cpp": "C++",
	".c": "C",
	".h": "C Header",
	".hpp": "C++ Header",
	".rb": "Ruby",
	".php": "PHP",
	".go": "Go",
	".rs": "Rust",
	".swift": "Swift",
	".kt": "Kotlin",
	".cs": "C#",
	".scala": "Scala",
	".r": "R",
	".dart": "Dart",
	".lua": "Lua",
	".sql": "SQL",
	".sh": "Shell",
	".md": "Markdown", # Include Markdown for documentation analysis
	".txt": "Text",
	".json": "JSON",
	".yml": "YAML",
	".yaml": "YAML",
	".xml": "XML",
	".html": "HTML",
	".css": "CSS"
	}


	# --- Initialization and Validation ---

	def validate_github_token(token: str) -> Tuple[bool, str]:
	"""
	Validate GitHub token before proceeding with analysis.
	Returns (is_valid: bool, message: str)
	"""
	if not token:
	return False, "GitHub token is missing." # Check for missing

	try:
	gh = Github(token)
	user = gh.get_user()
	username = user.login #important: accessing properties for validation
	rate_limit = gh.get_rate_limit()
	remaining = rate_limit.core.remaining


	if remaining == 0: #using remaining
	reset_time = rate_limit.core.reset.strftime("%Y-%m-%d %H:%M:%S UTC")
	return False, f"Rate limit exceeded. Resets at {reset_time}"

	return True, f"Token validated successfully (authenticated as {username})"


	except GithubException as e:
	if e.status == 401:
	return False, "Invalid token - authentication failed"
	elif e.status == 403:
	return False, "Token lacks required permissions or rate limit exceeded" #more specific 403 message
	elif e.status == 404:
	return False, "Invalid token or API endpoint not found" # More specific 404 message
	else:

	return False, f"GitHub error (status {e.status}): {e.data.get('message', str(e))}"
	except Exception as e: # General exception handling as a fallback.
	return False, f"Error validating token: {str(e)}"


	def initialize_tokens(github_token: str, gemini_key: str) -> str:
	"""Initialize API tokens globally with enhanced validation (using env vars now)."""
	global GITHUB_TOKEN, GEMINI_API_KEY

	if not github_token or not gemini_key:
	return "❌ Both GitHub and Gemini API keys are required."

	is_valid, message = validate_github_token(github_token)
	if not is_valid:
	return f"❌ GitHub token validation failed: {message}"

	try:
	genai.configure(api_key=gemini_key)
	model = genai.GenerativeModel('gemini-1.0-pro')
	response = model.generate_content("Test")
	if response.text is None : # important check.
	return "❌ Invalid Gemini API key (no response)" #More informative.
	# else:
	# return "Invalid"
	except Exception as e:
	return f"❌ Gemini API key validation failed: {str(e)}"

	GITHUB_TOKEN = github_token # Overwrite with validated tokens
	GEMINI_API_KEY = gemini_key

	return "✅ All tokens validated and initialized successfully!"


	# --- Classes ---

	class GitHubAPIHandler:
	"""Enhanced GitHub API handler with minimal authentication checks and robust error handling."""

	def __init__(self, token: Optional[str] = None):
	self.logger = logging.getLogger(__name__)
	self.token = token
	self._min_rate_limit_buffer = MIN_RATE_LIMIT_BUFFER
	self._initial_backoff = INITIAL_BACKOFF
	if not self.token:
	raise ValueError("GitHub token not provided")
	# Create the GitHub client within the class
	self.gh = self._create_github_client()

	def _create_github_client(self) -> Github:
	"""Create GitHub client with enhanced error handling"""
	try:
	# Create Github instance with basic configuration
	gh = Github(
	self.token,
	retry=3, # Number of retries for failed requests
	timeout=30, # Timeout in seconds
	per_page=100 # Maximum items per page
	)

	# Verify authentication
	try:
	user = gh.get_user()
	self.logger.info(f"Authenticated as: {user.login}")
	except GithubException as e:
	if e.status == 401:
	raise ValueError("Invalid GitHub token - authentication failed")
	elif e.status == 403:
	raise ValueError("GitHub token lacks required permissions or rate limit exceeded")
	else:
	raise ValueError(f"GitHub initialization failed: {str(e)}")

	return gh # Return the authenticated client
	except Exception as e:
	raise ValueError(f"Failed to initialize GitHub client: {str(e)}") # More informative error


	@retry(
	stop=stop_after_attempt(5),
	wait=wait_exponential(multiplier=1, min=4, max=60),
	retry=retry_if_exception_type((RateLimitExceededException, GithubException)),
	before_sleep=lambda retry_state: logging.info(
	f"Rate limited, retrying in {retry_state.next_action.sleep} seconds..."),
	)
	def get_repository(self, repo_url: str) -> Any:
	"""Get repository object using PyGithub, with error handling and validation."""
	try:
	parts = repo_url.rstrip('/').split('/')
	if len(parts) < 2:
	raise ValueError(f"Invalid repository URL format: {repo_url}")

	owner = parts[-2]
	repo_name = parts[-1]

	# Using PyGithub's get_repo method
	repo = self.gh.get_repo(f"{owner}/{repo_name}")
	return repo # Return the repo object

	except GithubException as e: # Specifically handle Github exceptions
	if e.status == 404:
	raise ValueError(f"Repository not found: {owner}/{repo_name}")
	elif e.status == 403:
	self._handle_forbidden_error() # Handle forbidden access (rate limits, etc.)
	raise #Re raise the exception so program doesn't continue

	else:
	raise ValueError(f"Failed to access repository: {str(e)}")
	except Exception as e: #catch all other exception.
	raise ValueError(f"Failed to access repository(An unexpected error occurred):{str(e)}")

	def _check_rate_limits(self):
	"""Enhanced rate limit checking with predictive waiting."""
	try:
	rate_limit = self.gh.get_rate_limit()
	remaining = rate_limit.core.remaining
	reset_time = rate_limit.core.reset.timestamp()

	self.logger.info(f"Rate limit - Remaining: {remaining}, Reset: {datetime.fromtimestamp(reset_time)}")

	if remaining < self._min_rate_limit_buffer:
	wait_time = self._get_rate_limit_wait_time()
	if wait_time > 0: # Only log if there's a wait.
	self.logger.warning(f"Approaching rate limit. Waiting {wait_time:.2f} seconds.")
	time.sleep(wait_time) # Wait before hitting the limit

	except GithubException as e: # Be specific about the exceptions you handle
	self.logger.error(f"Error checking rate limits: {str(e)}")
	time.sleep(60) # Wait a reasonable amount of time even if you cannot check
	except Exception as e: # Always have general exception to handle
	self.logger.error(f"Unexpected Error: {str(e)}") #General unexpected Error handle.
	time.sleep(60)

	def _get_rate_limit_wait_time(self) -> float:
	"""Calculate the time to wait until the rate limit resets."""
	try:
	rate_limit = self.gh.get_rate_limit()
	reset_time = rate_limit.core.reset.timestamp()
	current_time = time.time()
	return max(0, reset_time - current_time + 1) # Add 1 second buffer
	except Exception:
	return self._initial_backoff # Fallback on any error in getting rate limits

	def _handle_forbidden_error(self):
	"""Handle a 403 Forbidden error from the GitHub API."""
	try:
	# Check if it's a rate limit issue.
	rate_limit = self.gh.get_rate_limit()
	if rate_limit.core.remaining == 0:
	wait_time = self._get_rate_limit_wait_time()
	self.logger.warning(f"Rate limit exceeded. Waiting {wait_time:.2f} seconds.")
	time.sleep(wait_time)
	else:
	# If not rate limited, then likely a permissions issue
	self.logger.error("Access forbidden. Token may lack required permissions.")

	except Exception as e: #handling other errors.
	self.logger.error(f"Error handling forbidden response: {str(e)}")

	@retry(
	stop=stop_after_attempt(3), # Maximum 3 retries
	wait=wait_exponential(multiplier=1, min=4, max=10), #exponential backoff.
	reraise=True # Reraise exception after retries.
	)
	def get_file_content(self, repo: Any, path: str) -> Optional[str]:
	"""Get content of a file, with retries, rate limit check and error handling."""

	try:
	self._check_rate_limits() # Check rate limits before each attempt.
	content = repo.get_contents(path)
	return content
	except GithubException as e:
	if e.status == 404:
	self.logger.warning(f"File not found: {path}") # 404 is not critical.
	return None # explicitly return None
	elif e.status == 403: # Explicitly handle forbidden
	self._handle_forbidden_error() # Rate limiting or other access problem
	raise # Raise after handling (waiting, logging).
	# Any other GitHub error is an issue - log and re-raise
	self.logger.error(f"Error getting file content: {str(e)}") #handle
	raise #re-raise after loggng
	except Exception as e: # General exception for unexpected issue.
	self.logger.error(f"Unexpected Error : {str(e)}") #General exception handelling
	raise

	class CodeMetricsAnalyzer:
	"""Handles detailed code metrics analysis with proper error handling."""

	def __init__(self):
	self.logger = logging.getLogger(__name__)
	self.size_metrics_cache = {} # Consider if needed with parallelization

	def calculate_halstead_metrics(self, content: str, language: str = "Unknown") -> Dict[str, float]:
	"""
	Calculate Halstead complexity metrics for code.
	"""
	try:
	# Define language-specific operators (more comprehensive)
	operators = {
	"Python": set([
	'+', '-', '', '/', '//', '*', '%', '==', '!=', '>', '<', '>=', '<=',
	'and', 'or', 'not', 'is', 'in', '+=', '-=', '*=', '/=', '=',
	'if', 'elif', 'else', 'for', 'while', 'def', 'class', 'return',
	'yield', 'raise', 'break', 'continue', 'pass', 'assert',
	'import', 'from', 'as', 'try', 'except', 'finally', 'with', 'async', 'await'
	]),
	"JavaScript": set([
	'+', '-', '', '/', '%', '*', '==', '===', '!=', '!==', '>', '<',
	'>=', '<=', '&&', '\|\|', '!', '=', '+=', '-=', '*=', '/=',
	'if', 'else', 'for', 'while', 'function', 'return', 'class',
	'new', 'delete', 'typeof', 'instanceof', 'void', 'try', 'catch',
	'finally', 'throw', 'break', 'continue', 'default', 'case', 'async', 'await'
	]),
	"Java": set([ # Added Java operators
	'+', '-', '*', '/', '%', '++', '--', '==', '!=', '>', '<', '>=', '<=',
	'&&', '\|\|', '!', '=', '+=', '-=', '*=', '/=', '%=',
	'if', 'else', 'for', 'while', 'do', 'switch', 'case', 'default',
	'break', 'continue', 'return', 'try', 'catch', 'finally', 'throw', 'throws',
	'class', 'interface', 'extends', 'implements', 'new', 'instanceof', 'this', 'super'
	]),

	}.get(language, set(['+', '-', '*', '/', '=', '==', '>', '<', '>=', '<=']))

	unique_operators = set()
	unique_operands = set()
	total_operators = 0
	total_operands = 0

	lines = content.splitlines()
	for line in lines:
	line = line.strip()
	if line.startswith(('#', '//', '/', '')): # Handle comments
	continue

	for operator in operators:
	if operator in line:
	unique_operators.add(operator)
	total_operators += line.count(operator)

	# Improved operand counting (numbers, strings, identifiers)
	numbers = re.findall(r'\b\d+(?:\.\d+)?\b', line)
	unique_operands.update(numbers)
	total_operands += len(numbers)

	strings = re.findall(r'["\'][^"\']*["\']', line)
	unique_operands.update(strings)
	total_operands += len(strings)

	identifiers = re.findall(r'\b[a-zA-Z_]\w*\b', line)
	for ident in identifiers:
	if ident not in operators:
	unique_operands.add(ident)
	total_operands += 1

	n1 = len(unique_operators)
	n2 = len(unique_operands)
	N1 = total_operators
	N2 = total_operands

	# Handle edge cases to avoid division by zero
	if n1 > 0 and n2 > 0:
	program_length = N1 + N2
	vocabulary = n1 + n2
	volume = program_length * (math.log2(vocabulary) if vocabulary > 0 else 0)
	difficulty = (n1 * N2) / (2 * n2) if n2 > 0 else 0
	effort = volume * difficulty
	time = effort / 18 # Standard Halstead time estimation
	else:
	program_length = vocabulary = volume = difficulty = effort = time = 0

	return {
	"halstead_unique_operators": n1,
	"halstead_unique_operands": n2,
	"halstead_total_operators": N1,
	"halstead_total_operands": N2,
	"halstead_program_length": program_length,
	"halstead_vocabulary": vocabulary,
	"halstead_volume": volume,
	"halstead_difficulty": difficulty,
	"halstead_effort": effort,
	"halstead_time": time
	}

	except Exception as e:
	self.logger.error(f"Error calculating Halstead metrics: {str(e)}")
	# Return default 0 values for all metrics on error
	return {metric: 0 for metric in [
	"halstead_unique_operators", "halstead_unique_operands",
	"halstead_total_operators", "halstead_total_operands",
	"halstead_program_length", "halstead_vocabulary",
	"halstead_volume", "halstead_difficulty", "halstead_effort", "halstead_time"
	]}

	def calculate_comment_density(self, content: str, language: str = "Unknown") -> Dict[str, Any]:

	try:
	metrics = {
	"comment_lines": 0,
	"code_lines": 0,
	"blank_lines": 0,
	"comment_density": 0.0,
	"docstring_lines": 0, # Docstrings (Python)
	"total_lines": 0, #Total no of line.
	"inline_comments": 0
	}

	patterns = {
	"Python": {
	"single_line": ["#"],
	"multi_start": ['"""', "'''"],
	"multi_end": ['"""', "'''"],
	"inline_start": "#"
	},
	"JavaScript": {
	"single_line": ["//"],
	"multi_start": ["/*"],
	"multi_end": ["*/"],
	"inline_start": "//"
	},
	"Java": { # Added Java comment patterns
	"single_line": ["//"],
	"multi_start": ["/*"],
	"multi_end": ["*/"],
	"inline_start": "//"
	}
	}.get(language, {
	"single_line": ["//", "#"],
	"multi_start": ["/*", '"""', "'''"],
	"multi_end": ["*/", '"""', "'''"],
	"inline_start": ["//", "#"]
	})

	lines = content.splitlines()
	in_multiline_comment = False
	current_multiline_delimiter = None

	for line in lines:
	stripped = line.strip()
	metrics["total_lines"] += 1

	if not stripped:
	metrics["blank_lines"] += 1
	continue

	if not in_multiline_comment:
	for delimiter in patterns["multi_start"]:
	if stripped.startswith(delimiter):
	in_multiline_comment = True
	current_multiline_delimiter = delimiter
	metrics["comment_lines"] += 1
	if delimiter in ['"""', "'''"]:
	metrics["docstring_lines"] += 1
	break
	elif delimiter in stripped: # Handle same-line multi-line comments
	end_delimiter = "/" if delimiter == "/" else delimiter
	if end_delimiter in stripped[stripped.index(delimiter) + len(delimiter):]:
	metrics["comment_lines"] += 1
	if delimiter in ['"""', "'''"]:
	metrics["docstring_lines"] += 1
	break


	if not in_multiline_comment:
	is_comment = False
	for prefix in patterns["single_line"]:
	if stripped.startswith(prefix):
	metrics["comment_lines"] += 1
	is_comment = True
	break
	elif prefix in stripped: # Count inline comments
	metrics["inline_comments"] += 1
	break


	if not is_comment:
	metrics["code_lines"] += 1

	else:
	metrics["comment_lines"] += 1
	if current_multiline_delimiter in ['"""', "'''"]:
	metrics["docstring_lines"] += 1
	#checking current multi line delimeter stripped
	if current_multiline_delimiter in stripped:
	# Handle triple quotes properly
	if current_multiline_delimiter in ['"""', "'''"] and \
	stripped.count(current_multiline_delimiter) == 1:
	continue #
	in_multiline_comment = False
	current_multiline_delimiter = None


	non_blank_lines = metrics["total_lines"] - metrics["blank_lines"] #non blank lines calculating.
	if non_blank_lines > 0:
	metrics["comment_density"] = (metrics["comment_lines"] + metrics["inline_comments"]) / non_blank_lines * 100
	metrics["docstring_density"] = metrics["docstring_lines"] / non_blank_lines * 100

	if language == "Python":
	# Check for module-level docstring
	if len(lines) > 0 and (lines[0].strip().startswith('"""') or lines[0].strip().startswith("'''")):
	metrics["has_module_docstring"] = True
	metrics["module_docstring_lines"] = sum(1 for line in lines
	if '"""' not in line and "'''" not in line
	and bool(line.strip()))#counts the number of lines within a module-level docstring that are not the delimiters themselves and contain actual text
	else:
	metrics["has_module_docstring"] = False
	metrics["module_docstring_lines"] = 0

	return metrics

	except Exception as e:
	self.logger.error(f"Error calculating comment density: {str(e)}")
	# Return 0s for all density metrics on error
	return {
	"comment_lines": 0,
	"code_lines": 0,
	"blank_lines": 0,
	"comment_density": 0.0,
	"docstring_lines": 0,
	"total_lines": 0,
	"inline_comments": 0,
	"error": str(e) # Include the error message
	}

	def calculate_cyclomatic_complexity(self, content: str, language: str = "Unknown") -> Dict[str, Any]:
	"""Calculate cyclomatic complexity metrics for code with language-specific handling."""
	metrics = {
	"complexity": 1, # Base complexity (always start at 1)
	"cognitive_complexity": 0,
	"max_nesting_depth": 0
	}

	try:
	lines = content.splitlines()
	current_depth = 0

	# Language-specific complexity indicators (expanded)
	complexity_keywords = {
	"Python": {
	"if", "else", "elif", "for", "while", "try", "except", "with",
	"async for", "async with", "break", "continue"
	},
	"JavaScript": {
	"if", "else", "for", "while", "try", "catch", "switch", "case",
	"break", "continue", "&&", "\|\|", "?", "async", "await" # Add async/await
	},
	"Java": { # Added Java keywords
	"if", "else", "for", "while", "do", "switch", "case", "default",
	"break", "continue", "try", "catch", "finally"
	}
	# Add more language-specific keywords as needed
	}.get(language, {
	# Default keywords for unknown languages
	"if", "else", "elif", "for", "while", "try", "catch", "case", "switch",
	"&&", "\|\|", "?", "except", "finally", "with"
	})


	for line in lines:
	# Calculate nesting depth
	opens = line.count('{') - line.count('}')
	current_depth += opens
	metrics["max_nesting_depth"] = max(metrics["max_nesting_depth"], current_depth)

	# Increment complexity for control structures
	stripped_line = line.strip()
	for keyword in complexity_keywords:
	if keyword in stripped_line and not stripped_line.startswith(("//", "#", "/", "")): # Exclude comments
	metrics["complexity"] += 1
	metrics["cognitive_complexity"] += (1 + current_depth) # Cognitive complexity increase


	if language == "Python":
	# Add complexity for list/dict comprehensions
	if "for" in stripped_line and ("[" in stripped_line or "{" in stripped_line):
	metrics["complexity"] += 1
	metrics["cognitive_complexity"] += 1 # Also add to cognitive

	return metrics

	except Exception as e:
	self.logger.error(f"Error calculating complexity: {str(e)}")
	# Return defaults, not just an error string, but also include 1 as base.
	return {
	"complexity": 1, # Ensure baseline complexity
	"cognitive_complexity": 0,
	"max_nesting_depth": 0
	}

	def detect_code_duplication(self, content: str, min_lines: int = 6) -> Dict[str, Any]:
	"""Detect code duplication within the content"""

	try:
	metrics = {
	"duplicate_blocks": 0,
	"duplicate_lines": 0,
	"duplication_percentage": 0.0
	}

	lines = content.splitlines()
	total_lines = len(lines)

	# Return early if there are not enough lines
	if total_lines < min_lines:
	return metrics

	blocks = {}
	for i in range(total_lines - min_lines + 1):
	block = '\n'.join(lines[i:i + min_lines])
	normalized_block = self._normalize_code_block(block)
	if normalized_block.strip(): # Ignore all-whitespace blocks
	if normalized_block in blocks:
	blocks[normalized_block].append(i)
	else:
	blocks[normalized_block] = [i]

	duplicate_line_set = set() # Track duplicate line indices using a set
	for block, positions in blocks.items():
	if len(positions) > 1:
	metrics["duplicate_blocks"] += 1 # Count duplicate blocks
	for pos in positions:
	for i in range(pos, pos + min_lines): # Add all lines in duplicate block
	duplicate_line_set.add(i)

	metrics["duplicate_lines"] = len(duplicate_line_set) # Total count of duplicated lines

	if total_lines > 0:
	metrics["duplication_percentage"] = (metrics["duplicate_lines"] / total_lines) * 100 # Duplication metrics calcutation.

	return metrics

	except Exception as e:
	self.logger.error(f"Error detecting code duplication: {str(e)}")
	# Return 0 for all duplication metrics in case of error
	return {
	"duplicate_blocks": 0,
	"duplicate_lines": 0,
	"duplication_percentage": 0.0
	}
	def _normalize_code_block(self, block: str) -> str:
	"""Normalize a block of code for comparison by removing comments, whitespace, etc."""
	lines = []
	for line in block.splitlines():
	# Remove comments (handle both Python and JavaScript/Java comments)
	line = re.sub(r'#.*$', '', line) # Python comments
	line = re.sub(r'//.*$', '', line) # JavaScript comments
	line = re.sub(r'/\.?\*/', '', line) # Multi-line comments

	# Normalize whitespace
	line = re.sub(r'\s+', ' ', line.strip())

	if line: # Add non-empty lines
	lines.append(line)

	return '\n'.join(lines)

	def calculate_size_metrics(self, content: str, language: str = "Unknown") -> Dict[str, Any]:

	try:
	metrics = {
	"size_bytes": len(content),
	"total_lines": 0,
	"code_lines": 0,
	"blank_lines": 0,
	"comment_lines": 0,
	"avg_line_length": 0,
	"max_line_length": 0,
	"file_entropy": 0, # Added file entropy.
	}

	comments = { # handling diff comments.
	"Python": {
	"line_comment": "#",
	"block_start": ['"""', "'''"],
	"block_end": ['"""', "'''"]
	},
	"JavaScript": {
	"line_comment": "//",
	"block_start": ["/*"],
	"block_end": ["*/"]
	},
	"Java": { # Added Java comment definitions
	"line_comment": "//",
	"block_start": ["/*"],
	"block_end": ["*/"]
	}
	}.get(language, {
	"line_comment": "#",
	"block_start": ["/*", '"""', "'''"],
	"block_end": ["*/", '"""', "'''"]
	})

	lines = content.splitlines()
	total_length = 0 # Track the total character count of all lines
	char_counts = {} #count the occurance of characters in file
	in_block_comment = False

	for line in lines:
	metrics["total_lines"] += 1
	line_length = len(line) #length of lines
	total_length += line_length
	metrics["max_line_length"] = max(metrics["max_line_length"], line_length)


	for char in line:
	char_counts[char] = char_counts.get(char, 0) + 1

	stripped = line.strip() # Remove the strip function here.

	if not stripped:
	metrics["blank_lines"] += 1
	continue

	if not in_block_comment:
	is_comment = False
	for start in comments["block_start"]:
	if stripped.startswith(start): # Use startswith on the stripped line.
	in_block_comment = True
	metrics["comment_lines"] += 1
	is_comment = True #
	break #must add break otherwise count may vary.
	if not is_comment: # Out of block_start scope so we have more appropriate behaviour.
	if stripped.startswith(comments["line_comment"]): # check if line is comment or code.
	metrics["comment_lines"] += 1
	else:
	metrics["code_lines"] += 1
	else:
	metrics["comment_lines"] += 1 #comment lines
	for end in comments["block_end"]: # Block end condition.
	if end in stripped: # check comment block ends
	in_block_comment = False #
	break #

	if metrics["total_lines"] > 0:
	metrics["avg_line_length"] = total_length / metrics["total_lines"]

	# Calculate entropy.
	total_chars = sum(char_counts.values())
	if total_chars > 0:
	entropy = 0
	for count in char_counts.values():
	prob = count / total_chars
	entropy -= prob * math.log2(prob)
	metrics["file_entropy"] = entropy

	# These aren't always in 'comment_density', so calculate here.
	metrics["source_lines"] = metrics["code_lines"] + metrics["comment_lines"]
	metrics["comment_ratio"] = (metrics["comment_lines"] / metrics["source_lines"] * 100
	if metrics["source_lines"] > 0 else 0) # Handle potential division by zero.
	return metrics

	except Exception as e:
	self.logger.error(f"Error calculating size metrics: {str(e)}")
	# Return 0s and basic size info on error. Still provide content length
	return {
	"size_bytes": len(content) if content else 0, # File Size is valuable,even in error.
	"total_lines": 0,
	"code_lines": 0,
	"blank_lines": 0,
	"comment_lines": 0,
	"avg_line_length": 0,
	"max_line_length": 0,
	"file_entropy": 0, # file_entropy added to default values.
	"source_lines": 0, # return metrics initialized 0 for other metrices.
	"comment_ratio": 0 #Return default values on errors
	}


	def analyze_function_metrics(self, content: str, language: str = "Unknown") -> Dict[str, Any]:

	try:
	metrics = {
	"total_functions": 0,
	"avg_function_length": 0,
	"max_function_length": 0,
	"avg_function_complexity": 0,
	"max_function_complexity": 0,
	"documented_functions": 0,
	"function_lengths": [], # Collect all lengths
	"function_complexities": [], # Collect all complexities
	"function_details": [] # Store details of each function
	}

	# Language-specific function patterns
	patterns = {
	"Python": r"(?:async\s+)?def\s+(\w+)\s$[^)]$\s(?:->.?)?:",
	"JavaScript": r"(?:async\s+)?function\s+(\w+)\s$[^)]$\|(?:const\|let\|var)\s+(\w+)\s=\s(?:async\s+)?$[^)]$\s=>",
	"TypeScript": r"(?:async\s+)?function\s+(\w+)\s$[^)]$\|(?:const\|let\|var)\s+(\w+)\s=\s(?:async\s+)?$[^)]$\s=>",
	"Java": r"(?:public\|private\|protected\|\s)\s+(?:static\s+)?[a-zA-Z_<>[\]]+\s+(\w+)\s$[^)]$\s(?:throws\s+[^{]+)?\s\{",
	"C#": r"(?:public\|private\|protected\|\s)\s+(?:static\s+)?[a-zA-Z_<>[\]]+\s+(\w+)\s$[^)]$\s(?:where\s+[^{]+)?\s\{",
	}.get(language, r"function\s+(\w+)\s$[^)]$")

	lines = content.splitlines()
	current_function = None
	function_start = 0
	in_function = False
	function_content = []

	brace_count = 0 #for count braces.

	for i, line in enumerate(lines):
	stripped = line.strip()

	if not stripped or stripped.startswith(('/', '#')): #handle empty lines
	continue

	if re.search(patterns, line):
	current_function = {
	"name": re.search(patterns, line).group(1), # Extract function name
	"start_line": i + 1, # 1-based line numbers
	"has_docstring": False,
	"complexity": 1, #base complexity is one.
	"nested_depth": 0,
	"parameters": len(re.findall(r',', line)) + 1 if '(' in line else 0 # Count parameters
	}
	function_start = i #starting function line number.
	in_function = True
	function_content = [line] # Start collecting content
	continue

	if in_function:
	function_content.append(line) #add the functions to function content.
	brace_count += line.count('{') - line.count('}')

	if language == "Python" and i == function_start + 1: # Check for docstring right after def
	if stripped.startswith('"""') or stripped.startswith("'''"):
	current_function["has_docstring"] = True

	# More robust function end detection
	if (language in ["Python"] and brace_count == 0 and not line.startswith(' ')) or \
	(language not in ["Python"] and brace_count == 0 and line.rstrip().endswith('}')): #Robust function end check

	func_content = '\n'.join(function_content) #join content function for metrics
	current_function["length"] = len(function_content) # lines of function
	complexity_metrics = self.calculate_cyclomatic_complexity(func_content, language)
	current_function["complexity"] = complexity_metrics["complexity"] # Cyclomatic complexity

	metrics["total_functions"] += 1 # Total Number of functions count.
	metrics["function_lengths"].append(current_function["length"])
	metrics["function_complexities"].append(current_function["complexity"])
	metrics["max_function_length"] = max(metrics["max_function_length"],current_function["length"])# Compare current max value and store greater one.
	metrics["max_function_complexity"] = max(metrics["max_function_complexity"],
	current_function["complexity"]) # compare and find the max

	if current_function["has_docstring"]:
	metrics["documented_functions"] += 1 # count Document function

	metrics["function_details"].append(current_function)
	in_function = False
	current_function = None
	function_content = [] # Clear all collected datas.


	if metrics["total_functions"] > 0:
	metrics["avg_function_length"] = sum(metrics["function_lengths"]) / metrics["total_functions"]
	metrics["avg_function_complexity"] = sum(metrics["function_complexities"]) / metrics["total_functions"]
	metrics["documentation_ratio"] = metrics["documented_functions"] / metrics["total_functions"]

	return metrics

	except Exception as e:
	self.logger.error(f"Error analyzing function metrics: {str(e)}")
	# Return default values for all metrics in case of error.
	return {
	"total_functions": 0,
	"avg_function_length": 0,
	"max_function_length": 0,
	"avg_function_complexity": 0,
	"max_function_complexity": 0,
	"documented_functions": 0,
	"function_lengths": [],
	"function_complexities": [],
	"function_details": [],
	"error": str(e) # Include the error for debugging.
	}

	def _analyze_file_metrics(self, file_content) -> Optional[Dict[str, Any]]:
	"""Analyze metrics for a single file with proper error handling."""
	try:
	# Decode the file content (assuming it's base64 encoded)
	content = base64.b64decode(file_content.content).decode('utf-8')
	language = RELEVANT_EXTENSIONS.get(Path(file_content.path).suffix.lower(), "Unknown")

	metrics = {
	"path": file_content.path,
	"metrics": {}
	}

	# Size metrics (always calculated)
	try:
	size_metrics = self.calculate_size_metrics(content, language)
	metrics["metrics"].update(size_metrics) # Store results, handling None.
	except Exception as e:
	self.logger.error(f"Error calculating size metrics for {file_content.path}: {str(e)}")
	# Provide default values even if there is error
	metrics["metrics"].update({
	"size_bytes": len(content), #we have this data even in errors.
	"total_lines": len(content.splitlines()),
	"code_lines": 0,
	"blank_lines": 0,
	"comment_lines": 0
	})


	# Complexity metrics (only for supported languages)
	if language != "Unknown":
	try:
	complexity = self.calculate_cyclomatic_complexity(content, language)
	metrics["metrics"]["complexity"] = complexity.get("complexity", 0)
	metrics["metrics"]["cognitive_complexity"] = complexity.get("cognitive_complexity", 0) # Store cognitive.
	except Exception as e:
	self.logger.error(f"Error calculating complexity for {file_content.path}: {str(e)}")
	metrics["metrics"].update({
	"complexity": 0,
	"cognitive_complexity": 0 # Default to 0 if error.
	})

	# Halstead metrics (for supported languages)
	if language in ["Python", "JavaScript", "Java"]: # Check if language is supported
	try:
	halstead = self.calculate_halstead_metrics(content, language)
	metrics["metrics"].update(halstead) # Add the results to file data.
	except Exception as e:
	self.logger.error(f"Error calculating Halstead metrics for {file_content.path}: {str(e)}")
	# No defaults needed, halstead already returns 0s.

	# Duplication metrics (always calculate)
	try:
	duplication = self.detect_code_duplication(content)
	metrics["metrics"]["duplicate_segments"] = len(duplication.get("duplicate_segments", []))
	except Exception as e:
	self.logger.error(f"Error detecting duplication for {file_content.path}: {str(e)}")
	metrics["metrics"]["duplicate_segments"] = 0 # Set to 0 on error


	# Function-level metrics (for supported languages).
	if language != "Unknown":
	try:
	function_metrics = self.analyze_function_metrics(content, language)
	if function_metrics and "error" not in function_metrics: # Check for None AND no error
	metrics["metrics"].update(function_metrics) #
	except Exception as e:
	self.logger.error(f"Error analyzing functions for {file_content.path}: {str(e)}")
	# no default to add as function metrics handles defaults.

	# Comment density (always calculated).
	try:
	comment_metrics = self.calculate_comment_density(content, language)
	metrics["metrics"].update(comment_metrics) # Merge
	except Exception as e:
	self.logger.error(f"Error calculating comment density for {file_content.path}: {str(e)}")
	metrics["metrics"].update({
	"comment_density": 0, # Defaults on error
	"docstring_lines": 0 # Add other relevant metrics
	})


	return metrics #Returns calculated data

	except Exception as e: # General Exception to prevent crash.
	self.logger.error(f"Error analyzing file {file_content.path}: {str(e)}")
	# Return minimal error metrics (important)
	return {
	"path": file_content.path,
	"metrics": {
	"size_bytes": 0, # Important basic metric, try to preserve.
	"total_lines": 0, # and total lines
	"error": str(e)
	}
	}
	class DependencyAnalyzer:
	"""Handles dependency analysis with improved error handling."""

	def __init__(self, repo):
	self.repo = repo
	self.logger = logging.getLogger(__name__)
	self.dependency_files = {
	"python": ["requirements.txt", "setup.py", "Pipfile", "pyproject.toml"],
	"javascript": ["package.json", "yarn.lock", "package-lock.json"],
	"java": ["pom.xml", "build.gradle"],
	"ruby": ["Gemfile"],
	"php": ["composer.json"],
	"go": ["go.mod"],
	"rust": ["Cargo.toml"],
	"dotnet": [".csproj", ".fsproj", "*.vbproj"] # .NET project files
	}

	async def analyze_dependencies(self) -> Dict[str, Any]:
	"""Analyze project dependencies (async for aiohttp)."""
	results = {
	"dependency_files": [], # Files that specify the dependencies.
	"dependencies": defaultdict(list), # Parsed dependencies.
	"dependency_graph": defaultdict(list), # Relationship b/w Dependencies.
	"outdated_dependencies": [], #
	"security_alerts": [] # Placeholder for future security checks
	}

	try:
	contents = self.repo.get_contents("")
	while contents:
	file_content = contents.pop(0)
	if file_content.type == "dir":
	contents.extend(self.repo.get_contents(file_content.path))
	else:
	for lang, patterns in self.dependency_files.items():
	if any(self._matches_pattern(file_content.path, pattern) for pattern in patterns): #
	try:
	file_text = base64.b64decode(file_content.content).decode('utf-8') #
	deps = await self._parse_dependency_file(file_content.path, file_text) #parsing the files to find dependency.
	if deps: #check deps is not none.
	results["dependencies"][file_content.path] = deps
	results["dependency_files"].append(file_content.path) # add current file to list of dependency files.
	except Exception as e:
	self.logger.error(f"Error parsing {file_content.path}: {str(e)}")


	results["outdated_dependencies"] = await self._check_outdated_dependencies(results["dependencies"])#
	results["dependency_graph"] = self._build_dependency_graph(results["dependencies"])

	except Exception as e:
	self.logger.error(f"Error analyzing dependencies: {str(e)}")
	# No need to return default values here, as the initialized 'results' dict is sufficient

	return results

	def _matches_pattern(self, filename: str, pattern: str) -> bool:
	"""Check if a filename matches a given pattern (supports wildcards)."""
	if pattern.startswith("*"):
	return filename.endswith(pattern[1:]) # Simple wildcard match
	return filename.endswith(pattern)

	async def _parse_dependency_file(self, filepath: str, content: str) -> List[Dict[str, str]]:
	"""Parse different dependency file formats and extract dependencies."""
	deps = [] # Initialize an empty list to hold dependencies
	try:
	if filepath.endswith(('requirements.txt', 'Pipfile')): #requirements.txt or pipfile
	for line in content.split('\n'):
	if '==' in line:
	name, version = line.strip().split('==')
	deps.append({"name": name, "version": version, "type": "python"})

	elif filepath.endswith('package.json'): #package.json
	data = json.loads(content)
	for dep_type in ['dependencies', 'devDependencies']: # Check both dependencies and devDependencies
	if dep_type in data:
	for name, version in data[dep_type].items():
	# Remove semver characters like ^ and ~ for accurate comparisons
	deps.append({
	"name": name,
	"version": version.replace('^', '').replace('~', ''), # Remove ^ and ~
	"type": "npm"
	})

	# Add more file type parsing as needed (e.g., pom.xml for Java, Gemfile for Ruby)

	except Exception as e:
	self.logger.error(f"Error parsing {filepath}: {str(e)}")
	# Don't add any dependencies if parsing fails

	return deps # Always return the list, even if empty


	async def _check_outdated_dependencies(self, dependencies: Dict[str, List[Dict[str, str]]]) -> List[Dict[str, Any]]:
	"""Check for outdated dependencies using respective package registries (async)."""
	outdated = []

	async with aiohttp.ClientSession() as session: #use aiotthp for faster http requests.
	for filepath, deps in dependencies.items():
	for dep in deps:
	try:
	if dep["type"] == "python":
	async with session.get(f"https://pypi.org/pypi/{dep['name']}/json") as response:
	if response.status == 200:
	data = await response.json()
	latest_version = data["info"]["version"]
	# Use packaging.version for robust version comparison
	if version.parse(latest_version) > version.parse(dep["version"]):
	outdated.append({
	"name": dep["name"],
	"current_version": dep["version"],
	"latest_version": latest_version,
	"type": "python"
	})
	elif dep["type"] == "npm":
	# Use npm registry API
	async with session.get(f"https://registry.npmjs.org/{dep['name']}") as response:
	if response.status == 200:
	data = await response.json()
	latest_version = data["dist-tags"]["latest"]
	if version.parse(latest_version) > version.parse(dep['version']):
	outdated.append({
	"name": dep['name'],
	"current_version": dep["version"],
	"latest_version": latest_version,
	"type": "npm"
	})
	# Add checks for other package types (Java, Ruby, etc.)

	except Exception as e:
	self.logger.error(f"Error checking version for {dep['name']}: {str(e)}")
	# Continue checking other dependencies even if one fails

	return outdated # Return the list, even if empty

	def _build_dependency_graph(self, dependencies: Dict[str, List[Dict[str, str]]]) -> Dict[str, List[str]]:
	"""Build a dependency graph to visualize relationships (using networkx)."""

	graph = nx.DiGraph() # directed graph.

	try:
	for dep_file, deps in dependencies.items():
	for dep in deps:
	# Add edges to represent dependencies
	graph.add_edge(dep_file, dep["name"]) # Dep file depends on individual libraries.

	# Convert to a dictionary of lists for easier handling
	return nx.to_dict_of_lists(graph)

	except Exception as e:
	self.logger.error(f"Error building dependency graph: {str(e)}")
	return defaultdict(list) # Return an empty graph in case of error
	class TestAnalyzer:
	"""Handles test analysis."""

	def __init__(self, repo):
	self.repo = repo
	self.logger = logging.getLogger(__name__) # Add logger
	self.test_patterns = {
	"python": ["test_.py", "_test.py", "tests/*.py"],
	"javascript": [".test.js", ".spec.js", "__tests__/*.js"],
	"java": ["Test.java", "Tests.java"],
	"ruby": ["_test.rb", "_spec.rb"],
	"go": ["*_test.go"]
	}

	def analyze_tests(self) -> Dict[str, Any]:
	"""Analyze test files, test counts, and (if possible) coverage information."""
	results = {
	"test_files": [],
	"test_count": 0,
	"coverage_data": {}, # Dictionary to hold any parsed coverage information.
	"test_patterns": defaultdict(list) # Store the information about diff. testing pattern.
	}

	try:
	contents = self.repo.get_contents("")
	while contents:
	content = contents.pop(0)
	if content.type == "dir":
	contents.extend(self.repo.get_contents(content.path))
	elif self._is_test_file(content.path):
	results["test_files"].append(content.path)
	test_metrics = self._analyze_test_file(content) #metrics of single files.
	results["test_patterns"][content.path] = test_metrics # Store results.
	results["test_count"] += test_metrics.get("test_count", 0) # Safely get test_count

	results["coverage_data"] = self._find_coverage_data() # Get any coverage.

	except Exception as e:
	self.logger.error(f"Error analyzing tests: {str(e)}") # Use logger

	return results # Always return results


	def _is_test_file(self, filepath: str) -> bool:
	"""Check if a file is likely to be a test file, based on common patterns."""
	for patterns in self.test_patterns.values():
	for pattern in patterns:
	if Path(filepath).match(pattern): # Use Path.match for wildcard matching
	return True
	return False

	def _analyze_test_file(self, file_content) -> Dict[str, Any]:
	"""Analyze an individual test file to count tests, assertions, etc."""
	try:
	content = base64.b64decode(file_content.content).decode('utf-8')
	metrics = {
	"test_count": 0,
	"assertions": 0,
	"test_classes": 0 # If using class-based tests
	}

	# Count test cases (using regex for common patterns)
	metrics["test_count"] += len(re.findall(r'def test_', content)) # Python
	metrics["test_count"] += len(re.findall(r'it\s*\([\'""]', content)) # JavaScript (Jest/Mocha)
	metrics["assertions"] += len(re.findall(r'assert', content)) # General assertions
	metrics["test_classes"] += len(re.findall(r'class\s+\w+Test', content)) # test class patterns.


	return metrics

	except Exception as e:
	self.logger.error(f"Error analyzing test file: {str(e)}") # Use logger
	return {} # Return empty dict on error

	def _find_coverage_data(self) -> Dict[str, Any]:
	"""Try to find coverage information (if available, e.g., from coverage reports)."""
	coverage_data = {
	"total_coverage": None,
	"file_coverage": {}, # If file-level data available.
	"coverage_report_found": False # for indicating we find coverage files.
	}

	try:
	# Look for common coverage report files
	coverage_files = [
	".coverage", # Python coverage.py
	"coverage.xml", # Cobertura (Python, Java)
	"coverage.json", # Jest, other JavaScript
	"coverage/lcov.info", # LCOV (C/C++, others)
	"coverage/coverage-final.json" # Istanbul (JavaScript)
	]

	contents = self.repo.get_contents("")
	while contents:
	content = contents.pop(0)
	if content.type == "dir":
	contents.extend(self.repo.get_contents(content.path))
	elif any(content.path.endswith(f) for f in coverage_files):
	coverage_data["coverage_report_found"] = True # set covarage to True, Indicate report present.
	parsed_coverage = self._parse_coverage_file(content) # Try to parse.
	if parsed_coverage: #check parse_coverage is present
	coverage_data.update(parsed_coverage) # Merge into result

	except Exception as e:
	self.logger.error(f"Error finding coverage data: {str(e)}")

	return coverage_data

	def _parse_coverage_file(self, file_content) -> Dict[str, Any]:
	"""Parse a coverage report file (handles multiple formats)."""
	try:
	content = base64.b64decode(file_content.content).decode('utf-8')

	if file_content.path.endswith('.json'):
	data = json.loads(content)
	# Handle different JSON formats (e.g., coverage.py, Istanbul)
	if 'total' in data: # coverage.py format
	return {
	'total_coverage': data['total'].get('lines', {}).get('percent', 0),
	'file_coverage': {
	file: stats.get('lines', {}).get('percent', 0)
	for file, stats in data.get('files', {}).items()
	}
	}
	# Add handling for other JSON formats (e.g., Istanbul) as needed

	elif file_content.path.endswith('.xml'):
	# Parse XML (Cobertura format)
	from xml.etree import ElementTree #for parse XML format
	root = ElementTree.fromstring(content)

	total = float(root.get('line-rate', 0)) * 100 # Overall coverage
	file_coverage = {}

	# Extract coverage per class/file
	for class_elem in root.findall('.//class'):
	filename = class_elem.get('filename', '')
	line_rate = float(class_elem.get('line-rate', 0)) * 100
	file_coverage[filename] = line_rate

	return {
	'total_coverage': total,
	'file_coverage': file_coverage
	}

	elif file_content.path.endswith('lcov.info'):
	# Parse LCOV format
	total_lines = 0
	covered_lines = 0
	current_file = None
	file_coverage = {}

	for line in content.split('\n'):
	if line.startswith('SF:'): # Source file
	current_file = line[3:].strip()
	elif line.startswith('LH:'): # Lines hit
	covered = int(line[3:])
	covered_lines += covered
	elif line.startswith('LF:'): # Lines found
	total = int(line[3:])
	total_lines += total
	if current_file and total > 0: # calculate coverage.
	file_coverage[current_file] = (covered / total) * 100

	return {
	'total_coverage': (covered_lines / total_lines * 100) if total_lines > 0 else 0, # handle Total lines may be 0
	'file_coverage': file_coverage
	}

	except Exception as e:
	self.logger.error(f"Error parsing coverage file: {str(e)}")

	return {} # Return empty dict on error

	def analyze_test_quality(self, content: str) -> Dict[str, Any]:
	"""
	Analyze the quality of the tests themselves.
	"""
	try:
	metrics = {
	"assertion_density": 0, # Assertions per line of test code
	"test_setup_complexity": 0, # How complex is the test setup?
	"mock_usage": 0, # How frequently are mocks used?
	"test_patterns": [], # List of identified test patterns and best practices.
	"anti_patterns": [] # list of identified Anti patterns
	}

	lines = content.splitlines()
	assertion_count = sum(1 for line in lines if 'assert' in line) # check assertion present.
	metrics["assertion_density"] = assertion_count / len(lines) if lines else 0

	setup_lines = []
	in_setup = False
	for line in lines:
	if 'def setUp' in line or 'def setup' in line:
	in_setup = True
	elif in_setup and line.strip() and not line.startswith(' '): # if present it has any leading space of not.
	in_setup = False
	if in_setup:
	setup_lines.append(line)

	metrics["test_setup_complexity"] = len(setup_lines)

	mock_count = sum(1 for line in lines if 'mock' in line.lower()) # count mock if present
	metrics["mock_usage"] = mock_count

	#detect patterns.
	if any('parameterized' in line for line in lines):
	metrics["test_patterns"].append("parameterized_tests") #
	if any('fixture' in line for line in lines):
	metrics["test_patterns"].append("fixture_usage")#

	# Identify potential anti-patterns
	if any('time.sleep' in line for line in lines):
	metrics["anti_patterns"].append("sleep_in_tests")
	if any('test' not in line.lower() for line in lines if line.strip().startswith('def')): # all method related to test or not.
	metrics["anti_patterns"].append("non_test_methods") # anti_patterns if other extra methods there.

	return metrics
	except Exception as e:
	self.logger.error(f"Error analyzing test quality: {str(e)}")
	return { # Return default 0 values on error.
	"assertion_density": 0,
	"test_setup_complexity": 0,
	"mock_usage": 0,
	"test_patterns": [],
	"anti_patterns": []
	}
	class DocumentationAnalyzer:
	"""Handles documentation analysis."""

	def __init__(self, repo):
	self.repo = repo
	self.logger = logging.getLogger(__name__) # Add logger
	self.doc_patterns = [
	"README.md",
	"CONTRIBUTING.md",
	"CHANGELOG.md",
	"LICENSE",
	"docs/", # Common documentation directories
	"documentation/",
	"wiki/" # Consider wiki as documentation
	]

	def analyze_documentation(self) -> Dict[str, Any]:
	"""Analyze repository documentation (README, CONTRIBUTING, API docs, etc.)."""
	results = {
	"readme_analysis": None,
	"contributing_guidelines": None,
	"api_documentation": None, # Placeholder - can be expanded
	"documentation_files": [], # All documantation.
	"wiki_pages": [], # If the repo has a wiki
	"documentation_coverage": 0.0 # Overall score
	}

	try:
	# Analyze README
	readme = self._get_file_content("README.md")
	if readme:
	results["readme_analysis"] = self._analyze_readme(readme)

	# Check contributing guidelines
	contributing = self._get_file_content("CONTRIBUTING.md")
	if contributing:
	results["contributing_guidelines"] = self._analyze_contributing(contributing)


	contents = self.repo.get_contents("")
	while contents:
	content = contents.pop(0)
	if content.type == "dir":
	# Check for dedicated documentation directories
	if content.path.lower() in ["docs", "documentation"]:
	results["documentation_files"].extend(self._analyze_doc_directory(content.path))
	contents.extend(self.repo.get_contents(content.path))

	# Check for specific documentation files
	elif any(content.path.endswith(pattern) for pattern in self.doc_patterns):
	results["documentation_files"].append(content.path)

	results["documentation_coverage"] = self._calculate_doc_coverage()

	# Get wiki pages if available
	try:
	wiki_pages = self.repo.get_wiki_pages() # Requires PyGithub 2.x
	results["wiki_pages"] = [page.title for page in wiki_pages]
	except: # GitHub API might raise an exception if no wiki
	pass

	except Exception as e:
	self.logger.error(f"Error analyzing documentation: {str(e)}") # Use logger

	return results # Always return results

	def _get_file_content(self, filepath: str) -> Optional[str]:
	"""Helper to get the content of a specific file (handles not found)."""
	try:
	content = self.repo.get_contents(filepath)
	return base64.b64decode(content.content).decode('utf-8')
	except:
	return None # File not found

	def _analyze_readme(self, content: str) -> Dict[str, Any]:
	"""Analyze the README content for completeness and key information."""
	analysis = {
	"sections": [], # List of identified sections (e.g., from headings)
	"has_quickstart": False, # Quick start guide
	"has_installation": False, # Installation instructions
	"has_usage": False, # Basic usage examples
	"has_api_docs": False, # Link to API docs?
	"has_examples": False, # Code examples
	"word_count": len(content.split()),
	"completeness_score": 0.0
	}

	# Extract sections (using regex for headings)
	sections = re.findall(r'^#+\s+(.+)$', content, re.MULTILINE) # match and return the content.
	analysis["sections"] = sections

	# Check for key components (using regex for robustness)
	analysis["has_quickstart"] = bool(re.search(r'quick\s*start', content, re.I)) # Case-insensitive
	analysis["has_installation"] = bool(re.search(r'install\|setup', content, re.I))
	analysis["has_usage"] = bool(re.search(r'usage\|how\s+to\s+use', content, re.I)) # More flexible matching.
	analysis["has_api_docs"] = bool(re.search(r'api\|documentation', content, re.I))
	analysis["has_examples"] = bool(re.search(r'example\|demo', content, re.I)) # Broader example terms

	# Calculate a simple completeness score
	key_elements = [
	analysis["has_quickstart"],
	analysis["has_installation"],
	analysis["has_usage"],
	analysis["has_api_docs"],
	analysis["has_examples"]
	]
	analysis["completeness_score"] = sum(key_elements) / len(key_elements) * 100

	return analysis

	def _analyze_contributing(self, content: str) -> Dict[str, Any]:
	"""Analyze CONTRIBUTING.md for guidelines."""
	analysis = {
	"has_code_style": False, # Code Style Guide
	"has_pr_process": False, # How to make PR
	"has_issue_guidelines": False, #Guidelines for reporting issue.
	"has_setup_instructions": False, # setup environment Instructions.
	"completeness_score": 0.0
	}
	analysis["has_code_style"] = bool(re.search(r'code\s+style\|coding\s+standards', content, re.I))
	analysis["has_pr_process"] = bool(re.search(r'pull\s+request\|PR', content, re.I)) # checking pull request
	analysis["has_issue_guidelines"] = bool(re.search(r'issue\|bug\s+report', content, re.I)) #issue and bug report.
	analysis["has_setup_instructions"] = bool(re.search(r'setup\|getting\s+started', content, re.I))# Setup.

	key_elements = [ #key components present or not.
	analysis["has_code_style"],
	analysis["has_pr_process"],
	analysis["has_issue_guidelines"],
	analysis["has_setup_instructions"]
	]
	analysis["completeness_score"] = sum(key_elements) / len(key_elements) * 100 # calculate
	return analysis

	def _analyze_doc_directory(self, directory: str) -> List[str]:
	"""Analyze a dedicated documentation directory (if present)."""
	doc_files = []
	try:
	contents = self.repo.get_contents(directory)
	for content in contents:
	if content.type == "file":
	doc_files.append(content.path)
	except Exception as e:
	self.logger.error(f"Error analyzing doc directory: {str(e)}") # Use logger
	return doc_files

	def _calculate_doc_coverage(self) -> float:
	"""Calculate an overall documentation coverage score (heuristic)."""
	# This is a simplified scoring system and should be customized
	score = 0.0
	total_points = 0

	# Check README presence and quality
	readme = self._get_file_content("README.md")
	if readme:
	readme_analysis = self._analyze_readme(readme)
	score += readme_analysis["completeness_score"] / 100 * 40 # README is worth 40%
	total_points += 40

	# Check contributing guidelines
	contributing = self._get_file_content("CONTRIBUTING.md")
	if contributing:
	contributing_analysis = self._analyze_contributing(contributing)
	score += contributing_analysis["completeness_score"] / 100 * 20 # Contributing is worth 20%
	total_points += 20

	# Check API documentation (basic presence check)
	if any(f.endswith(('.md', '.rst')) for f in self.doc_patterns):
	score += 20 # API docs are worth 20%
	total_points += 20

	# Check for examples (this is simplified - could be improved)
	if any('example' in f.lower() for f in self.doc_patterns): # Case-insensitive check
	score += 20 # Examples are worth 20%
	total_points += 20

	return (score / total_points * 100) if total_points > 0 else 0.0 # Avoid division by 0

	class CommunityAnalyzer:
	"""Handles community metrics analysis."""

	def __init__(self, repo):
	self.repo = repo
	self.logger = logging.getLogger(__name__) # Add logger

	async def analyze_community(self) -> Dict[str, Any]:
	"""Analyze community engagement, health, and contribution patterns."""
	results = {
	"engagement_metrics": await self._get_engagement_metrics(), # Await async calls
	"issue_metrics": await self._analyze_issues(), # Await for analysis
	"pr_metrics": await self._analyze_pull_requests(), # Await for PR
	"contributor_metrics": self._analyze_contributors(),
	"discussion_metrics": await self._analyze_discussions() # If discussions are enabled
	}

	return results # Returns Calculated community metrics.

	async def _get_engagement_metrics(self) -> Dict[str, Any]:
	"""Get basic repository engagement metrics (stars, forks, watchers)."""
	metrics = {
	"stars": self.repo.stargazers_count,
	"forks": self.repo.forks_count,
	"watchers": self.repo.subscribers_count,
	"star_history": [], # Historical star data
	"fork_history": [] # Historical fork data
	}

	try:
	# Get star history (last 100 stars for efficiency)
	stargazers = self.repo.get_stargazers_with_dates()
	metrics["star_history"] = [
	{"date": star.starred_at.isoformat(), "count": i + 1} # count: i+1 to show progression.
	for i, star in enumerate(stargazers)
	]

	# Get fork history
	forks = self.repo.get_forks() # No need for with_date.
	metrics["fork_history"] = [
	{"date": fork.created_at.isoformat(), "count": i + 1}
	for i, fork in enumerate(forks)
	]
	except Exception as e:
	self.logger.error(f"Error getting engagement metrics: {str(e)}") # Use logger

	return metrics # Return calculated metrics data.

	async def _analyze_issues(self) -> Dict[str, Any]:
	"""Analyze repository issues (open, closed, response times, labels)."""
	metrics = {
	"total_issues": 0,
	"open_issues": 0,
	"closed_issues": 0,
	"avg_time_to_close": None, # Average time to close an issue
	"issue_categories": defaultdict(int), # Categorize issues by label
	"response_times": [] # List of response times
	}

	try:
	issues = self.repo.get_issues(state='all') # Get all issues (open and closed)
	for issue in issues:
	metrics["total_issues"] += 1
	if issue.state == 'open':
	metrics["open_issues"] += 1
	else:
	metrics["closed_issues"] += 1
	# Calculate time to close (if closed_at is available)
	if issue.closed_at and issue.created_at: #Calculate time,if issue closed.
	time_to_close = (issue.closed_at - issue.created_at).total_seconds()
	metrics["response_times"].append(time_to_close)

	# Categorize issues by labels
	for label in issue.labels:
	metrics["issue_categories"][label.name] += 1

	# Calculate average response time
	if metrics["response_times"]: # Calculate Avg_response only if any time available.
	metrics["avg_time_to_close"] = sum(metrics["response_times"]) / len(metrics["response_times"]) #avg = tot / no.

	except Exception as e:
	self.logger.error(f"Error analyzing issues: {str(e)}") # Use logger

	return metrics

	async def _analyze_pull_requests(self) -> Dict[str, Any]:
	"""Analyze pull requests (open, closed, merged, review times, sizes)."""
	metrics = {
	"total_prs": 0,
	"open_prs": 0,
	"merged_prs": 0,
	"closed_prs": 0,
	"avg_time_to_merge": None, # Average time to merge a PR
	"pr_sizes": defaultdict(int), # Categorize PRs by size (lines of code)
	"review_times": [] # List of review times
	}

	try:
	pulls = self.repo.get_pulls(state='all') # Get all PRs (open, closed, merged)
	for pr in pulls:
	metrics["total_prs"] += 1
	if pr.state == 'open':
	metrics["open_prs"] += 1
	elif pr.merged:
	metrics["merged_prs"] += 1
	# Calculate time to merge
	if pr.merged_at and pr.created_at:
	time_to_merge = (pr.merged_at - pr.created_at).total_seconds()
	metrics["review_times"].append(time_to_merge) #store calculated value
	else:
	metrics["closed_prs"] += 1 #

	# Categorize PR sizes (simplified, based on additions + deletions)
	if pr.additions + pr.deletions < 10:
	metrics["pr_sizes"]["xs"] += 1 # Extra small
	elif pr.additions + pr.deletions < 50:
	metrics["pr_sizes"]["s"] += 1 # Small
	elif pr.additions + pr.deletions < 250:
	metrics["pr_sizes"]["m"] += 1 # Medium
	elif pr.additions + pr.deletions < 1000:
	metrics["pr_sizes"]["l"] += 1 # Large
	else:
	metrics["pr_sizes"]["xl"] += 1 # Extra large

	# Calculate average review time
	if metrics["review_times"]: #calculate Avg_time to merge if review times available.
	metrics["avg_time_to_merge"] = sum(metrics["review_times"]) / len(metrics["review_times"]) #calculate Average.

	except Exception as e:
	self.logger.error(f"Error analyzing pull requests: {str(e)}") # Use logger

	return metrics # retrun calculated metrics value.

	def _analyze_contributors(self) -> Dict[str, Any]:
	"""Analyze contributor patterns and engagement."""
	metrics = {
	"total_contributors": 0,
	"active_contributors": 0, # Contributors active in the last 90 days
	"contributor_types": defaultdict(int), # User, Organization, Bot
	"contribution_frequency": defaultdict(int), # High, medium, low
	"core_contributors": [] # List of core contributors (e.g., top 10%)
	}

	try:
	contributors = self.repo.get_contributors()
	for contributor in contributors:
	metrics["total_contributors"] += 1

	# Check for recent activity (last 90 days)
	recent_commits = contributor.get_commits(since=datetime.now() - timedelta(days=90)) # active since
	if recent_commits.totalCount > 0:
	metrics["active_contributors"] += 1

	# Categorize contributor types
	metrics["contributor_types"][contributor.type] += 1 # increment by type.

	# Analyze contribution frequency (simplified)
	if contributor.contributions > 100: #Contribution level checking.
	metrics["contribution_frequency"]["high"] += 1
	# Consider contributors with >100 contributions as "core"
	metrics["core_contributors"].append({
	"login": contributor.login,
	"contributions": contributor.contributions, # store
	"type": contributor.type #Store.
	})
	elif contributor.contributions > 20:
	metrics["contribution_frequency"]["medium"] += 1 # store in medium if condition satisfy.
	else:
	metrics["contribution_frequency"]["low"] += 1#

	except Exception as e:
	self.logger.error(f"Error analyzing contributors: {str(e)}") # Use logger

	return metrics #return Calculated Contributer metrics

	async def _analyze_discussions(self) -> Dict[str, Any]:
	"""Analyze repository discussions (if enabled)."""
	metrics = {
	"total_discussions": 0,
	"active_discussions": 0, # Discussions with recent activity
	"categories": defaultdict(int), # Discussion categories
	"avg_responses": 0, # Average number of responses per discussion
	"response_times": [] # List of response times
	}

	try:
	# Check if discussions are enabled
	if self.repo.has_discussions: # first check for discussion enabled.
	discussions = self.repo.get_discussions() # retrive all the discussion using get_discussions.
	total_responses = 0

	for discussion in discussions:
	metrics["total_discussions"] += 1
	# Check for active discussions (simplified: any comments = active)
	if discussion.comments > 0:
	metrics["active_discussions"] += 1
	total_responses += discussion.comments # Calculate Total no of comments.

	# Categorize discussions
	metrics["categories"][discussion.category.name] += 1

	# Calculate response times (time to first response)
	if discussion.comments > 0:
	first_response = discussion.get_comments().reversed[0] # Get first comment
	response_time = (first_response.created_at - discussion.created_at).total_seconds() # time calcualtion.
	metrics["response_times"].append(response_time) # append that.

	# Calculate average responses per discussion
	if metrics["active_discussions"] > 0: # Calculate only if value present.
	metrics["avg_responses"] = total_responses / metrics["active_discussions"]
	except Exception as e:
	self.logger.error(f"Error analyzing discussions: {str(e)}") # Use logger

	return metrics
	class RepositoryAnalyzer:
	"""Main class to analyze a GitHub repository."""

	def __init__(self, repo_url: str, github_token: str):
	self.logger = logging.getLogger(__name__)
	self.gh = Github(github_token) # Keep for some top-level calls
	self.gh_handler = GitHubAPIHandler(github_token) # Use the handler
	self.code_metrics = CodeMetricsAnalyzer()

	parts = repo_url.rstrip('/').split('/')
	if len(parts) < 2:
	raise ValueError("Invalid repository URL format")

	self.repo_name = parts[-1]
	self.owner = parts[-2]
	self.analysis_data = { # Initialize data here
	"basic_info": {},
	"structure": {},
	"code_metrics": {},
	"dependencies": {},
	"tests": {},
	"documentation": {},
	"community": {},
	"visualizations": {}
	}

	try:
	self.repo = self.gh_handler.get_repository(repo_url) # Use handler

	# Initialize other analyzers after successfully getting the repo
	self.dependency_analyzer = DependencyAnalyzer(self.repo)
	self.test_analyzer = TestAnalyzer(self.repo)
	self.doc_analyzer = DocumentationAnalyzer(self.repo)
	self.community_analyzer = CommunityAnalyzer(self.repo)
	except Exception as e:
	self.logger.error(f"Failed to initialize repository analyzer: {str(e)}")
	raise

	async def analyze(self) -> Dict[str, Any]:
	"""Perform the full repository analysis."""
	try:
	# Basic repository information
	self.analysis_data["basic_info"] = {
	"name": self.repo.name,
	"owner": self.repo.owner.login,
	"description": self.repo.description or "No description available", # Handle None
	"stars": self.repo.stargazers_count,
	"forks": self.repo.forks_count,
	"created_at": self.repo.created_at.isoformat(), # Use isoformat()
	"last_updated": self.repo.updated_at.isoformat(),
	"primary_language": self.repo.language or "Not specified",
	}

	# Analyze repository structure with sampling
	self.analysis_data["structure"] = await self._analyze_structure()

	# Analyze code patterns and metrics
	self.analysis_data["code_metrics"] = await self._analyze_code_metrics()

	# Analyze dependencies
	self.analysis_data["dependencies"] = await self.dependency_analyzer.analyze_dependencies()

	# Analyze tests and coverage
	self.analysis_data["tests"] = self.test_analyzer.analyze_tests()

	# Analyze documentation
	self.analysis_data["documentation"] = self.doc_analyzer.analyze_documentation()

	# Analyze community health
	self.analysis_data["community"] = await self.community_analyzer.analyze_community()

	# Generate visualizations
	self.analysis_data["visualizations"] = await self._generate_visualizations()

	return self.analysis_data # Return the populated dict

	except Exception as e:
	self.logger.error(f"Error during analysis: {str(e)}")
	raise
	async def _analyze_structure(self) -> Dict[str, Any]:
	"""Analyze the repository's file and directory structure, with sampling."""
	structure = {
	"files": defaultdict(int), # File type counts (e.g., .py, .js)
	"directories": set(), # Unique directory paths
	"total_size": 0, # Total size in bytes
	"directory_tree": defaultdict(list), # Parent -> [children]
	"file_samples": [] # Sample files for detailed analysis
	}

	try:
	all_files = [] # Store all relevant files first
	contents = self.repo.get_contents("")

	while contents:
	content = contents.pop(0)
	if content.type == "dir":
	structure["directories"].add(content.path)
	# Build directory tree structure
	structure["directory_tree"][os.path.dirname(content.path)].append(content.path) #correct way
	contents.extend(self.repo.get_contents(content.path))
	else:
	ext = Path(content.path).suffix.lower() # Get lowercase extension
	# Only consider relevant files
	if ext in RELEVANT_EXTENSIONS:
	structure["files"][ext] += 1 # Increment count for the file type
	structure["total_size"] += content.size
	all_files.append(content)

	# Smart sampling of files
	if all_files:
	# Stratified sampling based on file types
	samples_per_type = min(5, max(1, len(all_files) // len(structure["files"]) if structure["files"] else 1)) # At least one sample
	for ext in structure["files"].keys():
	ext_files = [f for f in all_files if f.path.endswith(ext)] #select the all file
	if ext_files:
	# Sort by size, and select a diverse sample
	ext_files.sort(key=lambda x: x.size)
	total_samples = min(samples_per_type, len(ext_files))
	# Take samples evenly across the size range
	step = max(1, len(ext_files) // total_samples)
	for i in range(0, len(ext_files), step)[:total_samples]:# Select diverse files from list.
	structure["file_samples"].append({
	"path": ext_files[i].path,
	"size": ext_files[i].size,
	"type": RELEVANT_EXTENSIONS.get(ext, "Unknown") # Get language
	})


	except Exception as e:
	self.logger.error(f"Error analyzing structure: {str(e)}")
	# Don't need to return defaults if 'structure' dict is initialized.

	return {
	"file_types": dict(structure["files"]), # Convert defaultdict to dict
	"directory_count": len(structure["directories"]),
	"total_size": structure["total_size"],
	"file_count": sum(structure["files"].values()), # Total relevant files
	"directory_tree": dict(structure["directory_tree"]), # convert
	"file_samples": structure["file_samples"]
	}

	async def _analyze_code_metrics(self) -> Dict[str, Any]:
	"""Analyze code metrics for a sample of files, with parallel processing."""
	metrics = {
	"complexity_metrics": defaultdict(list), # Cyclomatic/cognitive, nesting
	"duplication_metrics": defaultdict(list),
	"function_metrics": defaultdict(list), # From function analysis
	"comment_metrics": defaultdict(list), # Comment density
	"language_metrics": defaultdict(dict) # Aggregate by language
	}

	try:
	# Get all relevant files
	contents = self.repo.get_contents("")
	files_to_analyze = []

	while contents:
	content = contents.pop(0)
	if content.type == "dir":
	contents.extend(self.repo.get_contents(content.path))
	elif Path(content.path).suffix.lower() in RELEVANT_EXTENSIONS: # Check file.
	files_to_analyze.append(content)

	# Use parallel processing for file analysis
	with ThreadPoolExecutor(max_workers=min(10, len(files_to_analyze))) as executor: # Limit max worker upto 10.
	futures = []
	for file_content in files_to_analyze:
	futures.append(executor.submit(self.code_metrics._analyze_file_metrics, file_content)) # passing arguments

	for future in futures: #
	try:
	file_metrics = future.result() # Collect the results from the File Analysis
	if file_metrics:
	language = RELEVANT_EXTENSIONS.get(Path(file_metrics["path"]).suffix.lower(), "Unknown")

	# Aggregate metrics (by language, for example)
	# Correctly handle string keys for metrics
	for metric_type, value in file_metrics["metrics"].items():
	if isinstance(value, (int, float)):
	metrics.setdefault(f"{metric_type}_metrics", defaultdict(list))[language].append(value) # store

	# Update language-specific metrics
	if language not in metrics["language_metrics"]:
	metrics["language_metrics"][language] = {
	"file_count": 0,
	"total_lines": 0,
	"total_complexity": 0
	}
	lang_metrics = metrics["language_metrics"][language] #get value based on language.
	lang_metrics["file_count"] += 1
	lang_metrics["total_lines"] += file_metrics["metrics"].get("total_lines", 0) # Total lines addition.
	lang_metrics["total_complexity"] += file_metrics["metrics"].get("complexity", 0) #complexity count


	except Exception as e:
	self.logger.error(f"Error processing file metrics: {str(e)}")

	return metrics # return aggregated

	except Exception as e:
	self.logger.error(f"Error analyzing code metrics: {str(e)}")
	return metrics # Return the initialized dict (possibly empty)


	async def _generate_visualizations(self) -> Dict[str, Any]:
	"""Generate visualizations from the analyzed data (using matplotlib, seaborn, etc.)."""
	visualizations = {}

	try:
	# Language distribution pie chart
	if self.analysis_data.get("structure", {}).get("file_types"):
	fig, ax = plt.subplots()
	languages = self.analysis_data["structure"]["file_types"]
	plt.pie(languages.values(), labels=languages.keys(), autopct='%1.1f%%')
	plt.title("Language Distribution")
	from io import BytesIO
	buffer = BytesIO() # convert bytes
	plt.savefig(buffer, format='png')
	visualizations["language_distribution"] = base64.b64encode(buffer.getvalue()).decode()
	plt.close()


	# Code complexity heatmap (example using average complexity)
	if self.analysis_data.get("code_metrics", {}).get("complexity_metrics"):
	complexity_data = []
	for lang, values in self.analysis_data["code_metrics"]["complexity_metrics"].items():
	if values: # Ensure there are values to average
	complexity_data.append({
	"language": lang,
	"avg_complexity": sum(values) / len(values)
	})

	if complexity_data: # If Data present generate graph.
	df = pd.DataFrame(complexity_data)
	plt.figure(figsize=(10, 6))
	sns.barplot(data=df, x="language", y="avg_complexity")
	plt.title("Average Code Complexity by Language")
	plt.xticks(rotation=45) # Rotate x-axis labels
	buffer = BytesIO()
	plt.savefig(buffer, format='png', bbox_inches='tight') # Improve layout
	visualizations["complexity_distribution"] = base64.b64encode(buffer.getvalue()).decode()
	plt.close()

	# Commit activity heatmap (example)
	if self.analysis_data.get("community", {}).get("commit_history"): #check whether community & commit-history metrics
	commit_data = self.analysis_data["community"]["commit_history"]
	df = pd.DataFrame(commit_data)
	df['date'] = pd.to_datetime(df['date']) # change into date time for visualization
	df = df.set_index('date')
	# Resample to daily counts
	df = df.resample('D').count()

	plt.figure(figsize=(12, 4)) # fixed size.
	sns.heatmap(df.pivot_table(index=df.index.dayofweek, columns=df.index.month, values='count', aggfunc='sum')) # cretae heat map
	plt.title("Commit Activity Heatmap") #tile.
	buffer = BytesIO() #
	plt.savefig(buffer, format='png', bbox_inches='tight')
	visualizations["commit_heatmap"] = base64.b64encode(buffer.getvalue()).decode() #
	plt.close() #

	# Add more visualizations as needed (e.g., dependency graph, test coverage)

	except Exception as e:
	self.logger.error(f"Error generating visualizations: {str(e)}")

	return visualizations # Even if empty


	# --- Prompt Creation and LLM Interaction ---

	def create_enhanced_analysis_prompt(analysis_data: Dict[str, Any]) -> str:
	"""Create an enhanced prompt for the LLM analysis."""
	return f"""You are an expert code analyst with deep experience in software architecture, development practices, and team dynamics.
	Analyze the provided repository data and create a detailed, insightful analysis using the following sections:

	# Repository Analysis for {analysis_data['basic_info']['name']}

	## 📊 Project Overview
	[Analyze the basic repository information, including:
	- Project purpose and description
	- Repository age and activity level
	- Key metrics (stars, forks, contributors)
	- Primary technologies used
	- Overall project health indicators]

	## 🏗️ Architecture and Code Organization
	[Analyze the repository structure and code organization:
	- Directory structure and organization patterns
	- Code distribution across languages
	- File organization and modularity
	- Architectural patterns
	- Development standards and practices
	- Code complexity distribution
	- Potential architectural improvements]

	## 💻 Code Quality and Metrics
	[Provide detailed analysis of code quality metrics:
	- Cyclomatic complexity trends
	- Code duplication patterns
	- Function length and complexity
	- Comment density and documentation quality
	- Test coverage and quality
	- Areas for potential improvement]

	## 📦 Dependencies and Security
	[Analyze the project's dependencies:
	- Major dependencies and their versions
	- Outdated dependencies
	- Security vulnerabilities
	- Dependency graph complexity
	- Licensing considerations]

	## 📚 Documentation Assessment
	[Evaluate the project's documentation:
	- README completeness and quality
	- API documentation coverage
	- Contributing guidelines
	- Code comments and inline documentation
	- Examples and tutorials
	- Documentation maintenance status]

	## 🧪 Testing and Quality Assurance
	[Analyze testing practices:
	- Test coverage metrics
	- Testing patterns and approaches
	- CI/CD implementation
	- Quality assurance processes
	- Areas needing additional testing]

	## 👥 Community Health and Engagement
	[Evaluate community aspects:
	- Contributor demographics and activity
	- Issue and PR response times
	- Community engagement metrics
	- Communication patterns
	- Governance model]

	## 📈 Development Trends
	[Analyze development patterns:
	- Commit frequency and distribution
	- Code change patterns
	- Release cycle analysis
	- Development velocity
	- Team collaboration patterns]

	## 🚀 Performance and Scalability
	[Assess technical characteristics:
	- Code performance indicators
	- Scalability considerations
	- Resource usage patterns
	- Technical debt indicators
	- Optimization opportunities]

	## 💡 Key Insights
	[Summarize the most important findings:
	- Top 3 strengths
	- Top 3 areas for improvement
	- Unique characteristics
	- Notable patterns or practices
	- Risk factors]

	## 📋 Recommendations
	[Provide actionable recommendations:
	- Immediate improvement opportunities
	- Long-term strategic suggestions
	- Specific tools or practices to consider
	- Priority areas for focus
	- Resource allocation suggestions]

	Please analyze the following repository data thoroughly and provide detailed insights for each section:

	{json.dumps(analysis_data, indent=2)}
	"""


	async def analyze_repository(repo_url: str, github_token: str, gemini_key: str, progress=gr.Progress()) -> Tuple[str, str, str]:
	"""Analyze repository and generate LLM summary (async, with progress)."""
	try:
	# Re-initialize tokens each time
	initialize_tokens(github_token, gemini_key) # Ensure fresh tokens

	progress(0, desc="Initializing repository analysis...")
	analyzer = RepositoryAnalyzer(repo_url, github_token)

	progress(0.3, desc="Analyzing repository structure and patterns...")
	analysis_data = await analyzer.analyze() # Await the analysis

	progress(0.7, desc="Generating comprehensive analysis...")

	# Use the more powerful Gemini 1.5 Pro model
	model = genai.GenerativeModel(
	model_name="gemini-1.5-pro", # Use 1.5 Pro
	generation_config={
	"temperature": 0.7,
	"top_p": 0.95, # Use nucleus sampling
	"top_k": 40,
	"max_output_tokens": 8192, # Increased token limit
	}
	)

	prompt = create_enhanced_analysis_prompt(analysis_data) # Use a better, sectioned prompt.

	# Use streaming for a better user experience
	chat = model.start_chat(history=[]) # Start fresh
	response = chat.send_message(prompt)


	progress(0.9, desc="Saving analysis results...")
	# Save analysis data to a temporary file (for follow-up Q&A)
	with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as f:
	json.dump(analysis_data, f, indent=2)
	analysis_file = f.name

	progress(1.0, desc="Analysis complete!")
	return response.text, analysis_file, "✅ Analysis completed successfully!"

	except Exception as e:
	error_message = f"❌ Error analyzing repository: {str(e)}"
	return "", "", error_message # Return empty strings for Markdown and file


	async def ask_question(question: str, analysis_file: str, chat_history: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
	"""Process a follow-up question about the analysis with enhanced context."""
	if not analysis_file:
	return chat_history + [(question, "Please analyze a repository first before asking questions.")]

	try:
	with open(analysis_file, 'r') as f:
	analysis_data = json.load(f)


	# Initialize chat with system prompt and history
	model = genai.GenerativeModel(
	model_name="gemini-1.5-pro", # Use 1.5 Pro
	generation_config={
	"temperature": 0.7,
	"top_p": 0.8, # More focused sampling
	"top_k": 40,
	"max_output_tokens": 4096, # Increased token limit
	}
	)

	# Build the context
	context = """You are an expert code analyst helping users understand repository analysis results.
	Provide detailed, technical, and actionable insights based on the analysis data. When appropriate,
	reference specific metrics and patterns from the analysis. If making recommendations, be specific
	and explain the reasoning behind them.

	Repository Analysis Data:
	"""
	context += json.dumps(analysis_data, indent=2) + "\n\n"

	if chat_history: # Previous Chat history if have any.
	context += "Previous conversation:\n"
	for user_msg, assistant_msg in chat_history[-3:]: # Only include last 3 exchanges for relevance.
	context += f"User: {user_msg}\nAssistant: {assistant_msg}\n"

	prompt = f"""{context}

	User's Question: {question}

	Please provide a detailed analysis that:
	1. Directly addresses the user's question
	2. References relevant metrics and data from the analysis
	3. Provides context and explanations for technical concepts
	4. Suggests actionable next steps or recommendations when appropriate
	5. Maintains technical accuracy while being clear and understandable

	Your response:"""

	chat = model.start_chat(history=[]) # Start a new chat
	response = chat.send_message(prompt)


	return chat_history + [(question, response.text)] # Store new

	except Exception as e:
	error_message = f"Error processing question: {str(e)}"
	return chat_history + [(question, error_message)]



	# --- Gradio Interface ---

	def create_interface():
	with gr.Blocks(theme=gr.themes.Soft()) as app: # Use a theme
	gr.Markdown("""
	# 🔍 GitHub Repository Analyzer (Colab Version)

	Analyze any public GitHub repository using AI.
	""")

	# API tokens
	with gr.Row():
	github_token = gr.Textbox(
	label="GitHub Token",
	type="password",
	placeholder="Enter your GitHub token"
	)
	gemini_key = gr.Textbox(
	label="Gemini API Key",
	type="password",
	placeholder="Enter your Gemini API key"
	)
	init_btn = gr.Button("Initialize Tokens", variant="secondary")

	# Repository URL and analysis button
	with gr.Row():
	repo_url = gr.Textbox(
	label="GitHub Repository URL",
	placeholder="https://github.com/owner/repo",
	scale=4 # Larger input box
	)
	analyze_btn = gr.Button("🔍 Analyze", variant="primary", scale=1)

	# Status message
	status_msg = gr.Markdown("") # Display Error Status.

	# Analysis results
	with gr.Tabs():
	with gr.Tab("📝 Analysis Report"): # report Analysis.
	summary = gr.Markdown("") # output report.

	with gr.Tab("💭 Q&A"): # Improved label
	chatbot = gr.Chatbot(
	[],
	label="Ask questions about the analysis",
	height=400
	)
	with gr.Row():
	question = gr.Textbox(
	label="Your Question",
	placeholder="Ask about specific aspects of the analysis...",
	scale=4
	)
	ask_btn = gr.Button("Ask", scale=1)
	clear_btn = gr.Button("Clear", scale=1)


	# Hidden state to store the analysis data file path
	analysis_file = gr.State("")


	async def safe_analyze(repo_url: str, github_token: str, gemini_key: str):
	"""Wrapper function to handle analysis and errors gracefully."""
	try:
	if not repo_url:
	return None, None, "❌ Please enter a GitHub repository URL"

	if not github_token or not gemini_key:
	return None, None, "❌ Please initialize tokens first"

	if not re.match(r'https?://github\.com/[\w-]+/[\w-]+/?$', repo_url):
	return None, None, "❌ Invalid GitHub repository URL format"

	summary, analysis_file, status = await analyze_repository(repo_url, github_token, gemini_key)
	return summary, analysis_file, status

	except Exception as e:
	return None, None, f"❌ Analysis failed: {str(e)}"


	# Event handlers
	init_btn.click(
	initialize_tokens,
	inputs=[github_token, gemini_key],
	outputs=status_msg
	)

	analyze_btn.click(
	fn=lambda: "⏳ Analysis in progress...", # Immediate feedback
	inputs=None,
	outputs=status_msg,
	queue=False # Don't queue this click
	).then(
	safe_analyze, # Call the wrapper
	inputs=[repo_url, github_token, gemini_key],
	outputs=[summary, analysis_file, status_msg]
	)

	ask_btn.click(
	ask_question,
	inputs=[question, analysis_file, chatbot], # Include chatbot history
	outputs=[chatbot]
	).then(
	lambda: "", # Clear the question box after asking
	None,
	question,
	queue=False
	)

	clear_btn.click(
	lambda: ([], ""), # Clear chatbot and question
	outputs=[chatbot, question]
	)

	return app
	# Run the interface
	if __name__ == "__main__":
	app = create_interface()
	app.launch(debug=True, share=True)