Spaces:

Backup-bdg
/

OpenHands

Build error

App Files Files Community

OpenHands / openhands /resolver /interfaces /github.py

Backup-bdg

Upload 964 files

51ff9e5 verified 6 months ago

raw

history blame

23.9 kB

	from typing import Any

	import httpx

	from openhands.core.logger import openhands_logger as logger
	from openhands.resolver.interfaces.issue import (
	Issue,
	IssueHandlerInterface,
	ReviewThread,
	)
	from openhands.resolver.utils import extract_issue_references


	class GithubIssueHandler(IssueHandlerInterface):
	def __init__(
	self,
	owner: str,
	repo: str,
	token: str,
	username: str \| None = None,
	base_domain: str = 'github.com',
	):
	"""Initialize a GitHub issue handler.

	Args:
	owner: The owner of the repository
	repo: The name of the repository
	token: The GitHub personal access token
	username: Optional GitHub username
	base_domain: The domain for GitHub Enterprise (default: "github.com")
	"""
	self.owner = owner
	self.repo = repo
	self.token = token
	self.username = username
	self.base_domain = base_domain
	self.base_url = self.get_base_url()
	self.download_url = self.get_download_url()
	self.clone_url = self.get_clone_url()
	self.headers = self.get_headers()

	def set_owner(self, owner: str) -> None:
	self.owner = owner

	def get_headers(self) -> dict[str, str]:
	return {
	'Authorization': f'token {self.token}',
	'Accept': 'application/vnd.github.v3+json',
	}

	def get_base_url(self) -> str:
	if self.base_domain == 'github.com':
	return f'https://api.github.com/repos/{self.owner}/{self.repo}'
	else:
	return f'https://{self.base_domain}/api/v3/repos/{self.owner}/{self.repo}'

	def get_authorize_url(self) -> str:
	return f'https://{self.username}:{self.token}@{self.base_domain}/'

	def get_branch_url(self, branch_name: str) -> str:
	return self.get_base_url() + f'/branches/{branch_name}'

	def get_download_url(self) -> str:
	return f'{self.base_url}/issues'

	def get_clone_url(self) -> str:
	username_and_token = (
	f'{self.username}:{self.token}'
	if self.username
	else f'x-auth-token:{self.token}'
	)
	return f'https://{username_and_token}@{self.base_domain}/{self.owner}/{self.repo}.git'

	def get_graphql_url(self) -> str:
	if self.base_domain == 'github.com':
	return 'https://api.github.com/graphql'
	else:
	return f'https://{self.base_domain}/api/graphql'

	def get_compare_url(self, branch_name: str) -> str:
	return f'https://{self.base_domain}/{self.owner}/{self.repo}/compare/{branch_name}?expand=1'

	def get_converted_issues(
	self, issue_numbers: list[int] \| None = None, comment_id: int \| None = None
	) -> list[Issue]:
	"""Download issues from Github.

	Args:
	issue_numbers: The numbers of the issues to download
	comment_id: The ID of a single comment, if provided, otherwise all comments

	Returns:
	List of Github issues.
	"""

	if not issue_numbers:
	raise ValueError('Unspecified issue number')

	all_issues = self.download_issues()
	logger.info(f'Limiting resolving to issues {issue_numbers}.')
	all_issues = [
	issue
	for issue in all_issues
	if issue['number'] in issue_numbers and 'pull_request' not in issue
	]

	if len(issue_numbers) == 1 and not all_issues:
	raise ValueError(f'Issue {issue_numbers[0]} not found')

	converted_issues = []
	for issue in all_issues:
	# Check for required fields (number and title)
	if any([issue.get(key) is None for key in ['number', 'title']]):
	logger.warning(
	f'Skipping issue {issue} as it is missing number or title.'
	)
	continue

	# Handle empty body by using empty string
	if issue.get('body') is None:
	issue['body'] = ''

	# Get issue thread comments
	thread_comments = self.get_issue_comments(
	issue['number'], comment_id=comment_id
	)
	# Convert empty lists to None for optional fields
	issue_details = Issue(
	owner=self.owner,
	repo=self.repo,
	number=issue['number'],
	title=issue['title'],
	body=issue['body'],
	thread_comments=thread_comments,
	review_comments=None, # Initialize review comments as None for regular issues
	)

	converted_issues.append(issue_details)

	return converted_issues

	def download_issues(self) -> list[Any]:
	params: dict[str, int \| str] = {'state': 'open', 'per_page': 100, 'page': 1}
	all_issues = []

	while True:
	response = httpx.get(self.download_url, headers=self.headers, params=params)
	response.raise_for_status()
	issues = response.json()

	if not issues:
	break

	if not isinstance(issues, list) or any(
	[not isinstance(issue, dict) for issue in issues]
	):
	raise ValueError(
	'Expected list of dictionaries from Service Github API.'
	)

	all_issues.extend(issues)
	assert isinstance(params['page'], int)
	params['page'] += 1

	return all_issues

	def get_issue_comments(
	self, issue_number: int, comment_id: int \| None = None
	) -> list[str] \| None:
	"""Download comments for a specific issue from Github."""
	url = f'{self.download_url}/{issue_number}/comments'
	params = {'per_page': 100, 'page': 1}
	all_comments = []

	while True:
	response = httpx.get(url, headers=self.headers, params=params)
	response.raise_for_status()
	comments = response.json()

	if not comments:
	break

	if comment_id:
	matching_comment = next(
	(
	comment['body']
	for comment in comments
	if comment['id'] == comment_id
	),
	None,
	)
	if matching_comment:
	return [matching_comment]
	else:
	all_comments.extend([comment['body'] for comment in comments])

	params['page'] += 1

	return all_comments if all_comments else None

	def branch_exists(self, branch_name: str) -> bool:
	logger.info(f'Checking if branch {branch_name} exists...')
	response = httpx.get(
	f'{self.base_url}/branches/{branch_name}', headers=self.headers
	)
	exists = response.status_code == 200
	logger.info(f'Branch {branch_name} exists: {exists}')
	return exists

	def get_branch_name(self, base_branch_name: str) -> str:
	branch_name = base_branch_name
	attempt = 1
	while self.branch_exists(branch_name):
	attempt += 1
	branch_name = f'{base_branch_name}-try{attempt}'
	return branch_name

	def reply_to_comment(self, pr_number: int, comment_id: str, reply: str) -> None:
	# Opting for graphql as REST API doesn't allow reply to replies in comment threads
	query = """
	mutation($body: String!, $pullRequestReviewThreadId: ID!) {
	addPullRequestReviewThreadReply(input: { body: $body, pullRequestReviewThreadId: $pullRequestReviewThreadId }) {
	comment {
	id
	body
	createdAt
	}
	}
	}
	"""

	comment_reply = f'Openhands fix success summary\n\n\n{reply}'
	variables = {'body': comment_reply, 'pullRequestReviewThreadId': comment_id}
	url = self.get_graphql_url()
	headers = {
	'Authorization': f'Bearer {self.token}',
	'Content-Type': 'application/json',
	}

	response = httpx.post(
	url, json={'query': query, 'variables': variables}, headers=headers
	)
	response.raise_for_status()

	def get_pull_url(self, pr_number: int) -> str:
	return f'https://{self.base_domain}/{self.owner}/{self.repo}/pull/{pr_number}'

	def get_default_branch_name(self) -> str:
	response = httpx.get(f'{self.base_url}', headers=self.headers)
	response.raise_for_status()
	data = response.json()
	return str(data['default_branch'])

	def create_pull_request(self, data: dict[str, Any] \| None = None) -> dict[str, Any]:
	if data is None:
	data = {}
	response = httpx.post(f'{self.base_url}/pulls', headers=self.headers, json=data)
	if response.status_code == 403:
	raise RuntimeError(
	'Failed to create pull request due to missing permissions. '
	'Make sure that the provided token has push permissions for the repository.'
	)
	response.raise_for_status()
	pr_data = response.json()
	return dict(pr_data)

	def request_reviewers(self, reviewer: str, pr_number: int) -> None:
	review_data = {'reviewers': [reviewer]}
	review_response = httpx.post(
	f'{self.base_url}/pulls/{pr_number}/requested_reviewers',
	headers=self.headers,
	json=review_data,
	)
	if review_response.status_code != 201:
	logger.warning(
	f'Failed to request review from {reviewer}: {review_response.text}'
	)

	def send_comment_msg(self, issue_number: int, msg: str) -> None:
	"""Send a comment message to a GitHub issue or pull request.

	Args:
	issue_number: The issue or pull request number
	msg: The message content to post as a comment
	"""
	# Post a comment on the PR
	comment_url = f'{self.base_url}/issues/{issue_number}/comments'
	comment_data = {'body': msg}
	comment_response = httpx.post(
	comment_url, headers=self.headers, json=comment_data
	)
	if comment_response.status_code != 201:
	logger.error(
	f'Failed to post comment: {comment_response.status_code} {comment_response.text}'
	)
	else:
	logger.info(f'Comment added to the PR: {msg}')

	def get_context_from_external_issues_references(
	self,
	closing_issues: list[str],
	closing_issue_numbers: list[int],
	issue_body: str,
	review_comments: list[str] \| None,
	review_threads: list[ReviewThread],
	thread_comments: list[str] \| None,
	) -> list[str]:
	return []


	class GithubPRHandler(GithubIssueHandler):
	def __init__(
	self,
	owner: str,
	repo: str,
	token: str,
	username: str \| None = None,
	base_domain: str = 'github.com',
	):
	"""Initialize a GitHub PR handler.

	Args:
	owner: The owner of the repository
	repo: The name of the repository
	token: The GitHub personal access token
	username: Optional GitHub username
	base_domain: The domain for GitHub Enterprise (default: "github.com")
	"""
	super().__init__(owner, repo, token, username, base_domain)
	if self.base_domain == 'github.com':
	self.download_url = (
	f'https://api.github.com/repos/{self.owner}/{self.repo}/pulls'
	)
	else:
	self.download_url = f'https://{self.base_domain}/api/v3/repos/{self.owner}/{self.repo}/pulls'

	def download_pr_metadata(
	self, pull_number: int, comment_id: int \| None = None
	) -> tuple[list[str], list[int], list[str], list[ReviewThread], list[str]]:
	"""Run a GraphQL query against the GitHub API for information.

	Retrieves information about:
	1. unresolved review comments
	2. referenced issues the pull request would close

	Args:
	pull_number: The number of the pull request to query.
	comment_id: Optional ID of a specific comment to focus on.
	query: The GraphQL query as a string.
	variables: A dictionary of variables for the query.
	token: Your GitHub personal access token.

	Returns:
	The JSON response from the GitHub API.
	"""
	# Using graphql as REST API doesn't indicate resolved status for review comments
	# TODO: grabbing the first 10 issues, 100 review threads, and 100 coments; add pagination to retrieve all
	query = """
	query($owner: String!, $repo: String!, $pr: Int!) {
	repository(owner: $owner, name: $repo) {
	pullRequest(number: $pr) {
	closingIssuesReferences(first: 10) {
	edges {
	node {
	body
	number
	}
	}
	}
	url
	reviews(first: 100) {
	nodes {
	body
	state
	fullDatabaseId
	}
	}
	reviewThreads(first: 100) {
	edges{
	node{
	id
	isResolved
	comments(first: 100) {
	totalCount
	nodes {
	body
	path
	fullDatabaseId
	}
	}
	}
	}
	}
	}
	}
	}
	"""

	variables = {'owner': self.owner, 'repo': self.repo, 'pr': pull_number}

	url = self.get_graphql_url()
	headers = {
	'Authorization': f'Bearer {self.token}',
	'Content-Type': 'application/json',
	}

	response = httpx.post(
	url, json={'query': query, 'variables': variables}, headers=headers
	)
	response.raise_for_status()
	response_json = response.json()

	# Parse the response to get closing issue references and unresolved review comments
	pr_data = (
	response_json.get('data', {}).get('repository', {}).get('pullRequest', {})
	)

	# Get closing issues
	closing_issues = pr_data.get('closingIssuesReferences', {}).get('edges', [])
	closing_issues_bodies = [issue['node']['body'] for issue in closing_issues]
	closing_issue_numbers = [
	issue['node']['number'] for issue in closing_issues
	] # Extract issue numbers

	# Get review comments
	reviews = pr_data.get('reviews', {}).get('nodes', [])
	if comment_id is not None:
	reviews = [
	review
	for review in reviews
	if int(review['fullDatabaseId']) == comment_id
	]
	review_bodies = [review['body'] for review in reviews]

	# Get unresolved review threads
	review_threads = []
	thread_ids = [] # Store thread IDs; agent replies to the thread
	raw_review_threads = pr_data.get('reviewThreads', {}).get('edges', [])
	for thread in raw_review_threads:
	node = thread.get('node', {})
	if not node.get(
	'isResolved', True
	): # Check if the review thread is unresolved
	id = node.get('id')
	thread_contains_comment_id = False
	my_review_threads = node.get('comments', {}).get('nodes', [])
	message = ''
	files = []
	for i, review_thread in enumerate(my_review_threads):
	if (
	comment_id is not None
	and int(review_thread['fullDatabaseId']) == comment_id
	):
	thread_contains_comment_id = True

	if (
	i == len(my_review_threads) - 1
	): # Check if it's the last thread in the thread
	if len(my_review_threads) > 1:
	message += '---\n' # Add "---" before the last message if there's more than one thread
	message += 'latest feedback:\n' + review_thread['body'] + '\n'
	else:
	message += (
	review_thread['body'] + '\n'
	) # Add each thread in a new line

	file = review_thread.get('path')
	if file and file not in files:
	files.append(file)

	if comment_id is None or thread_contains_comment_id:
	unresolved_thread = ReviewThread(comment=message, files=files)
	review_threads.append(unresolved_thread)
	thread_ids.append(id)

	return (
	closing_issues_bodies,
	closing_issue_numbers,
	review_bodies,
	review_threads,
	thread_ids,
	)

	# Override processing of downloaded issues
	def get_pr_comments(
	self, pr_number: int, comment_id: int \| None = None
	) -> list[str] \| None:
	"""Download comments for a specific pull request from Github."""
	if self.base_domain == 'github.com':
	url = f'https://api.github.com/repos/{self.owner}/{self.repo}/issues/{pr_number}/comments'
	else:
	url = f'https://{self.base_domain}/api/v3/repos/{self.owner}/{self.repo}/issues/{pr_number}/comments'
	headers = {
	'Authorization': f'token {self.token}',
	'Accept': 'application/vnd.github.v3+json',
	}
	params = {'per_page': 100, 'page': 1}
	all_comments = []

	while True:
	response = httpx.get(url, headers=headers, params=params)
	response.raise_for_status()
	comments = response.json()

	if not comments:
	break

	if comment_id is not None:
	matching_comment = next(
	(
	comment['body']
	for comment in comments
	if comment['id'] == comment_id
	),
	None,
	)
	if matching_comment:
	return [matching_comment]
	else:
	all_comments.extend([comment['body'] for comment in comments])

	params['page'] += 1

	return all_comments if all_comments else None

	def get_context_from_external_issues_references(
	self,
	closing_issues: list[str],
	closing_issue_numbers: list[int],
	issue_body: str,
	review_comments: list[str] \| None,
	review_threads: list[ReviewThread],
	thread_comments: list[str] \| None,
	) -> list[str]:
	new_issue_references = []

	if issue_body:
	new_issue_references.extend(extract_issue_references(issue_body))

	if review_comments:
	for comment in review_comments:
	new_issue_references.extend(extract_issue_references(comment))

	if review_threads:
	for review_thread in review_threads:
	new_issue_references.extend(
	extract_issue_references(review_thread.comment)
	)

	if thread_comments:
	for thread_comment in thread_comments:
	new_issue_references.extend(extract_issue_references(thread_comment))

	non_duplicate_references = set(new_issue_references)
	unique_issue_references = non_duplicate_references.difference(
	closing_issue_numbers
	)

	for issue_number in unique_issue_references:
	try:
	if self.base_domain == 'github.com':
	url = f'https://api.github.com/repos/{self.owner}/{self.repo}/issues/{issue_number}'
	else:
	url = f'https://{self.base_domain}/api/v3/repos/{self.owner}/{self.repo}/issues/{issue_number}'
	headers = {
	'Authorization': f'Bearer {self.token}',
	'Accept': 'application/vnd.github.v3+json',
	}
	response = httpx.get(url, headers=headers)
	response.raise_for_status()
	issue_data = response.json()
	issue_body = issue_data.get('body', '')
	if issue_body:
	closing_issues.append(issue_body)
	except httpx.HTTPError as e:
	logger.warning(f'Failed to fetch issue {issue_number}: {str(e)}')

	return closing_issues

	def get_converted_issues(
	self, issue_numbers: list[int] \| None = None, comment_id: int \| None = None
	) -> list[Issue]:
	if not issue_numbers:
	raise ValueError('Unspecified issue numbers')

	all_issues = self.download_issues()
	logger.info(f'Limiting resolving to issues {issue_numbers}.')
	all_issues = [issue for issue in all_issues if issue['number'] in issue_numbers]

	converted_issues = []
	for issue in all_issues:
	# For PRs, body can be None
	if any([issue.get(key) is None for key in ['number', 'title']]):
	logger.warning(f'Skipping #{issue} as it is missing number or title.')
	continue

	# Handle None body for PRs
	body = issue.get('body') if issue.get('body') is not None else ''
	(
	closing_issues,
	closing_issues_numbers,
	review_comments,
	review_threads,
	thread_ids,
	) = self.download_pr_metadata(issue['number'], comment_id=comment_id)
	head_branch = issue['head']['ref']

	# Get PR thread comments
	thread_comments = self.get_pr_comments(
	issue['number'], comment_id=comment_id
	)

	closing_issues = self.get_context_from_external_issues_references(
	closing_issues,
	closing_issues_numbers,
	body,
	review_comments,
	review_threads,
	thread_comments,
	)

	issue_details = Issue(
	owner=self.owner,
	repo=self.repo,
	number=issue['number'],
	title=issue['title'],
	body=body,
	closing_issues=closing_issues,
	review_comments=review_comments,
	review_threads=review_threads,
	thread_ids=thread_ids,
	head_branch=head_branch,
	thread_comments=thread_comments,
	)

	converted_issues.append(issue_details)

	return converted_issues