Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| import re | |
| import json | |
| import os | |
| from pathlib import Path | |
| import tempfile | |
| import base64 | |
| def parse_repo_url(url): | |
| """Parse GitHub repository URL to extract owner, repo, reference, and path.""" | |
| url = url.rstrip('/') | |
| url_pattern = r'^https://github\.com/([^/]+)/([^/]+)(/tree/([^/]+)(/(.+))?)?$' | |
| match = re.match(url_pattern, url) | |
| if not match: | |
| raise ValueError('Invalid GitHub repository URL. Please ensure the URL is in the correct format: ' | |
| 'https://github.com/owner/repo or https://github.com/owner/repo/tree/branch/path') | |
| return { | |
| 'owner': match.group(1), | |
| 'repo': match.group(2), | |
| 'ref_from_url': match.group(4), | |
| 'path_from_url': match.group(6) | |
| } | |
| def fetch_repo_sha(owner, repo, ref, path, token=None): | |
| """Fetch repository SHA for specified path and reference.""" | |
| url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path if path else ''}" | |
| if ref: | |
| url += f"?ref={ref}" | |
| headers = {'Accept': 'application/vnd.github.object+json'} | |
| if token: | |
| headers['Authorization'] = f"token {token}" | |
| response = requests.get(url, headers=headers) | |
| if not response.ok: | |
| if response.status_code == 403 and response.headers.get('X-RateLimit-Remaining') == '0': | |
| raise ValueError('GitHub API rate limit exceeded. Please try again later or provide a valid access token.') | |
| if response.status_code == 404: | |
| raise ValueError('Repository, branch, or path not found. Please check that the URL, branch/tag, and path are correct.') | |
| raise ValueError(f'Failed to fetch repository SHA. Status: {response.status_code}. Please check your input.') | |
| data = response.json() | |
| return data.get('sha') | |
| def fetch_repo_tree(owner, repo, sha, token=None): | |
| """Fetch repository tree structure.""" | |
| url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{sha}?recursive=1" | |
| headers = {'Accept': 'application/vnd.github+json'} | |
| if token: | |
| headers['Authorization'] = f"token {token}" | |
| response = requests.get(url, headers=headers) | |
| if not response.ok: | |
| if response.status_code == 403 and response.headers.get('X-RateLimit-Remaining') == '0': | |
| raise ValueError('GitHub API rate limit exceeded. Please try again later or provide a valid access token.') | |
| raise ValueError(f'Failed to fetch repository tree. Status: {response.status_code}. Please check your input.') | |
| data = response.json() | |
| return data.get('tree', []) | |
| def sort_contents(contents): | |
| """Sort contents by path.""" | |
| def get_path(item): | |
| return item.get('path', '') | |
| sorted_contents = sorted(contents, key=lambda x: [p or '.' for p in get_path(x).split('/')]) | |
| return sorted_contents | |
| def create_directory_structure(tree): | |
| """Create directory structure dictionary from tree.""" | |
| tree = [item for item in tree if item.get('type') == 'blob'] | |
| tree = sort_contents(tree) | |
| directory_structure = {} | |
| for item in tree: | |
| path = item.get('path', '') | |
| if not path.startswith('/'): | |
| path = '/' + path | |
| path_parts = path.split('/') | |
| current_level = directory_structure | |
| for i, part in enumerate(path_parts): | |
| if not part: | |
| part = './' | |
| if part not in current_level: | |
| current_level[part] = item if i == len(path_parts) - 1 else {} | |
| if i < len(path_parts) - 1: | |
| current_level = current_level[part] | |
| return directory_structure | |
| def build_directory_html(structure, prefix=''): | |
| """Build HTML representation of directory structure with checkboxes.""" | |
| html = '<ul class="directory">' | |
| for name, item in sorted(structure.items(), key=lambda x: x[0]): | |
| if isinstance(item, dict): | |
| # This is a directory | |
| html += f'<li><input type="checkbox" class="directory-checkbox"> <span class="folder">{name}</span>' | |
| html += build_directory_html(item, prefix + '/' + name if prefix else name) | |
| html += '</li>' | |
| else: | |
| # This is a file | |
| file_path = item.get('path', '') | |
| file_url = item.get('url', '') | |
| common_extensions = ['.js', '.py', '.java', '.cpp', '.html', '.css', '.ts', '.jsx', '.tsx'] | |
| is_common = any(file_path.lower().endswith(ext) for ext in common_extensions) | |
| checked = 'checked' if is_common else '' | |
| html += f'<li><input type="checkbox" {checked} value="{json.dumps({"url": file_url, "path": file_path})}" class="file-checkbox"> ' | |
| html += f'<span class="file">{name}</span></li>' | |
| html += '</ul>' | |
| return html | |
| def fetch_repo_contents(repo_url, ref, path, token): | |
| """Fetch repository contents and return HTML representation of directory structure.""" | |
| try: | |
| repo_info = parse_repo_url(repo_url) | |
| final_ref = ref or repo_info.get('ref_from_url') | |
| final_path = path or repo_info.get('path_from_url') or '' | |
| owner = repo_info.get('owner') | |
| repo = repo_info.get('repo') | |
| sha = fetch_repo_sha(owner, repo, final_ref, final_path, token) | |
| tree = fetch_repo_tree(owner, repo, sha, token) | |
| structure = create_directory_structure(tree) | |
| # Create HTML for directory structure display | |
| html_structure = build_directory_html(structure) | |
| # Add JavaScript for checkbox behavior | |
| js = """ | |
| <script> | |
| // Check/uncheck all child checkboxes when directory checkbox is changed | |
| document.querySelectorAll('.directory-checkbox').forEach(checkbox => { | |
| checkbox.addEventListener('change', function() { | |
| const parent = this.parentElement; | |
| const childCheckboxes = parent.querySelectorAll('input[type="checkbox"]'); | |
| childCheckboxes.forEach(childBox => { | |
| childBox.checked = this.checked; | |
| }); | |
| }); | |
| }); | |
| </script> | |
| """ | |
| return html_structure + js, "", tree | |
| except Exception as e: | |
| error_message = str(e) | |
| return "", f"Error fetching repository contents: {error_message}\n\nPlease ensure:\n1. The repository URL is correct and accessible.\n2. You have the necessary permissions.\n3. If it's a private repository, you've provided a valid access token.\n4. The specified branch/tag and path exist.", None | |
| def fetch_selected_files(selected_files_json, token): | |
| """Fetch contents of selected files.""" | |
| try: | |
| selected_files = json.loads(selected_files_json) | |
| if not selected_files: | |
| return "Error: No files selected. Please select at least one file from the directory structure." | |
| file_contents = [] | |
| headers = {'Accept': 'application/vnd.github.v3.raw'} | |
| if token: | |
| headers['Authorization'] = f"token {token}" | |
| for file_info in selected_files: | |
| url = file_info.get('url') | |
| path = file_info.get('path') | |
| response = requests.get(url, headers=headers) | |
| if not response.ok: | |
| if response.status_code == 403 and response.headers.get('X-RateLimit-Remaining') == '0': | |
| raise ValueError(f"GitHub API rate limit exceeded while fetching {path}. Please try again later or provide a valid access token.") | |
| raise ValueError(f"Failed to fetch content for {path}. Status: {response.status_code}. Please check your permissions.") | |
| text = response.text | |
| file_contents.append({'url': url, 'path': path, 'text': text}) | |
| return format_repo_contents(file_contents) | |
| except Exception as e: | |
| return f"Error generating text file: {str(e)}\n\nPlease ensure:\n1. You have selected at least one file.\n2. Your access token (if provided) is valid.\n3. You have a stable internet connection.\n4. The GitHub API is accessible." | |
| def format_repo_contents(contents): | |
| """Format repository contents for display.""" | |
| text = '' | |
| index = '' | |
| contents = sort_contents(contents) | |
| # Create a directory tree structure | |
| tree = {} | |
| for item in contents: | |
| parts = item.get('path', '').split('/') | |
| current_level = tree | |
| for i, part in enumerate(parts): | |
| if part not in current_level: | |
| current_level[part] = {} if i < len(parts) - 1 else None | |
| if i < len(parts) - 1: | |
| current_level = current_level[part] | |
| # Function to recursively build the index | |
| def build_index(node, prefix=''): | |
| result = '' | |
| entries = sorted(node.items()) | |
| for i, (name, subnode) in enumerate(entries): | |
| is_last = i == len(entries) - 1 | |
| line_prefix = '└── ' if is_last else '├── ' | |
| child_prefix = ' ' if is_last else '│ ' | |
| if name == '': | |
| name = './' | |
| result += f"{prefix}{line_prefix}{name}\n" | |
| if subnode and isinstance(subnode, dict): | |
| result += build_index(subnode, f"{prefix}{child_prefix}") | |
| return result | |
| index = build_index(tree) | |
| for item in contents: | |
| text += f"\n\n---\nFile: {item.get('path', '')}\n---\n\n{item.get('text', '')}\n" | |
| return f"Directory Structure:\n\n{index}\n{text}" | |
| def get_selected_files(html_structure, repo_tree): | |
| """Parse selected files from HTML structure.""" | |
| # This would normally be done with JavaScript on the client side, | |
| # but since Gradio doesn't support direct DOM manipulation, | |
| # we'll provide a list of files for selection instead. | |
| blob_items = [item for item in repo_tree if item.get('type') == 'blob'] | |
| file_list = [] | |
| for item in blob_items: | |
| file_path = item.get('path', '') | |
| common_extensions = ['.js', '.py', '.java', '.cpp', '.html', '.css', '.ts', '.jsx', '.tsx'] | |
| is_common = any(file_path.lower().endswith(ext) for ext in common_extensions) | |
| file_list.append({ | |
| 'name': file_path, | |
| 'url': item.get('url', ''), | |
| 'path': file_path, | |
| 'selected': is_common | |
| }) | |
| return file_list | |
| def generate_file_checkboxes(tree): | |
| """Generate file checkboxes for selection.""" | |
| if not tree: | |
| return {} # Return empty dictionary instead of empty list | |
| blob_items = [item for item in tree if item.get('type') == 'blob'] | |
| file_options = {} | |
| for item in blob_items: | |
| path = item.get('path', '') | |
| file_options[path] = { | |
| 'url': item.get('url', ''), | |
| 'path': path | |
| } | |
| return file_options | |
| def process_selections(file_options, selections, token): | |
| """Process selected files and fetch their contents.""" | |
| if not selections or not file_options: | |
| return "Error: No files selected or no files available." | |
| selected_files = [] | |
| for selection in selections: | |
| if selection in file_options: | |
| selected_files.append(file_options[selection]) | |
| if not selected_files: | |
| return "Error: No valid files selected." | |
| # Convert to JSON for the fetch function | |
| selected_files_json = json.dumps(selected_files) | |
| return fetch_selected_files(selected_files_json, token) | |
| def save_output(output_text): | |
| """Save output text to a file and return download link.""" | |
| if not output_text or not output_text.strip(): | |
| return "Error: No content to download. Please generate the text file first." | |
| # Create a temporary file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.txt') as temp_file: | |
| temp_file.write(output_text.encode('utf-8')) | |
| temp_path = temp_file.name | |
| return temp_path | |
| # Create Gradio interface | |
| with gr.Blocks(css=""" | |
| .directory { list-style-type: none; padding-left: 20px; } | |
| .folder { color: #e67e22; font-weight: bold; } | |
| .file { color: #3498db; } | |
| .gr-box { border-radius: 8px; } | |
| """) as demo: | |
| gr.Markdown("# GitHub Repository Explorer") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| repo_url = gr.Textbox(label="GitHub Repository URL", placeholder="https://github.com/username/repo") | |
| with gr.Row(): | |
| ref = gr.Textbox(label="Branch/Tag (optional)", placeholder="main") | |
| path = gr.Textbox(label="Path (optional)", placeholder="src") | |
| token = gr.Textbox(label="Access Token (optional, for private repos)", placeholder="ghp_xxxxxxxxxxxx", type="password") | |
| fetch_button = gr.Button("Fetch Repository") | |
| with gr.Column(scale=3): | |
| with gr.Tabs(): | |
| with gr.TabItem("Info"): | |
| gr.Markdown(""" | |
| ## How to use | |
| 1. Enter a GitHub repository URL (e.g., https://github.com/username/repo) | |
| 2. Optionally specify branch/tag and path | |
| 3. For private repositories, provide an access token | |
| 4. Click "Fetch Repository" to load the directory structure | |
| 5. Select files from the directory structure | |
| 6. Click "Generate Text" to fetch and format file contents | |
| 7. Copy or download the generated text | |
| ## Access Token Information | |
| To access private repositories or increase API rate limits, you'll need a GitHub personal access token. | |
| To create one: | |
| 1. Go to GitHub Settings > Developer settings > Personal access tokens | |
| 2. Generate a new token with the 'repo' scope | |
| 3. Copy the token and paste it in the Access Token field | |
| """) | |
| # Directory structure display and file selection | |
| dir_structure_html = gr.HTML(label="Directory Structure") | |
| # Store repo tree data for use in file selection | |
| repo_tree_state = gr.State(None) | |
| file_options_state = gr.State(None) | |
| # File selection | |
| file_selector = gr.CheckboxGroup(label="Select Files", interactive=True) | |
| # Output and action buttons | |
| output_text = gr.Textbox(label="Output", lines=20) | |
| with gr.Row(): | |
| generate_button = gr.Button("Generate Text") | |
| copy_button = gr.Button("Copy to Clipboard") | |
| download_button = gr.Button("Download") | |
| error_output = gr.Textbox(label="Status/Error Messages") | |
| download_path = gr.State(None) | |
| # Define events | |
| fetch_button.click( | |
| fn=fetch_repo_contents, | |
| inputs=[repo_url, ref, path, token], | |
| outputs=[dir_structure_html, error_output, repo_tree_state] | |
| ).then( | |
| fn=generate_file_checkboxes, | |
| inputs=[repo_tree_state], | |
| outputs=[file_options_state] | |
| ).then( | |
| fn=lambda tree: [path for path in generate_file_checkboxes(tree).keys()], | |
| inputs=[repo_tree_state], | |
| outputs=[file_selector] | |
| ) | |
| generate_button.click( | |
| fn=process_selections, | |
| inputs=[file_options_state, file_selector, token], | |
| outputs=[output_text] | |
| ) | |
| copy_button.click( | |
| fn=lambda x: x, # Just pass through the text | |
| inputs=[output_text], | |
| outputs=[output_text] | |
| ) | |
| download_button.click( | |
| fn=save_output, | |
| inputs=[output_text], | |
| outputs=[download_path] | |
| ).then( | |
| fn=lambda path: gr.update(value=f"File saved at: {path}. You can download it from there."), | |
| inputs=[download_path], | |
| outputs=[error_output] | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch(share=True) # Added share=True to create a public link |