Spaces:

nihalaninihal
/

Repo2TxT

Sleeping

App Files Files Community

nihalaninihal commited on Mar 31

Commit

290988d

verified ·

1 Parent(s): c57500e

Create app.py

Browse files

Files changed (1) hide show

app.py +408 -0

app.py ADDED Viewed

	@@ -0,0 +1,408 @@

+import gradio as gr
+import requests
+import re
+import json
+import os
+from pathlib import Path
+import tempfile
+import base64
+def parse_repo_url(url):
+    """Parse GitHub repository URL to extract owner, repo, reference, and path."""
+    url = url.rstrip('/')
+    url_pattern = r'^https://github\.com/([^/]+)/([^/]+)(/tree/([^/]+)(/(.+))?)?$'
+    match = re.match(url_pattern, url)
+    if not match:
+        raise ValueError('Invalid GitHub repository URL. Please ensure the URL is in the correct format: '
+                        'https://github.com/owner/repo or https://github.com/owner/repo/tree/branch/path')
+    return {
+        'owner': match.group(1),
+        'repo': match.group(2),
+        'ref_from_url': match.group(4),
+        'path_from_url': match.group(6)
+    }
+def fetch_repo_sha(owner, repo, ref, path, token=None):
+    """Fetch repository SHA for specified path and reference."""
+    url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path if path else ''}"
+    if ref:
+        url += f"?ref={ref}"
+    headers = {'Accept': 'application/vnd.github.object+json'}
+    if token:
+        headers['Authorization'] = f"token {token}"
+    response = requests.get(url, headers=headers)
+    if not response.ok:
+        if response.status_code == 403 and response.headers.get('X-RateLimit-Remaining') == '0':
+            raise ValueError('GitHub API rate limit exceeded. Please try again later or provide a valid access token.')
+        if response.status_code == 404:
+            raise ValueError('Repository, branch, or path not found. Please check that the URL, branch/tag, and path are correct.')
+        raise ValueError(f'Failed to fetch repository SHA. Status: {response.status_code}. Please check your input.')
+    data = response.json()
+    return data.get('sha')
+def fetch_repo_tree(owner, repo, sha, token=None):
+    """Fetch repository tree structure."""
+    url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{sha}?recursive=1"
+    headers = {'Accept': 'application/vnd.github+json'}
+    if token:
+        headers['Authorization'] = f"token {token}"
+    response = requests.get(url, headers=headers)
+    if not response.ok:
+        if response.status_code == 403 and response.headers.get('X-RateLimit-Remaining') == '0':
+            raise ValueError('GitHub API rate limit exceeded. Please try again later or provide a valid access token.')
+        raise ValueError(f'Failed to fetch repository tree. Status: {response.status_code}. Please check your input.')
+    data = response.json()
+    return data.get('tree', [])
+def sort_contents(contents):
+    """Sort contents by path."""
+    def get_path(item):
+        return item.get('path', '')
+    sorted_contents = sorted(contents, key=lambda x: [p or '.' for p in get_path(x).split('/')])
+    return sorted_contents
+def create_directory_structure(tree):
+    """Create directory structure dictionary from tree."""
+    tree = [item for item in tree if item.get('type') == 'blob']
+    tree = sort_contents(tree)
+    directory_structure = {}
+    for item in tree:
+        path = item.get('path', '')
+        if not path.startswith('/'):
+            path = '/' + path
+        path_parts = path.split('/')
+        current_level = directory_structure
+        for i, part in enumerate(path_parts):
+            if not part:
+                part = './'
+            if part not in current_level:
+                current_level[part] = item if i == len(path_parts) - 1 else {}
+            if i < len(path_parts) - 1:
+                current_level = current_level[part]
+    return directory_structure
+def build_directory_html(structure, prefix=''):
+    """Build HTML representation of directory structure with checkboxes."""
+    html = '<ul class="directory">'
+    for name, item in sorted(structure.items(), key=lambda x: x[0]):
+        if isinstance(item, dict):
+            # This is a directory
+            html += f'<li><input type="checkbox" class="directory-checkbox"> <span class="folder">{name}</span>'
+            html += build_directory_html(item, prefix + '/' + name if prefix else name)
+            html += '</li>'
+        else:
+            # This is a file
+            file_path = item.get('path', '')
+            file_url = item.get('url', '')
+            common_extensions = ['.js', '.py', '.java', '.cpp', '.html', '.css', '.ts', '.jsx', '.tsx']
+            is_common = any(file_path.lower().endswith(ext) for ext in common_extensions)
+            checked = 'checked' if is_common else ''
+            html += f'<li><input type="checkbox" {checked} value="{json.dumps({"url": file_url, "path": file_path})}" class="file-checkbox"> '
+            html += f'<span class="file">{name}</span></li>'
+    html += '</ul>'
+    return html
+def fetch_repo_contents(repo_url, ref, path, token):
+    """Fetch repository contents and return HTML representation of directory structure."""
+    try:
+        repo_info = parse_repo_url(repo_url)
+        final_ref = ref or repo_info.get('ref_from_url')
+        final_path = path or repo_info.get('path_from_url') or ''
+        owner = repo_info.get('owner')
+        repo = repo_info.get('repo')
+        sha = fetch_repo_sha(owner, repo, final_ref, final_path, token)
+        tree = fetch_repo_tree(owner, repo, sha, token)
+        structure = create_directory_structure(tree)
+        # Create HTML for directory structure display
+        html_structure = build_directory_html(structure)
+        # Add JavaScript for checkbox behavior
+        js = """
+        <script>
+        // Check/uncheck all child checkboxes when directory checkbox is changed
+        document.querySelectorAll('.directory-checkbox').forEach(checkbox => {
+            checkbox.addEventListener('change', function() {
+                const parent = this.parentElement;
+                const childCheckboxes = parent.querySelectorAll('input[type="checkbox"]');
+                childCheckboxes.forEach(childBox => {
+                    childBox.checked = this.checked;
+                });
+            });
+        });
+        </script>
+        """
+        return html_structure + js, "", tree
+    except Exception as e:
+        error_message = str(e)
+        return "", f"Error fetching repository contents: {error_message}\n\nPlease ensure:\n1. The repository URL is correct and accessible.\n2. You have the necessary permissions.\n3. If it's a private repository, you've provided a valid access token.\n4. The specified branch/tag and path exist.", None
+def fetch_selected_files(selected_files_json, token):
+    """Fetch contents of selected files."""
+    try:
+        selected_files = json.loads(selected_files_json)
+        if not selected_files:
+            return "Error: No files selected. Please select at least one file from the directory structure."
+        file_contents = []
+        headers = {'Accept': 'application/vnd.github.v3.raw'}
+        if token:
+            headers['Authorization'] = f"token {token}"
+        for file_info in selected_files:
+            url = file_info.get('url')
+            path = file_info.get('path')
+            response = requests.get(url, headers=headers)
+            if not response.ok:
+                if response.status_code == 403 and response.headers.get('X-RateLimit-Remaining') == '0':
+                    raise ValueError(f"GitHub API rate limit exceeded while fetching {path}. Please try again later or provide a valid access token.")
+                raise ValueError(f"Failed to fetch content for {path}. Status: {response.status_code}. Please check your permissions.")
+            text = response.text
+            file_contents.append({'url': url, 'path': path, 'text': text})
+        return format_repo_contents(file_contents)
+    except Exception as e:
+        return f"Error generating text file: {str(e)}\n\nPlease ensure:\n1. You have selected at least one file.\n2. Your access token (if provided) is valid.\n3. You have a stable internet connection.\n4. The GitHub API is accessible."
+def format_repo_contents(contents):
+    """Format repository contents for display."""
+    text = ''
+    index = ''
+    contents = sort_contents(contents)
+    # Create a directory tree structure
+    tree = {}
+    for item in contents:
+        parts = item.get('path', '').split('/')
+        current_level = tree
+        for i, part in enumerate(parts):
+            if part not in current_level:
+                current_level[part] = {} if i < len(parts) - 1 else None
+            if i < len(parts) - 1:
+                current_level = current_level[part]
+    # Function to recursively build the index
+    def build_index(node, prefix=''):
+        result = ''
+        entries = sorted(node.items())
+        for i, (name, subnode) in enumerate(entries):
+            is_last = i == len(entries) - 1
+            line_prefix = '└── ' if is_last else '├── '
+            child_prefix = '    ' if is_last else '│   '
+            if name == '':
+                name = './'
+            result += f"{prefix}{line_prefix}{name}\n"
+            if subnode and isinstance(subnode, dict):
+                result += build_index(subnode, f"{prefix}{child_prefix}")
+        return result
+    index = build_index(tree)
+    for item in contents:
+        text += f"\n\n---\nFile: {item.get('path', '')}\n---\n\n{item.get('text', '')}\n"
+    return f"Directory Structure:\n\n{index}\n{text}"
+def get_selected_files(html_structure, repo_tree):
+    """Parse selected files from HTML structure."""
+    # This would normally be done with JavaScript on the client side,
+    # but since Gradio doesn't support direct DOM manipulation,
+    # we'll provide a list of files for selection instead.
+    blob_items = [item for item in repo_tree if item.get('type') == 'blob']
+    file_list = []
+    for item in blob_items:
+        file_path = item.get('path', '')
+        common_extensions = ['.js', '.py', '.java', '.cpp', '.html', '.css', '.ts', '.jsx', '.tsx']
+        is_common = any(file_path.lower().endswith(ext) for ext in common_extensions)
+        file_list.append({
+            'name': file_path,
+            'url': item.get('url', ''),
+            'path': file_path,
+            'selected': is_common
+        })
+    return file_list
+def generate_file_checkboxes(tree):
+    """Generate file checkboxes for selection."""
+    if not tree:
+        return []
+    blob_items = [item for item in tree if item.get('type') == 'blob']
+    file_options = {}
+    for item in blob_items:
+        path = item.get('path', '')
+        file_options[path] = {
+            'url': item.get('url', ''),
+            'path': path
+        }
+    return file_options
+def process_selections(file_options, selections, token):
+    """Process selected files and fetch their contents."""
+    if not selections or not file_options:
+        return "Error: No files selected or no files available."
+    selected_files = []
+    for selection in selections:
+        if selection in file_options:
+            selected_files.append(file_options[selection])
+    if not selected_files:
+        return "Error: No valid files selected."
+    # Convert to JSON for the fetch function
+    selected_files_json = json.dumps(selected_files)
+    return fetch_selected_files(selected_files_json, token)
+def save_output(output_text):
+    """Save output text to a file and return download link."""
+    if not output_text or not output_text.strip():
+        return "Error: No content to download. Please generate the text file first."
+    # Create a temporary file
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.txt') as temp_file:
+        temp_file.write(output_text.encode('utf-8'))
+        temp_path = temp_file.name
+    return temp_path
+# Create Gradio interface
+with gr.Blocks(css="""
+    .directory { list-style-type: none; padding-left: 20px; }
+    .folder { color: #e67e22; font-weight: bold; }
+    .file { color: #3498db; }
+    .gr-box { border-radius: 8px; }
+""") as demo:
+    gr.Markdown("# GitHub Repository Explorer")
+    with gr.Row():
+        with gr.Column(scale=2):
+            repo_url = gr.Textbox(label="GitHub Repository URL", placeholder="https://github.com/username/repo")
+            with gr.Row():
+                ref = gr.Textbox(label="Branch/Tag (optional)", placeholder="main")
+                path = gr.Textbox(label="Path (optional)", placeholder="src")
+            token = gr.Textbox(label="Access Token (optional, for private repos)", placeholder="ghp_xxxxxxxxxxxx", type="password")
+            fetch_button = gr.Button("Fetch Repository")
+        with gr.Column(scale=3):
+            with gr.Tabs():
+                with gr.TabItem("Info"):
+                    gr.Markdown("""
+                    ## How to use
+                    1. Enter a GitHub repository URL (e.g., https://github.com/username/repo)
+                    2. Optionally specify branch/tag and path
+                    3. For private repositories, provide an access token
+                    4. Click "Fetch Repository" to load the directory structure
+                    5. Select files from the directory structure
+                    6. Click "Generate Text" to fetch and format file contents
+                    7. Copy or download the generated text
+                    ## Access Token Information
+                    To access private repositories or increase API rate limits, you'll need a GitHub personal access token.
+                    To create one:
+                    1. Go to GitHub Settings > Developer settings > Personal access tokens
+                    2. Generate a new token with the 'repo' scope
+                    3. Copy the token and paste it in the Access Token field
+                    """)
+    # Directory structure display and file selection
+    dir_structure_html = gr.HTML(label="Directory Structure")
+    # Store repo tree data for use in file selection
+    repo_tree_state = gr.State(None)
+    file_options_state = gr.State(None)
+    # File selection
+    file_selector = gr.CheckboxGroup(label="Select Files", interactive=True)
+    # Output and action buttons
+    output_text = gr.Textbox(label="Output", lines=20)
+    with gr.Row():
+        generate_button = gr.Button("Generate Text")
+        copy_button = gr.Button("Copy to Clipboard")
+        download_button = gr.Button("Download")
+    error_output = gr.Textbox(label="Status/Error Messages")
+    download_path = gr.State(None)
+    # Define events
+    fetch_button.click(
+        fn=fetch_repo_contents,
+        inputs=[repo_url, ref, path, token],
+        outputs=[dir_structure_html, error_output, repo_tree_state]
+    ).then(
+        fn=generate_file_checkboxes,
+        inputs=[repo_tree_state],
+        outputs=[file_options_state]
+    ).then(
+        fn=lambda tree: [path for path in generate_file_checkboxes(tree).keys()],
+        inputs=[repo_tree_state],
+        outputs=[file_selector]
+    )
+    generate_button.click(
+        fn=process_selections,
+        inputs=[file_options_state, file_selector, token],
+        outputs=[output_text]
+    )
+    copy_button.click(
+        fn=lambda x: x,  # Just pass through the text
+        inputs=[output_text],
+        outputs=[output_text]
+    )
+    download_button.click(
+        fn=save_output,
+        inputs=[output_text],
+        outputs=[download_path]
+    ).then(
+        fn=lambda path: gr.update(value=f"File saved at: {path}. You can download it from there."),
+        inputs=[download_path],
+        outputs=[error_output]
+    )
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()