nihalaninihal commited on
Commit
290988d
Β·
verified Β·
1 Parent(s): c57500e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +408 -0
app.py ADDED
@@ -0,0 +1,408 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import re
4
+ import json
5
+ import os
6
+ from pathlib import Path
7
+ import tempfile
8
+ import base64
9
+
10
+ def parse_repo_url(url):
11
+ """Parse GitHub repository URL to extract owner, repo, reference, and path."""
12
+ url = url.rstrip('/')
13
+ url_pattern = r'^https://github\.com/([^/]+)/([^/]+)(/tree/([^/]+)(/(.+))?)?$'
14
+ match = re.match(url_pattern, url)
15
+
16
+ if not match:
17
+ raise ValueError('Invalid GitHub repository URL. Please ensure the URL is in the correct format: '
18
+ 'https://github.com/owner/repo or https://github.com/owner/repo/tree/branch/path')
19
+
20
+ return {
21
+ 'owner': match.group(1),
22
+ 'repo': match.group(2),
23
+ 'ref_from_url': match.group(4),
24
+ 'path_from_url': match.group(6)
25
+ }
26
+
27
+ def fetch_repo_sha(owner, repo, ref, path, token=None):
28
+ """Fetch repository SHA for specified path and reference."""
29
+ url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path if path else ''}"
30
+ if ref:
31
+ url += f"?ref={ref}"
32
+
33
+ headers = {'Accept': 'application/vnd.github.object+json'}
34
+ if token:
35
+ headers['Authorization'] = f"token {token}"
36
+
37
+ response = requests.get(url, headers=headers)
38
+
39
+ if not response.ok:
40
+ if response.status_code == 403 and response.headers.get('X-RateLimit-Remaining') == '0':
41
+ raise ValueError('GitHub API rate limit exceeded. Please try again later or provide a valid access token.')
42
+ if response.status_code == 404:
43
+ raise ValueError('Repository, branch, or path not found. Please check that the URL, branch/tag, and path are correct.')
44
+ raise ValueError(f'Failed to fetch repository SHA. Status: {response.status_code}. Please check your input.')
45
+
46
+ data = response.json()
47
+ return data.get('sha')
48
+
49
+ def fetch_repo_tree(owner, repo, sha, token=None):
50
+ """Fetch repository tree structure."""
51
+ url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{sha}?recursive=1"
52
+
53
+ headers = {'Accept': 'application/vnd.github+json'}
54
+ if token:
55
+ headers['Authorization'] = f"token {token}"
56
+
57
+ response = requests.get(url, headers=headers)
58
+
59
+ if not response.ok:
60
+ if response.status_code == 403 and response.headers.get('X-RateLimit-Remaining') == '0':
61
+ raise ValueError('GitHub API rate limit exceeded. Please try again later or provide a valid access token.')
62
+ raise ValueError(f'Failed to fetch repository tree. Status: {response.status_code}. Please check your input.')
63
+
64
+ data = response.json()
65
+ return data.get('tree', [])
66
+
67
+ def sort_contents(contents):
68
+ """Sort contents by path."""
69
+ def get_path(item):
70
+ return item.get('path', '')
71
+
72
+ sorted_contents = sorted(contents, key=lambda x: [p or '.' for p in get_path(x).split('/')])
73
+ return sorted_contents
74
+
75
+ def create_directory_structure(tree):
76
+ """Create directory structure dictionary from tree."""
77
+ tree = [item for item in tree if item.get('type') == 'blob']
78
+ tree = sort_contents(tree)
79
+
80
+ directory_structure = {}
81
+
82
+ for item in tree:
83
+ path = item.get('path', '')
84
+ if not path.startswith('/'):
85
+ path = '/' + path
86
+
87
+ path_parts = path.split('/')
88
+ current_level = directory_structure
89
+
90
+ for i, part in enumerate(path_parts):
91
+ if not part:
92
+ part = './'
93
+
94
+ if part not in current_level:
95
+ current_level[part] = item if i == len(path_parts) - 1 else {}
96
+
97
+ if i < len(path_parts) - 1:
98
+ current_level = current_level[part]
99
+
100
+ return directory_structure
101
+
102
+ def build_directory_html(structure, prefix=''):
103
+ """Build HTML representation of directory structure with checkboxes."""
104
+ html = '<ul class="directory">'
105
+
106
+ for name, item in sorted(structure.items(), key=lambda x: x[0]):
107
+ if isinstance(item, dict):
108
+ # This is a directory
109
+ html += f'<li><input type="checkbox" class="directory-checkbox"> <span class="folder">{name}</span>'
110
+ html += build_directory_html(item, prefix + '/' + name if prefix else name)
111
+ html += '</li>'
112
+ else:
113
+ # This is a file
114
+ file_path = item.get('path', '')
115
+ file_url = item.get('url', '')
116
+ common_extensions = ['.js', '.py', '.java', '.cpp', '.html', '.css', '.ts', '.jsx', '.tsx']
117
+ is_common = any(file_path.lower().endswith(ext) for ext in common_extensions)
118
+ checked = 'checked' if is_common else ''
119
+
120
+ html += f'<li><input type="checkbox" {checked} value="{json.dumps({"url": file_url, "path": file_path})}" class="file-checkbox"> '
121
+ html += f'<span class="file">{name}</span></li>'
122
+
123
+ html += '</ul>'
124
+ return html
125
+
126
+ def fetch_repo_contents(repo_url, ref, path, token):
127
+ """Fetch repository contents and return HTML representation of directory structure."""
128
+ try:
129
+ repo_info = parse_repo_url(repo_url)
130
+ final_ref = ref or repo_info.get('ref_from_url')
131
+ final_path = path or repo_info.get('path_from_url') or ''
132
+
133
+ owner = repo_info.get('owner')
134
+ repo = repo_info.get('repo')
135
+
136
+ sha = fetch_repo_sha(owner, repo, final_ref, final_path, token)
137
+ tree = fetch_repo_tree(owner, repo, sha, token)
138
+
139
+ structure = create_directory_structure(tree)
140
+
141
+ # Create HTML for directory structure display
142
+ html_structure = build_directory_html(structure)
143
+
144
+ # Add JavaScript for checkbox behavior
145
+ js = """
146
+ <script>
147
+ // Check/uncheck all child checkboxes when directory checkbox is changed
148
+ document.querySelectorAll('.directory-checkbox').forEach(checkbox => {
149
+ checkbox.addEventListener('change', function() {
150
+ const parent = this.parentElement;
151
+ const childCheckboxes = parent.querySelectorAll('input[type="checkbox"]');
152
+ childCheckboxes.forEach(childBox => {
153
+ childBox.checked = this.checked;
154
+ });
155
+ });
156
+ });
157
+ </script>
158
+ """
159
+
160
+ return html_structure + js, "", tree
161
+
162
+ except Exception as e:
163
+ error_message = str(e)
164
+ return "", f"Error fetching repository contents: {error_message}\n\nPlease ensure:\n1. The repository URL is correct and accessible.\n2. You have the necessary permissions.\n3. If it's a private repository, you've provided a valid access token.\n4. The specified branch/tag and path exist.", None
165
+
166
+ def fetch_selected_files(selected_files_json, token):
167
+ """Fetch contents of selected files."""
168
+ try:
169
+ selected_files = json.loads(selected_files_json)
170
+ if not selected_files:
171
+ return "Error: No files selected. Please select at least one file from the directory structure."
172
+
173
+ file_contents = []
174
+ headers = {'Accept': 'application/vnd.github.v3.raw'}
175
+ if token:
176
+ headers['Authorization'] = f"token {token}"
177
+
178
+ for file_info in selected_files:
179
+ url = file_info.get('url')
180
+ path = file_info.get('path')
181
+
182
+ response = requests.get(url, headers=headers)
183
+ if not response.ok:
184
+ if response.status_code == 403 and response.headers.get('X-RateLimit-Remaining') == '0':
185
+ raise ValueError(f"GitHub API rate limit exceeded while fetching {path}. Please try again later or provide a valid access token.")
186
+ raise ValueError(f"Failed to fetch content for {path}. Status: {response.status_code}. Please check your permissions.")
187
+
188
+ text = response.text
189
+ file_contents.append({'url': url, 'path': path, 'text': text})
190
+
191
+ return format_repo_contents(file_contents)
192
+
193
+ except Exception as e:
194
+ return f"Error generating text file: {str(e)}\n\nPlease ensure:\n1. You have selected at least one file.\n2. Your access token (if provided) is valid.\n3. You have a stable internet connection.\n4. The GitHub API is accessible."
195
+
196
+ def format_repo_contents(contents):
197
+ """Format repository contents for display."""
198
+ text = ''
199
+ index = ''
200
+
201
+ contents = sort_contents(contents)
202
+
203
+ # Create a directory tree structure
204
+ tree = {}
205
+ for item in contents:
206
+ parts = item.get('path', '').split('/')
207
+ current_level = tree
208
+ for i, part in enumerate(parts):
209
+ if part not in current_level:
210
+ current_level[part] = {} if i < len(parts) - 1 else None
211
+ if i < len(parts) - 1:
212
+ current_level = current_level[part]
213
+
214
+ # Function to recursively build the index
215
+ def build_index(node, prefix=''):
216
+ result = ''
217
+ entries = sorted(node.items())
218
+ for i, (name, subnode) in enumerate(entries):
219
+ is_last = i == len(entries) - 1
220
+ line_prefix = '└── ' if is_last else 'β”œβ”€β”€ '
221
+ child_prefix = ' ' if is_last else 'β”‚ '
222
+
223
+ if name == '':
224
+ name = './'
225
+
226
+ result += f"{prefix}{line_prefix}{name}\n"
227
+ if subnode and isinstance(subnode, dict):
228
+ result += build_index(subnode, f"{prefix}{child_prefix}")
229
+ return result
230
+
231
+ index = build_index(tree)
232
+
233
+ for item in contents:
234
+ text += f"\n\n---\nFile: {item.get('path', '')}\n---\n\n{item.get('text', '')}\n"
235
+
236
+ return f"Directory Structure:\n\n{index}\n{text}"
237
+
238
+ def get_selected_files(html_structure, repo_tree):
239
+ """Parse selected files from HTML structure."""
240
+ # This would normally be done with JavaScript on the client side,
241
+ # but since Gradio doesn't support direct DOM manipulation,
242
+ # we'll provide a list of files for selection instead.
243
+ blob_items = [item for item in repo_tree if item.get('type') == 'blob']
244
+ file_list = []
245
+
246
+ for item in blob_items:
247
+ file_path = item.get('path', '')
248
+ common_extensions = ['.js', '.py', '.java', '.cpp', '.html', '.css', '.ts', '.jsx', '.tsx']
249
+ is_common = any(file_path.lower().endswith(ext) for ext in common_extensions)
250
+
251
+ file_list.append({
252
+ 'name': file_path,
253
+ 'url': item.get('url', ''),
254
+ 'path': file_path,
255
+ 'selected': is_common
256
+ })
257
+
258
+ return file_list
259
+
260
+ def generate_file_checkboxes(tree):
261
+ """Generate file checkboxes for selection."""
262
+ if not tree:
263
+ return []
264
+
265
+ blob_items = [item for item in tree if item.get('type') == 'blob']
266
+ file_options = {}
267
+
268
+ for item in blob_items:
269
+ path = item.get('path', '')
270
+ file_options[path] = {
271
+ 'url': item.get('url', ''),
272
+ 'path': path
273
+ }
274
+
275
+ return file_options
276
+
277
+ def process_selections(file_options, selections, token):
278
+ """Process selected files and fetch their contents."""
279
+ if not selections or not file_options:
280
+ return "Error: No files selected or no files available."
281
+
282
+ selected_files = []
283
+ for selection in selections:
284
+ if selection in file_options:
285
+ selected_files.append(file_options[selection])
286
+
287
+ if not selected_files:
288
+ return "Error: No valid files selected."
289
+
290
+ # Convert to JSON for the fetch function
291
+ selected_files_json = json.dumps(selected_files)
292
+ return fetch_selected_files(selected_files_json, token)
293
+
294
+ def save_output(output_text):
295
+ """Save output text to a file and return download link."""
296
+ if not output_text or not output_text.strip():
297
+ return "Error: No content to download. Please generate the text file first."
298
+
299
+ # Create a temporary file
300
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.txt') as temp_file:
301
+ temp_file.write(output_text.encode('utf-8'))
302
+ temp_path = temp_file.name
303
+
304
+ return temp_path
305
+
306
+ # Create Gradio interface
307
+ with gr.Blocks(css="""
308
+ .directory { list-style-type: none; padding-left: 20px; }
309
+ .folder { color: #e67e22; font-weight: bold; }
310
+ .file { color: #3498db; }
311
+ .gr-box { border-radius: 8px; }
312
+ """) as demo:
313
+ gr.Markdown("# GitHub Repository Explorer")
314
+
315
+ with gr.Row():
316
+ with gr.Column(scale=2):
317
+ repo_url = gr.Textbox(label="GitHub Repository URL", placeholder="https://github.com/username/repo")
318
+
319
+ with gr.Row():
320
+ ref = gr.Textbox(label="Branch/Tag (optional)", placeholder="main")
321
+ path = gr.Textbox(label="Path (optional)", placeholder="src")
322
+
323
+ token = gr.Textbox(label="Access Token (optional, for private repos)", placeholder="ghp_xxxxxxxxxxxx", type="password")
324
+
325
+ fetch_button = gr.Button("Fetch Repository")
326
+
327
+ with gr.Column(scale=3):
328
+ with gr.Tabs():
329
+ with gr.TabItem("Info"):
330
+ gr.Markdown("""
331
+ ## How to use
332
+ 1. Enter a GitHub repository URL (e.g., https://github.com/username/repo)
333
+ 2. Optionally specify branch/tag and path
334
+ 3. For private repositories, provide an access token
335
+ 4. Click "Fetch Repository" to load the directory structure
336
+ 5. Select files from the directory structure
337
+ 6. Click "Generate Text" to fetch and format file contents
338
+ 7. Copy or download the generated text
339
+
340
+ ## Access Token Information
341
+ To access private repositories or increase API rate limits, you'll need a GitHub personal access token.
342
+ To create one:
343
+ 1. Go to GitHub Settings > Developer settings > Personal access tokens
344
+ 2. Generate a new token with the 'repo' scope
345
+ 3. Copy the token and paste it in the Access Token field
346
+ """)
347
+
348
+ # Directory structure display and file selection
349
+ dir_structure_html = gr.HTML(label="Directory Structure")
350
+
351
+ # Store repo tree data for use in file selection
352
+ repo_tree_state = gr.State(None)
353
+ file_options_state = gr.State(None)
354
+
355
+ # File selection
356
+ file_selector = gr.CheckboxGroup(label="Select Files", interactive=True)
357
+
358
+ # Output and action buttons
359
+ output_text = gr.Textbox(label="Output", lines=20)
360
+
361
+ with gr.Row():
362
+ generate_button = gr.Button("Generate Text")
363
+ copy_button = gr.Button("Copy to Clipboard")
364
+ download_button = gr.Button("Download")
365
+
366
+ error_output = gr.Textbox(label="Status/Error Messages")
367
+ download_path = gr.State(None)
368
+
369
+ # Define events
370
+ fetch_button.click(
371
+ fn=fetch_repo_contents,
372
+ inputs=[repo_url, ref, path, token],
373
+ outputs=[dir_structure_html, error_output, repo_tree_state]
374
+ ).then(
375
+ fn=generate_file_checkboxes,
376
+ inputs=[repo_tree_state],
377
+ outputs=[file_options_state]
378
+ ).then(
379
+ fn=lambda tree: [path for path in generate_file_checkboxes(tree).keys()],
380
+ inputs=[repo_tree_state],
381
+ outputs=[file_selector]
382
+ )
383
+
384
+ generate_button.click(
385
+ fn=process_selections,
386
+ inputs=[file_options_state, file_selector, token],
387
+ outputs=[output_text]
388
+ )
389
+
390
+ copy_button.click(
391
+ fn=lambda x: x, # Just pass through the text
392
+ inputs=[output_text],
393
+ outputs=[output_text]
394
+ )
395
+
396
+ download_button.click(
397
+ fn=save_output,
398
+ inputs=[output_text],
399
+ outputs=[download_path]
400
+ ).then(
401
+ fn=lambda path: gr.update(value=f"File saved at: {path}. You can download it from there."),
402
+ inputs=[download_path],
403
+ outputs=[error_output]
404
+ )
405
+
406
+ # Launch the app
407
+ if __name__ == "__main__":
408
+ demo.launch()