Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,10 +13,7 @@ if not hf_token:
|
|
| 13 |
if not hf_user:
|
| 14 |
raise ValueError("SPACE_AUTHOR_NAME environment variable is not set")
|
| 15 |
|
| 16 |
-
|
| 17 |
-
# login(token=hf_token, add_to_git_credential=True)
|
| 18 |
-
|
| 19 |
-
SUPPORTED_FILE_TYPES = ["txt", "shell", "python", "markdown", "yaml", "json", "csv", "tsv", "xml", "html", "ini"]
|
| 20 |
|
| 21 |
def validate_url(url):
|
| 22 |
return url.startswith('https://')
|
|
@@ -24,7 +21,6 @@ def validate_url(url):
|
|
| 24 |
def clone_repo(url, repo_dir, hf_token, hf_user):
|
| 25 |
env = os.environ.copy()
|
| 26 |
env['GIT_LFS_SKIP_SMUDGE'] = '1'
|
| 27 |
-
# Construct the Git URL with the token and author name for authentication
|
| 28 |
token_url = url.replace('https://', f'https://{hf_user}:{hf_token}@')
|
| 29 |
result = subprocess.run(["git", "clone", token_url, repo_dir], env=env, capture_output=True, text=True)
|
| 30 |
if result.returncode != 0:
|
|
@@ -37,11 +33,16 @@ def get_file_summary(file_path, file_type):
|
|
| 37 |
"name": os.path.relpath(file_path),
|
| 38 |
"type": file_type,
|
| 39 |
"size": size,
|
|
|
|
|
|
|
| 40 |
}
|
| 41 |
|
| 42 |
-
def read_file_content(file_path):
|
| 43 |
with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
def validate_file_types(directory):
|
| 47 |
m = Magika()
|
|
@@ -88,7 +89,6 @@ def extract_repo_content(url, hf_token, hf_user):
|
|
| 88 |
|
| 89 |
extracted_content.append(content)
|
| 90 |
|
| 91 |
-
# Cleanup temporary directory
|
| 92 |
subprocess.run(["rm", "-rf", repo_dir])
|
| 93 |
|
| 94 |
return extracted_content
|
|
@@ -100,6 +100,8 @@ def format_output(extracted_content, repo_url):
|
|
| 100 |
formatted_output += f"### File: {file_data['header']['name']}\n"
|
| 101 |
formatted_output += f"**Type:** {file_data['header']['type']}\n"
|
| 102 |
formatted_output += f"**Size:** {file_data['header']['size']} bytes\n"
|
|
|
|
|
|
|
| 103 |
formatted_output += "#### Content:\n"
|
| 104 |
formatted_output += f"```\n{file_data['content']}\n```\n\n"
|
| 105 |
else:
|
|
@@ -130,4 +132,4 @@ with app:
|
|
| 130 |
|
| 131 |
extract_button.click(fn=extract_and_display, inputs=url_input, outputs=output_display)
|
| 132 |
|
| 133 |
-
app.launch()
|
|
|
|
| 13 |
if not hf_user:
|
| 14 |
raise ValueError("SPACE_AUTHOR_NAME environment variable is not set")
|
| 15 |
|
| 16 |
+
SUPPORTED_FILE_TYPES = ["txt", "shell", "python", "markdown", "yaml", "json", "csv", "tsv", "xml", "html", "ini", "jsonl", "ipynb"]
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
def validate_url(url):
|
| 19 |
return url.startswith('https://')
|
|
|
|
| 21 |
def clone_repo(url, repo_dir, hf_token, hf_user):
|
| 22 |
env = os.environ.copy()
|
| 23 |
env['GIT_LFS_SKIP_SMUDGE'] = '1'
|
|
|
|
| 24 |
token_url = url.replace('https://', f'https://{hf_user}:{hf_token}@')
|
| 25 |
result = subprocess.run(["git", "clone", token_url, repo_dir], env=env, capture_output=True, text=True)
|
| 26 |
if result.returncode != 0:
|
|
|
|
| 33 |
"name": os.path.relpath(file_path),
|
| 34 |
"type": file_type,
|
| 35 |
"size": size,
|
| 36 |
+
"creation_date": os.path.getctime(file_path),
|
| 37 |
+
"modification_date": os.path.getmtime(file_path)
|
| 38 |
}
|
| 39 |
|
| 40 |
+
def read_file_content(file_path, max_size=32*1024):
|
| 41 |
with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
|
| 42 |
+
if os.path.getsize(file_path) > max_size:
|
| 43 |
+
return file.read(max_size) + "\n... [Content Truncated] ..."
|
| 44 |
+
else:
|
| 45 |
+
return file.read()
|
| 46 |
|
| 47 |
def validate_file_types(directory):
|
| 48 |
m = Magika()
|
|
|
|
| 89 |
|
| 90 |
extracted_content.append(content)
|
| 91 |
|
|
|
|
| 92 |
subprocess.run(["rm", "-rf", repo_dir])
|
| 93 |
|
| 94 |
return extracted_content
|
|
|
|
| 100 |
formatted_output += f"### File: {file_data['header']['name']}\n"
|
| 101 |
formatted_output += f"**Type:** {file_data['header']['type']}\n"
|
| 102 |
formatted_output += f"**Size:** {file_data['header']['size']} bytes\n"
|
| 103 |
+
formatted_output += f"**Created:** {file_data['header']['creation_date']}\n"
|
| 104 |
+
formatted_output += f"**Modified:** {file_data['header']['modification_date']}\n"
|
| 105 |
formatted_output += "#### Content:\n"
|
| 106 |
formatted_output += f"```\n{file_data['content']}\n```\n\n"
|
| 107 |
else:
|
|
|
|
| 132 |
|
| 133 |
extract_button.click(fn=extract_and_display, inputs=url_input, outputs=output_display)
|
| 134 |
|
| 135 |
+
app.launch()
|