Spaces:
Running
Running
fix links + prevent duplication (#1)
Browse files- convert the URL strings to html links (9d5e5fbd94321699a9bcebd1f12fe9aa0a9172bf)
- check if the URL or dataset name already exists to avoid duplication (86a186ec83c9ba05fba39d994cd42367718f4170)
Co-authored-by: Mahir Daiyan <MHRDYN7@users.noreply.huggingface.co>
app.py
CHANGED
|
@@ -98,6 +98,14 @@ def submit_entry(
|
|
| 98 |
errors.append("Approximate size must be a non-negative number.")
|
| 99 |
if not field.strip():
|
| 100 |
errors.append("Please provide a field.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
if errors:
|
| 103 |
return gr.update(value=f"Submission failed:\n- " + "\n- ".join(errors), visible=True), gr.update(visible=False)
|
|
@@ -121,7 +129,7 @@ def submit_entry(
|
|
| 121 |
ok = f"Thanks, {user_display}. Your entry has been saved locally and will sync to the Hub within ~{COMMIT_EVERY_MIN} minutes."
|
| 122 |
updated = read_all_records()
|
| 123 |
rows = [
|
| 124 |
-
[r["dataset_name"], r["dataset_url"], r["description"], f"{r['approx_size']} {r['size_unit']}", r["field"], r["user"], r["created_at"]]
|
| 125 |
for r in updated
|
| 126 |
]
|
| 127 |
return gr.update(value=ok, visible=True), rows
|
|
@@ -131,7 +139,7 @@ def refresh_table(field: str, search: str):
|
|
| 131 |
data = read_all_records()
|
| 132 |
data = filter_records(data, field, search)
|
| 133 |
rows = [
|
| 134 |
-
[r["dataset_name"], r["dataset_url"], r["description"], f"{r['approx_size']} {r['size_unit']}", r["field"], r["user"], r["created_at"]]
|
| 135 |
for r in data
|
| 136 |
]
|
| 137 |
return rows
|
|
@@ -164,7 +172,7 @@ with gr.Blocks(title="Community Dataset Index", css=".wrap {max-width: 1200px; m
|
|
| 164 |
refresh = gr.Button("Refresh")
|
| 165 |
table = gr.Dataframe(
|
| 166 |
headers=["Name", "URL", "Description", "Size", "Field", "User", "Created"],
|
| 167 |
-
datatype=["str", "
|
| 168 |
interactive=False,
|
| 169 |
wrap=True,
|
| 170 |
)
|
|
|
|
| 98 |
errors.append("Approximate size must be a non-negative number.")
|
| 99 |
if not field.strip():
|
| 100 |
errors.append("Please provide a field.")
|
| 101 |
+
|
| 102 |
+
# Check for existing dataset URL and name
|
| 103 |
+
existing_records = read_all_records()
|
| 104 |
+
for record in existing_records:
|
| 105 |
+
if record.get("dataset_url", "").strip().lower() == dataset_url.strip().lower():
|
| 106 |
+
errors.append(f"Dataset URL already exists: {record.get('dataset_url')}")
|
| 107 |
+
if record.get("dataset_name", "").strip().lower() == dataset_name.strip().lower():
|
| 108 |
+
errors.append(f"Dataset name already exists: {record.get('dataset_name')}")
|
| 109 |
|
| 110 |
if errors:
|
| 111 |
return gr.update(value=f"Submission failed:\n- " + "\n- ".join(errors), visible=True), gr.update(visible=False)
|
|
|
|
| 129 |
ok = f"Thanks, {user_display}. Your entry has been saved locally and will sync to the Hub within ~{COMMIT_EVERY_MIN} minutes."
|
| 130 |
updated = read_all_records()
|
| 131 |
rows = [
|
| 132 |
+
[r["dataset_name"], f'<a href="{r["dataset_url"]}" target="_blank">{r["dataset_url"]}</a>', r["description"], f"{r['approx_size']} {r['size_unit']}", r["field"], r["user"], r["created_at"]]
|
| 133 |
for r in updated
|
| 134 |
]
|
| 135 |
return gr.update(value=ok, visible=True), rows
|
|
|
|
| 139 |
data = read_all_records()
|
| 140 |
data = filter_records(data, field, search)
|
| 141 |
rows = [
|
| 142 |
+
[r["dataset_name"], f'<a href="{r["dataset_url"]}" target="_blank">{r["dataset_url"]}</a>', r["description"], f"{r['approx_size']} {r['size_unit']}", r["field"], r["user"], r["created_at"]]
|
| 143 |
for r in data
|
| 144 |
]
|
| 145 |
return rows
|
|
|
|
| 172 |
refresh = gr.Button("Refresh")
|
| 173 |
table = gr.Dataframe(
|
| 174 |
headers=["Name", "URL", "Description", "Size", "Field", "User", "Created"],
|
| 175 |
+
datatype=["str", "html", "str", "str", "str", "str", "str"],
|
| 176 |
interactive=False,
|
| 177 |
wrap=True,
|
| 178 |
)
|