cgeorgiaw HF Staff commited on
Commit
334508f
·
1 Parent(s): a2b2adc

bug fix with OAuth

Browse files
Files changed (1) hide show
  1. app.py +9 -29
app.py CHANGED
@@ -12,10 +12,9 @@ from huggingface_hub import CommitScheduler
12
  # ------------------------------
13
  # Config
14
  # ------------------------------
15
- DATASET_REPO_ID = "hugging-science/dataset-quest-index"
16
  COMMIT_EVERY_MIN = 2
17
 
18
- # Local folder where submissions are accumulated before CommitScheduler pushes them
19
  LOCAL_SUBMISSIONS_DIR = Path("submissions")
20
  LOCAL_SUBMISSIONS_DIR.mkdir(parents=True, exist_ok=True)
21
  LOCAL_FILE = LOCAL_SUBMISSIONS_DIR / f"records_{uuid.uuid4().hex}.jsonl"
@@ -28,7 +27,6 @@ scheduler = CommitScheduler(
28
  every=COMMIT_EVERY_MIN,
29
  )
30
 
31
-
32
  # ------------------------------
33
  # Utilities
34
  # ------------------------------
@@ -37,7 +35,6 @@ def _now_iso() -> str:
37
 
38
 
39
  def read_all_records() -> List[Dict[str, Any]]:
40
- """Read all jsonl records from LOCAL_SUBMISSIONS_DIR into a list."""
41
  records: List[Dict[str, Any]] = []
42
  for p in sorted(LOCAL_SUBMISSIONS_DIR.glob("*.jsonl")):
43
  try:
@@ -49,7 +46,6 @@ def read_all_records() -> List[Dict[str, Any]]:
49
  try:
50
  records.append(json.loads(line))
51
  except Exception:
52
- # Skip malformed lines
53
  pass
54
  except FileNotFoundError:
55
  pass
@@ -81,17 +77,6 @@ def filter_records(records: List[Dict[str, Any]], field: str | None, search: str
81
  # ------------------------------
82
  # App logic
83
  # ------------------------------
84
- FIELDS = [
85
- "NLP",
86
- "Computer Vision",
87
- "Audio",
88
- "Multimodal",
89
- "Reinforcement Learning",
90
- "Time Series",
91
- "Tabular",
92
- "Other",
93
- ]
94
-
95
  SIZE_UNITS = ["KB", "MB", "GB", "TB"]
96
 
97
 
@@ -104,7 +89,6 @@ def submit_entry(
104
  field: str,
105
  profile: gr.OAuthProfile | None,
106
  ):
107
- # Basic validation
108
  errors = []
109
  if not dataset_name.strip():
110
  errors.append("Dataset name is required.")
@@ -112,8 +96,8 @@ def submit_entry(
112
  errors.append("Dataset URL must be an http(s) link.")
113
  if size_value is None or size_value < 0:
114
  errors.append("Approximate size must be a non-negative number.")
115
- if field not in FIELDS:
116
- errors.append("Please choose a field.")
117
 
118
  if errors:
119
  return gr.update(value=f"Submission failed:\n- " + "\n- ".join(errors), visible=True), gr.update(visible=False)
@@ -129,15 +113,13 @@ def submit_entry(
129
  "description": description.strip(),
130
  "approx_size": float(size_value),
131
  "size_unit": size_unit,
132
- "field": field,
133
  "user": user_handle or user_display,
134
  }
135
 
136
  append_record(record)
137
- # Return success notice and refresh table
138
  ok = f"Thanks, {user_display}. Your entry has been saved locally and will sync to the Hub within ~{COMMIT_EVERY_MIN} minutes."
139
  updated = read_all_records()
140
- # Project to a neat table
141
  rows = [
142
  [r["dataset_name"], r["dataset_url"], r["description"], f"{r['approx_size']} {r['size_unit']}", r["field"], r["user"], r["created_at"]]
143
  for r in updated
@@ -171,13 +153,13 @@ with gr.Blocks(title="Community Dataset Index", css=".wrap {max-width: 1200px; m
171
  with gr.Row():
172
  size_val = gr.Number(label="Approx. size", minimum=0, value=0)
173
  size_unit = gr.Dropdown(SIZE_UNITS, value="GB", label="Unit")
174
- field = gr.Dropdown(FIELDS, label="Field", value="NLP")
175
  submit = gr.Button("Submit", variant="primary")
176
  notice = gr.Markdown(visible=False)
177
  with gr.Column(scale=2):
178
  gr.Markdown("### Browse & filter")
179
  with gr.Row():
180
- field_filter = gr.Dropdown(["All"] + FIELDS, value="All", label="Field filter")
181
  search = gr.Textbox(label="Search", placeholder="Search name, URL, description, user…")
182
  refresh = gr.Button("Refresh")
183
  table = gr.Dataframe(
@@ -187,10 +169,9 @@ with gr.Blocks(title="Community Dataset Index", css=".wrap {max-width: 1200px; m
187
  wrap=True,
188
  )
189
 
190
- # Wire events
191
  submit.click(
192
  submit_entry,
193
- inputs=[name, url, desc, size_val, size_unit, field, gr.OAuthProfile()],
194
  outputs=[notice, table],
195
  show_progress="minimal",
196
  )
@@ -199,9 +180,8 @@ with gr.Blocks(title="Community Dataset Index", css=".wrap {max-width: 1200px; m
199
  field_filter.change(refresh_table, inputs=[field_filter, search], outputs=table)
200
  search.submit(refresh_table, inputs=[field_filter, search], outputs=table)
201
 
202
- # Populate on launch
203
- demo.load(lambda: refresh_table("All", ""), inputs=None, outputs=table)
204
 
205
 
206
  if __name__ == "__main__":
207
- demo.launch()
 
12
  # ------------------------------
13
  # Config
14
  # ------------------------------
15
+ DATASET_REPO_ID = "hugging-science/dataset-quest-index"
16
  COMMIT_EVERY_MIN = 2
17
 
 
18
  LOCAL_SUBMISSIONS_DIR = Path("submissions")
19
  LOCAL_SUBMISSIONS_DIR.mkdir(parents=True, exist_ok=True)
20
  LOCAL_FILE = LOCAL_SUBMISSIONS_DIR / f"records_{uuid.uuid4().hex}.jsonl"
 
27
  every=COMMIT_EVERY_MIN,
28
  )
29
 
 
30
  # ------------------------------
31
  # Utilities
32
  # ------------------------------
 
35
 
36
 
37
  def read_all_records() -> List[Dict[str, Any]]:
 
38
  records: List[Dict[str, Any]] = []
39
  for p in sorted(LOCAL_SUBMISSIONS_DIR.glob("*.jsonl")):
40
  try:
 
46
  try:
47
  records.append(json.loads(line))
48
  except Exception:
 
49
  pass
50
  except FileNotFoundError:
51
  pass
 
77
  # ------------------------------
78
  # App logic
79
  # ------------------------------
 
 
 
 
 
 
 
 
 
 
 
80
  SIZE_UNITS = ["KB", "MB", "GB", "TB"]
81
 
82
 
 
89
  field: str,
90
  profile: gr.OAuthProfile | None,
91
  ):
 
92
  errors = []
93
  if not dataset_name.strip():
94
  errors.append("Dataset name is required.")
 
96
  errors.append("Dataset URL must be an http(s) link.")
97
  if size_value is None or size_value < 0:
98
  errors.append("Approximate size must be a non-negative number.")
99
+ if not field.strip():
100
+ errors.append("Please provide a field.")
101
 
102
  if errors:
103
  return gr.update(value=f"Submission failed:\n- " + "\n- ".join(errors), visible=True), gr.update(visible=False)
 
113
  "description": description.strip(),
114
  "approx_size": float(size_value),
115
  "size_unit": size_unit,
116
+ "field": field.strip(),
117
  "user": user_handle or user_display,
118
  }
119
 
120
  append_record(record)
 
121
  ok = f"Thanks, {user_display}. Your entry has been saved locally and will sync to the Hub within ~{COMMIT_EVERY_MIN} minutes."
122
  updated = read_all_records()
 
123
  rows = [
124
  [r["dataset_name"], r["dataset_url"], r["description"], f"{r['approx_size']} {r['size_unit']}", r["field"], r["user"], r["created_at"]]
125
  for r in updated
 
153
  with gr.Row():
154
  size_val = gr.Number(label="Approx. size", minimum=0, value=0)
155
  size_unit = gr.Dropdown(SIZE_UNITS, value="GB", label="Unit")
156
+ field = gr.Textbox(label="Field (e.g. PDEs, multi-omics, single-cell, catalysts, etc.)")
157
  submit = gr.Button("Submit", variant="primary")
158
  notice = gr.Markdown(visible=False)
159
  with gr.Column(scale=2):
160
  gr.Markdown("### Browse & filter")
161
  with gr.Row():
162
+ field_filter = gr.Textbox(label="Field filter (leave blank for all)")
163
  search = gr.Textbox(label="Search", placeholder="Search name, URL, description, user…")
164
  refresh = gr.Button("Refresh")
165
  table = gr.Dataframe(
 
169
  wrap=True,
170
  )
171
 
 
172
  submit.click(
173
  submit_entry,
174
+ inputs=[name, url, desc, size_val, size_unit, field],
175
  outputs=[notice, table],
176
  show_progress="minimal",
177
  )
 
180
  field_filter.change(refresh_table, inputs=[field_filter, search], outputs=table)
181
  search.submit(refresh_table, inputs=[field_filter, search], outputs=table)
182
 
183
+ demo.load(lambda: refresh_table("", ""), inputs=None, outputs=table)
 
184
 
185
 
186
  if __name__ == "__main__":
187
+ demo.launch()