guipenedo HF Staff commited on
Commit
bf84519
·
1 Parent(s): b31a11e

clean up design

Browse files
Files changed (1) hide show
  1. app.py +41 -59
app.py CHANGED
@@ -1,16 +1,3 @@
1
- # import os
2
- # _LOCAL_TMP = "/fsx/guilherme/tmp"
3
- # try:
4
- # os.makedirs(_LOCAL_TMP, exist_ok=True)
5
- # os.environ.setdefault("TMPDIR", _LOCAL_TMP)
6
- # os.environ.setdefault("TEMP", _LOCAL_TMP)
7
- # os.environ.setdefault("TMP", _LOCAL_TMP)
8
- # _GRADIO_TMP = os.path.join(_LOCAL_TMP, "gradio")
9
- # os.makedirs(_GRADIO_TMP, exist_ok=True)
10
- # os.environ.setdefault("GRADIO_TEMP_DIR", _GRADIO_TMP)
11
- # except Exception:
12
- # pass
13
-
14
  import gradio as gr
15
  from datatrove.pipeline.readers import ParquetReader
16
  from default_wiki_pipeline import _parse_and_clean_wikicode, mwparserfromhell
@@ -39,7 +26,7 @@ def matches_filters(doc, require_has_math: bool | None, require_has_infobox: boo
39
  meta = doc.metadata or {}
40
  if require_has_math and not bool(meta.get("has_math")):
41
  return False
42
- if require_has_infobox and not meta.get("infoboxes"):
43
  return False
44
  return True
45
 
@@ -243,6 +230,18 @@ def on_next(docs_cache, idx: int, reader_iter, require_has_math: bool, require_h
243
  return new_idx, docs_cache, reader_iter, left, left_meta, header, md, info, right
244
 
245
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  with gr.Blocks() as demo:
247
  idx_state = gr.State(value=-1, time_to_live=900)
248
  docs_state = gr.State(value=[], time_to_live=900)
@@ -256,8 +255,6 @@ with gr.Blocks() as demo:
256
  with gr.Row():
257
  prev_btn = gr.Button("Previous")
258
  next_btn = gr.Button("Next")
259
- left_text = gr.Textbox(label="FineWiki extraction", lines=30)
260
- left_meta = gr.JSON(label="Metadata")
261
 
262
  with gr.Column():
263
  with gr.Row():
@@ -266,55 +263,40 @@ with gr.Blocks() as demo:
266
  with gr.Column(scale=1):
267
  require_has_math = gr.Checkbox(label="Has math", value=False)
268
  require_has_infobox = gr.Checkbox(label="Has infobox", value=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  with gr.Tab("Preview"):
270
  right_iframe = gr.HTML(label="Original Page")
271
  with gr.Tab("wikimedia/wikipedia"):
272
- right_markdown = gr.Textbox(label="wikimedia/wikipedia extraction", lines=30)
273
- right_infoboxes = gr.JSON(label="Infoboxes")
274
-
275
-
276
-
277
- # with gr.Row():
278
- # with gr.Column(scale=1):
279
- # language_select = gr.Dropdown(choices=lang_list, value="en", label="Language")
280
- # with gr.Column(scale=1):
281
- # require_has_math = gr.Checkbox(label="Has math", value=False)
282
- # require_has_infobox = gr.Checkbox(label="Has infobox", value=False)
283
- # with gr.Row():
284
- # with gr.Column():
285
- # with gr.Row():
286
- # prev_btn = gr.Button("Previous")
287
- # next_btn = gr.Button("Next")
288
- # with gr.Column():
289
- # header_md = gr.Markdown()
290
- # # with gr.Row():
291
- # # id_input = gr.Textbox(label="Wikidata ID/URL/Page ID", placeholder="e.g., Q42 or https://... or 12345", lines=1)
292
- # # find_btn = gr.Button("Find")
293
- # # with gr.Row():
294
- # # show_preview = gr.Checkbox(label="Show preview", value=True)
295
- # # show_wiki = gr.Checkbox(label="Show `wikimedia/wikipedia` extraction", value=False)
296
- # # show_infoboxes = gr.Checkbox(label="Show infoboxes", value=True)
297
- # with gr.Row():
298
- # with gr.Column():
299
- # left_text = gr.Textbox(label="FineWiki extraction", lines=30)
300
- # left_meta = gr.JSON(label="Metadata")
301
- # with gr.Column():
302
- # with gr.Tab("Preview"):
303
- # right_iframe = gr.HTML(label="Original Page")
304
- # with gr.Tab("wikimedia/wikipedia"):
305
- # right_markdown = gr.Textbox(label="wikimedia/wikipedia extraction", lines=30)
306
- # right_infoboxes = gr.JSON(label="Infoboxes")
307
-
308
- language_select.change(on_select_language, inputs=[language_select, require_has_math, require_has_infobox], outputs=[idx_state, docs_state, iter_state, left_text, left_meta, header_md, right_markdown, right_infoboxes, right_iframe], concurrency_limit=1)
309
- demo.load(on_select_language, inputs=[language_select, require_has_math, require_has_infobox], outputs=[idx_state, docs_state, iter_state, left_text, left_meta, header_md, right_markdown, right_infoboxes, right_iframe], concurrency_limit=1)
310
  # find_btn.click(on_find, inputs=[docs_state, idx_state, iter_state, id_input, require_has_math, require_has_infobox], outputs=[idx_state, docs_state, iter_state, left_text, left_meta, header_md, right_markdown, right_infoboxes, right_iframe], concurrency_limit=1)
311
 
312
  # Visibility toggles driven directly by checkbox changes
313
- # show_wiki.change(lambda v: gr.update(visible=v), inputs=[show_wiki], outputs=[right_markdown])
314
- # show_preview.change(lambda v: gr.update(visible=v), inputs=[show_preview], outputs=[right_iframe])
315
- # show_infoboxes.change(lambda v: gr.update(visible=v), inputs=[show_infoboxes], outputs=[right_infoboxes])
316
- prev_btn.click(on_prev, inputs=[docs_state, idx_state, iter_state, require_has_math, require_has_infobox], outputs=[idx_state, docs_state, iter_state, left_text, left_meta, header_md, right_markdown, right_infoboxes, right_iframe], concurrency_limit=1)
317
- next_btn.click(on_next, inputs=[docs_state, idx_state, iter_state, require_has_math, require_has_infobox], outputs=[idx_state, docs_state, iter_state, left_text, left_meta, header_md, right_markdown, right_infoboxes, right_iframe], concurrency_limit=1)
 
 
 
318
 
319
  # Enable global queue to coordinate concurrent requests safely
320
  demo.queue(default_concurrency_limit=1, max_size=128)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from datatrove.pipeline.readers import ParquetReader
3
  from default_wiki_pipeline import _parse_and_clean_wikicode, mwparserfromhell
 
26
  meta = doc.metadata or {}
27
  if require_has_math and not bool(meta.get("has_math")):
28
  return False
29
+ if require_has_infobox and not meta.get("infoboxes") and len(meta.get("infoboxes", [])) == 0:
30
  return False
31
  return True
32
 
 
230
  return new_idx, docs_cache, reader_iter, left, left_meta, header, md, info, right
231
 
232
 
233
+ SCROLL_TO_TOP_JS = """
234
+ () => {
235
+ const ids = ["left_text_box", "right_markdown_box"];
236
+ for (const id of ids) {
237
+ const root = document.getElementById(id);
238
+ if (!root) continue;
239
+ const ta = root.querySelector('textarea');
240
+ if (ta) ta.scrollTop = 0;
241
+ }
242
+ }
243
+ """
244
+
245
  with gr.Blocks() as demo:
246
  idx_state = gr.State(value=-1, time_to_live=900)
247
  docs_state = gr.State(value=[], time_to_live=900)
 
255
  with gr.Row():
256
  prev_btn = gr.Button("Previous")
257
  next_btn = gr.Button("Next")
 
 
258
 
259
  with gr.Column():
260
  with gr.Row():
 
263
  with gr.Column(scale=1):
264
  require_has_math = gr.Checkbox(label="Has math", value=False)
265
  require_has_infobox = gr.Checkbox(label="Has infobox", value=False)
266
+ with gr.Row():
267
+ with gr.Column():
268
+ with gr.Tab("FineWiki"):
269
+ left_text = gr.Textbox(label="FineWiki extraction", lines=30, elem_id="left_text_box")
270
+ with gr.Tab("Metadata"):
271
+ left_meta = gr.JSON(label="Metadata")
272
+ with gr.Tab("Infoboxes"):
273
+ right_infoboxes = gr.JSON(label="Infoboxes")
274
+
275
+ with gr.Row():
276
+ prev_btn2 = gr.Button("Previous")
277
+ next_btn2 = gr.Button("Next")
278
+ with gr.Column():
279
  with gr.Tab("Preview"):
280
  right_iframe = gr.HTML(label="Original Page")
281
  with gr.Tab("wikimedia/wikipedia"):
282
+ right_markdown = gr.Textbox(label="wikimedia/wikipedia extraction", lines=30, elem_id="right_markdown_box")
283
+
284
+
285
+ _ev1 = language_select.change(on_select_language, inputs=[language_select, require_has_math, require_has_infobox], outputs=[idx_state, docs_state, iter_state, left_text, left_meta, header_md, right_markdown, right_infoboxes, right_iframe], concurrency_limit=1)
286
+ _ev1.then(js=SCROLL_TO_TOP_JS)
287
+ _ev2 = demo.load(on_select_language, inputs=[language_select, require_has_math, require_has_infobox], outputs=[idx_state, docs_state, iter_state, left_text, left_meta, header_md, right_markdown, right_infoboxes, right_iframe], concurrency_limit=1)
288
+ _ev2.then(js=SCROLL_TO_TOP_JS)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  # find_btn.click(on_find, inputs=[docs_state, idx_state, iter_state, id_input, require_has_math, require_has_infobox], outputs=[idx_state, docs_state, iter_state, left_text, left_meta, header_md, right_markdown, right_infoboxes, right_iframe], concurrency_limit=1)
290
 
291
  # Visibility toggles driven directly by checkbox changes
292
+ _ev4 = prev_btn.click(on_prev, inputs=[docs_state, idx_state, iter_state, require_has_math, require_has_infobox], outputs=[idx_state, docs_state, iter_state, left_text, left_meta, header_md, right_markdown, right_infoboxes, right_iframe], concurrency_limit=1)
293
+ _ev4.then(js=SCROLL_TO_TOP_JS)
294
+ _ev5 = next_btn.click(on_next, inputs=[docs_state, idx_state, iter_state, require_has_math, require_has_infobox], outputs=[idx_state, docs_state, iter_state, left_text, left_meta, header_md, right_markdown, right_infoboxes, right_iframe], concurrency_limit=1)
295
+ _ev5.then(js=SCROLL_TO_TOP_JS)
296
+ _ev4 = prev_btn2.click(on_prev, inputs=[docs_state, idx_state, iter_state, require_has_math, require_has_infobox], outputs=[idx_state, docs_state, iter_state, left_text, left_meta, header_md, right_markdown, right_infoboxes, right_iframe], concurrency_limit=1)
297
+ _ev4.then(js=SCROLL_TO_TOP_JS)
298
+ _ev5 = next_btn2.click(on_next, inputs=[docs_state, idx_state, iter_state, require_has_math, require_has_infobox], outputs=[idx_state, docs_state, iter_state, left_text, left_meta, header_md, right_markdown, right_infoboxes, right_iframe], concurrency_limit=1)
299
+ _ev5.then(js=SCROLL_TO_TOP_JS)
300
 
301
  # Enable global queue to coordinate concurrent requests safely
302
  demo.queue(default_concurrency_limit=1, max_size=128)