Spaces:

13ze
/

html-to-markdown-cpu

Running

13ze commited on Apr 16

Commit

acf83b2

verified ·

1 Parent(s): 5e18683

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,16 +15,16 @@ def main_fn(html: str, check: list[int]):
         for t in soup.find_all(tag):
             t.decompose()
     body = soup.find("body")
     main = soup.find("main")
-    strip_tags = []  # Futuramente pode-se usar `check` para filtrar tags
-    if main:
-        markdown = md(main, strip=strip_tags)
-    else:
-        markdown = md(body, strip=strip_tags)
     title_tag = soup.find("title")
     title = title_tag.get_text(strip=True) if title_tag else "Sem título"
@@ -49,7 +49,7 @@ demo = gr.Interface(
     outputs=[
         gr.TextArea(label="Markdown gerado", show_copy_button=True)
     ],
-    allow_flagging="never",
 )
 demo.launch(server_name="0.0.0.0")

         for t in soup.find_all(tag):
             t.decompose()
+    # Tenta usar <main>, depois <body>, ou como fallback o próprio soup
     body = soup.find("body")
     main = soup.find("main")
+    target = main or body or soup
+    strip_tags = []  # Pode usar `check` no futuro se quiser ignorar certas tags
+    markdown = md(target, strip=strip_tags)
+    # Tenta extrair o título da página
     title_tag = soup.find("title")
     title = title_tag.get_text(strip=True) if title_tag else "Sem título"
     outputs=[
         gr.TextArea(label="Markdown gerado", show_copy_button=True)
     ],
+    flagging_mode="never",
 )
 demo.launch(server_name="0.0.0.0")