File size: 1,485 Bytes
ed9451a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import gradio as gr
from bs4 import BeautifulSoup
from markdownify import MarkdownConverter


def md(soup, **options):
    return MarkdownConverter(**options).convert_soup(soup)


def main_fn(html: str, check: list[int]):
    soup = BeautifulSoup(html, features="html.parser")

    # Remove <script> e <style>
    for tag in ["script", "style"]:
        for t in soup.find_all(tag):
            t.decompose()

    body = soup.find("body")
    main = soup.find("main")

    strip_tags = []  # Futuramente pode-se usar `check` para filtrar tags

    if main:
        markdown = md(main, strip=strip_tags)
    else:
        markdown = md(body, strip=strip_tags)

    title_tag = soup.find("title")
    title = title_tag.get_text(strip=True) if title_tag else "Sem título"

    return f"{title}\n======\n\n{markdown}"


demo = gr.Interface(
    fn=main_fn,
    title="HTML para Markdown",
    description="""
    <div style="width: fit-content; margin: 0 auto;">
        Cole aqui seu HTML bruto e o app vai converter para Markdown.
    </div>""",
    inputs=[
        gr.TextArea(label="HTML", placeholder="Cole seu código HTML aqui...", lines=20),
        gr.CheckboxGroup(
            label="Ignorar tags (sem efeito - tudo será extraído)",
            choices=["a", "img", "noscript"],
            value=[],
        ),
    ],
    outputs=[
        gr.TextArea(label="Markdown gerado", show_copy_button=True)
    ],
    allow_flagging="never",
)

demo.launch(server_name="0.0.0.0")