maslionok
commited on
Commit
·
66d1427
1
Parent(s):
21f4048
reverted
Browse files
app.py
CHANGED
|
@@ -30,46 +30,16 @@ def normalize(text, lang_choice):
|
|
| 30 |
print("❌ Pipeline error:", e)
|
| 31 |
return f"Error: {e}"
|
| 32 |
|
| 33 |
-
# Define example inputs for different languages
|
| 34 |
-
examples = [
|
| 35 |
-
["The quick brown fox jumps over the lazy dog. This is a sample text for testing.", "en"],
|
| 36 |
-
["Der schnelle braune Fuchs springt über den faulen Hund. Dies ist ein Beispieltext zum Testen.", "de"],
|
| 37 |
-
["Le renard brun rapide saute par-dessus le chien paresseux. Ceci est un texte d'exemple pour les tests.", "fr"],
|
| 38 |
-
["El zorro marrón rápido salta sobre el perro perezoso. Este es un texto de ejemplo para pruebas.", "es"],
|
| 39 |
-
["La volpe marrone veloce salta sopra il cane pigro. Questo è un testo di esempio per i test.", "it"],
|
| 40 |
-
["Auto-detect language: Mixed content with English and Français words together!", "Auto-detect"]
|
| 41 |
-
]
|
| 42 |
-
|
| 43 |
demo = gr.Interface(
|
| 44 |
fn=normalize,
|
| 45 |
inputs=[
|
| 46 |
-
gr.Textbox(
|
| 47 |
-
|
| 48 |
-
placeholder="Type your text here or try one of the examples below...",
|
| 49 |
-
lines=3
|
| 50 |
-
),
|
| 51 |
-
gr.Dropdown(choices=["Auto-detect"] + LANGUAGES, value="Auto-detect", label="Language")
|
| 52 |
],
|
| 53 |
-
outputs=gr.Textbox(label="Normalized Output"
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
description="""
|
| 57 |
-
<div style="text-align: center; margin-bottom: 20px;">
|
| 58 |
-
<img src="file/logo.jpeg" alt="Logo" style="max-width: 200px; height: auto; border-radius: 8px;">
|
| 59 |
-
</div>
|
| 60 |
-
|
| 61 |
-
**Solr normalization is intended to give an idea of what kind of normalization is happening behind Impresso.**
|
| 62 |
-
|
| 63 |
-
This demo replicates Solr's text analysis functionality, showing how text is processed through various normalization steps including tokenization, stopword removal, and language-specific analysis.
|
| 64 |
-
|
| 65 |
-
Try the examples below or enter your own text to see how different languages are processed!
|
| 66 |
-
""",
|
| 67 |
-
article="""
|
| 68 |
-
### About
|
| 69 |
-
This tool demonstrates the text normalization pipeline used in the Impresso project, which mirrors Apache Solr's text analysis capabilities.
|
| 70 |
-
""",
|
| 71 |
-
theme=gr.themes.Soft(),
|
| 72 |
allow_flagging="never"
|
| 73 |
)
|
| 74 |
|
| 75 |
-
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
| 30 |
print("❌ Pipeline error:", e)
|
| 31 |
return f"Error: {e}"
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
demo = gr.Interface(
|
| 34 |
fn=normalize,
|
| 35 |
inputs=[
|
| 36 |
+
gr.Textbox(label="Enter Text"),
|
| 37 |
+
gr.Dropdown(choices=LANGUAGES, value="Auto-detect", label="Language")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
],
|
| 39 |
+
outputs=gr.Textbox(label="Normalized Output"),
|
| 40 |
+
title="Solr Normalization Pipeline",
|
| 41 |
+
description="Text normalization replicating Solr functionality.",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
allow_flagging="never"
|
| 43 |
)
|
| 44 |
|
| 45 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|