| import gradio as gr | |
| from impresso_pipelines.solrnormalization import SolrNormalizationPipeline | |
| pipeline = SolrNormalizationPipeline() | |
| LANGUAGES = ["Auto-detect", "de", "fr", "el", "ru"] | |
| def normalize(text, lang_choice): | |
| lang = None if lang_choice == "Auto-detect" else lang_choice | |
| result = pipeline(text, lang=lang, diagnostics=True) | |
| return f"Language: {result['language']}\n\nTokens:\n{result['tokens']}\n\nDetected stopwords:\n{result['stopwords_detected']}" | |
| demo = gr.Interface( | |
| fn=normalize, | |
| inputs=[ | |
| gr.Textbox(label="Enter Text"), | |
| gr.Dropdown(choices=LANGUAGES, value="Auto-detect", label="Language") | |
| ], | |
| outputs=gr.Textbox(label="Normalized Output"), | |
| title="Solr Normalization Pipeline", | |
| description="Text normalization using Lucene analyzers. Language auto-detected if not selected." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |