maslionok
commited on
Commit
·
442b8bb
1
Parent(s):
38629ba
fixed about the pipeline part
Browse files
app.py
CHANGED
|
@@ -81,8 +81,13 @@ with gr.Blocks(title="Solr Normalization Demo") as demo:
|
|
| 81 |
gr.Markdown(
|
| 82 |
"""
|
| 83 |
- **Tokenization**: Splits text into individual tokens
|
| 84 |
-
- **
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
"""
|
| 87 |
)
|
| 88 |
|
|
|
|
| 81 |
gr.Markdown(
|
| 82 |
"""
|
| 83 |
- **Tokenization**: Splits text into individual tokens
|
| 84 |
+
- **Tokenfilter**: Applies various transformations like lowercasing:
|
| 85 |
+
- elison: removes apostrophes
|
| 86 |
+
- lowercase: converts to lowercase
|
| 87 |
+
- asciifolding: converts accented characters to ASCII
|
| 88 |
+
- stop: removes common stopwords
|
| 89 |
+
- stemmer: reduces words to their root form
|
| 90 |
+
- normalization: applies language-specific normalization
|
| 91 |
"""
|
| 92 |
)
|
| 93 |
|