Spaces:
Runtime error
Runtime error
Commit
·
649ea6a
1
Parent(s):
8f0da78
add warning message
Browse files- app.py +13 -1
- explanation_filtering_pipeline.pdf +0 -0
app.py
CHANGED
|
@@ -54,6 +54,15 @@ class Visualization:
|
|
| 54 |
lang_dataset_id, path_kenlm_model
|
| 55 |
)
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
def preamble(self):
|
| 58 |
st.markdown(
|
| 59 |
"Before diving into this demo, you might want to take a look at how the filtering pipeline looks like in more detail."
|
|
@@ -170,7 +179,9 @@ class Visualization:
|
|
| 170 |
)
|
| 171 |
self.docs = self.docs_checkpoint
|
| 172 |
for i in range(len(self.docs["repetitions_ratio"])):
|
| 173 |
-
self.docs["repetitions_ratio"].iloc[i] = self.docs[
|
|
|
|
|
|
|
| 174 |
|
| 175 |
cutoff_def = "If the repetitions ratio of a document is higher than this number, the document is removed."
|
| 176 |
cutoff_repetitions_ratio = st.sidebar.slider(
|
|
@@ -526,6 +537,7 @@ class Visualization:
|
|
| 526 |
)
|
| 527 |
|
| 528 |
def visualization(self):
|
|
|
|
| 529 |
self.preamble()
|
| 530 |
self.open_data()
|
| 531 |
self.set_title()
|
|
|
|
| 54 |
lang_dataset_id, path_kenlm_model
|
| 55 |
)
|
| 56 |
|
| 57 |
+
def warning_preamble(self):
|
| 58 |
+
st.markdown(
|
| 59 |
+
"This demo can be a little slow, and only allows you to process up to 5000 documents "
|
| 60 |
+
"for a decent speed. If you want to display up to three times more documents and have "
|
| 61 |
+
"a faster visualization, we invite you to run this "
|
| 62 |
+
"[code](https://github.com/bigscience-workshop/data_tooling/tree/master/ac_dc/visualization) "
|
| 63 |
+
"on your computer."
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
def preamble(self):
|
| 67 |
st.markdown(
|
| 68 |
"Before diving into this demo, you might want to take a look at how the filtering pipeline looks like in more detail."
|
|
|
|
| 179 |
)
|
| 180 |
self.docs = self.docs_checkpoint
|
| 181 |
for i in range(len(self.docs["repetitions_ratio"])):
|
| 182 |
+
self.docs["repetitions_ratio"].iloc[i] = self.docs[
|
| 183 |
+
"repetitions_ratio"
|
| 184 |
+
].iloc[i][repetitions_length]
|
| 185 |
|
| 186 |
cutoff_def = "If the repetitions ratio of a document is higher than this number, the document is removed."
|
| 187 |
cutoff_repetitions_ratio = st.sidebar.slider(
|
|
|
|
| 537 |
)
|
| 538 |
|
| 539 |
def visualization(self):
|
| 540 |
+
self.warning_preamble()
|
| 541 |
self.preamble()
|
| 542 |
self.open_data()
|
| 543 |
self.set_title()
|
explanation_filtering_pipeline.pdf
CHANGED
|
Binary files a/explanation_filtering_pipeline.pdf and b/explanation_filtering_pipeline.pdf differ
|
|
|