Spaces:
Build error
Build error
meg-huggingface
commited on
Commit
·
cda45dd
1
Parent(s):
b256a5f
Handling for no words
Browse files
data_measurements/streamlit_utils.py
CHANGED
|
@@ -111,30 +111,33 @@ def expander_general_stats(dstats, column_id):
|
|
| 111 |
"Use this widget to check whether the terms you see most represented"
|
| 112 |
" in the dataset make sense for the goals of the dataset."
|
| 113 |
)
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
"
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
"[open class words](https://dictionary.apa.org/open-class-words) "
|
| 122 |
-
"and their counts are: "
|
| 123 |
-
)
|
| 124 |
-
st.dataframe(dstats.sorted_top_vocab_df)
|
| 125 |
-
st.markdown(
|
| 126 |
-
"There are {0} missing values in the dataset.".format(
|
| 127 |
-
str(dstats.text_nan_count)
|
| 128 |
)
|
| 129 |
-
)
|
| 130 |
-
if dstats.dedup_total > 0:
|
| 131 |
st.markdown(
|
| 132 |
-
"
|
| 133 |
-
"
|
| 134 |
-
"
|
| 135 |
)
|
| 136 |
-
|
| 137 |
-
st.markdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
|
| 140 |
### Show the label distribution from the datasets
|
|
|
|
| 111 |
"Use this widget to check whether the terms you see most represented"
|
| 112 |
" in the dataset make sense for the goals of the dataset."
|
| 113 |
)
|
| 114 |
+
if dstats.total_words == 0:
|
| 115 |
+
st.markdown("Eh oh...not finding the file I need. 😭 Probably it will be there soon. 🤞 Check back later!")
|
| 116 |
+
else:
|
| 117 |
+
st.markdown("There are {0} total words".format(str(dstats.total_words)))
|
| 118 |
+
st.markdown(
|
| 119 |
+
"There are {0} words after removing closed "
|
| 120 |
+
"class words".format(str(dstats.total_open_words))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
)
|
|
|
|
|
|
|
| 122 |
st.markdown(
|
| 123 |
+
"The most common "
|
| 124 |
+
"[open class words](https://dictionary.apa.org/open-class-words) "
|
| 125 |
+
"and their counts are: "
|
| 126 |
)
|
| 127 |
+
st.dataframe(dstats.sorted_top_vocab_df)
|
| 128 |
+
st.markdown(
|
| 129 |
+
"There are {0} missing values in the dataset.".format(
|
| 130 |
+
str(dstats.text_nan_count)
|
| 131 |
+
)
|
| 132 |
+
)
|
| 133 |
+
if dstats.dedup_total > 0:
|
| 134 |
+
st.markdown(
|
| 135 |
+
"There are {0} duplicate items in the dataset. "
|
| 136 |
+
"For more information about the duplicates, "
|
| 137 |
+
"click the 'Duplicates' tab below.".format(str(dstats.dedup_total))
|
| 138 |
+
)
|
| 139 |
+
else:
|
| 140 |
+
st.markdown("There are 0 duplicate items in the dataset. ")
|
| 141 |
|
| 142 |
|
| 143 |
### Show the label distribution from the datasets
|