Paula Leonova
commited on
Commit
·
c0937ef
1
Parent(s):
43481f8
Update how ground truth label is uploaded for multiple texts
Browse files
app.py
CHANGED
|
@@ -81,16 +81,16 @@ with st.form(key='my_form'):
|
|
| 81 |
glabels = list(set([x.strip() for x in glabels.strip().split(',') if len(x.strip()) > 0]))
|
| 82 |
|
| 83 |
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
|
| 95 |
|
| 96 |
# threshold_value = st.slider(
|
|
@@ -280,12 +280,19 @@ if submit_button or example_button:
|
|
| 280 |
else:
|
| 281 |
label_match_df = labels_full_df.copy()
|
| 282 |
|
| 283 |
-
# TO DO: ADD Flexibility for csv import and multiple texts
|
| 284 |
if len(glabels) > 0:
|
| 285 |
gdata = pd.DataFrame({'label': glabels})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
gdata['correct_match'] = True
|
| 287 |
-
|
| 288 |
-
label_match_df = pd.merge(label_match_df, gdata, how = 'left', on = ['label'])
|
| 289 |
label_match_df['correct_match'].fillna(False, inplace=True)
|
| 290 |
|
| 291 |
st.dataframe(label_match_df)
|
|
@@ -296,7 +303,6 @@ if submit_button or example_button:
|
|
| 296 |
mime='title_label_sum_full/csv',
|
| 297 |
)
|
| 298 |
|
| 299 |
-
|
| 300 |
# if len(glabels) > 0:
|
| 301 |
# st.markdown("### Evaluation Metrics")
|
| 302 |
# with st.spinner('Evaluating output against ground truth...'):
|
|
@@ -313,4 +319,4 @@ if submit_button or example_button:
|
|
| 313 |
# st.dataframe(df_report)
|
| 314 |
|
| 315 |
st.success('All done!')
|
| 316 |
-
|
|
|
|
| 81 |
glabels = list(set([x.strip() for x in glabels.strip().split(',') if len(x.strip()) > 0]))
|
| 82 |
|
| 83 |
|
| 84 |
+
glabels_csv_expander = st.expander(label=f'Have a file with labels for the text? Click here to upload your CSV file.', expanded=False)
|
| 85 |
+
with glabels_csv_expander:
|
| 86 |
+
st.markdown('##### Choose one of the options below:')
|
| 87 |
+
st.write("__Option A:__")
|
| 88 |
+
uploaded_onetext_glabels_file = st.file_uploader("Single Text: Choose a CSV file with one column and no header, where each cell is a separate label",
|
| 89 |
+
key = 'onetext_glabels_uploader')
|
| 90 |
+
st.write("__Option B:__")
|
| 91 |
+
uploaded_multitext_glabels_file = st.file_uploader('Multiple Text: Choose a CSV file with two columns "title" and "label", with the cells in the title column matching the name of the files uploaded in step #1.',
|
| 92 |
+
key = 'multitext_glabels_uploader')
|
| 93 |
+
|
| 94 |
|
| 95 |
|
| 96 |
# threshold_value = st.slider(
|
|
|
|
| 280 |
else:
|
| 281 |
label_match_df = labels_full_df.copy()
|
| 282 |
|
|
|
|
| 283 |
if len(glabels) > 0:
|
| 284 |
gdata = pd.DataFrame({'label': glabels})
|
| 285 |
+
join_list = ['label']
|
| 286 |
+
elif uploaded_onetext_glabels_file is not None:
|
| 287 |
+
gdata = pd.read_csv(uploaded_onetext_glabels_file, header=None)
|
| 288 |
+
join_list = ['label']
|
| 289 |
+
elif uploaded_multitext_glabels_file is not None:
|
| 290 |
+
gdata = pd.read_csv(uploaded_multitext_glabels_file)
|
| 291 |
+
join_list = ['title', 'label']
|
| 292 |
+
|
| 293 |
+
if len(glabels) > 0 or uploaded_onetext_glabels_file is not None or uploaded_multitext_glabels_file is not None:
|
| 294 |
gdata['correct_match'] = True
|
| 295 |
+
label_match_df = pd.merge(label_match_df, gdata, how='outer', on=join_list)
|
|
|
|
| 296 |
label_match_df['correct_match'].fillna(False, inplace=True)
|
| 297 |
|
| 298 |
st.dataframe(label_match_df)
|
|
|
|
| 303 |
mime='title_label_sum_full/csv',
|
| 304 |
)
|
| 305 |
|
|
|
|
| 306 |
# if len(glabels) > 0:
|
| 307 |
# st.markdown("### Evaluation Metrics")
|
| 308 |
# with st.spinner('Evaluating output against ground truth...'):
|
|
|
|
| 319 |
# st.dataframe(df_report)
|
| 320 |
|
| 321 |
st.success('All done!')
|
| 322 |
+
st.balloons()
|