Spaces:
Runtime error
Runtime error
Trent
commited on
Commit
·
75efc41
1
Parent(s):
f18ec1c
Add gender evaluation demo
Browse files- app.py +44 -3
- backend/utils.py +11 -0
- data/bias_evaluation.csv +3 -0
app.py
CHANGED
|
@@ -1,13 +1,16 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
|
|
|
| 3 |
|
| 4 |
from backend import inference
|
| 5 |
from backend.config import MODELS_ID, QA_MODELS_ID, SEARCH_MODELS_ID
|
|
|
|
| 6 |
|
| 7 |
st.title('Demo using Flax-Sentence-Tranformers')
|
| 8 |
|
| 9 |
st.sidebar.title('Tasks')
|
| 10 |
-
menu = st.sidebar.radio("", options=["Sentence Similarity", "Asymmetric QA", "Search / Cluster"
|
|
|
|
| 11 |
|
| 12 |
st.markdown('''
|
| 13 |
|
|
@@ -52,7 +55,7 @@ For more cool information on sentence embeddings, see the [sBert project](https:
|
|
| 52 |
index = [f"{idx + 1}:{input[:min(15, len(input))]}..." for idx, input in enumerate(inputs)]
|
| 53 |
df_total = pd.DataFrame(index=index)
|
| 54 |
for key, value in df_results.items():
|
| 55 |
-
df_total[key] =
|
| 56 |
|
| 57 |
st.write('Here are the results for selected models:')
|
| 58 |
st.write(df_total)
|
|
@@ -94,7 +97,7 @@ For more cool information on sentence embeddings, see the [sBert project](https:
|
|
| 94 |
index = [f"{idx + 1}:{input[:min(15, len(input))]}..." for idx, input in enumerate(inputs)]
|
| 95 |
df_total = pd.DataFrame(index=index)
|
| 96 |
for key, value in df_results.items():
|
| 97 |
-
df_total[key] =
|
| 98 |
|
| 99 |
st.write('Here are the results for selected models:')
|
| 100 |
st.write(df_total)
|
|
@@ -130,3 +133,41 @@ For more cool information on sentence embeddings, see the [sBert project](https:
|
|
| 130 |
st.write("Demonstration : https://gyazo.com/1ff0aa438ae533de3b3c63382af7fe80")
|
| 131 |
# fig = inference.text_cluster(anchor, 1000, select_models[0], QA_MODELS_ID)
|
| 132 |
# fig.show()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
+
import torch
|
| 4 |
|
| 5 |
from backend import inference
|
| 6 |
from backend.config import MODELS_ID, QA_MODELS_ID, SEARCH_MODELS_ID
|
| 7 |
+
from backend.utils import load_gender_data
|
| 8 |
|
| 9 |
st.title('Demo using Flax-Sentence-Tranformers')
|
| 10 |
|
| 11 |
st.sidebar.title('Tasks')
|
| 12 |
+
menu = st.sidebar.radio("", options=["Sentence Similarity", "Asymmetric QA", "Search / Cluster",
|
| 13 |
+
"Gender Bias Evaluation"], index=0)
|
| 14 |
|
| 15 |
st.markdown('''
|
| 16 |
|
|
|
|
| 55 |
index = [f"{idx + 1}:{input[:min(15, len(input))]}..." for idx, input in enumerate(inputs)]
|
| 56 |
df_total = pd.DataFrame(index=index)
|
| 57 |
for key, value in df_results.items():
|
| 58 |
+
df_total[key] = [ts.item() for ts in torch.nn.functional.softmax(torch.from_numpy(value['score'].values))]
|
| 59 |
|
| 60 |
st.write('Here are the results for selected models:')
|
| 61 |
st.write(df_total)
|
|
|
|
| 97 |
index = [f"{idx + 1}:{input[:min(15, len(input))]}..." for idx, input in enumerate(inputs)]
|
| 98 |
df_total = pd.DataFrame(index=index)
|
| 99 |
for key, value in df_results.items():
|
| 100 |
+
df_total[key] = [ts.item() for ts in torch.nn.functional.softmax(torch.from_numpy(value['score'].values))]
|
| 101 |
|
| 102 |
st.write('Here are the results for selected models:')
|
| 103 |
st.write(df_total)
|
|
|
|
| 133 |
st.write("Demonstration : https://gyazo.com/1ff0aa438ae533de3b3c63382af7fe80")
|
| 134 |
# fig = inference.text_cluster(anchor, 1000, select_models[0], QA_MODELS_ID)
|
| 135 |
# fig.show()
|
| 136 |
+
|
| 137 |
+
elif menu == "Gender Bias Evaluation":
|
| 138 |
+
st.header("Gender Bias Evaluation")
|
| 139 |
+
st.markdown('''
|
| 140 |
+
**Instructions**: Here we can observe **inherent gender bias** in training set via random sampling of the sentences.
|
| 141 |
+
|
| 142 |
+
Input 3 texts, one without any mention of gender for target occupation and 2 others with gendered pronouns.
|
| 143 |
+
|
| 144 |
+
Hopefully the evaluation performed here can proceed towards improving Gender-neutrality of datasets.
|
| 145 |
+
|
| 146 |
+
For more cool information on sentence embeddings, see the [sBert project](https://www.sbert.net/examples/applications/computing-embeddings/README.html).
|
| 147 |
+
''')
|
| 148 |
+
|
| 149 |
+
select_models = st.multiselect("Choose models", options=list(MODELS_ID), default=list(MODELS_ID)[0])
|
| 150 |
+
|
| 151 |
+
base_text = st.text_input("Gender Neutral Text", "President of the United States promised relief to Hurricane survivors.")
|
| 152 |
+
male_text = st.text_input("Male-assumed Text", "He promised relief to Hurricane survivors.")
|
| 153 |
+
female_text = st.text_input("Female-assumed Text", "She promised relief to Hurricane survivors.")
|
| 154 |
+
|
| 155 |
+
enter = st.button("Compare")
|
| 156 |
+
if enter:
|
| 157 |
+
results = {model: inference.text_similarity(base_text, [male_text, female_text], model, MODELS_ID) for model in select_models}
|
| 158 |
+
|
| 159 |
+
index = ["male", "female", "gender_bias"]
|
| 160 |
+
df_total = pd.DataFrame(index=index)
|
| 161 |
+
for key, value in results.items():
|
| 162 |
+
softmax = [ts.item() for ts in torch.nn.functional.softmax(torch.from_numpy(value['score'].values))]
|
| 163 |
+
if softmax[0] > softmax[1]:
|
| 164 |
+
gender = "male"
|
| 165 |
+
elif abs(softmax[0] - softmax[1]) < 1e-2:
|
| 166 |
+
gender = "neutral"
|
| 167 |
+
else:
|
| 168 |
+
gender = "female"
|
| 169 |
+
softmax.append(gender)
|
| 170 |
+
df_total[key] = softmax
|
| 171 |
+
|
| 172 |
+
st.write('Here are the results for selected models:')
|
| 173 |
+
st.write(df_total)
|
backend/utils.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import gzip
|
| 2 |
import json
|
| 3 |
import numpy as np
|
|
|
|
| 4 |
|
| 5 |
import streamlit as st
|
| 6 |
import torch
|
|
@@ -44,3 +45,13 @@ def filter_questions(tag, max_questions=10000):
|
|
| 44 |
if len(filtered_posts) >= max_questions:
|
| 45 |
break
|
| 46 |
return filtered_posts
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gzip
|
| 2 |
import json
|
| 3 |
import numpy as np
|
| 4 |
+
import pandas as pd
|
| 5 |
|
| 6 |
import streamlit as st
|
| 7 |
import torch
|
|
|
|
| 45 |
if len(filtered_posts) >= max_questions:
|
| 46 |
break
|
| 47 |
return filtered_posts
|
| 48 |
+
|
| 49 |
+
def load_gender_data():
|
| 50 |
+
df = load_gendered_dataset()
|
| 51 |
+
sampled_row = df.sample().iloc[0]
|
| 52 |
+
return sampled_row.base_sentence, sampled_row.male_sentence, sampled_row.female_sentence
|
| 53 |
+
|
| 54 |
+
@st.cache(allow_output_mutation=True)
|
| 55 |
+
def load_gendered_dataset():
|
| 56 |
+
df = pd.read_csv('./data/bias_evaluation.csv')
|
| 57 |
+
return df
|
data/bias_evaluation.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6af3ca9ab808d043dceba088499d0264d95244b3f91fa109a489768b41aa85c
|
| 3 |
+
size 356049
|