Spaces:
Running
Running
Commit
·
25dd383
1
Parent(s):
7c94469
update markdown
Browse files
app.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
# Model card: https://huggingface.co/emilylearning/selection-induced-collider-bias
|
| 2 |
# %%
|
| 3 |
import gradio as gr
|
| 4 |
import matplotlib.pyplot as plt
|
|
@@ -17,7 +16,6 @@ MODEL_NAME_DICT = {
|
|
| 17 |
"bert-large-uncased": "BERT-large",
|
| 18 |
"roberta-base": "RoBERTa-base",
|
| 19 |
"bert-base-uncased": "BERT-base",
|
| 20 |
-
"olm/olm-roberta-base-oct-2022": "OLM_RoBERTa-base",
|
| 21 |
OWN_MODEL_NAME: "Your model's"
|
| 22 |
}
|
| 23 |
MODEL_NAMES = list(MODEL_NAME_DICT.keys())
|
|
@@ -51,10 +49,10 @@ GENDERED_LIST = [
|
|
| 51 |
]
|
| 52 |
|
| 53 |
|
| 54 |
-
|
| 55 |
# %%
|
| 56 |
# Fire up the models
|
| 57 |
-
models = {m
|
|
|
|
| 58 |
|
| 59 |
# %%
|
| 60 |
# Get the winogender sentences
|
|
@@ -63,7 +61,9 @@ occs = sorted(list({sentence_id.split('_')[0]
|
|
| 63 |
for sentence_id in winogender_sentences}))
|
| 64 |
|
| 65 |
# %%
|
| 66 |
-
|
|
|
|
|
|
|
| 67 |
male_gendered_tokens = [list[0] for list in GENDERED_LIST]
|
| 68 |
female_gendered_tokens = [list[1] for list in GENDERED_LIST]
|
| 69 |
|
|
@@ -109,8 +109,9 @@ def get_figure(df, model_name, occ):
|
|
| 109 |
ax.bar(xs, ys)
|
| 110 |
ax.axis('tight')
|
| 111 |
ax.set_xlabel("Sentence number")
|
| 112 |
-
ax.set_ylabel("
|
| 113 |
-
ax.set_title(
|
|
|
|
| 114 |
return fig
|
| 115 |
|
| 116 |
|
|
@@ -121,7 +122,7 @@ def predict_gender_pronouns(
|
|
| 121 |
texts,
|
| 122 |
occ,
|
| 123 |
):
|
| 124 |
-
"""Run inference on input_text for selected model type, returning
|
| 125 |
"""
|
| 126 |
|
| 127 |
# TODO: make these selectable by user
|
|
@@ -130,8 +131,8 @@ def predict_gender_pronouns(
|
|
| 130 |
|
| 131 |
# For debugging
|
| 132 |
print('input_texts', texts)
|
| 133 |
-
|
| 134 |
-
if model_name is None or model_name == '':
|
| 135 |
model_name = MODEL_NAMES[0]
|
| 136 |
model = models[model_name]
|
| 137 |
elif model_name == OWN_MODEL_NAME:
|
|
@@ -143,7 +144,7 @@ def predict_gender_pronouns(
|
|
| 143 |
|
| 144 |
indie_vars_list = indie_vars.split(',')
|
| 145 |
|
| 146 |
-
male_gendered_tokens, female_gendered_tokens =
|
| 147 |
|
| 148 |
masked_texts = [text.replace('MASK', mask_token) for text in texts]
|
| 149 |
|
|
@@ -193,7 +194,7 @@ def predict_gender_pronouns(
|
|
| 193 |
/ num_ave), DECIMAL_PLACES)
|
| 194 |
|
| 195 |
uncertain_df = pd.DataFrame.from_dict(
|
| 196 |
-
all_uncertainty_f, orient='index', columns=['
|
| 197 |
|
| 198 |
uncertain_df = uncertain_df.reset_index().rename(
|
| 199 |
columns={'index': 'Sentence number'})
|
|
@@ -208,28 +209,26 @@ def predict_gender_pronouns(
|
|
| 208 |
demo = gr.Blocks()
|
| 209 |
with demo:
|
| 210 |
input_texts = gr.Variable([])
|
| 211 |
-
gr.Markdown("
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
gr.Markdown("
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
gr.Markdown("#### TL;DR")
|
| 227 |
-
gr.Markdown("Follow steps below to test out one of the pre-loaded options. Once you get the hang of it, you can load a new model and/or provide your own input texts.")
|
| 228 |
|
| 229 |
with gr.Row():
|
| 230 |
model_name = gr.Radio(
|
| 231 |
MODEL_NAMES,
|
| 232 |
-
|
|
|
|
| 233 |
)
|
| 234 |
own_model_name = gr.Textbox(
|
| 235 |
label=f"...Or, if you selected an '{OWN_MODEL_NAME}' model, put any Hugging Face pipeline model name \
|
|
@@ -246,11 +245,11 @@ with demo:
|
|
| 246 |
lines=2,
|
| 247 |
label=f"...Or, if you selected '{PICK_YOUR_OWN_LABEL}' above, add your own texts new-line delimited sentences here. Be sure\
|
| 248 |
to include a single MASK-ed out pronoun. \
|
| 249 |
-
If unsure on the required format, click an occupation above instead, to see some example input texts for this round."
|
| 250 |
)
|
| 251 |
|
| 252 |
with gr.Row():
|
| 253 |
-
get_text_btn = gr.Button("3)
|
| 254 |
|
| 255 |
get_text_btn.click(
|
| 256 |
fn=display_input_texts,
|
|
@@ -261,16 +260,17 @@ with demo:
|
|
| 261 |
)
|
| 262 |
|
| 263 |
with gr.Row():
|
| 264 |
-
uncertain_btn = gr.Button("4)
|
| 265 |
gr.Markdown(
|
| 266 |
"If there is an * by a sentence number, then at least one top prediction for that sentence was non-gendered.")
|
| 267 |
|
| 268 |
with gr.Row():
|
| 269 |
-
female_fig = gr.Plot()
|
| 270 |
with gr.Row():
|
| 271 |
female_df = gr.Dataframe()
|
| 272 |
with gr.Row():
|
| 273 |
-
display_text = gr.Textbox(
|
|
|
|
| 274 |
|
| 275 |
uncertain_btn.click(
|
| 276 |
fn=predict_gender_pronouns,
|
|
@@ -281,4 +281,4 @@ with demo:
|
|
| 281 |
|
| 282 |
demo.launch(debug=True)
|
| 283 |
|
| 284 |
-
# %%
|
|
|
|
|
|
|
| 1 |
# %%
|
| 2 |
import gradio as gr
|
| 3 |
import matplotlib.pyplot as plt
|
|
|
|
| 16 |
"bert-large-uncased": "BERT-large",
|
| 17 |
"roberta-base": "RoBERTa-base",
|
| 18 |
"bert-base-uncased": "BERT-base",
|
|
|
|
| 19 |
OWN_MODEL_NAME: "Your model's"
|
| 20 |
}
|
| 21 |
MODEL_NAMES = list(MODEL_NAME_DICT.keys())
|
|
|
|
| 49 |
]
|
| 50 |
|
| 51 |
|
|
|
|
| 52 |
# %%
|
| 53 |
# Fire up the models
|
| 54 |
+
models = {m: pipeline("fill-mask", model=m)
|
| 55 |
+
for m in MODEL_NAMES if m != OWN_MODEL_NAME}
|
| 56 |
|
| 57 |
# %%
|
| 58 |
# Get the winogender sentences
|
|
|
|
| 61 |
for sentence_id in winogender_sentences}))
|
| 62 |
|
| 63 |
# %%
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def get_gendered_token_ids():
|
| 67 |
male_gendered_tokens = [list[0] for list in GENDERED_LIST]
|
| 68 |
female_gendered_tokens = [list[1] for list in GENDERED_LIST]
|
| 69 |
|
|
|
|
| 109 |
ax.bar(xs, ys)
|
| 110 |
ax.axis('tight')
|
| 111 |
ax.set_xlabel("Sentence number")
|
| 112 |
+
ax.set_ylabel("Specification Metric")
|
| 113 |
+
ax.set_title(
|
| 114 |
+
f"Task Specification Metric on {MODEL_NAME_DICT[model_name]} for '{occ}' sentences")
|
| 115 |
return fig
|
| 116 |
|
| 117 |
|
|
|
|
| 122 |
texts,
|
| 123 |
occ,
|
| 124 |
):
|
| 125 |
+
"""Run inference on input_text for selected model type, returning Task Specification metric results.
|
| 126 |
"""
|
| 127 |
|
| 128 |
# TODO: make these selectable by user
|
|
|
|
| 131 |
|
| 132 |
# For debugging
|
| 133 |
print('input_texts', texts)
|
| 134 |
+
|
| 135 |
+
if model_name is None or model_name == '':
|
| 136 |
model_name = MODEL_NAMES[0]
|
| 137 |
model = models[model_name]
|
| 138 |
elif model_name == OWN_MODEL_NAME:
|
|
|
|
| 144 |
|
| 145 |
indie_vars_list = indie_vars.split(',')
|
| 146 |
|
| 147 |
+
male_gendered_tokens, female_gendered_tokens = get_gendered_token_ids()
|
| 148 |
|
| 149 |
masked_texts = [text.replace('MASK', mask_token) for text in texts]
|
| 150 |
|
|
|
|
| 194 |
/ num_ave), DECIMAL_PLACES)
|
| 195 |
|
| 196 |
uncertain_df = pd.DataFrame.from_dict(
|
| 197 |
+
all_uncertainty_f, orient='index', columns=['Specification Metric'])
|
| 198 |
|
| 199 |
uncertain_df = uncertain_df.reset_index().rename(
|
| 200 |
columns={'index': 'Sentence number'})
|
|
|
|
| 209 |
demo = gr.Blocks()
|
| 210 |
with demo:
|
| 211 |
input_texts = gr.Variable([])
|
| 212 |
+
gr.Markdown("**Detect Task Specification at Inference-time.**")
|
| 213 |
+
|
| 214 |
+
gr.Markdown("**Follow the numbered steps below to test one of the pre-loaded options.** Once you get the hang of it, you can load a new model and/or provide your own input texts.")
|
| 215 |
+
gr.Markdown(f"""1) Pick a preloaded BERT-like model.
|
| 216 |
+
*Note: RoBERTa-large performance is best.*
|
| 217 |
+
2) Pick an Occupation type from the Winogender Schemas evaluation set.
|
| 218 |
+
*Or select '{PICK_YOUR_OWN_LABEL}' (it need not be about an occupation).*
|
| 219 |
+
3) Click button to load input texts.
|
| 220 |
+
*Read the sentences to determine which two are well-specified for gendered pronoun coreference resolution. The rest are gender-unspecified.*
|
| 221 |
+
4) Click button to get Task Specification Metric results!
|
| 222 |
+
""")
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
|
|
|
|
|
|
|
|
|
|
| 226 |
|
| 227 |
with gr.Row():
|
| 228 |
model_name = gr.Radio(
|
| 229 |
MODEL_NAMES,
|
| 230 |
+
type="value",
|
| 231 |
+
label="1) Pick a preloaded BERT-like model (note: RoBERTa-large performance is best).",
|
| 232 |
)
|
| 233 |
own_model_name = gr.Textbox(
|
| 234 |
label=f"...Or, if you selected an '{OWN_MODEL_NAME}' model, put any Hugging Face pipeline model name \
|
|
|
|
| 245 |
lines=2,
|
| 246 |
label=f"...Or, if you selected '{PICK_YOUR_OWN_LABEL}' above, add your own texts new-line delimited sentences here. Be sure\
|
| 247 |
to include a single MASK-ed out pronoun. \
|
| 248 |
+
If unsure on the required format, click an occupation above instead, to see some example input texts for this round."
|
| 249 |
)
|
| 250 |
|
| 251 |
with gr.Row():
|
| 252 |
+
get_text_btn = gr.Button("3) Click to load input texts.)")
|
| 253 |
|
| 254 |
get_text_btn.click(
|
| 255 |
fn=display_input_texts,
|
|
|
|
| 260 |
)
|
| 261 |
|
| 262 |
with gr.Row():
|
| 263 |
+
uncertain_btn = gr.Button("4) Click to get Task Specification Metric results!")
|
| 264 |
gr.Markdown(
|
| 265 |
"If there is an * by a sentence number, then at least one top prediction for that sentence was non-gendered.")
|
| 266 |
|
| 267 |
with gr.Row():
|
| 268 |
+
female_fig = gr.Plot(type="auto")
|
| 269 |
with gr.Row():
|
| 270 |
female_df = gr.Dataframe()
|
| 271 |
with gr.Row():
|
| 272 |
+
display_text = gr.Textbox(
|
| 273 |
+
type="auto", label="Sample of text fed to model")
|
| 274 |
|
| 275 |
uncertain_btn.click(
|
| 276 |
fn=predict_gender_pronouns,
|
|
|
|
| 281 |
|
| 282 |
demo.launch(debug=True)
|
| 283 |
|
| 284 |
+
# %%
|