Spaces:

emilylearning
/

llm_uncertainty

Running

App Files Files Community

emilylearning commited on Mar 4, 2023

Commit

25dd383

1 Parent(s): 7c94469

update markdown

Browse files

Files changed (1) hide show

app.py +36 -36

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# Model card: https://huggingface.co/emilylearning/selection-induced-collider-bias
 # %%
 import gradio as gr
 import matplotlib.pyplot as plt
@@ -17,7 +16,6 @@ MODEL_NAME_DICT = {
     "bert-large-uncased": "BERT-large",
     "roberta-base": "RoBERTa-base",
     "bert-base-uncased": "BERT-base",
-    "olm/olm-roberta-base-oct-2022": "OLM_RoBERTa-base",
     OWN_MODEL_NAME: "Your model's"
 }
 MODEL_NAMES = list(MODEL_NAME_DICT.keys())
@@ -51,10 +49,10 @@ GENDERED_LIST = [
 ]
 # %%
 # Fire up the models
-models = {m : pipeline("fill-mask", model=m) for m in MODEL_NAMES if m != OWN_MODEL_NAME}
 # %%
 # Get the winogender sentences
@@ -63,7 +61,9 @@ occs = sorted(list({sentence_id.split('_')[0]
                     for sentence_id in winogender_sentences}))
 # %%
-def get_gendered_tokens():
     male_gendered_tokens = [list[0] for list in GENDERED_LIST]
     female_gendered_tokens = [list[1] for list in GENDERED_LIST]
@@ -109,8 +109,9 @@ def get_figure(df, model_name, occ):
     ax.bar(xs, ys)
     ax.axis('tight')
     ax.set_xlabel("Sentence number")
-    ax.set_ylabel("Uncertainty metric")
-    ax.set_title(f"{MODEL_NAME_DICT[model_name]} gender pronoun uncertainty in '{occ}' sentences")
     return fig
@@ -121,7 +122,7 @@ def predict_gender_pronouns(
     texts,
     occ,
 ):
-    """Run inference on input_text for selected model type, returning uncertainty results.
     """
     # TODO: make these selectable by user
@@ -130,8 +131,8 @@ def predict_gender_pronouns(
     # For debugging
     print('input_texts', texts)
-    if model_name is None or model_name == '':
         model_name = MODEL_NAMES[0]
         model = models[model_name]
     elif model_name == OWN_MODEL_NAME:
@@ -143,7 +144,7 @@ def predict_gender_pronouns(
     indie_vars_list = indie_vars.split(',')
-    male_gendered_tokens, female_gendered_tokens = get_gendered_tokens()
     masked_texts = [text.replace('MASK', mask_token) for text in texts]
@@ -193,7 +194,7 @@ def predict_gender_pronouns(
                                                 / num_ave), DECIMAL_PLACES)
     uncertain_df = pd.DataFrame.from_dict(
-        all_uncertainty_f, orient='index', columns=['Uncertainty metric'])
     uncertain_df = uncertain_df.reset_index().rename(
         columns={'index': 'Sentence number'})
@@ -208,28 +209,26 @@ def predict_gender_pronouns(
 demo = gr.Blocks()
 with demo:
     input_texts = gr.Variable([])
-    gr.Markdown("## Are you certain?")
-    gr.Markdown(
-        "#### LLMs are pretty good at reporting their uncertainty. We just need to ask the right way.")
-    gr.Markdown("Using our uncertainty metric informed by applying causal inference techniques in \
-        ['Selection Induced Collider Bias: A Gender Pronoun Uncertainty Case Study'](https://arxiv.org/abs/2210.00131 ), \
-        we are able to identify likely spurious correlations and exploit them in \
-        the scenario of gender underspecified tasks. (Note that introspecting softmax probabilities alone is insufficient, as in the sentences \
-        below, LLMs may report a softmax prob of ~0.9 despite the task being underspecified.)")
-    gr.Markdown("We extend the [Winogender Schemas](https://github.com/rudinger/winogender-schemas) evaluation set to produce\
-        eight syntactically similar sentences. However semantically, \
-        only two of the sentences are gender-specified while the rest remain gender-underspecified")
-    gr.Markdown("If a model can reliably tell us when it is uncertain about its predictions, one can replace only those uncertain predictions with\
-        an appropriate heuristic or information retrieval process.")
-    gr.Markdown("#### TL;DR")
-    gr.Markdown("Follow steps below to test out one of the pre-loaded options. Once you get the hang of it, you can load a new model and/or provide your own input texts.")
     with gr.Row():
         model_name = gr.Radio(
             MODEL_NAMES,
-            label="1) Pick a preloaded BERT-like model for uncertainty evaluation (note: RoBERTa-large performance is best)...",
         )
         own_model_name = gr.Textbox(
             label=f"...Or, if you selected an '{OWN_MODEL_NAME}' model, put any Hugging Face pipeline model name \
@@ -246,11 +245,11 @@ with demo:
             lines=2,
             label=f"...Or, if you selected '{PICK_YOUR_OWN_LABEL}' above, add your own texts new-line delimited sentences here. Be sure\
             to include a single MASK-ed out pronoun. \
-            If unsure on the required format, click an occupation above instead, to see some example input texts for this round.",
         )
     with gr.Row():
-        get_text_btn = gr.Button("3) Load input texts")
     get_text_btn.click(
         fn=display_input_texts,
@@ -261,16 +260,17 @@ with demo:
     )
     with gr.Row():
-        uncertain_btn = gr.Button("4) Get uncertainty results!")
     gr.Markdown(
         "If there is an * by a sentence number, then at least one top prediction for that sentence was non-gendered.")
     with gr.Row():
-        female_fig = gr.Plot()
     with gr.Row():
         female_df = gr.Dataframe()
     with gr.Row():
-        display_text = gr.Textbox(label="Sample of text fed to model")
     uncertain_btn.click(
         fn=predict_gender_pronouns,
@@ -281,4 +281,4 @@ with demo:
 demo.launch(debug=True)
-# %%

 # %%
 import gradio as gr
 import matplotlib.pyplot as plt
     "bert-large-uncased": "BERT-large",
     "roberta-base": "RoBERTa-base",
     "bert-base-uncased": "BERT-base",
     OWN_MODEL_NAME: "Your model's"
 }
 MODEL_NAMES = list(MODEL_NAME_DICT.keys())
 ]
 # %%
 # Fire up the models
+models = {m: pipeline("fill-mask", model=m)
+          for m in MODEL_NAMES if m != OWN_MODEL_NAME}
 # %%
 # Get the winogender sentences
                     for sentence_id in winogender_sentences}))
 # %%
+def get_gendered_token_ids():
     male_gendered_tokens = [list[0] for list in GENDERED_LIST]
     female_gendered_tokens = [list[1] for list in GENDERED_LIST]
     ax.bar(xs, ys)
     ax.axis('tight')
     ax.set_xlabel("Sentence number")
+    ax.set_ylabel("Specification Metric")
+    ax.set_title(
+        f"Task Specification Metric on {MODEL_NAME_DICT[model_name]}  for '{occ}' sentences")
     return fig
     texts,
     occ,
 ):
+    """Run inference on input_text for selected model type, returning Task Specification metric results.
     """
     # TODO: make these selectable by user
     # For debugging
     print('input_texts', texts)
+    if model_name is None or model_name == '':
         model_name = MODEL_NAMES[0]
         model = models[model_name]
     elif model_name == OWN_MODEL_NAME:
     indie_vars_list = indie_vars.split(',')
+    male_gendered_tokens, female_gendered_tokens = get_gendered_token_ids()
     masked_texts = [text.replace('MASK', mask_token) for text in texts]
                                                 / num_ave), DECIMAL_PLACES)
     uncertain_df = pd.DataFrame.from_dict(
+        all_uncertainty_f, orient='index', columns=['Specification Metric'])
     uncertain_df = uncertain_df.reset_index().rename(
         columns={'index': 'Sentence number'})
 demo = gr.Blocks()
 with demo:
     input_texts = gr.Variable([])
+    gr.Markdown("**Detect Task Specification at Inference-time.**")
+    gr.Markdown("**Follow the numbered steps below to test one of the pre-loaded options.** Once you get the hang of it, you can load a new model and/or provide your own input texts.")
+    gr.Markdown(f"""1) Pick a preloaded BERT-like model.
+        *Note: RoBERTa-large performance is best.*
+    2) Pick an Occupation type from the Winogender Schemas evaluation set.
+        *Or select '{PICK_YOUR_OWN_LABEL}' (it need not be about an occupation).*
+    3) Click button to load input texts.
+        *Read the sentences to determine which two are well-specified for gendered pronoun coreference resolution. The rest are gender-unspecified.*
+    4) Click button to get Task Specification Metric results!
+    """)
     with gr.Row():
         model_name = gr.Radio(
             MODEL_NAMES,
+            type="value",
+            label="1) Pick a preloaded BERT-like model (note: RoBERTa-large performance is best).",
         )
         own_model_name = gr.Textbox(
             label=f"...Or, if you selected an '{OWN_MODEL_NAME}' model, put any Hugging Face pipeline model name \
             lines=2,
             label=f"...Or, if you selected '{PICK_YOUR_OWN_LABEL}' above, add your own texts new-line delimited sentences here. Be sure\
             to include a single MASK-ed out pronoun. \
+            If unsure on the required format, click an occupation above instead, to see some example input texts for this round."
         )
     with gr.Row():
+        get_text_btn = gr.Button("3) Click to load input texts.)")
     get_text_btn.click(
         fn=display_input_texts,
     )
     with gr.Row():
+        uncertain_btn = gr.Button("4) Click to get Task Specification Metric results!")
     gr.Markdown(
         "If there is an * by a sentence number, then at least one top prediction for that sentence was non-gendered.")
     with gr.Row():
+        female_fig = gr.Plot(type="auto")
     with gr.Row():
         female_df = gr.Dataframe()
     with gr.Row():
+        display_text = gr.Textbox(
+            type="auto", label="Sample of text fed to model")
     uncertain_btn.click(
         fn=predict_gender_pronouns,
 demo.launch(debug=True)
+# %%