Spaces:
Sleeping
Sleeping
Rafal
commited on
Commit
·
f250d4a
1
Parent(s):
a6d6e4e
Added testing FALCON
Browse files- app.py +1 -1
- mgr_bias_scoring.py +19 -5
app.py
CHANGED
|
@@ -872,7 +872,7 @@ with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
|
|
| 872 |
gen_title = gr.Markdown("### Select Tested Model", visible=True)
|
| 873 |
|
| 874 |
# Tested Model Selection - "openlm-research/open_llama_7b"
|
| 875 |
-
tested_model_name = gr.Dropdown( ["bert-base-uncased","bert-large-uncased","gpt2","gpt2-medium","gpt2-large","emilyalsentzer/Bio_ClinicalBERT","microsoft/biogpt","openlm-research/open_llama_3b", "openlm-research/open_llama_7b"], value="bert-base-uncased",
|
| 876 |
multiselect=None,
|
| 877 |
interactive=True,
|
| 878 |
label="Tested Language Model",
|
|
|
|
| 872 |
gen_title = gr.Markdown("### Select Tested Model", visible=True)
|
| 873 |
|
| 874 |
# Tested Model Selection - "openlm-research/open_llama_7b"
|
| 875 |
+
tested_model_name = gr.Dropdown( ["bert-base-uncased","bert-large-uncased","gpt2","gpt2-medium","gpt2-large","emilyalsentzer/Bio_ClinicalBERT","microsoft/biogpt","openlm-research/open_llama_3b", "openlm-research/open_llama_7b", "tiiuae/falcon-7b"], value="bert-base-uncased",
|
| 876 |
multiselect=None,
|
| 877 |
interactive=True,
|
| 878 |
label="Tested Language Model",
|
mgr_bias_scoring.py
CHANGED
|
@@ -20,6 +20,8 @@ from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
|
| 20 |
from transformers import BioGptForCausalLM, BioGptTokenizer
|
| 21 |
# LLAMA
|
| 22 |
from transformers import LlamaTokenizer, LlamaForCausalLM
|
|
|
|
|
|
|
| 23 |
|
| 24 |
import mgr_sentences as smgr
|
| 25 |
import mgr_biases as bmgr
|
|
@@ -71,7 +73,18 @@ def _getModel(model_name, device):
|
|
| 71 |
offload_folder="offload",
|
| 72 |
offload_state_dict = True,
|
| 73 |
device_map='auto')
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
if model == None:
|
| 76 |
print("Model is empty!!!")
|
| 77 |
else:
|
|
@@ -634,7 +647,7 @@ def testModelProbability(model_name, model, tokenizer, device):
|
|
| 634 |
print(f"Testing on GPT-2 family model: {model_name}")
|
| 635 |
#print(getGPT2Prob(model, tokenizer, "African American man was [MASK]", ["poor","rich"], device, verbose=True))
|
| 636 |
elif 'llama' in model_name:
|
| 637 |
-
print(f"Testing on LLAMA family model: {model_name}")
|
| 638 |
#print(getGPT2Prob(model, tokenizer, "African American man was [MASK]", ["poor","rich"], device, verbose=True))
|
| 639 |
|
| 640 |
# bias test on one row of a dataframe -> row is one sentence template with target terms
|
|
@@ -645,7 +658,8 @@ def checkBias(row, biasProbFunc, model, tokenizer, device, progress, df_len):
|
|
| 645 |
if progress != None:
|
| 646 |
progress(row.name/df_len, desc=f"{row['template']}")
|
| 647 |
|
| 648 |
-
test_res = [1
|
|
|
|
| 649 |
try:
|
| 650 |
test_res, sentences = biasProbFunc(model, tokenizer, row['template'].replace("[T]","[MASK]"), grp_terms, device)
|
| 651 |
except ValueError as err:
|
|
@@ -740,8 +754,8 @@ def testBiasOnPairs(gen_pairs_df, bias_spec, model_name, model, tokenizer, devic
|
|
| 740 |
gen_pairs_df[['stereotyped','top_term','bottom_term','top_logit','bottom_logit']] = gen_pairs_df.progress_apply(
|
| 741 |
biasTestFunc, biasProbFunc=modelGPT2TestFunc, model=model, tokenizer=tokenizer, device=device, progress=progress, df_len=gen_pairs_df.shape[0], axis=1)
|
| 742 |
|
| 743 |
-
elif 'llama' in model_name.lower():
|
| 744 |
-
print(f"Testing on LLAMA family model: {model_name}")
|
| 745 |
gen_pairs_df[['stereotyped','top_term','bottom_term','top_logit','bottom_logit']] = gen_pairs_df.progress_apply(
|
| 746 |
biasTestFunc, biasProbFunc=modelGPT2TestFunc, model=model, tokenizer=tokenizer, device=device, progress=progress, df_len=gen_pairs_df.shape[0], axis=1)
|
| 747 |
|
|
|
|
| 20 |
from transformers import BioGptForCausalLM, BioGptTokenizer
|
| 21 |
# LLAMA
|
| 22 |
from transformers import LlamaTokenizer, LlamaForCausalLM
|
| 23 |
+
# FALCON
|
| 24 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 25 |
|
| 26 |
import mgr_sentences as smgr
|
| 27 |
import mgr_biases as bmgr
|
|
|
|
| 73 |
offload_folder="offload",
|
| 74 |
offload_state_dict = True,
|
| 75 |
device_map='auto')
|
| 76 |
+
elif "falcon" in model_name.lower():
|
| 77 |
+
print(f"Getting FALCON model: {model_name}")
|
| 78 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 79 |
+
model = AutoModelForCausalLM.from_pretrained(model_name,
|
| 80 |
+
torch_dtype=torch.bfloat16,
|
| 81 |
+
trust_remote_code=True,
|
| 82 |
+
low_cpu_mem_usage=True, ##
|
| 83 |
+
#use_safetensors=True, ##
|
| 84 |
+
offload_folder="offload",
|
| 85 |
+
offload_state_dict = True,
|
| 86 |
+
device_map='auto')
|
| 87 |
+
#model.tie_weights()
|
| 88 |
if model == None:
|
| 89 |
print("Model is empty!!!")
|
| 90 |
else:
|
|
|
|
| 647 |
print(f"Testing on GPT-2 family model: {model_name}")
|
| 648 |
#print(getGPT2Prob(model, tokenizer, "African American man was [MASK]", ["poor","rich"], device, verbose=True))
|
| 649 |
elif 'llama' in model_name:
|
| 650 |
+
print(f"Testing on LLAMA or FALCON family model: {model_name}")
|
| 651 |
#print(getGPT2Prob(model, tokenizer, "African American man was [MASK]", ["poor","rich"], device, verbose=True))
|
| 652 |
|
| 653 |
# bias test on one row of a dataframe -> row is one sentence template with target terms
|
|
|
|
| 658 |
if progress != None:
|
| 659 |
progress(row.name/df_len, desc=f"{row['template']}")
|
| 660 |
|
| 661 |
+
test_res = [0,1]
|
| 662 |
+
random.shuffle(test_res) # fail-safe
|
| 663 |
try:
|
| 664 |
test_res, sentences = biasProbFunc(model, tokenizer, row['template'].replace("[T]","[MASK]"), grp_terms, device)
|
| 665 |
except ValueError as err:
|
|
|
|
| 754 |
gen_pairs_df[['stereotyped','top_term','bottom_term','top_logit','bottom_logit']] = gen_pairs_df.progress_apply(
|
| 755 |
biasTestFunc, biasProbFunc=modelGPT2TestFunc, model=model, tokenizer=tokenizer, device=device, progress=progress, df_len=gen_pairs_df.shape[0], axis=1)
|
| 756 |
|
| 757 |
+
elif 'llama' in model_name.lower() or 'falcon' in model_name.lower():
|
| 758 |
+
print(f"Testing on LLAMA or FALCON family model: {model_name}")
|
| 759 |
gen_pairs_df[['stereotyped','top_term','bottom_term','top_logit','bottom_logit']] = gen_pairs_df.progress_apply(
|
| 760 |
biasTestFunc, biasProbFunc=modelGPT2TestFunc, model=model, tokenizer=tokenizer, device=device, progress=progress, df_len=gen_pairs_df.shape[0], axis=1)
|
| 761 |
|