Spaces:
Runtime error
Runtime error
| from threading import Lock | |
| import argparse | |
| import numpy as np | |
| from matplotlib import pyplot as plt | |
| import gradio as gr | |
| import torch | |
| import pandas as pd | |
| from biasprobe import BinaryProbe, PairwiseExtractionRunner, SimplePairPromptBuilder, ProbeConfig | |
| def get_args(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('--seed', '-s', type=int, default=0, help="the random seed") | |
| parser.add_argument('--port', '-p', type=int, default=8080, help="the port to launch the demo") | |
| parser.add_argument('--no-cuda', action='store_true', help="Use CPUs instead of GPUs") | |
| args = parser.parse_args() | |
| return args | |
| def main(): | |
| args = get_args() | |
| plt.switch_backend('agg') | |
| dmap = 'auto' | |
| mdict = {0: '24GIB'} | |
| config = ProbeConfig.create_for_model('mistralai/Mistral-7B-Instruct-v0.1') | |
| probe = BinaryProbe(config).cuda() | |
| probe.load_state_dict(torch.load('probe.pt')) | |
| runner = PairwiseExtractionRunner.from_pretrained('mistralai/Mistral-7B-Instruct-v0.1', optimize=False, torch_dtype=torch.float16, max_memory=mdict, device_map=dmap, low_cpu_mem_usage=True) | |
| def run_extraction(prompt): | |
| builder = SimplePairPromptBuilder(criterion='more positive') | |
| lst = [x.strip() for x in prompt.lower()[:300].split(',')][:100] | |
| exp = runner.run_extraction(lst, lst, layers=[15], num_repeat=50, builder=builder, parallel=False, run_inference=True, debug=True, max_new_tokens=2) | |
| test_ds = exp.make_dataset(15) | |
| import torch | |
| raw_scores = [] | |
| preds_list = [] | |
| hs = [] | |
| for idx, (tensor, labels) in enumerate(test_ds): | |
| with torch.no_grad(): | |
| labels = labels - 1 # 1-indexed | |
| if tensor.shape[0] != 2: | |
| continue | |
| h = tensor[1] - tensor[0] | |
| hs.append(h) | |
| try: | |
| x = probe(tensor.unsqueeze(0).cuda().float()).squeeze() | |
| except IndexError: | |
| continue | |
| pred = [0, 1] if x.item() > 0 else [1, 0] | |
| pred = np.array(pred) | |
| if test_ds.original_examples is not None: | |
| items = [x.content for x in test_ds.original_examples[idx].hits] | |
| preds_list.append(np.array(items, dtype=object)[labels][pred].tolist()) | |
| raw_scores.append(x.item()) | |
| df = pd.DataFrame({'Win Rate': np.array(raw_scores) > 0, 'Word': [x[0] for x in preds_list]}) | |
| win_df = df.groupby('Word').mean('Win Rate') | |
| win_df = win_df.reset_index().sort_values('Win Rate') | |
| win_df['Win Rate'] = [str(x) + '%' for x in (win_df['Win Rate'] * 100).round(2).tolist()] | |
| return win_df | |
| with gr.Blocks(css='scrollbar.css') as demo: | |
| md = '''# BiasProbe: Revealing Preference Biases in Language Model Representations | |
| What do llamas really "think" about controversial words? | |
| Type some words below to see how Mistral-7B-Instruct associates them with | |
| positive and negative emotions. | |
| Higher win rates indicate that the word is more likely to be associated with | |
| positive emotions than other words in the list. | |
| Check out our paper, [What Do Llamas Really Think? Revealing Preference Biases in Language Model Representations](http://arxiv.org/abs/2311.18812). | |
| See our [codebase](https://github.com/castorini/biasprobe) on GitHub. | |
| ''' | |
| gr.Markdown(md) | |
| with gr.Row(): | |
| with gr.Column(): | |
| text = gr.Textbox(label='Words', value='Republican, democrat, libertarian, authoritarian') | |
| submit_btn = gr.Button('Submit', elem_id='submit-btn') | |
| output = gr.DataFrame(pd.DataFrame({'Word': ['authoritarian', 'republican', 'democrat', 'libertarian'], | |
| 'Win Rate': ['44.44%', '81.82%', '100%', '100%']})) | |
| submit_btn.click( | |
| fn=run_extraction, | |
| inputs=[text], | |
| outputs=[output]) | |
| while True: | |
| try: | |
| demo.launch(server_name='0.0.0.0') | |
| except OSError: | |
| gr.close_all() | |
| except KeyboardInterrupt: | |
| gr.close_all() | |
| break | |
| if __name__ == '__main__': | |
| main() | |