Spaces:

firefighter
/

TransDis-CreativityAutoAssessment

Running

Qifan Zhang commited on Apr 1, 2023

Commit

cf575f8

1 Parent(s): f32101e

add feature 1

Files changed (5) hide show

.gitignore ADDED Viewed

+data
+.idea
+*.csv

app.py ADDED Viewed

+from typing import Optional
+import gradio as gr
+import pandas as pd
+from utils.similarity import batch_cos_sim
+def read_data(filepath: str) -> Optional[pd.DataFrame]:
+    if not filepath:
+        return None
+    if filepath.endswith('.xlsx'):
+        df = pd.read_csv(filepath)
+    elif filepath.endswith('.csv'):
+        df = pd.read_csv(filepath)
+    else:
+        raise Exception('File type not supported')
+    return df
+def process(model_name: str,
+            prompt: str,
+            file=None,
+            ):
+    df = read_data(file.name)
+    df = batch_cos_sim(df, model_name)
+    path = 'output.csv'
+    df.to_csv(path, index=False, encoding='utf-8-sig')
+    return df.to_markdown(), path
+model_name_input = gr.components.Textbox(
+    value='paraphrase-multilingual-MiniLM-L12-v2',
+    lines=1,
+    type="text"
+)
+prompt_input = gr.components.Textbox(
+    value='prompt,response',
+    lines=10,
+    type="text"
+)
+file_output = gr.components.File(label="Output File",
+                                 file_count="single",
+                                 file_types=["", ".", ".csv", ".xls", ".xlsx"])
+app = gr.Interface(
+    fn=process,
+    inputs=[model_name_input, prompt_input, "file" ],
+    outputs=["text", file_output]
+)
+app.launch()

output.csv ADDED Viewed

+prompt,response,originality
+床单,当空调被,0.6427325010299683
+床单,保暖,0.5928247570991516
+床单,绑在树上做成吊床,0.5714011490345001
+床单,当燃料烧,0.7625655382871628
+床单,包裹东西,0.41448450088500977
+床单,裁剪成衣服,0.5791812241077423
+牙刷,用来刷首饰,0.5138461589813232
+牙刷,刷鞋,0.5954866111278534
+牙刷,洗水果,0.6339634656906128
+牙刷,捅人,0.5337955951690674
+牙刷,洗马桶,0.5022678673267365
+牙刷,刷桃子的毛,0.6439318358898163

utils/models.py ADDED Viewed

+from functools import lru_cache
+import torch
+from sentence_transformers import SentenceTransformer
+import numpy as np
+DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+class SBert:
+    def __init__(self, path):
+        self.model = SentenceTransformer(path, device=DEVICE)
+    @lru_cache(maxsize=10000)
+    def __call__(self, x) -> np.ndarray:
+        y = self.model.encode(x)
+        return y

utils/similarity.py ADDED Viewed

+import pandas as pd
+from sentence_transformers.util import cos_sim
+from utils.models import SBert
+def get_cos_sim(model, prompt: str, response: str) -> float:
+    prompt_vec = model(prompt)
+    response_vec = model(response)
+    score = cos_sim(prompt_vec, response_vec).item()
+    return score
+def batch_cos_sim(df: pd.DataFrame, model_name) -> pd.DataFrame:
+    # df.columns = ['prompt', 'response']
+    assert 'prompt' in df.columns
+    assert 'response' in df.columns
+    model = SBert(model_name)
+    df['originality'] = df.apply(lambda x: 1 - get_cos_sim(model, x['prompt'], x['response']), axis=1)
+    return df
+if __name__ == '__main__':
+    _df = pd.read_csv('data/example_1.csv')
+    _df_o = batch_cos_sim(_df, 'paraphrase-multilingual-MiniLM-L12-v2')