Erlangshen-UniMC-Zero-Shot

Build error

App Files Files Community

suolyer commited on Nov 19, 2022

Commit

6d11d05

1 Parent(s): 2e08a92

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -37

app.py CHANGED Viewed

@@ -30,6 +30,8 @@ from transformers.optimization import get_linear_schedule_with_warmup
 from transformers import BertForMaskedLM, AlbertTokenizer
 from transformers import AutoConfig
 from transformers import MegatronBertForMaskedLM
 import argparse
 import copy
 import streamlit as st
@@ -297,9 +299,12 @@ class UniMCModel(nn.Module):
         self.config = AutoConfig.from_pretrained(pre_train_dir)
         if self.config.model_type == 'megatron-bert':
             self.bert = MegatronBertForMaskedLM.from_pretrained(pre_train_dir)
         else:
             self.bert = BertForMaskedLM.from_pretrained(pre_train_dir)
         self.loss_func = torch.nn.CrossEntropyLoss()
         self.yes_token = yes_token
@@ -626,54 +631,82 @@ def load_model(model_path):
     model = UniMCPipelines(args)
     return model
 def main():
     text_dict={
-        '文本分类':"微软披露拓扑量子计算机计划！",
-        '情感分析':"刚买iphone13 pro 还不到一个月，天天死机最差的一次购物体验",
-        '语义匹配':"今天心情不好,我很不开心",
-        '自然语言推理':"小明正在上高中[unused1]小明是一个初中生",
-        '多项式阅读理解':"这个男的是什么意思?[unused1][SEP]女：您看这件衣服挺不错的，质量好，价钱也不贵。\n男：再看看吧。",
     }
     question_dict={
-        '文本分类':"故事；文化；娱乐；体育；财经；房产；汽车；教育；科技",
-        '情感分析':"好评；差评",
-        '语义匹配':"可以理解为；不能理解为",
-        '自然语言推理':"可以推断出；不能推断出；很难推断出",
-        '多项式阅读理解':"不想要这件；衣服挺好的；衣服质量不好",
     }
     choice_dict={
-        '文本分类':"故事；文化；娱乐；体育；财经；房产；汽车；教育；科技",
-        '情感分析':"好评；差评",
-        '语义匹配':"可以理解为；不能理解为",
-        '自然语言推理':"可以推断出；不能推断出；很难推断出",
-        '多项式阅读理解':"不想要这件；衣服挺好的；衣服质量不好",
     }
     st.subheader("UniMC Zero-shot 体验")
-    st.sidebar.header("参数配置")
     sbform = st.sidebar.form("固定参数设置")
-    language = sbform.selectbox('选择语言', ['中文', 'English'])
-    sbform.form_submit_button("配置")
-    if language == '中文':
         model = load_model('IDEA-CCNL/Erlangshen-UniMC-RoBERTa-110M-Chinese')
     else:
-        model = load_model('IDEA-CCNL/Erlangshen-UniMC-RoBERTa-110M-Chinese')
-    st.info("请输入以下信息...")
-    model_type = st.selectbox('选择任务类型',['文本分类','情感分析','语义匹配','自然语言推理','多项式阅读理解'])
-    sentences = st.text_area("请输入句子：", text_dict[model_type])
-    question = st.text_input("请输入问题（不输入问题也可以）：", "")
-    choice = st.text_input("输入标签（以中文；分割）:", choice_dict[model_type])
     choice = choice.split('；')
     data = [{"texta": sentences,
@@ -683,15 +716,13 @@ def main():
             "answer": "", "label": 0,
             "id": 0}]
-    if st.button("点击一下，开始预测！"):
-        start=time.time()
-        result = model.predict(data, cuda=False)
-        st.success(f"Prediction is successful, consumes {str(time.time()-start)} seconds")
-        st.json(result[0])
-    else:
-        st.info(
-            "**Enter a text** above and **press the button** to predict the category."
-        )

 from transformers import BertForMaskedLM, AlbertTokenizer
 from transformers import AutoConfig
 from transformers import MegatronBertForMaskedLM
+from modeling_deberta_v2 import DebertaV2ForMaskedLM
+from modeling_albert import AlbertForMaskedLM
 import argparse
 import copy
 import streamlit as st
         self.config = AutoConfig.from_pretrained(pre_train_dir)
         if self.config.model_type == 'megatron-bert':
             self.bert = MegatronBertForMaskedLM.from_pretrained(pre_train_dir)
+        elif self.config.model_type == 'deberta-v2':
+            self.bert = DebertaV2ForMaskedLM.from_pretrained(pre_train_dir)
+        elif self.config.model_type == 'albert':
+            self.bert = AlbertForMaskedLM.from_pretrained(pre_train_dir)
         else:
             self.bert = BertForMaskedLM.from_pretrained(pre_train_dir)
         self.loss_func = torch.nn.CrossEntropyLoss()
         self.yes_token = yes_token
     model = UniMCPipelines(args)
     return model
 def main():
     text_dict={
+        'Text classification「文本分类」':"彭于晏不着急，胡歌不着急，那我也不着急",
+        'Sentiment「情感分析」':"刚买iphone13 pro 还不到一个月，天天死机最差的一次购物体验",
+        'Similarity「语义匹配」':"今天心情不好",
+        'NLI 「自然语言推理」':"小明正在上高中",
+        'Multiple Choice「多项式阅读理解」':"女：您看这件衣服挺不错的，质量好，价钱也不贵。\n男：再看看吧。",
     }
     question_dict={
+        'Text classification「文本分类」':"这是什么类型的新闻？",
+        'Sentiment「情感分析」':"",
+        'Similarity「语义匹配」':"",
+        'NLI 「自然语言推理」':"",
+        'Multiple Choice「多项式阅读理解」':"这个男的是什么意思?",
     }
     choice_dict={
+        'Text classification「文本分类」':"故事；文化；娱乐；体育；财经；房产；汽车；教育；科技",
+        'Sentiment「情感分析」':"这是一条好评；这是一条差评",
+        'Similarity「语义匹配」':"可以理解为：我很不开心；不能理解为：我很不开心",
+        'NLI 「自然语言推理」':"可以推断出：小明是一个初中生；不能推断出：小明是一个初中生；很难推断出：小明是一个初中生",
+        'Multiple Choice「多项式阅读理解」':"不想要这件；衣服挺好的；衣服质量不好",
+    }
+    text_dict_en={
+        'Text classification「文本分类」':"Henkel AG & Company KGaA operates worldwide with leading brands and technologies in three business areas: Laundry & Home Care Beauty Care and Adhesive Technologies. Henkel is the name behind some of America’s favorite brands.",
+        'Sentiment「情感分析」':"a gorgeous , high-spirited musical from india that exquisitely blends music , dance , song , and high drama . ",
+        'Similarity「语义匹配」':"Ricky Clemons ' brief , troubled Missouri basketball career is over .",
+        'NLI 「自然语言推理」':"That was then, and then's gone. It's now now. I don't mean I 've done a sudden transformation.",
+        'Multiple Choice「多项式阅读理解」':"A huge crowd is in the stands in an arena. A man throws a javelin. Photographers take pictures in the background. several men",
+    }
+    question_dict_en={
+        'Text classification「文本分类」':"",
+        'Sentiment「情感分析」':"",
+        'Similarity「语义匹配」':"",
+        'NLI 「自然语言推理」':"",
+        'Multiple Choice「多项式��读理解」':"",
+    }
+    choice_dict_en={
+        'Text classification「文本分类」':"Company;Educational Institution;Artist;Athlete;Office Holder",
+        'Sentiment「情感分析」':"it's great;it's terrible",
+        'Similarity「语义匹配」':"That can be interpreted as  Missouri kicked Ricky Clemons off its team , ending his troubled career there .;That cannot be interpreted as  Missouri kicked Ricky Clemons off its team , ending his troubled career there .",
+        'NLI 「自然语言推理」':"we can infer that she has done a sudden transformation;we can not infer that she has done a sudden transformation;it is diffcult for us to infer that she has done a sudden transformation",
+        'Multiple Choice「多项式阅读理解」':"are water boarding in a river.;are shown throwing balls.;challenge the man to jump onto the rope.;run to where the javelin lands.",
     }
     st.subheader("UniMC Zero-shot 体验")
+    st.sidebar.header("Configuration「参数配置」")
     sbform = st.sidebar.form("固定参数设置")
+    language = sbform.selectbox('Select a language「选择语言」', ['中文「Chinese」', 'English「英文」'])
+    sbform.form_submit_button("Submit configuration「提交配置」")
+    if '中文' in language:
         model = load_model('IDEA-CCNL/Erlangshen-UniMC-RoBERTa-110M-Chinese')
     else:
+        model = load_model('IDEA-CCNL/Erlangshen-UniMC-Albert-235M-English')
+    st.info("Please input the following information「请输入以下信息...」")
+    model_type = st.selectbox('Select task type「选择任务类型」',['Text classification「文本分类」','Sentiment「情感分析」','Similarity「语义匹配」','NLI 「自然语言推理」','Multiple Choice「多项式阅读理解」'])
+    if '中文' in language:
+        sentences = st.text_area("Please input the context「请输入句子」", text_dict[model_type])
+        question = st.text_input("Please input the question「请输入问题（不输入问题也可以）」", question_dict[model_type])
+        choice = st.text_input("Please input the label「输入标签（以中文；分割）」", choice_dict[model_type])
+    else:
+        sentences = st.text_area("Please input the context「请输入句子」", text_dict_en[model_type])
+        question = st.text_input("Please input the question「请输入问题（不输入问题也可以）」", question_dict_en[model_type])
+        choice = st.text_input("Please input the label「输入标签（以中文；分割）」", choice_dict[model_type])
     choice = choice.split('；')
     data = [{"texta": sentences,
             "answer": "", "label": 0,
             "id": 0}]
+    start=time.time()
+    result = model.predict(data, cuda=False)
+    st.success(f"Prediction is successful, consumes {str(time.time()-start)} seconds")
+    st.json(result[0])
+    f1.form_submit_button("Submit「点击一下，开始预测！」")