Spaces:
Sleeping
Sleeping
Update app.py
#2
by
hyomin
- opened
app.py
CHANGED
|
@@ -3,30 +3,24 @@ import numpy as np
|
|
| 3 |
import re
|
| 4 |
import os
|
| 5 |
import sys
|
| 6 |
-
import random
|
| 7 |
import transformers
|
| 8 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 9 |
from transformers import RobertaTokenizer, RobertaForSequenceClassification
|
| 10 |
import torch
|
| 11 |
import torch.nn.functional as F
|
| 12 |
-
from torch.utils.data import Dataset, DataLoader
|
| 13 |
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
| 14 |
import gradio as gr
|
| 15 |
|
| 16 |
|
|
|
|
| 17 |
|
| 18 |
-
def greet(co):
|
| 19 |
-
code_text = []
|
| 20 |
-
|
| 21 |
-
code_text.append(co)
|
| 22 |
-
|
| 23 |
-
code_text = ' '.join(code_text)
|
| 24 |
code_text = re.sub('\/\*[\S\s]*\*\/', '', code_text)
|
| 25 |
code_text = re.sub('\/\/.*', '', code_text)
|
| 26 |
code_text = re.sub('(\\\\n)+', '\\n', code_text)
|
| 27 |
|
| 28 |
# 1. CFA-CodeBERTa-small.pt -> CodeBERTa-small-v1 finetunig model
|
| 29 |
-
path = os.getcwd() + '
|
| 30 |
tokenizer = AutoTokenizer.from_pretrained("huggingface/CodeBERTa-small-v1")
|
| 31 |
input_ids = tokenizer.encode(
|
| 32 |
code_text, max_length=512, truncation=True, padding='max_length')
|
|
@@ -38,7 +32,7 @@ def greet(co):
|
|
| 38 |
# model(input_ids)[0].argmax().detach().cpu().numpy().item()
|
| 39 |
|
| 40 |
# 2. CFA-codebert-c.pt -> codebert-c finetuning model
|
| 41 |
-
path = os.getcwd() + '
|
| 42 |
tokenizer = AutoTokenizer.from_pretrained(path)
|
| 43 |
input_ids = tokenizer(code_text, padding=True, max_length=512,
|
| 44 |
truncation=True, return_token_type_ids=True)['input_ids']
|
|
@@ -49,7 +43,7 @@ def greet(co):
|
|
| 49 |
pred_2 = model(input_ids)[0].detach().cpu().numpy()[0]
|
| 50 |
|
| 51 |
# 3. CFA-codebert-c-v2.pt -> undersampling + codebert-c finetuning model
|
| 52 |
-
path = os.getcwd() + '
|
| 53 |
tokenizer = RobertaTokenizer.from_pretrained(path)
|
| 54 |
input_ids = tokenizer(code_text, padding=True, max_length=512,
|
| 55 |
truncation=True, return_token_type_ids=True)['input_ids']
|
|
@@ -60,7 +54,7 @@ def greet(co):
|
|
| 60 |
pred_3 = model(input_ids)[0].detach().cpu().numpy()
|
| 61 |
|
| 62 |
# 4. codeT5 finetuning model
|
| 63 |
-
path = os.getcwd() + '
|
| 64 |
model_params = {
|
| 65 |
# model_type: t5-base/t5-large
|
| 66 |
"MODEL": path,
|
|
@@ -80,16 +74,14 @@ def greet(co):
|
|
| 80 |
pred_4 = int(pred_4[0])
|
| 81 |
|
| 82 |
# ensemble
|
| 83 |
-
tot_result = (pred_1 * 0.
|
| 84 |
-
pred_3 * 0.
|
| 85 |
if tot_result == 0:
|
| 86 |
return "false positive !!"
|
| 87 |
else:
|
| 88 |
return "true positive !!"
|
| 89 |
|
| 90 |
|
| 91 |
-
|
| 92 |
-
|
| 93 |
# codeT5
|
| 94 |
class YourDataSetClass(Dataset):
|
| 95 |
|
|
@@ -194,18 +186,19 @@ demo.launch(share=True)
|
|
| 194 |
'''
|
| 195 |
with gr.Blocks() as demo1:
|
| 196 |
gr.Markdown(
|
| 197 |
-
|
| 198 |
<h1 align="center">
|
| 199 |
False-Alarm-Detector
|
| 200 |
</h1>
|
| 201 |
""")
|
| 202 |
|
| 203 |
gr.Markdown(
|
| 204 |
-
|
| 205 |
-
์ ์
|
| 206 |
-
์ค๋ฅ๊ฐ True-positive ์ธ์ง False-positive ์ธ์ง ๋ถ๋ฅ ํด ์ฃผ๋
|
| 207 |
""")
|
| 208 |
|
|
|
|
| 209 |
with gr.Accordion(label='๋ชจ๋ธ์ ๋ํ ์ค๋ช
( ์ฌ๊ธฐ๋ฅผ ํด๋ฆญ ํ์์ค. )',open=False):
|
| 210 |
gr.Markdown(
|
| 211 |
"""
|
|
@@ -218,14 +211,16 @@ with gr.Blocks() as demo1:
|
|
| 218 |
- codeT5 ์ค๋ช
|
| 219 |
"""
|
| 220 |
)
|
|
|
|
| 221 |
with gr.Row():
|
| 222 |
with gr.Column():
|
| 223 |
-
|
|
|
|
| 224 |
with gr.Row():
|
| 225 |
btn = gr.Button("๊ฒฐ๊ณผ ์ถ๋ ฅ")
|
| 226 |
with gr.Column():
|
| 227 |
-
|
| 228 |
-
btn.click(fn
|
| 229 |
|
| 230 |
if __name__ == "__main__":
|
| 231 |
demo1.launch()
|
|
|
|
| 3 |
import re
|
| 4 |
import os
|
| 5 |
import sys
|
|
|
|
| 6 |
import transformers
|
| 7 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 8 |
from transformers import RobertaTokenizer, RobertaForSequenceClassification
|
| 9 |
import torch
|
| 10 |
import torch.nn.functional as F
|
| 11 |
+
from torch.utils.data import Dataset, DataLoader
|
| 12 |
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
| 13 |
import gradio as gr
|
| 14 |
|
| 15 |
|
| 16 |
+
def is_false_alarm(code_text):
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
code_text = re.sub('\/\*[\S\s]*\*\/', '', code_text)
|
| 19 |
code_text = re.sub('\/\/.*', '', code_text)
|
| 20 |
code_text = re.sub('(\\\\n)+', '\\n', code_text)
|
| 21 |
|
| 22 |
# 1. CFA-CodeBERTa-small.pt -> CodeBERTa-small-v1 finetunig model
|
| 23 |
+
path = os.getcwd() + '\models\CFA-CodeBERTa-small.pt'
|
| 24 |
tokenizer = AutoTokenizer.from_pretrained("huggingface/CodeBERTa-small-v1")
|
| 25 |
input_ids = tokenizer.encode(
|
| 26 |
code_text, max_length=512, truncation=True, padding='max_length')
|
|
|
|
| 32 |
# model(input_ids)[0].argmax().detach().cpu().numpy().item()
|
| 33 |
|
| 34 |
# 2. CFA-codebert-c.pt -> codebert-c finetuning model
|
| 35 |
+
path = os.getcwd() + '\models\CFA-codebert-c.pt'
|
| 36 |
tokenizer = AutoTokenizer.from_pretrained(path)
|
| 37 |
input_ids = tokenizer(code_text, padding=True, max_length=512,
|
| 38 |
truncation=True, return_token_type_ids=True)['input_ids']
|
|
|
|
| 43 |
pred_2 = model(input_ids)[0].detach().cpu().numpy()[0]
|
| 44 |
|
| 45 |
# 3. CFA-codebert-c-v2.pt -> undersampling + codebert-c finetuning model
|
| 46 |
+
path = os.getcwd() + '\models\CFA-codebert-c-v2.pt'
|
| 47 |
tokenizer = RobertaTokenizer.from_pretrained(path)
|
| 48 |
input_ids = tokenizer(code_text, padding=True, max_length=512,
|
| 49 |
truncation=True, return_token_type_ids=True)['input_ids']
|
|
|
|
| 54 |
pred_3 = model(input_ids)[0].detach().cpu().numpy()
|
| 55 |
|
| 56 |
# 4. codeT5 finetuning model
|
| 57 |
+
path = os.getcwd() + '\models\CFA-codeT5'
|
| 58 |
model_params = {
|
| 59 |
# model_type: t5-base/t5-large
|
| 60 |
"MODEL": path,
|
|
|
|
| 74 |
pred_4 = int(pred_4[0])
|
| 75 |
|
| 76 |
# ensemble
|
| 77 |
+
tot_result = (pred_1 * 0.1 + pred_2 * 0.1 +
|
| 78 |
+
pred_3 * 0.7 + pred_4 * 0.1).argmax()
|
| 79 |
if tot_result == 0:
|
| 80 |
return "false positive !!"
|
| 81 |
else:
|
| 82 |
return "true positive !!"
|
| 83 |
|
| 84 |
|
|
|
|
|
|
|
| 85 |
# codeT5
|
| 86 |
class YourDataSetClass(Dataset):
|
| 87 |
|
|
|
|
| 186 |
'''
|
| 187 |
with gr.Blocks() as demo1:
|
| 188 |
gr.Markdown(
|
| 189 |
+
"""
|
| 190 |
<h1 align="center">
|
| 191 |
False-Alarm-Detector
|
| 192 |
</h1>
|
| 193 |
""")
|
| 194 |
|
| 195 |
gr.Markdown(
|
| 196 |
+
"""
|
| 197 |
+
์ ์ ๋ถ์๊ธฐ๋ฅผ ํตํด ์ค๋ฅ๋ผ๊ณ ๋ณด๊ณ ๋ C์ธ์ด ์ฝ๋์ ํจ์๋ฅผ ์
๋ ฅํ๋ฉด,
|
| 198 |
+
์ค๋ฅ๊ฐ True-positive ์ธ์ง False-positive ์ธ์ง ๋ถ๋ฅ ํด ์ฃผ๋ ํ๋ก๊ทธ๋จ์
๋๋ค.
|
| 199 |
""")
|
| 200 |
|
| 201 |
+
'''
|
| 202 |
with gr.Accordion(label='๋ชจ๋ธ์ ๋ํ ์ค๋ช
( ์ฌ๊ธฐ๋ฅผ ํด๋ฆญ ํ์์ค. )',open=False):
|
| 203 |
gr.Markdown(
|
| 204 |
"""
|
|
|
|
| 211 |
- codeT5 ์ค๋ช
|
| 212 |
"""
|
| 213 |
)
|
| 214 |
+
'''
|
| 215 |
with gr.Row():
|
| 216 |
with gr.Column():
|
| 217 |
+
inputs = gr.Textbox(
|
| 218 |
+
lines=10, placeholder="์ฝ๋๋ฅผ ์
๋ ฅํ์์ค.", label='Code')
|
| 219 |
with gr.Row():
|
| 220 |
btn = gr.Button("๊ฒฐ๊ณผ ์ถ๋ ฅ")
|
| 221 |
with gr.Column():
|
| 222 |
+
output = gr.Text(label='Result')
|
| 223 |
+
btn.click(fn=is_false_alarm, inputs=inputs, outputs=output)
|
| 224 |
|
| 225 |
if __name__ == "__main__":
|
| 226 |
demo1.launch()
|