Spaces:
Sleeping
Sleeping
Commit
ยท
4c479ed
1
Parent(s):
ea6c7bf
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,11 +15,8 @@ import gradio as gr
|
|
| 15 |
|
| 16 |
def greet(co):
|
| 17 |
code_text = []
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
if not code:
|
| 21 |
-
break
|
| 22 |
-
code_text.append(code)
|
| 23 |
|
| 24 |
code_text = ' '.join(code_text)
|
| 25 |
code_text = re.sub('\/\*[\S\s]*\*\/', '', code_text)
|
|
@@ -27,7 +24,7 @@ def greet(co):
|
|
| 27 |
code_text = re.sub('(\\\\n)+', '\\n', code_text)
|
| 28 |
|
| 29 |
# 1. CFA-CodeBERTa-small.pt -> CodeBERTa-small-v1 finetunig model
|
| 30 |
-
path = 'models/CFA-CodeBERTa-small.pt'
|
| 31 |
tokenizer = AutoTokenizer.from_pretrained("huggingface/CodeBERTa-small-v1")
|
| 32 |
input_ids = tokenizer.encode(
|
| 33 |
code_text, max_length=512, truncation=True, padding='max_length')
|
|
@@ -39,7 +36,7 @@ def greet(co):
|
|
| 39 |
# model(input_ids)[0].argmax().detach().cpu().numpy().item()
|
| 40 |
|
| 41 |
# 2. CFA-codebert-c.pt -> codebert-c finetuning model
|
| 42 |
-
path = 'models/CFA-codebert-c.pt'
|
| 43 |
tokenizer = AutoTokenizer.from_pretrained(path)
|
| 44 |
input_ids = tokenizer(code_text, padding=True, max_length=512,
|
| 45 |
truncation=True, return_token_type_ids=True)['input_ids']
|
|
@@ -50,7 +47,7 @@ def greet(co):
|
|
| 50 |
pred_2 = model(input_ids)[0].detach().cpu().numpy()[0]
|
| 51 |
|
| 52 |
# 3. CFA-codebert-c-v2.pt -> undersampling + codebert-c finetuning model
|
| 53 |
-
path = 'models/CFA-codebert-c-v2.pt'
|
| 54 |
tokenizer = RobertaTokenizer.from_pretrained(path)
|
| 55 |
input_ids = tokenizer(code_text, padding=True, max_length=512,
|
| 56 |
truncation=True, return_token_type_ids=True)['input_ids']
|
|
@@ -61,7 +58,7 @@ def greet(co):
|
|
| 61 |
pred_3 = model(input_ids)[0].detach().cpu().numpy()
|
| 62 |
|
| 63 |
# 4. codeT5 finetuning model
|
| 64 |
-
path = 'models/CFA-codeT5'
|
| 65 |
model_params = {
|
| 66 |
# model_type: t5-base/t5-large
|
| 67 |
"MODEL": path,
|
|
@@ -83,8 +80,11 @@ def greet(co):
|
|
| 83 |
# ensemble
|
| 84 |
tot_result = (pred_1 * 0.8 + pred_2 * 0.1 +
|
| 85 |
pred_3 * 0.1 + pred_4 * 0.1).argmax()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
-
return tot_result
|
| 88 |
|
| 89 |
|
| 90 |
|
|
@@ -200,25 +200,30 @@ with gr.Blocks() as demo1:
|
|
| 200 |
|
| 201 |
gr.Markdown(
|
| 202 |
"""
|
| 203 |
-
์ ์ ๋ถ์๊ธฐ๋ก ์ค๋ฅ๋ผ๊ณ ๋ณด๊ณ ๋ ์ฝ๋๋ฅผ
|
| 204 |
์ค๋ฅ๊ฐ True-positive ์ธ์ง False-positive ์ธ์ง ๋ถ๋ฅ ํด ์ฃผ๋ ํ๋ก๊ทธ๋จ์ด๋ค.
|
| 205 |
""")
|
| 206 |
|
| 207 |
with gr.Accordion(label='๋ชจ๋ธ์ ๋ํ ์ค๋ช
( ์ฌ๊ธฐ๋ฅผ ํด๋ฆญ ํ์์ค. )',open=False):
|
| 208 |
gr.Markdown(
|
| 209 |
"""
|
| 210 |
-
์ด 3๊ฐ์ ๋ชจ๋ธ์
|
| 211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
"""
|
| 213 |
)
|
| 214 |
with gr.Row():
|
| 215 |
with gr.Column():
|
| 216 |
-
inputs_1 = gr.Textbox(placeholder="์ฝ๋๋ฅผ ์
๋ ฅํ์์ค.", label='
|
| 217 |
with gr.Row():
|
| 218 |
-
btn = gr.Button("
|
| 219 |
with gr.Column():
|
| 220 |
-
outputs_1 = gr.
|
| 221 |
btn.click(fn = greet, inputs = inputs_1, outputs= outputs_1)
|
| 222 |
|
| 223 |
if __name__ == "__main__":
|
| 224 |
-
demo1.launch()
|
|
|
|
| 15 |
|
| 16 |
def greet(co):
|
| 17 |
code_text = []
|
| 18 |
+
|
| 19 |
+
code_text.append(co)
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
code_text = ' '.join(code_text)
|
| 22 |
code_text = re.sub('\/\*[\S\s]*\*\/', '', code_text)
|
|
|
|
| 24 |
code_text = re.sub('(\\\\n)+', '\\n', code_text)
|
| 25 |
|
| 26 |
# 1. CFA-CodeBERTa-small.pt -> CodeBERTa-small-v1 finetunig model
|
| 27 |
+
path = os.getcwd() + '/models/CFA-CodeBERTa-small.pt'
|
| 28 |
tokenizer = AutoTokenizer.from_pretrained("huggingface/CodeBERTa-small-v1")
|
| 29 |
input_ids = tokenizer.encode(
|
| 30 |
code_text, max_length=512, truncation=True, padding='max_length')
|
|
|
|
| 36 |
# model(input_ids)[0].argmax().detach().cpu().numpy().item()
|
| 37 |
|
| 38 |
# 2. CFA-codebert-c.pt -> codebert-c finetuning model
|
| 39 |
+
path = os.getcwd() + '/models/CFA-codebert-c.pt'
|
| 40 |
tokenizer = AutoTokenizer.from_pretrained(path)
|
| 41 |
input_ids = tokenizer(code_text, padding=True, max_length=512,
|
| 42 |
truncation=True, return_token_type_ids=True)['input_ids']
|
|
|
|
| 47 |
pred_2 = model(input_ids)[0].detach().cpu().numpy()[0]
|
| 48 |
|
| 49 |
# 3. CFA-codebert-c-v2.pt -> undersampling + codebert-c finetuning model
|
| 50 |
+
path = os.getcwd() + '/models/CFA-codebert-c-v2.pt'
|
| 51 |
tokenizer = RobertaTokenizer.from_pretrained(path)
|
| 52 |
input_ids = tokenizer(code_text, padding=True, max_length=512,
|
| 53 |
truncation=True, return_token_type_ids=True)['input_ids']
|
|
|
|
| 58 |
pred_3 = model(input_ids)[0].detach().cpu().numpy()
|
| 59 |
|
| 60 |
# 4. codeT5 finetuning model
|
| 61 |
+
path = os.getcwd() + '/models/CFA-codeT5'
|
| 62 |
model_params = {
|
| 63 |
# model_type: t5-base/t5-large
|
| 64 |
"MODEL": path,
|
|
|
|
| 80 |
# ensemble
|
| 81 |
tot_result = (pred_1 * 0.8 + pred_2 * 0.1 +
|
| 82 |
pred_3 * 0.1 + pred_4 * 0.1).argmax()
|
| 83 |
+
if tot_result == 0:
|
| 84 |
+
return "false positive !!"
|
| 85 |
+
else:
|
| 86 |
+
return "true positive !!"
|
| 87 |
|
|
|
|
| 88 |
|
| 89 |
|
| 90 |
|
|
|
|
| 200 |
|
| 201 |
gr.Markdown(
|
| 202 |
"""
|
| 203 |
+
์ ์ ๋ถ์๊ธฐ๋ก ์ค๋ฅ๋ผ๊ณ ๋ณด๊ณ ๋ ์ฝ๋๋ฅผ ์
๋ ฅํ๋ฉด,
|
| 204 |
์ค๋ฅ๊ฐ True-positive ์ธ์ง False-positive ์ธ์ง ๋ถ๋ฅ ํด ์ฃผ๋ ํ๋ก๊ทธ๋จ์ด๋ค.
|
| 205 |
""")
|
| 206 |
|
| 207 |
with gr.Accordion(label='๋ชจ๋ธ์ ๋ํ ์ค๋ช
( ์ฌ๊ธฐ๋ฅผ ํด๋ฆญ ํ์์ค. )',open=False):
|
| 208 |
gr.Markdown(
|
| 209 |
"""
|
| 210 |
+
์ด 3๊ฐ์ ๋ชจ๋ธ์ ์ฌ์ฉํ์๋ค.
|
| 211 |
+
1. codeBERTa-small-v1
|
| 212 |
+
- codeBERTa-small-v1 ์ค๋ช
|
| 213 |
+
2. codeBERT - C
|
| 214 |
+
- codeBERT - C ์ค๋ช
|
| 215 |
+
3. codeT5
|
| 216 |
+
- codeT5 ์ค๋ช
|
| 217 |
"""
|
| 218 |
)
|
| 219 |
with gr.Row():
|
| 220 |
with gr.Column():
|
| 221 |
+
inputs_1 = gr.Textbox(placeholder="์ฝ๋๋ฅผ ์
๋ ฅํ์์ค.", label='Code')
|
| 222 |
with gr.Row():
|
| 223 |
+
btn = gr.Button("๊ฒฐ๊ณผ ์ถ๋ ฅ")
|
| 224 |
with gr.Column():
|
| 225 |
+
outputs_1 = gr.Text(label = 'Result')
|
| 226 |
btn.click(fn = greet, inputs = inputs_1, outputs= outputs_1)
|
| 227 |
|
| 228 |
if __name__ == "__main__":
|
| 229 |
+
demo1.launch(share=True)
|