Spaces:

jorgemarcc
/

graphcodebert-interpretability

Sleeping

App Files Files Community

jorgemarcc commited on Jul 22

Commit

cadb900

verified ·

1 Parent(s): 795c1ba

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -15

app.py CHANGED Viewed

@@ -1,10 +1,7 @@
 # -*- coding: utf-8 -*-
 """
-[Martinez-Gil2025] Martinez-Gil, J. (2025).
-Augmenting the Interpretability of GraphCodeBERT for Code Similarity Tasks.
-International Journal of Software Engineering and Knowledge Engineering, 35(05), 657-678.
-@author: Jorge Martinez-Gil
 """
 import numpy as np
@@ -16,11 +13,11 @@ import gradio as gr
 from io import BytesIO
 from PIL import Image
-# Load GraphCodeBERT model
-tokenizer = RobertaTokenizer.from_pretrained("microsoft/graphcodebert-base")
-model = RobertaModel.from_pretrained("microsoft/graphcodebert-base")
-# Define sorting algorithms as strings
 sorting_algorithms = {
     "Bubble_Sort": """
 def bubble_sort(arr):
@@ -61,10 +58,8 @@ def merge_sort(arr):
         mid = len(arr) // 2
         L = arr[:mid]
         R = arr[mid:]
         merge_sort(L)
         merge_sort(R)
         i = j = k = 0
         while i < len(L) and j < len(R):
             if L[i] < R[j]:
@@ -74,12 +69,10 @@ def merge_sort(arr):
                 arr[k] = R[j]
                 j += 1
             k += 1
         while i < len(L):
             arr[k] = L[i]
             i += 1
             k += 1
         while j < len(R):
             arr[k] = R[j]
             j += 1
@@ -97,7 +90,6 @@ def partition(arr, low, high):
             arr[i], arr[j] = arr[j], arr[i]
     arr[i+1], arr[high] = arr[high], arr[i+1]
     return (i + 1)
 def quick_sort(arr, low, high):
     if low < high:
         pi = partition(arr, low, high)
@@ -116,7 +108,7 @@ def get_token_embeddings(code):
     tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'].squeeze())
     return token_embeddings, tokens
-# Compare two algorithms and return PCA scatter plot
 def compare_algorithms(algo1_name, algo2_name):
     code1 = sorting_algorithms[algo1_name]
     code2 = sorting_algorithms[algo2_name]
@@ -155,3 +147,4 @@ interface = gr.Interface(
 if __name__ == "__main__":
     interface.launch()

 # -*- coding: utf-8 -*-
 """
+Martinez-Gil, J. (2025). Augmenting the Interpretability of GraphCodeBERT for Code Similarity Tasks.
+International Journal of Software Engineering and Knowledge Engineering, 35(05), 657–678.
 """
 import numpy as np
 from io import BytesIO
 from PIL import Image
+# Load GraphCodeBERT from Hugging Face (with cache)
+tokenizer = RobertaTokenizer.from_pretrained("microsoft/graphcodebert-base", cache_dir="models/")
+model = RobertaModel.from_pretrained("microsoft/graphcodebert-base", cache_dir="models/")
+# Define sorting algorithms
 sorting_algorithms = {
     "Bubble_Sort": """
 def bubble_sort(arr):
         mid = len(arr) // 2
         L = arr[:mid]
         R = arr[mid:]
         merge_sort(L)
         merge_sort(R)
         i = j = k = 0
         while i < len(L) and j < len(R):
             if L[i] < R[j]:
                 arr[k] = R[j]
                 j += 1
             k += 1
         while i < len(L):
             arr[k] = L[i]
             i += 1
             k += 1
         while j < len(R):
             arr[k] = R[j]
             j += 1
             arr[i], arr[j] = arr[j], arr[i]
     arr[i+1], arr[high] = arr[high], arr[i+1]
     return (i + 1)
 def quick_sort(arr, low, high):
     if low < high:
         pi = partition(arr, low, high)
     tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'].squeeze())
     return token_embeddings, tokens
+# Plot comparison between two algorithms
 def compare_algorithms(algo1_name, algo2_name):
     code1 = sorting_algorithms[algo1_name]
     code2 = sorting_algorithms[algo2_name]
 if __name__ == "__main__":
     interface.launch()