Spaces:
Runtime error
Runtime error
Avoiding re-loading already loaded models. Stated unload functionality as not-implemented.
Browse files
detect-pretrain-code-contamination/src/run.py
CHANGED
|
@@ -23,6 +23,8 @@ import sys
|
|
| 23 |
import gc
|
| 24 |
import pickle
|
| 25 |
|
|
|
|
|
|
|
| 26 |
def save_data(filename, data):
|
| 27 |
with open(filename, 'wb') as filehandle:
|
| 28 |
# store the data as binary data stream
|
|
@@ -36,20 +38,18 @@ def load_data(filename):
|
|
| 36 |
return loaded_data
|
| 37 |
|
| 38 |
def unload_model(model,tokenizer):
|
| 39 |
-
model
|
| 40 |
-
del model
|
| 41 |
-
del tokenizer
|
| 42 |
-
time.sleep(0.5)
|
| 43 |
-
gc.collect()
|
| 44 |
-
torch.cuda.empty_cache()
|
| 45 |
|
| 46 |
def load_model(name1):
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
|
|
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
| 53 |
|
| 54 |
def calculatePerplexity(sentence, model, tokenizer, gpu):
|
| 55 |
"""
|
|
@@ -105,6 +105,7 @@ def get_neighbors(text,ref_loss,model2,tokenizer2,ratio_gen):
|
|
| 105 |
return neighbors_dl
|
| 106 |
|
| 107 |
def evaluate_data(test_data, col_name, target_model, ref_model, ratio_gen, data_name):
|
|
|
|
| 108 |
print(f"all data size: {len(test_data)}")
|
| 109 |
random.seed(0)
|
| 110 |
random.shuffle(test_data)
|
|
|
|
| 23 |
import gc
|
| 24 |
import pickle
|
| 25 |
|
| 26 |
+
models = {}
|
| 27 |
+
|
| 28 |
def save_data(filename, data):
|
| 29 |
with open(filename, 'wb') as filehandle:
|
| 30 |
# store the data as binary data stream
|
|
|
|
| 38 |
return loaded_data
|
| 39 |
|
| 40 |
def unload_model(model,tokenizer):
|
| 41 |
+
print("[X] Cannot unload model! Functionality not implemented!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
def load_model(name1):
|
| 44 |
+
if name1 not in models:
|
| 45 |
+
model1 = AutoModelForCausalLM.from_pretrained(name1, return_dict=True, device_map='auto')
|
| 46 |
+
model1.eval()
|
| 47 |
+
tokenizer1 = AutoTokenizer.from_pretrained(name1)
|
| 48 |
|
| 49 |
+
tokenizer1.pad_token = tokenizer1.eos_token
|
| 50 |
+
models[name1] = model1
|
| 51 |
+
models[name1 + "_tokenizer"] = tokenizer1
|
| 52 |
+
return models[name1], models[name1 + "_tokenizer"]
|
| 53 |
|
| 54 |
def calculatePerplexity(sentence, model, tokenizer, gpu):
|
| 55 |
"""
|
|
|
|
| 105 |
return neighbors_dl
|
| 106 |
|
| 107 |
def evaluate_data(test_data, col_name, target_model, ref_model, ratio_gen, data_name):
|
| 108 |
+
global model1,model2,tokenizer1,tokenizer2
|
| 109 |
print(f"all data size: {len(test_data)}")
|
| 110 |
random.seed(0)
|
| 111 |
random.shuffle(test_data)
|