Spaces:

rshakked
/

safe-talk

Sleeping

rshakked commited on May 13

Commit

5071ec6

1 Parent(s): bc56514

fix: make label_columns explicit parameter in label_row_soft

Files changed (2) hide show

train_abuse_model.py CHANGED Viewed

@@ -176,7 +176,7 @@ logger.info(np.shape(df))
 df = df.dropna(subset=[text_column])
 logger.info(np.shape(df))
-df["label_vector"] = df.apply(label_row_soft, axis=1)
 label_matrix = df["label_vector"].tolist()
 # Proper 3-way split: train / val / test

 df = df.dropna(subset=[text_column])
 logger.info(np.shape(df))
+df["label_vector"] = df.apply(lambda row: label_row_soft(row, label_columns), axis=1)
 label_matrix = df["label_vector"].tolist()
 # Proper 3-way split: train / val / test

utils.py CHANGED Viewed

@@ -71,7 +71,7 @@ def tune_thresholds(probs, true_labels, verbose=True):
     return best_low, best_high, best_macro_f1
 #  Convert label values to soft scores: "yes" = 1.0, "plausibly" = 0.5, others = 0.0
-def label_row_soft(row):
     labels = []
     for col in label_columns:
         val = str(row[col]).strip().lower()

     return best_low, best_high, best_macro_f1
 #  Convert label values to soft scores: "yes" = 1.0, "plausibly" = 0.5, others = 0.0
+def label_row_soft(row, label_columns):
     labels = []
     for col in label_columns:
         val = str(row[col]).strip().lower()