fix: make label_columns explicit parameter in label_row_soft
Browse files- train_abuse_model.py +1 -1
- utils.py +1 -1
train_abuse_model.py
CHANGED
|
@@ -176,7 +176,7 @@ logger.info(np.shape(df))
|
|
| 176 |
df = df.dropna(subset=[text_column])
|
| 177 |
logger.info(np.shape(df))
|
| 178 |
|
| 179 |
-
df["label_vector"] = df.apply(label_row_soft, axis=1)
|
| 180 |
label_matrix = df["label_vector"].tolist()
|
| 181 |
|
| 182 |
# Proper 3-way split: train / val / test
|
|
|
|
| 176 |
df = df.dropna(subset=[text_column])
|
| 177 |
logger.info(np.shape(df))
|
| 178 |
|
| 179 |
+
df["label_vector"] = df.apply(lambda row: label_row_soft(row, label_columns), axis=1)
|
| 180 |
label_matrix = df["label_vector"].tolist()
|
| 181 |
|
| 182 |
# Proper 3-way split: train / val / test
|
utils.py
CHANGED
|
@@ -71,7 +71,7 @@ def tune_thresholds(probs, true_labels, verbose=True):
|
|
| 71 |
return best_low, best_high, best_macro_f1
|
| 72 |
|
| 73 |
# Convert label values to soft scores: "yes" = 1.0, "plausibly" = 0.5, others = 0.0
|
| 74 |
-
def label_row_soft(row):
|
| 75 |
labels = []
|
| 76 |
for col in label_columns:
|
| 77 |
val = str(row[col]).strip().lower()
|
|
|
|
| 71 |
return best_low, best_high, best_macro_f1
|
| 72 |
|
| 73 |
# Convert label values to soft scores: "yes" = 1.0, "plausibly" = 0.5, others = 0.0
|
| 74 |
+
def label_row_soft(row, label_columns):
|
| 75 |
labels = []
|
| 76 |
for col in label_columns:
|
| 77 |
val = str(row[col]).strip().lower()
|