loodvanniekerkginkgo commited on
Commit
25b07ba
·
1 Parent(s): ea07f64

Submission changes for better validation

Browse files
Files changed (2) hide show
  1. submit.py +2 -1
  2. validation.py +5 -0
submit.py CHANGED
@@ -91,6 +91,7 @@ def make_submission(
91
  # if profile:
92
  # user_state = profile.name
93
  validate_username(user_state)
 
94
 
95
  model_name = model_name.strip()
96
  model_description = model_description.strip()
@@ -139,7 +140,7 @@ def make_submission(
139
  # Upload submission without re-validating (already done)
140
  upload_submission(
141
  file_content=file_content,
142
- user_state=user_state,
143
  submission_type=submission_type,
144
  model_name=model_name,
145
  model_description=model_description,
 
91
  # if profile:
92
  # user_state = profile.name
93
  validate_username(user_state)
94
+ username = user_state.strip()
95
 
96
  model_name = model_name.strip()
97
  model_description = model_description.strip()
 
140
  # Upload submission without re-validating (already done)
141
  upload_submission(
142
  file_content=file_content,
143
+ user_state=username,
144
  submission_type=submission_type,
145
  model_name=model_name,
146
  model_description=model_description,
validation.py CHANGED
@@ -195,6 +195,11 @@ def validate_dataframe(df: pd.DataFrame, submission_type: str = "GDPa1") -> None
195
  missing_count = df[col].isnull().sum()
196
  if missing_count > 0:
197
  raise gr.Error(f"❌ Column '{col}' contains {missing_count} missing values")
 
 
 
 
 
198
 
199
  # All names should be unique
200
  n_duplicates = df["antibody_name"].duplicated().sum()
 
195
  missing_count = df[col].isnull().sum()
196
  if missing_count > 0:
197
  raise gr.Error(f"❌ Column '{col}' contains {missing_count} missing values")
198
+
199
+ # No constant values in assay columns
200
+ for col in assay_columns:
201
+ if df[col].nunique() == 1:
202
+ raise gr.Error(f"❌ Column '{col}' contains a constant value ({df[col].iloc[0]}), and therefore the Spearman correlation is undefined.")
203
 
204
  # All names should be unique
205
  n_duplicates = df["antibody_name"].duplicated().sum()