loodvanniekerkginkgo commited on
Commit
094a347
·
1 Parent(s): fb66aff

Added more encoding options

Browse files
Files changed (1) hide show
  1. submit.py +19 -4
submit.py CHANGED
@@ -57,6 +57,16 @@ def upload_submission(
57
  Path(tmp_name).unlink()
58
 
59
 
 
 
 
 
 
 
 
 
 
 
60
  def validate_file_requirements(file: BinaryIO, file_type: str) -> Path:
61
  """Validate basic file requirements and return Path object"""
62
  file_path = file.name
@@ -70,6 +80,13 @@ def validate_file_requirements(file: BinaryIO, file_type: str) -> Path:
70
  raise gr.Error(
71
  f"{file_type} file must be a CSV file. Please upload a .csv file."
72
  )
 
 
 
 
 
 
 
73
 
74
  return path_obj
75
 
@@ -115,15 +132,13 @@ def make_submission(
115
  files = {}
116
  # Validate CV file
117
  cv_path = validate_file_requirements(cv_file, "GDPa1 Cross-Validation")
118
- with cv_path.open("rb") as f:
119
- cv_content = f.read().decode("utf-8")
120
  validate_csv_file(cv_content, "GDPa1_cross_validation")
121
  files["cv"] = cv_content
122
 
123
  # Validate test file
124
  test_path = validate_file_requirements(test_file, "Private Test Set")
125
- with test_path.open("rb") as f:
126
- test_content = f.read().decode("utf-8")
127
  validate_csv_file(test_content, "Heldout Test Set")
128
  files["test"] = test_content
129
 
 
57
  Path(tmp_name).unlink()
58
 
59
 
60
+ def safe_read_encoding(path_obj: Path) -> str:
61
+ # Try utf-8 otherwise latin-1 (can tweak as needed)
62
+ try:
63
+ with path_obj.open("rb") as f:
64
+ return f.read().decode("utf-8")
65
+ except UnicodeDecodeError:
66
+ with path_obj.open("rb") as f:
67
+ return f.read().decode("latin-1")
68
+
69
+
70
  def validate_file_requirements(file: BinaryIO, file_type: str) -> Path:
71
  """Validate basic file requirements and return Path object"""
72
  file_path = file.name
 
80
  raise gr.Error(
81
  f"{file_type} file must be a CSV file. Please upload a .csv file."
82
  )
83
+ # Check it can be read
84
+ try:
85
+ safe_read_encoding(path_obj)
86
+ except Exception as e:
87
+ raise gr.Error(
88
+ f"Error reading {file_type} file {path_obj.name}. Could be an encoding issue if using Windows. Full error: {e}"
89
+ )
90
 
91
  return path_obj
92
 
 
132
  files = {}
133
  # Validate CV file
134
  cv_path = validate_file_requirements(cv_file, "GDPa1 Cross-Validation")
135
+ cv_content = safe_read_encoding(cv_path)
 
136
  validate_csv_file(cv_content, "GDPa1_cross_validation")
137
  files["cv"] = cv_content
138
 
139
  # Validate test file
140
  test_path = validate_file_requirements(test_file, "Private Test Set")
141
+ test_content = safe_read_encoding(test_path)
 
142
  validate_csv_file(test_content, "Heldout Test Set")
143
  files["test"] = test_content
144