Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
|
@@ -114,76 +114,7 @@ def load_questions():
|
|
| 114 |
logger.info(f"Stored file path mappings for {len(task_file_paths)} tasks.")
|
| 115 |
if not questions_for_api:
|
| 116 |
logger.error("CRITICAL: No valid questions loaded after filtering/processing.")
|
| 117 |
-
# --- Add this endpoint definition to your FastAPI app ---
|
| 118 |
-
|
| 119 |
-
# Determine a base path for security. This should be the root directory
|
| 120 |
-
# where Hugging Face datasets cache is allowed to serve files from.
|
| 121 |
-
# IMPORTANT: Adjust this path based on your server's environment or use
|
| 122 |
-
# environment variables for configuration.
|
| 123 |
-
# Using expanduser handles '~' correctly.
|
| 124 |
-
ALLOWED_CACHE_BASE = os.path.abspath(os.path.expanduser("~/.cache/huggingface/datasets"))
|
| 125 |
-
logger.info(f"Configured allowed base path for file serving: {ALLOWED_CACHE_BASE}")
|
| 126 |
-
|
| 127 |
-
@app.get("/files/{task_id}",
|
| 128 |
-
summary="Get Associated File by Task ID",
|
| 129 |
-
description="Downloads the file associated with the given task_id, if one exists and is mapped.",
|
| 130 |
-
responses={
|
| 131 |
-
200: {
|
| 132 |
-
"description": "File content.",
|
| 133 |
-
"content": {"*/*": {}} # Indicates response can be any file type
|
| 134 |
-
},
|
| 135 |
-
403: {"model": ErrorResponse, "description": "Access denied (e.g., path traversal attempt)."},
|
| 136 |
-
404: {"model": ErrorResponse, "description": "Task ID not found, no file associated, or file missing on server."},
|
| 137 |
-
500: {"model": ErrorResponse, "description": "Server error reading file."}
|
| 138 |
-
})
|
| 139 |
-
async def get_task_file(task_id: str):
|
| 140 |
-
"""
|
| 141 |
-
Serves the file associated with a specific task ID.
|
| 142 |
-
Includes security checks to prevent accessing arbitrary files.
|
| 143 |
-
"""
|
| 144 |
-
logger.info(f"Request received for file associated with task_id: {task_id}")
|
| 145 |
-
|
| 146 |
-
if task_id not in task_file_paths:
|
| 147 |
-
logger.warning(f"File request failed: task_id '{task_id}' not found in file path mapping.")
|
| 148 |
-
raise HTTPException(status_code=404, detail=f"No file path associated with task_id {task_id}.")
|
| 149 |
-
|
| 150 |
-
local_file_path = task_file_paths[task_id]
|
| 151 |
-
logger.debug(f"Mapped task_id '{task_id}' to local path: {local_file_path}")
|
| 152 |
|
| 153 |
-
# --- CRUCIAL SECURITY CHECK ---
|
| 154 |
-
try:
|
| 155 |
-
# Resolve to absolute paths to prevent '..' tricks
|
| 156 |
-
abs_file_path = os.path.abspath(local_file_path)
|
| 157 |
-
abs_base_path = ALLOWED_CACHE_BASE # Already absolute
|
| 158 |
-
|
| 159 |
-
# Check if the resolved file path starts with the allowed base directory
|
| 160 |
-
if not abs_file_path.startswith(abs_base_path):
|
| 161 |
-
logger.error(f"SECURITY ALERT: Path traversal attempt denied for task_id '{task_id}'. Path '{local_file_path}' resolves outside base '{abs_base_path}'.")
|
| 162 |
-
raise HTTPException(status_code=403, detail="File access denied.")
|
| 163 |
-
|
| 164 |
-
# Check if the file exists at the resolved, validated path
|
| 165 |
-
if not os.path.exists(abs_file_path) or not os.path.isfile(abs_file_path):
|
| 166 |
-
logger.error(f"File not found on server for task_id '{task_id}' at expected path: {abs_file_path}")
|
| 167 |
-
raise HTTPException(status_code=404, detail=f"File associated with task_id {task_id} not found on server disk.")
|
| 168 |
-
|
| 169 |
-
except HTTPException as http_exc:
|
| 170 |
-
raise http_exc # Re-raise our own security/404 exceptions
|
| 171 |
-
except Exception as path_err:
|
| 172 |
-
logger.error(f"Error resolving or checking path '{local_file_path}' for task_id '{task_id}': {path_err}", exc_info=True)
|
| 173 |
-
raise HTTPException(status_code=500, detail="Server error validating file path.")
|
| 174 |
-
# --- END SECURITY CHECK ---
|
| 175 |
-
|
| 176 |
-
# Determine MIME type for the Content-Type header
|
| 177 |
-
mime_type, _ = mimetypes.guess_type(abs_file_path)
|
| 178 |
-
media_type = mime_type if mime_type else "application/octet-stream" # Default if unknown
|
| 179 |
-
|
| 180 |
-
# Extract filename for the Content-Disposition header (suggests filename to browser/client)
|
| 181 |
-
file_name_for_download = os.path.basename(abs_file_path)
|
| 182 |
-
|
| 183 |
-
logger.info(f"Serving file '{file_name_for_download}' (type: {media_type}) for task_id '{task_id}' from path: {abs_file_path}")
|
| 184 |
-
|
| 185 |
-
# Use FileResponse to efficiently stream the file
|
| 186 |
-
return FileResponse(path=abs_file_path, media_type=media_type, filename=file_name_for_download)
|
| 187 |
|
| 188 |
|
| 189 |
class Question(BaseModel):
|
|
@@ -259,8 +190,76 @@ async def startup_event():
|
|
| 259 |
# import sys
|
| 260 |
# sys.exit(1) # Consider exiting if questions are critical
|
| 261 |
|
| 262 |
-
# ---
|
| 263 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
def update_huggingface_dataset(username: str, score: float):
|
| 265 |
"""Loads the dataset, updates the score if higher, and pushes back."""
|
| 266 |
try:
|
|
|
|
| 114 |
logger.info(f"Stored file path mappings for {len(task_file_paths)} tasks.")
|
| 115 |
if not questions_for_api:
|
| 116 |
logger.error("CRITICAL: No valid questions loaded after filtering/processing.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
|
| 120 |
class Question(BaseModel):
|
|
|
|
| 190 |
# import sys
|
| 191 |
# sys.exit(1) # Consider exiting if questions are critical
|
| 192 |
|
| 193 |
+
# --- Add this endpoint definition to your FastAPI app ---
|
| 194 |
+
|
| 195 |
+
# Determine a base path for security. This should be the root directory
|
| 196 |
+
# where Hugging Face datasets cache is allowed to serve files from.
|
| 197 |
+
# IMPORTANT: Adjust this path based on your server's environment or use
|
| 198 |
+
# environment variables for configuration.
|
| 199 |
+
# Using expanduser handles '~' correctly.
|
| 200 |
+
ALLOWED_CACHE_BASE = os.path.abspath(os.path.expanduser("~/.cache/huggingface/datasets"))
|
| 201 |
+
logger.info(f"Configured allowed base path for file serving: {ALLOWED_CACHE_BASE}")
|
| 202 |
+
|
| 203 |
+
@app.get("/files/{task_id}",
|
| 204 |
+
summary="Get Associated File by Task ID",
|
| 205 |
+
description="Downloads the file associated with the given task_id, if one exists and is mapped.",
|
| 206 |
+
responses={
|
| 207 |
+
200: {
|
| 208 |
+
"description": "File content.",
|
| 209 |
+
"content": {"*/*": {}} # Indicates response can be any file type
|
| 210 |
+
},
|
| 211 |
+
403: {"model": ErrorResponse, "description": "Access denied (e.g., path traversal attempt)."},
|
| 212 |
+
404: {"model": ErrorResponse, "description": "Task ID not found, no file associated, or file missing on server."},
|
| 213 |
+
500: {"model": ErrorResponse, "description": "Server error reading file."}
|
| 214 |
+
})
|
| 215 |
+
async def get_task_file(task_id: str):
|
| 216 |
+
"""
|
| 217 |
+
Serves the file associated with a specific task ID.
|
| 218 |
+
Includes security checks to prevent accessing arbitrary files.
|
| 219 |
+
"""
|
| 220 |
+
logger.info(f"Request received for file associated with task_id: {task_id}")
|
| 221 |
+
|
| 222 |
+
if task_id not in task_file_paths:
|
| 223 |
+
logger.warning(f"File request failed: task_id '{task_id}' not found in file path mapping.")
|
| 224 |
+
raise HTTPException(status_code=404, detail=f"No file path associated with task_id {task_id}.")
|
| 225 |
+
|
| 226 |
+
local_file_path = task_file_paths[task_id]
|
| 227 |
+
logger.debug(f"Mapped task_id '{task_id}' to local path: {local_file_path}")
|
| 228 |
+
|
| 229 |
+
# --- CRUCIAL SECURITY CHECK ---
|
| 230 |
+
try:
|
| 231 |
+
# Resolve to absolute paths to prevent '..' tricks
|
| 232 |
+
abs_file_path = os.path.abspath(local_file_path)
|
| 233 |
+
abs_base_path = ALLOWED_CACHE_BASE # Already absolute
|
| 234 |
+
|
| 235 |
+
# Check if the resolved file path starts with the allowed base directory
|
| 236 |
+
if not abs_file_path.startswith(abs_base_path):
|
| 237 |
+
logger.error(f"SECURITY ALERT: Path traversal attempt denied for task_id '{task_id}'. Path '{local_file_path}' resolves outside base '{abs_base_path}'.")
|
| 238 |
+
raise HTTPException(status_code=403, detail="File access denied.")
|
| 239 |
+
|
| 240 |
+
# Check if the file exists at the resolved, validated path
|
| 241 |
+
if not os.path.exists(abs_file_path) or not os.path.isfile(abs_file_path):
|
| 242 |
+
logger.error(f"File not found on server for task_id '{task_id}' at expected path: {abs_file_path}")
|
| 243 |
+
raise HTTPException(status_code=404, detail=f"File associated with task_id {task_id} not found on server disk.")
|
| 244 |
+
|
| 245 |
+
except HTTPException as http_exc:
|
| 246 |
+
raise http_exc # Re-raise our own security/404 exceptions
|
| 247 |
+
except Exception as path_err:
|
| 248 |
+
logger.error(f"Error resolving or checking path '{local_file_path}' for task_id '{task_id}': {path_err}", exc_info=True)
|
| 249 |
+
raise HTTPException(status_code=500, detail="Server error validating file path.")
|
| 250 |
+
# --- END SECURITY CHECK ---
|
| 251 |
+
|
| 252 |
+
# Determine MIME type for the Content-Type header
|
| 253 |
+
mime_type, _ = mimetypes.guess_type(abs_file_path)
|
| 254 |
+
media_type = mime_type if mime_type else "application/octet-stream" # Default if unknown
|
| 255 |
+
|
| 256 |
+
# Extract filename for the Content-Disposition header (suggests filename to browser/client)
|
| 257 |
+
file_name_for_download = os.path.basename(abs_file_path)
|
| 258 |
+
|
| 259 |
+
logger.info(f"Serving file '{file_name_for_download}' (type: {media_type}) for task_id '{task_id}' from path: {abs_file_path}")
|
| 260 |
+
|
| 261 |
+
# Use FileResponse to efficiently stream the file
|
| 262 |
+
return FileResponse(path=abs_file_path, media_type=media_type, filename=file_name_for_download)
|
| 263 |
def update_huggingface_dataset(username: str, score: float):
|
| 264 |
"""Loads the dataset, updates the score if higher, and pushes back."""
|
| 265 |
try:
|