Spaces:
Sleeping
Sleeping
edbeeching
commited on
Commit
·
a2a9a72
1
Parent(s):
385de10
add default username
Browse files
app.py
CHANGED
|
@@ -194,7 +194,7 @@ def validate_request(request: GenerationRequest, oauth_token: Optional[Union[gr.
|
|
| 194 |
return request
|
| 195 |
|
| 196 |
|
| 197 |
-
def load_dataset_info(dataset_name, model_name, oauth_token=None, dataset_token=None
|
| 198 |
"""Load dataset information and return choices for dropdowns"""
|
| 199 |
if not dataset_name.strip():
|
| 200 |
return (
|
|
@@ -250,28 +250,47 @@ def load_dataset_info(dataset_name, model_name, oauth_token=None, dataset_token=
|
|
| 250 |
# Set slider maximum to the minimum of dataset samples and user limit
|
| 251 |
slider_max = min(dataset_sample_count, user_max_samples) if dataset_sample_count > 0 else user_max_samples
|
| 252 |
|
| 253 |
-
#
|
| 254 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
|
| 256 |
-
|
| 257 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
|
| 259 |
-
#
|
| 260 |
-
|
| 261 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
|
| 263 |
-
# Build the output name:
|
| 264 |
-
suggested_output_name = f"{
|
| 265 |
|
| 266 |
# Limit to 86 characters
|
| 267 |
if len(suggested_output_name) > 86:
|
| 268 |
# Truncate dataset name to fit within limit
|
| 269 |
-
available_for_dataset = 86 - len(
|
| 270 |
if available_for_dataset > 0:
|
| 271 |
dataset_base_name = dataset_base_name[:available_for_dataset]
|
| 272 |
-
suggested_output_name = f"{
|
| 273 |
else:
|
| 274 |
-
suggested_output_name = f"{
|
| 275 |
|
| 276 |
status_msg = f"✅ Dataset info loaded successfully! Found {len(config_choices)} config(s), {len(split_choices)} split(s), and {len(column_choices)} column(s)."
|
| 277 |
if dataset_sample_count > 0:
|
|
@@ -582,7 +601,24 @@ def main():
|
|
| 582 |
model_token = False # This is currently not supported
|
| 583 |
input_dataset_token = None # This is currently not supported
|
| 584 |
output_dataset_token = os.getenv("OUTPUT_DATASET_TOKEN")
|
| 585 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 586 |
try:
|
| 587 |
request = GenerationRequest(
|
| 588 |
id="", # Will be generated when adding to the database
|
|
@@ -604,7 +640,7 @@ def main():
|
|
| 604 |
input_dataset_token=input_dataset_token if input_dataset_token else None,
|
| 605 |
output_dataset_token=output_dataset_token,
|
| 606 |
num_output_examples=num_output_samples, # will be set after validating the input dataset
|
| 607 |
-
username=
|
| 608 |
email="n/a",
|
| 609 |
)
|
| 610 |
|
|
|
|
| 194 |
return request
|
| 195 |
|
| 196 |
|
| 197 |
+
def load_dataset_info(dataset_name, model_name, oauth_token=None, dataset_token=None):
|
| 198 |
"""Load dataset information and return choices for dropdowns"""
|
| 199 |
if not dataset_name.strip():
|
| 200 |
return (
|
|
|
|
| 250 |
# Set slider maximum to the minimum of dataset samples and user limit
|
| 251 |
slider_max = min(dataset_sample_count, user_max_samples) if dataset_sample_count > 0 else user_max_samples
|
| 252 |
|
| 253 |
+
# Get username from OAuth token
|
| 254 |
+
username = "anonymous"
|
| 255 |
+
if oauth_token:
|
| 256 |
+
try:
|
| 257 |
+
if isinstance(oauth_token, gr.OAuthToken):
|
| 258 |
+
token_str = oauth_token.token
|
| 259 |
+
elif isinstance(oauth_token, str):
|
| 260 |
+
token_str = oauth_token
|
| 261 |
+
else:
|
| 262 |
+
token_str = None
|
| 263 |
|
| 264 |
+
if token_str:
|
| 265 |
+
user_info = whoami(token=token_str)
|
| 266 |
+
username = user_info.get("name", "anonymous")
|
| 267 |
+
except Exception:
|
| 268 |
+
username = "anonymous"
|
| 269 |
+
|
| 270 |
+
# Generate a suggested output dataset name: username-model-dataset
|
| 271 |
+
dataset_base_name = dataset_name.split('/')[-1] if '/' in dataset_name else dataset_name
|
| 272 |
|
| 273 |
+
# Extract model short name (e.g., "Qwen/Qwen3-4B-Instruct-2507" -> "qwen3-4b")
|
| 274 |
+
model_short_name = model_name.split('/')[-1].lower()
|
| 275 |
+
# Remove common suffixes and simplify
|
| 276 |
+
model_short_name = model_short_name.replace('-instruct', '').replace('-2507', '').replace('_', '-')
|
| 277 |
+
# Take first part if it's still long
|
| 278 |
+
if len(model_short_name) > 15:
|
| 279 |
+
parts = model_short_name.split('-')
|
| 280 |
+
model_short_name = '-'.join(parts[:2]) if len(parts) > 1 else parts[0][:15]
|
| 281 |
|
| 282 |
+
# Build the output name: username-model-dataset
|
| 283 |
+
suggested_output_name = f"{username}-{model_short_name}-{dataset_base_name}"
|
| 284 |
|
| 285 |
# Limit to 86 characters
|
| 286 |
if len(suggested_output_name) > 86:
|
| 287 |
# Truncate dataset name to fit within limit
|
| 288 |
+
available_for_dataset = 86 - len(username) - len(model_short_name) - 2 # -2 for the hyphens
|
| 289 |
if available_for_dataset > 0:
|
| 290 |
dataset_base_name = dataset_base_name[:available_for_dataset]
|
| 291 |
+
suggested_output_name = f"{username}-{model_short_name}-{dataset_base_name}"
|
| 292 |
else:
|
| 293 |
+
suggested_output_name = f"{username}-{model_short_name}"
|
| 294 |
|
| 295 |
status_msg = f"✅ Dataset info loaded successfully! Found {len(config_choices)} config(s), {len(split_choices)} split(s), and {len(column_choices)} column(s)."
|
| 296 |
if dataset_sample_count > 0:
|
|
|
|
| 601 |
model_token = False # This is currently not supported
|
| 602 |
input_dataset_token = None # This is currently not supported
|
| 603 |
output_dataset_token = os.getenv("OUTPUT_DATASET_TOKEN")
|
| 604 |
+
|
| 605 |
+
# Get username from OAuth token
|
| 606 |
+
username = "anonymous"
|
| 607 |
+
if oauth_token:
|
| 608 |
+
try:
|
| 609 |
+
if isinstance(oauth_token, gr.OAuthToken):
|
| 610 |
+
token_str = oauth_token.token
|
| 611 |
+
elif isinstance(oauth_token, str):
|
| 612 |
+
token_str = oauth_token
|
| 613 |
+
else:
|
| 614 |
+
token_str = None
|
| 615 |
+
|
| 616 |
+
if token_str:
|
| 617 |
+
user_info = whoami(token=token_str)
|
| 618 |
+
username = user_info.get("name", "unknown")
|
| 619 |
+
except Exception:
|
| 620 |
+
username = "unknown"
|
| 621 |
+
|
| 622 |
try:
|
| 623 |
request = GenerationRequest(
|
| 624 |
id="", # Will be generated when adding to the database
|
|
|
|
| 640 |
input_dataset_token=input_dataset_token if input_dataset_token else None,
|
| 641 |
output_dataset_token=output_dataset_token,
|
| 642 |
num_output_examples=num_output_samples, # will be set after validating the input dataset
|
| 643 |
+
username=username,
|
| 644 |
email="n/a",
|
| 645 |
)
|
| 646 |
|