edbeeching commited on
Commit
a2a9a72
·
1 Parent(s): 385de10

add default username

Browse files
Files changed (1) hide show
  1. app.py +51 -15
app.py CHANGED
@@ -194,7 +194,7 @@ def validate_request(request: GenerationRequest, oauth_token: Optional[Union[gr.
194
  return request
195
 
196
 
197
- def load_dataset_info(dataset_name, model_name, oauth_token=None, dataset_token=None, ):
198
  """Load dataset information and return choices for dropdowns"""
199
  if not dataset_name.strip():
200
  return (
@@ -250,28 +250,47 @@ def load_dataset_info(dataset_name, model_name, oauth_token=None, dataset_token=
250
  # Set slider maximum to the minimum of dataset samples and user limit
251
  slider_max = min(dataset_sample_count, user_max_samples) if dataset_sample_count > 0 else user_max_samples
252
 
253
- # Generate a suggested output dataset name with model name and timestamp
254
- dataset_base_name = dataset_name.split('/')[-1] if '/' in dataset_name else dataset_name
 
 
 
 
 
 
 
 
255
 
256
- # Extract model short name (e.g., "Qwen/Qwen3-4B-Instruct-2507" -> "Qwen3-4B-Instruct-2507")
257
- model_short_name = model_name.split('/')[-1]
 
 
 
 
 
 
258
 
259
- # Create a compact timestamp (YYMMDD-HHMM format)
260
- from datetime import datetime
261
- timestamp = datetime.now().strftime("%y%m%d-%H%M")
 
 
 
 
 
262
 
263
- # Build the output name: MODEL-dataset-timestamp
264
- suggested_output_name = f"{model_short_name}-{dataset_base_name}-{timestamp}"
265
 
266
  # Limit to 86 characters
267
  if len(suggested_output_name) > 86:
268
  # Truncate dataset name to fit within limit
269
- available_for_dataset = 86 - len(model_short_name) - len(timestamp) - 2 # -2 for the hyphens
270
  if available_for_dataset > 0:
271
  dataset_base_name = dataset_base_name[:available_for_dataset]
272
- suggested_output_name = f"{model_short_name}-{dataset_base_name}-{timestamp}"
273
  else:
274
- suggested_output_name = f"{model_short_name}-{timestamp}"
275
 
276
  status_msg = f"✅ Dataset info loaded successfully! Found {len(config_choices)} config(s), {len(split_choices)} split(s), and {len(column_choices)} column(s)."
277
  if dataset_sample_count > 0:
@@ -582,7 +601,24 @@ def main():
582
  model_token = False # This is currently not supported
583
  input_dataset_token = None # This is currently not supported
584
  output_dataset_token = os.getenv("OUTPUT_DATASET_TOKEN")
585
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
586
  try:
587
  request = GenerationRequest(
588
  id="", # Will be generated when adding to the database
@@ -604,7 +640,7 @@ def main():
604
  input_dataset_token=input_dataset_token if input_dataset_token else None,
605
  output_dataset_token=output_dataset_token,
606
  num_output_examples=num_output_samples, # will be set after validating the input dataset
607
- username="user",
608
  email="n/a",
609
  )
610
 
 
194
  return request
195
 
196
 
197
+ def load_dataset_info(dataset_name, model_name, oauth_token=None, dataset_token=None):
198
  """Load dataset information and return choices for dropdowns"""
199
  if not dataset_name.strip():
200
  return (
 
250
  # Set slider maximum to the minimum of dataset samples and user limit
251
  slider_max = min(dataset_sample_count, user_max_samples) if dataset_sample_count > 0 else user_max_samples
252
 
253
+ # Get username from OAuth token
254
+ username = "anonymous"
255
+ if oauth_token:
256
+ try:
257
+ if isinstance(oauth_token, gr.OAuthToken):
258
+ token_str = oauth_token.token
259
+ elif isinstance(oauth_token, str):
260
+ token_str = oauth_token
261
+ else:
262
+ token_str = None
263
 
264
+ if token_str:
265
+ user_info = whoami(token=token_str)
266
+ username = user_info.get("name", "anonymous")
267
+ except Exception:
268
+ username = "anonymous"
269
+
270
+ # Generate a suggested output dataset name: username-model-dataset
271
+ dataset_base_name = dataset_name.split('/')[-1] if '/' in dataset_name else dataset_name
272
 
273
+ # Extract model short name (e.g., "Qwen/Qwen3-4B-Instruct-2507" -> "qwen3-4b")
274
+ model_short_name = model_name.split('/')[-1].lower()
275
+ # Remove common suffixes and simplify
276
+ model_short_name = model_short_name.replace('-instruct', '').replace('-2507', '').replace('_', '-')
277
+ # Take first part if it's still long
278
+ if len(model_short_name) > 15:
279
+ parts = model_short_name.split('-')
280
+ model_short_name = '-'.join(parts[:2]) if len(parts) > 1 else parts[0][:15]
281
 
282
+ # Build the output name: username-model-dataset
283
+ suggested_output_name = f"{username}-{model_short_name}-{dataset_base_name}"
284
 
285
  # Limit to 86 characters
286
  if len(suggested_output_name) > 86:
287
  # Truncate dataset name to fit within limit
288
+ available_for_dataset = 86 - len(username) - len(model_short_name) - 2 # -2 for the hyphens
289
  if available_for_dataset > 0:
290
  dataset_base_name = dataset_base_name[:available_for_dataset]
291
+ suggested_output_name = f"{username}-{model_short_name}-{dataset_base_name}"
292
  else:
293
+ suggested_output_name = f"{username}-{model_short_name}"
294
 
295
  status_msg = f"✅ Dataset info loaded successfully! Found {len(config_choices)} config(s), {len(split_choices)} split(s), and {len(column_choices)} column(s)."
296
  if dataset_sample_count > 0:
 
601
  model_token = False # This is currently not supported
602
  input_dataset_token = None # This is currently not supported
603
  output_dataset_token = os.getenv("OUTPUT_DATASET_TOKEN")
604
+
605
+ # Get username from OAuth token
606
+ username = "anonymous"
607
+ if oauth_token:
608
+ try:
609
+ if isinstance(oauth_token, gr.OAuthToken):
610
+ token_str = oauth_token.token
611
+ elif isinstance(oauth_token, str):
612
+ token_str = oauth_token
613
+ else:
614
+ token_str = None
615
+
616
+ if token_str:
617
+ user_info = whoami(token=token_str)
618
+ username = user_info.get("name", "unknown")
619
+ except Exception:
620
+ username = "unknown"
621
+
622
  try:
623
  request = GenerationRequest(
624
  id="", # Will be generated when adding to the database
 
640
  input_dataset_token=input_dataset_token if input_dataset_token else None,
641
  output_dataset_token=output_dataset_token,
642
  num_output_examples=num_output_samples, # will be set after validating the input dataset
643
+ username=username,
644
  email="n/a",
645
  )
646