edbeeching commited on
Commit
5fad7f1
·
1 Parent(s): ab5bf76

fix bugs with model token

Browse files
Files changed (1) hide show
  1. app.py +27 -8
app.py CHANGED
@@ -100,6 +100,7 @@ def validate_request(request: GenerationRequest) -> GenerationRequest:
100
  request.input_dataset_split = f"{request.input_dataset_split}[:{request.num_output_examples}]"
101
 
102
 
 
103
  if request.num_output_examples > MAX_SAMPLES:
104
  raise Exception(f"Requested number of output examples {request.num_output_examples} exceeds the max limit of {MAX_SAMPLES}.")
105
 
@@ -107,12 +108,28 @@ def validate_request(request: GenerationRequest) -> GenerationRequest:
107
  if request.prompt_column not in input_dataset_info.features:
108
  raise Exception(f"Prompt column {request.prompt_column} does not exist in dataset {request.input_dataset_name}. Available columns: {list(input_dataset_info.features.keys())}")
109
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  # check the models exists
111
  try:
112
- model_config = AutoConfig.from_pretrained(request.model_name_or_path, revision=request.model_revision, token=request.model_token)
 
 
 
 
113
  except Exception as e:
114
  print(e)
115
- raise Exception(f"Model {request.model_name_or_path} revision {request.model_revision} does not exist or cannot be accessed with the provided token.")
116
 
117
  # check the model max position embeddings is greater than the requested max tokens and less than MAX_TOKENS
118
  if model_config.max_position_embeddings < request.max_tokens:
@@ -265,7 +282,7 @@ def main():
265
  max_tok, temp, top_k_val, top_p_val, email_addr, num_output_samples):
266
 
267
  MASTER_ORG = "synthetic-data-universe/"
268
- model_token = None # This is currently not supported
269
  input_dataset_token = None # This is currently not supported
270
  output_dataset_token = os.getenv("OUTPUT_DATASET_TOKEN")
271
 
@@ -281,7 +298,7 @@ def main():
281
  prompt_column=prompt_col,
282
  model_name_or_path=model_name,
283
  model_revision=model_rev,
284
- model_token=model_token if model_token else None,
285
  system_prompt=sys_prompt if sys_prompt else None,
286
  max_tokens=int(max_tok),
287
  temperature=temp,
@@ -319,10 +336,12 @@ def main():
319
  "To unlock this and many other cool stuff, please consider upgrading your account.\n\n"
320
  "### [**Become a PRO Today!**](http://huggingface.co/subscribe/pro?source=synthetic-data-universe)"
321
  )
322
- return gr.update(visible=False), gr.update(visible=True, value=message)
323
-
324
- demo.load(control_access, inputs=None, outputs=[main_interface, pro_message])
325
- demo.queue(max_size=None, default_concurrency_limit=None).launch(show_error=True)
 
 
326
 
327
  if __name__ == "__main__":
328
  main()
 
100
  request.input_dataset_split = f"{request.input_dataset_split}[:{request.num_output_examples}]"
101
 
102
 
103
+
104
  if request.num_output_examples > MAX_SAMPLES:
105
  raise Exception(f"Requested number of output examples {request.num_output_examples} exceeds the max limit of {MAX_SAMPLES}.")
106
 
 
108
  if request.prompt_column not in input_dataset_info.features:
109
  raise Exception(f"Prompt column {request.prompt_column} does not exist in dataset {request.input_dataset_name}. Available columns: {list(input_dataset_info.features.keys())}")
110
 
111
+ # This is currently not supported, the output dataset will be created under the org 'synthetic-data-universe'
112
+ # check output_dataset name is valid
113
+ if request.output_dataset_name.count("/") != 1:
114
+ raise Exception("Output dataset name must be in the format 'dataset_name', e.g., 'my-dataset'. The dataset will be created under the org 'synthetic-data-universe/my-dataset'.")
115
+
116
+ # check the output dataset is valid and accessible with the provided token
117
+ try:
118
+ output_dataset_info = get_dataset_infos(request.output_dataset_name, token=request.output_dataset_token)
119
+ raise Exception(f"Output dataset {request.output_dataset_name} already exists. Please choose a different name.")
120
+ except Exception as e:
121
+ pass # dataset does not exist, which is expected
122
+
123
  # check the models exists
124
  try:
125
+ model_config = AutoConfig.from_pretrained(request.model_name_or_path,
126
+ revision=request.model_revision,
127
+ force_download=True,
128
+ token=False
129
+ )
130
  except Exception as e:
131
  print(e)
132
+ raise Exception(f"Model {request.model_name_or_path} revision {request.model_revision} does not exist or cannot be accessed. The model may be private or gated, which is not supported at this time.")
133
 
134
  # check the model max position embeddings is greater than the requested max tokens and less than MAX_TOKENS
135
  if model_config.max_position_embeddings < request.max_tokens:
 
282
  max_tok, temp, top_k_val, top_p_val, email_addr, num_output_samples):
283
 
284
  MASTER_ORG = "synthetic-data-universe/"
285
+ model_token = False # This is currently not supported
286
  input_dataset_token = None # This is currently not supported
287
  output_dataset_token = os.getenv("OUTPUT_DATASET_TOKEN")
288
 
 
298
  prompt_column=prompt_col,
299
  model_name_or_path=model_name,
300
  model_revision=model_rev,
301
+ model_token=model_token,
302
  system_prompt=sys_prompt if sys_prompt else None,
303
  max_tokens=int(max_tok),
304
  temperature=temp,
 
336
  "To unlock this and many other cool stuff, please consider upgrading your account.\n\n"
337
  "### [**Become a PRO Today!**](http://huggingface.co/subscribe/pro?source=synthetic-data-universe)"
338
  )
339
+ return gr.update(visible=False), gr.update(visible=True, value=message)
340
+
341
+ login_button = gr.LoginButton() # this is required or AUTH will not work
342
+
343
+ demo.load(control_access, inputs=None, outputs=[main_interface, pro_message])
344
+ demo.queue(max_size=None, default_concurrency_limit=None).launch(show_error=True)
345
 
346
  if __name__ == "__main__":
347
  main()