Spaces:
Sleeping
Sleeping
edbeeching
commited on
Commit
·
5fad7f1
1
Parent(s):
ab5bf76
fix bugs with model token
Browse files
app.py
CHANGED
|
@@ -100,6 +100,7 @@ def validate_request(request: GenerationRequest) -> GenerationRequest:
|
|
| 100 |
request.input_dataset_split = f"{request.input_dataset_split}[:{request.num_output_examples}]"
|
| 101 |
|
| 102 |
|
|
|
|
| 103 |
if request.num_output_examples > MAX_SAMPLES:
|
| 104 |
raise Exception(f"Requested number of output examples {request.num_output_examples} exceeds the max limit of {MAX_SAMPLES}.")
|
| 105 |
|
|
@@ -107,12 +108,28 @@ def validate_request(request: GenerationRequest) -> GenerationRequest:
|
|
| 107 |
if request.prompt_column not in input_dataset_info.features:
|
| 108 |
raise Exception(f"Prompt column {request.prompt_column} does not exist in dataset {request.input_dataset_name}. Available columns: {list(input_dataset_info.features.keys())}")
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
# check the models exists
|
| 111 |
try:
|
| 112 |
-
model_config = AutoConfig.from_pretrained(request.model_name_or_path,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
except Exception as e:
|
| 114 |
print(e)
|
| 115 |
-
raise Exception(f"Model {request.model_name_or_path} revision {request.model_revision} does not exist or cannot be accessed
|
| 116 |
|
| 117 |
# check the model max position embeddings is greater than the requested max tokens and less than MAX_TOKENS
|
| 118 |
if model_config.max_position_embeddings < request.max_tokens:
|
|
@@ -265,7 +282,7 @@ def main():
|
|
| 265 |
max_tok, temp, top_k_val, top_p_val, email_addr, num_output_samples):
|
| 266 |
|
| 267 |
MASTER_ORG = "synthetic-data-universe/"
|
| 268 |
-
model_token =
|
| 269 |
input_dataset_token = None # This is currently not supported
|
| 270 |
output_dataset_token = os.getenv("OUTPUT_DATASET_TOKEN")
|
| 271 |
|
|
@@ -281,7 +298,7 @@ def main():
|
|
| 281 |
prompt_column=prompt_col,
|
| 282 |
model_name_or_path=model_name,
|
| 283 |
model_revision=model_rev,
|
| 284 |
-
model_token=model_token
|
| 285 |
system_prompt=sys_prompt if sys_prompt else None,
|
| 286 |
max_tokens=int(max_tok),
|
| 287 |
temperature=temp,
|
|
@@ -319,10 +336,12 @@ def main():
|
|
| 319 |
"To unlock this and many other cool stuff, please consider upgrading your account.\n\n"
|
| 320 |
"### [**Become a PRO Today!**](http://huggingface.co/subscribe/pro?source=synthetic-data-universe)"
|
| 321 |
)
|
| 322 |
-
return gr.update(visible=False), gr.update(visible=True, value=message)
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
|
|
|
|
|
|
| 326 |
|
| 327 |
if __name__ == "__main__":
|
| 328 |
main()
|
|
|
|
| 100 |
request.input_dataset_split = f"{request.input_dataset_split}[:{request.num_output_examples}]"
|
| 101 |
|
| 102 |
|
| 103 |
+
|
| 104 |
if request.num_output_examples > MAX_SAMPLES:
|
| 105 |
raise Exception(f"Requested number of output examples {request.num_output_examples} exceeds the max limit of {MAX_SAMPLES}.")
|
| 106 |
|
|
|
|
| 108 |
if request.prompt_column not in input_dataset_info.features:
|
| 109 |
raise Exception(f"Prompt column {request.prompt_column} does not exist in dataset {request.input_dataset_name}. Available columns: {list(input_dataset_info.features.keys())}")
|
| 110 |
|
| 111 |
+
# This is currently not supported, the output dataset will be created under the org 'synthetic-data-universe'
|
| 112 |
+
# check output_dataset name is valid
|
| 113 |
+
if request.output_dataset_name.count("/") != 1:
|
| 114 |
+
raise Exception("Output dataset name must be in the format 'dataset_name', e.g., 'my-dataset'. The dataset will be created under the org 'synthetic-data-universe/my-dataset'.")
|
| 115 |
+
|
| 116 |
+
# check the output dataset is valid and accessible with the provided token
|
| 117 |
+
try:
|
| 118 |
+
output_dataset_info = get_dataset_infos(request.output_dataset_name, token=request.output_dataset_token)
|
| 119 |
+
raise Exception(f"Output dataset {request.output_dataset_name} already exists. Please choose a different name.")
|
| 120 |
+
except Exception as e:
|
| 121 |
+
pass # dataset does not exist, which is expected
|
| 122 |
+
|
| 123 |
# check the models exists
|
| 124 |
try:
|
| 125 |
+
model_config = AutoConfig.from_pretrained(request.model_name_or_path,
|
| 126 |
+
revision=request.model_revision,
|
| 127 |
+
force_download=True,
|
| 128 |
+
token=False
|
| 129 |
+
)
|
| 130 |
except Exception as e:
|
| 131 |
print(e)
|
| 132 |
+
raise Exception(f"Model {request.model_name_or_path} revision {request.model_revision} does not exist or cannot be accessed. The model may be private or gated, which is not supported at this time.")
|
| 133 |
|
| 134 |
# check the model max position embeddings is greater than the requested max tokens and less than MAX_TOKENS
|
| 135 |
if model_config.max_position_embeddings < request.max_tokens:
|
|
|
|
| 282 |
max_tok, temp, top_k_val, top_p_val, email_addr, num_output_samples):
|
| 283 |
|
| 284 |
MASTER_ORG = "synthetic-data-universe/"
|
| 285 |
+
model_token = False # This is currently not supported
|
| 286 |
input_dataset_token = None # This is currently not supported
|
| 287 |
output_dataset_token = os.getenv("OUTPUT_DATASET_TOKEN")
|
| 288 |
|
|
|
|
| 298 |
prompt_column=prompt_col,
|
| 299 |
model_name_or_path=model_name,
|
| 300 |
model_revision=model_rev,
|
| 301 |
+
model_token=model_token,
|
| 302 |
system_prompt=sys_prompt if sys_prompt else None,
|
| 303 |
max_tokens=int(max_tok),
|
| 304 |
temperature=temp,
|
|
|
|
| 336 |
"To unlock this and many other cool stuff, please consider upgrading your account.\n\n"
|
| 337 |
"### [**Become a PRO Today!**](http://huggingface.co/subscribe/pro?source=synthetic-data-universe)"
|
| 338 |
)
|
| 339 |
+
return gr.update(visible=False), gr.update(visible=True, value=message)
|
| 340 |
+
|
| 341 |
+
login_button = gr.LoginButton() # this is required or AUTH will not work
|
| 342 |
+
|
| 343 |
+
demo.load(control_access, inputs=None, outputs=[main_interface, pro_message])
|
| 344 |
+
demo.queue(max_size=None, default_concurrency_limit=None).launch(show_error=True)
|
| 345 |
|
| 346 |
if __name__ == "__main__":
|
| 347 |
main()
|