Spaces:
Runtime error
Runtime error
generate with contrastive search
Browse filesSigned-off-by: peter szemraj <peterszemraj@gmail.com>
- app.py +33 -26
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -17,12 +17,11 @@ use_gpu = torch.cuda.is_available()
|
|
| 17 |
def generate_text(
|
| 18 |
prompt: str,
|
| 19 |
gen_length=64,
|
| 20 |
-
|
|
|
|
| 21 |
no_repeat_ngram_size=2,
|
| 22 |
length_penalty=1.0,
|
| 23 |
-
num_beam_groups=1,
|
| 24 |
# perma params (not set by user)
|
| 25 |
-
repetition_penalty=3.5,
|
| 26 |
abs_max_length=512,
|
| 27 |
verbose=False,
|
| 28 |
):
|
|
@@ -53,15 +52,13 @@ def generate_text(
|
|
| 53 |
logging.info(f"Input too long {input_len} > {abs_max_length}, may cause errors")
|
| 54 |
result = generator(
|
| 55 |
prompt,
|
| 56 |
-
|
|
|
|
| 57 |
min_length=input_len + 4,
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
repetition_penalty=repetition_penalty,
|
| 61 |
no_repeat_ngram_size=no_repeat_ngram_size,
|
| 62 |
length_penalty=length_penalty,
|
| 63 |
-
do_sample=False,
|
| 64 |
-
early_stopping=True,
|
| 65 |
) # generate
|
| 66 |
response = result[0]["generated_text"]
|
| 67 |
rt = time.perf_counter() - st
|
|
@@ -118,18 +115,19 @@ def get_parser():
|
|
| 118 |
)
|
| 119 |
|
| 120 |
parser.add_argument(
|
| 121 |
-
"-
|
| 122 |
-
"--
|
| 123 |
-
type=
|
| 124 |
-
default=
|
| 125 |
-
help="
|
| 126 |
)
|
| 127 |
|
| 128 |
parser.add_argument(
|
| 129 |
-
"
|
|
|
|
| 130 |
type=int,
|
| 131 |
-
default=
|
| 132 |
-
help="
|
| 133 |
)
|
| 134 |
return parser
|
| 135 |
|
|
@@ -146,11 +144,18 @@ available_models = [
|
|
| 146 |
]
|
| 147 |
|
| 148 |
if __name__ == "__main__":
|
|
|
|
| 149 |
logging.info("\n\n\nStarting new instance of app.py")
|
| 150 |
args = get_parser().parse_args()
|
| 151 |
logging.info(f"received args:\t{args}")
|
| 152 |
model_tag = args.model
|
| 153 |
verbose = args.verbose
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
logging.info(f"Loading model: {model_tag}, use GPU = {use_gpu}")
|
| 155 |
generator = pipeline(
|
| 156 |
"text-generation",
|
|
@@ -228,16 +233,18 @@ if __name__ == "__main__":
|
|
| 228 |
value=2,
|
| 229 |
)
|
| 230 |
with gr.Row():
|
| 231 |
-
|
| 232 |
-
choices=[2, 4, 8],
|
| 233 |
label="Number of Beams",
|
| 234 |
-
value=
|
| 235 |
)
|
| 236 |
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
|
|
|
|
|
|
| 241 |
)
|
| 242 |
length_penalty = gr.Slider(
|
| 243 |
minimum=0.5,
|
|
@@ -269,10 +276,10 @@ if __name__ == "__main__":
|
|
| 269 |
inputs=[
|
| 270 |
prompt_text,
|
| 271 |
num_gen_tokens,
|
| 272 |
-
|
|
|
|
| 273 |
no_repeat_ngram_size,
|
| 274 |
length_penalty,
|
| 275 |
-
num_beam_groups,
|
| 276 |
],
|
| 277 |
outputs=[email_mailto_button, generated_email],
|
| 278 |
)
|
|
|
|
| 17 |
def generate_text(
|
| 18 |
prompt: str,
|
| 19 |
gen_length=64,
|
| 20 |
+
penalty_alpha=0.6,
|
| 21 |
+
top_k=6,
|
| 22 |
no_repeat_ngram_size=2,
|
| 23 |
length_penalty=1.0,
|
|
|
|
| 24 |
# perma params (not set by user)
|
|
|
|
| 25 |
abs_max_length=512,
|
| 26 |
verbose=False,
|
| 27 |
):
|
|
|
|
| 52 |
logging.info(f"Input too long {input_len} > {abs_max_length}, may cause errors")
|
| 53 |
result = generator(
|
| 54 |
prompt,
|
| 55 |
+
max_new_tokens=gen_length,
|
| 56 |
+
max_length=None, # in case of default max_length
|
| 57 |
min_length=input_len + 4,
|
| 58 |
+
penalty_alpha=penalty_alpha,
|
| 59 |
+
top_k=top_k,
|
|
|
|
| 60 |
no_repeat_ngram_size=no_repeat_ngram_size,
|
| 61 |
length_penalty=length_penalty,
|
|
|
|
|
|
|
| 62 |
) # generate
|
| 63 |
response = result[0]["generated_text"]
|
| 64 |
rt = time.perf_counter() - st
|
|
|
|
| 115 |
)
|
| 116 |
|
| 117 |
parser.add_argument(
|
| 118 |
+
"-a",
|
| 119 |
+
"--penalty_alpha",
|
| 120 |
+
type=float,
|
| 121 |
+
default=0.6,
|
| 122 |
+
help="The penalty alpha for the text generation pipeline (contrastive search) - default 0.6",
|
| 123 |
)
|
| 124 |
|
| 125 |
parser.add_argument(
|
| 126 |
+
"-k",
|
| 127 |
+
"--top_k",
|
| 128 |
type=int,
|
| 129 |
+
default=6,
|
| 130 |
+
help="The top k for the text generation pipeline (contrastive search) - default 6",
|
| 131 |
)
|
| 132 |
return parser
|
| 133 |
|
|
|
|
| 144 |
]
|
| 145 |
|
| 146 |
if __name__ == "__main__":
|
| 147 |
+
|
| 148 |
logging.info("\n\n\nStarting new instance of app.py")
|
| 149 |
args = get_parser().parse_args()
|
| 150 |
logging.info(f"received args:\t{args}")
|
| 151 |
model_tag = args.model
|
| 152 |
verbose = args.verbose
|
| 153 |
+
top_k = args.top_k
|
| 154 |
+
alpha = args.penalty_alpha
|
| 155 |
+
|
| 156 |
+
assert top_k > 0, "top_k must be greater than 0"
|
| 157 |
+
assert alpha >= 0.0 and alpha <= 1.0, "penalty_alpha must be between 0 and 1"
|
| 158 |
+
|
| 159 |
logging.info(f"Loading model: {model_tag}, use GPU = {use_gpu}")
|
| 160 |
generator = pipeline(
|
| 161 |
"text-generation",
|
|
|
|
| 233 |
value=2,
|
| 234 |
)
|
| 235 |
with gr.Row():
|
| 236 |
+
contrastive_top_k = gr.Radio(
|
| 237 |
+
choices=[2, 4, 6, 8],
|
| 238 |
label="Number of Beams",
|
| 239 |
+
value=top_k,
|
| 240 |
)
|
| 241 |
|
| 242 |
+
penalty_alpha = gr.Slider(
|
| 243 |
+
label="Penalty Alpha",
|
| 244 |
+
value=alpha,
|
| 245 |
+
maximum=1.0,
|
| 246 |
+
minimum=0.0,
|
| 247 |
+
step=0.1,
|
| 248 |
)
|
| 249 |
length_penalty = gr.Slider(
|
| 250 |
minimum=0.5,
|
|
|
|
| 276 |
inputs=[
|
| 277 |
prompt_text,
|
| 278 |
num_gen_tokens,
|
| 279 |
+
penalty_alpha,
|
| 280 |
+
contrastive_top_k,
|
| 281 |
no_repeat_ngram_size,
|
| 282 |
length_penalty,
|
|
|
|
| 283 |
],
|
| 284 |
outputs=[email_mailto_button, generated_email],
|
| 285 |
)
|
requirements.txt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
gradio
|
| 2 |
torch
|
| 3 |
-
transformers
|
|
|
|
| 1 |
gradio
|
| 2 |
torch
|
| 3 |
+
transformers>=4.24.0
|