Spaces:
Runtime error
Runtime error
peter szemraj
commited on
Commit
·
81d65e8
1
Parent(s):
d9f8cf2
:tada: init
Browse files- .gitignore +21 -0
- app.py +182 -0
.gitignore
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# ignore gradio db files# sys files
|
| 3 |
+
*__pycache__*
|
| 4 |
+
*__pycache__/
|
| 5 |
+
|
| 6 |
+
# data
|
| 7 |
+
|
| 8 |
+
*.txt
|
| 9 |
+
*.pkl
|
| 10 |
+
*flagged/
|
| 11 |
+
|
| 12 |
+
# ignore log files
|
| 13 |
+
*.log
|
| 14 |
+
*logs/
|
| 15 |
+
|
| 16 |
+
# scratch
|
| 17 |
+
*scratch/
|
| 18 |
+
*scratch*
|
| 19 |
+
|
| 20 |
+
# notebooks
|
| 21 |
+
*notebooks/
|
app.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import logging
|
| 3 |
+
import time
|
| 4 |
+
import gradio as gr
|
| 5 |
+
import torch
|
| 6 |
+
from transformers import pipeline
|
| 7 |
+
|
| 8 |
+
logging.basicConfig(
|
| 9 |
+
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
use_gpu = torch.cuda.is_available()
|
| 13 |
+
|
| 14 |
+
def generate_text(
|
| 15 |
+
prompt: str,
|
| 16 |
+
gen_length=64,
|
| 17 |
+
num_beams=4,
|
| 18 |
+
no_repeat_ngram_size=2,
|
| 19 |
+
length_penalty=1.0,
|
| 20 |
+
# perma params (not set by user)
|
| 21 |
+
repetition_penalty=3.5,
|
| 22 |
+
abs_max_length=512,
|
| 23 |
+
verbose=False,
|
| 24 |
+
):
|
| 25 |
+
"""
|
| 26 |
+
generate_text - generate text from a prompt using a text generation pipeline
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
prompt (str): the prompt to generate text from
|
| 30 |
+
model_input (_type_): the text generation pipeline
|
| 31 |
+
max_length (int, optional): the maximum length of the generated text. Defaults to 128.
|
| 32 |
+
method (str, optional): the generation method. Defaults to "Sampling".
|
| 33 |
+
verbose (bool, optional): the verbosity of the output. Defaults to False.
|
| 34 |
+
|
| 35 |
+
Returns:
|
| 36 |
+
str: the generated text
|
| 37 |
+
"""
|
| 38 |
+
global generator
|
| 39 |
+
logging.info(f"Generating text from prompt: {prompt}")
|
| 40 |
+
st = time.perf_counter()
|
| 41 |
+
|
| 42 |
+
input_tokens = generator.tokenizer(prompt)
|
| 43 |
+
input_len = len(input_tokens['input_ids'])
|
| 44 |
+
if input_len > abs_max_length:
|
| 45 |
+
logging.info(f"Input too long {input_len} > {abs_max_length}, may cause errors")
|
| 46 |
+
result = generator(
|
| 47 |
+
prompt,
|
| 48 |
+
max_length=gen_length + input_len,
|
| 49 |
+
min_length=input_len + 4,
|
| 50 |
+
num_beams=num_beams,
|
| 51 |
+
repetition_penalty=repetition_penalty,
|
| 52 |
+
no_repeat_ngram_size=no_repeat_ngram_size,
|
| 53 |
+
length_penalty=length_penalty,
|
| 54 |
+
do_sample=False,
|
| 55 |
+
early_stopping=True,
|
| 56 |
+
# tokenizer
|
| 57 |
+
truncation=True,
|
| 58 |
+
|
| 59 |
+
) # generate
|
| 60 |
+
response = result[0]['generated_text']
|
| 61 |
+
rt = time.perf_counter() - st
|
| 62 |
+
if verbose:
|
| 63 |
+
logging.info(f"Generated text: {response}")
|
| 64 |
+
logging.info(f"Generation time: {rt:.2f}s")
|
| 65 |
+
return response
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def get_parser():
|
| 69 |
+
"""
|
| 70 |
+
get_parser - a helper function for the argparse module
|
| 71 |
+
"""
|
| 72 |
+
parser = argparse.ArgumentParser(
|
| 73 |
+
description="Text Generation demo for postbot",
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
parser.add_argument(
|
| 77 |
+
'-m',
|
| 78 |
+
'--model',
|
| 79 |
+
required=False,
|
| 80 |
+
type=str,
|
| 81 |
+
default="postbot/distilgpt2-emailgen",
|
| 82 |
+
help='Pass an different huggingface model tag to use a custom model',
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
parser.add_argument(
|
| 86 |
+
"-v",
|
| 87 |
+
"--verbose",
|
| 88 |
+
required=False,
|
| 89 |
+
action="store_true",
|
| 90 |
+
help="Verbose output",
|
| 91 |
+
)
|
| 92 |
+
return parser
|
| 93 |
+
|
| 94 |
+
default_prompt = """
|
| 95 |
+
Hello,
|
| 96 |
+
|
| 97 |
+
Following up on the bubblegum shipment."""
|
| 98 |
+
|
| 99 |
+
if __name__ == "__main__":
|
| 100 |
+
logging.info("\n\n\nStarting new instance of app.py")
|
| 101 |
+
args = get_parser().parse_args()
|
| 102 |
+
logging.info(f"received args:\t{args}")
|
| 103 |
+
model_tag = args.model
|
| 104 |
+
verbose = args.verbose
|
| 105 |
+
logging.info(f"Loading model: {model_tag}, use GPU = {use_gpu}")
|
| 106 |
+
generator = pipeline(
|
| 107 |
+
"text-generation",
|
| 108 |
+
model_tag,
|
| 109 |
+
device=0 if use_gpu else -1,
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
demo = gr.Blocks()
|
| 114 |
+
|
| 115 |
+
logging.info("launching interface...")
|
| 116 |
+
|
| 117 |
+
with demo:
|
| 118 |
+
gr.Markdown("# Autocompleting Emails with Textgen - Demo")
|
| 119 |
+
gr.Markdown(
|
| 120 |
+
"Enter part of an email, and the model will autocomplete it for you!"
|
| 121 |
+
)
|
| 122 |
+
gr.Markdown('The model used is [postbot/distilgpt2-emailgen](https://huggingface.co/postbot/distilgpt2-emailgen)')
|
| 123 |
+
gr.Markdown("---")
|
| 124 |
+
|
| 125 |
+
with gr.Column():
|
| 126 |
+
|
| 127 |
+
gr.Markdown("## Generate Text")
|
| 128 |
+
gr.Markdown(
|
| 129 |
+
"Enter/edit the prompt and adjust the parameters as needed. Then press the Generate button!"
|
| 130 |
+
)
|
| 131 |
+
prompt_text = gr.Textbox(
|
| 132 |
+
lines=4,
|
| 133 |
+
label="Email Prompt",
|
| 134 |
+
value=default_prompt,
|
| 135 |
+
)
|
| 136 |
+
num_gen_tokens = gr.Slider(
|
| 137 |
+
label="Generation Tokens",
|
| 138 |
+
default=64,
|
| 139 |
+
maximum=128,
|
| 140 |
+
minimum=32,
|
| 141 |
+
step=16,
|
| 142 |
+
)
|
| 143 |
+
num_beams = gr.Radio(
|
| 144 |
+
choices=[4, 8, 16],
|
| 145 |
+
label="num beams",
|
| 146 |
+
value=4,
|
| 147 |
+
)
|
| 148 |
+
no_repeat_ngram_size = gr.Radio(
|
| 149 |
+
choices=[1, 2, 3, 4],
|
| 150 |
+
label="no repeat ngram size",
|
| 151 |
+
value=2,
|
| 152 |
+
)
|
| 153 |
+
length_penalty = gr.Slider(
|
| 154 |
+
minimum=0.5, maximum=1.0, label="length penalty", default=0.8, step=0.05
|
| 155 |
+
)
|
| 156 |
+
generated_email = gr.Textbox(
|
| 157 |
+
label="Generated Result", placeholder="The completed email will appear here"
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
generate_button = gr.Button(
|
| 161 |
+
"Generate!",
|
| 162 |
+
)
|
| 163 |
+
gr.Markdown("---")
|
| 164 |
+
|
| 165 |
+
with gr.Column():
|
| 166 |
+
|
| 167 |
+
gr.Markdown("## About")
|
| 168 |
+
gr.Markdown(
|
| 169 |
+
"This model is a fine-tuned version of distilgpt2 on a dataset of 50k emails sourced from the internet, including the classic `aeslc` dataset."
|
| 170 |
+
)
|
| 171 |
+
gr.Markdown("The intended use of this model is to provide suggestions to _auto-complete_ the rest of your email. Said another way, it should serve as a **tool to write predictable emails faster**. It is not intended to write entire emails; at least **some input** is required to guide the direction of the model.\n\nPlease verify any suggestions by the model for A) False claims and B) negation statements before accepting/sending something.")
|
| 172 |
+
gr.Markdown("---")
|
| 173 |
+
|
| 174 |
+
generate_button.click(
|
| 175 |
+
fn=generate_text,
|
| 176 |
+
inputs=[prompt_text, num_gen_tokens, num_beams, no_repeat_ngram_size, length_penalty],
|
| 177 |
+
outputs=[generated_email],
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
demo.launch(
|
| 181 |
+
enable_queue=True,
|
| 182 |
+
)
|