Spaces:

pszemraj
/

summarize-long-text

Running on CPU Upgrade

App Files Files Community

Peter commited on May 23, 2022

Commit

98a3ea7

1 Parent(s): 8281a66

add base model for faster rt

Browse files

Files changed (1) hide show

app.py +7 -3

app.py CHANGED Viewed

@@ -43,6 +43,7 @@ def truncate_word_count(text, max_words=512):
 def proc_submission(
     input_text: str,
     num_beams,
     token_batch_length,
     length_penalty,
@@ -74,6 +75,7 @@ def proc_submission(
     history = {}
     clean_text = clean(input_text, lower=False)
     processed = truncate_word_count(clean_text, max_input_length)
     if processed["was_truncated"]:
         tr_in = processed["truncated_text"]
@@ -86,8 +88,8 @@ def proc_submission(
     _summaries = summarize_via_tokenbatches(
         tr_in,
-        model,
-        tokenizer,
         batch_length=token_batch_length,
         **settings,
     )
@@ -128,6 +130,7 @@ def load_examples(examples_dir="examples"):
 if __name__ == "__main__":
     model, tokenizer = load_model_and_tokenizer("pszemraj/led-large-book-summary")
     title = "Long-Form Summarization: LED & BookSum"
     description = "A simple demo of how to use a fine-tuned LED model to summarize long-form text. [This model](https://huggingface.co/pszemraj/led-large-book-summary) is a fine-tuned version of [allenai/led-large-16384](https://huggingface.co/allenai/led-large-16384) on the [BookSum dataset](https://arxiv.org/abs/2105.08209). The goal was to create a model that can generalize well and is useful in summarizing lots of text in academic and daily usage."
@@ -137,8 +140,9 @@ if __name__ == "__main__":
             gr.inputs.Textbox(
                 lines=10,
                 label="input text",
-                placeholder="Enter text to summarize, the text will be cleaned and truncated to 512 words on Spaces. Narrative, academic (both papers and lecture transcription), and article text work well.",
             ),
             gr.inputs.Slider(
                 minimum=1, maximum=6, label="num_beams", default=4, step=1
             ),

 def proc_submission(
     input_text: str,
+    model_size: str,
     num_beams,
     token_batch_length,
     length_penalty,
     history = {}
     clean_text = clean(input_text, lower=False)
+    max_input_length = 1024 if model_size == "base" else max_input_length
     processed = truncate_word_count(clean_text, max_input_length)
     if processed["was_truncated"]:
         tr_in = processed["truncated_text"]
     _summaries = summarize_via_tokenbatches(
         tr_in,
+        model_sm if model_size == "base" else model,
+        tokenizer_sm if model_size == "base" else tokenizer,
         batch_length=token_batch_length,
         **settings,
     )
 if __name__ == "__main__":
     model, tokenizer = load_model_and_tokenizer("pszemraj/led-large-book-summary")
+    model_sm, tokenizer_sm = load_model_and_tokenizer("pszemraj/led-base-book-summary")
     title = "Long-Form Summarization: LED & BookSum"
     description = "A simple demo of how to use a fine-tuned LED model to summarize long-form text. [This model](https://huggingface.co/pszemraj/led-large-book-summary) is a fine-tuned version of [allenai/led-large-16384](https://huggingface.co/allenai/led-large-16384) on the [BookSum dataset](https://arxiv.org/abs/2105.08209). The goal was to create a model that can generalize well and is useful in summarizing lots of text in academic and daily usage."
             gr.inputs.Textbox(
                 lines=10,
                 label="input text",
+                placeholder="Enter text to summarize, the text will be cleaned and truncated on Spaces. Narrative, academic (both papers and lecture transcription), and article text work well. May take a bit to generate depending on the input text :)",
             ),
+            gr.inputs.radio(choices=['base', 'large'], label='model size', default='base'),
             gr.inputs.Slider(
                 minimum=1, maximum=6, label="num_beams", default=4, step=1
             ),