Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Peter
commited on
Commit
·
98a3ea7
1
Parent(s):
8281a66
add base model for faster rt
Browse files
app.py
CHANGED
|
@@ -43,6 +43,7 @@ def truncate_word_count(text, max_words=512):
|
|
| 43 |
|
| 44 |
def proc_submission(
|
| 45 |
input_text: str,
|
|
|
|
| 46 |
num_beams,
|
| 47 |
token_batch_length,
|
| 48 |
length_penalty,
|
|
@@ -74,6 +75,7 @@ def proc_submission(
|
|
| 74 |
|
| 75 |
history = {}
|
| 76 |
clean_text = clean(input_text, lower=False)
|
|
|
|
| 77 |
processed = truncate_word_count(clean_text, max_input_length)
|
| 78 |
if processed["was_truncated"]:
|
| 79 |
tr_in = processed["truncated_text"]
|
|
@@ -86,8 +88,8 @@ def proc_submission(
|
|
| 86 |
|
| 87 |
_summaries = summarize_via_tokenbatches(
|
| 88 |
tr_in,
|
| 89 |
-
model,
|
| 90 |
-
tokenizer,
|
| 91 |
batch_length=token_batch_length,
|
| 92 |
**settings,
|
| 93 |
)
|
|
@@ -128,6 +130,7 @@ def load_examples(examples_dir="examples"):
|
|
| 128 |
if __name__ == "__main__":
|
| 129 |
|
| 130 |
model, tokenizer = load_model_and_tokenizer("pszemraj/led-large-book-summary")
|
|
|
|
| 131 |
title = "Long-Form Summarization: LED & BookSum"
|
| 132 |
description = "A simple demo of how to use a fine-tuned LED model to summarize long-form text. [This model](https://huggingface.co/pszemraj/led-large-book-summary) is a fine-tuned version of [allenai/led-large-16384](https://huggingface.co/allenai/led-large-16384) on the [BookSum dataset](https://arxiv.org/abs/2105.08209). The goal was to create a model that can generalize well and is useful in summarizing lots of text in academic and daily usage."
|
| 133 |
|
|
@@ -137,8 +140,9 @@ if __name__ == "__main__":
|
|
| 137 |
gr.inputs.Textbox(
|
| 138 |
lines=10,
|
| 139 |
label="input text",
|
| 140 |
-
placeholder="Enter text to summarize, the text will be cleaned and truncated
|
| 141 |
),
|
|
|
|
| 142 |
gr.inputs.Slider(
|
| 143 |
minimum=1, maximum=6, label="num_beams", default=4, step=1
|
| 144 |
),
|
|
|
|
| 43 |
|
| 44 |
def proc_submission(
|
| 45 |
input_text: str,
|
| 46 |
+
model_size: str,
|
| 47 |
num_beams,
|
| 48 |
token_batch_length,
|
| 49 |
length_penalty,
|
|
|
|
| 75 |
|
| 76 |
history = {}
|
| 77 |
clean_text = clean(input_text, lower=False)
|
| 78 |
+
max_input_length = 1024 if model_size == "base" else max_input_length
|
| 79 |
processed = truncate_word_count(clean_text, max_input_length)
|
| 80 |
if processed["was_truncated"]:
|
| 81 |
tr_in = processed["truncated_text"]
|
|
|
|
| 88 |
|
| 89 |
_summaries = summarize_via_tokenbatches(
|
| 90 |
tr_in,
|
| 91 |
+
model_sm if model_size == "base" else model,
|
| 92 |
+
tokenizer_sm if model_size == "base" else tokenizer,
|
| 93 |
batch_length=token_batch_length,
|
| 94 |
**settings,
|
| 95 |
)
|
|
|
|
| 130 |
if __name__ == "__main__":
|
| 131 |
|
| 132 |
model, tokenizer = load_model_and_tokenizer("pszemraj/led-large-book-summary")
|
| 133 |
+
model_sm, tokenizer_sm = load_model_and_tokenizer("pszemraj/led-base-book-summary")
|
| 134 |
title = "Long-Form Summarization: LED & BookSum"
|
| 135 |
description = "A simple demo of how to use a fine-tuned LED model to summarize long-form text. [This model](https://huggingface.co/pszemraj/led-large-book-summary) is a fine-tuned version of [allenai/led-large-16384](https://huggingface.co/allenai/led-large-16384) on the [BookSum dataset](https://arxiv.org/abs/2105.08209). The goal was to create a model that can generalize well and is useful in summarizing lots of text in academic and daily usage."
|
| 136 |
|
|
|
|
| 140 |
gr.inputs.Textbox(
|
| 141 |
lines=10,
|
| 142 |
label="input text",
|
| 143 |
+
placeholder="Enter text to summarize, the text will be cleaned and truncated on Spaces. Narrative, academic (both papers and lecture transcription), and article text work well. May take a bit to generate depending on the input text :)",
|
| 144 |
),
|
| 145 |
+
gr.inputs.radio(choices=['base', 'large'], label='model size', default='base'),
|
| 146 |
gr.inputs.Slider(
|
| 147 |
minimum=1, maximum=6, label="num_beams", default=4, step=1
|
| 148 |
),
|