Spaces:
Runtime error
Runtime error
๐ docs
Browse filesSigned-off-by: peter szemraj <peterszemraj@gmail.com>
app.py
CHANGED
|
@@ -212,6 +212,7 @@ def proc_submission(
|
|
| 212 |
length_penalty (float): the length penalty to use
|
| 213 |
repetition_penalty (float): the repetition penalty to use
|
| 214 |
no_repeat_ngram_size (int): the no repeat ngram size to use
|
|
|
|
| 215 |
max_input_length (int, optional): the maximum input length to use. Defaults to 6144.
|
| 216 |
|
| 217 |
Note:
|
|
@@ -219,7 +220,7 @@ def proc_submission(
|
|
| 219 |
environment variable APP_MAX_WORDS to a different value.
|
| 220 |
|
| 221 |
Returns:
|
| 222 |
-
|
| 223 |
"""
|
| 224 |
|
| 225 |
remove_stagnant_files() # clean up old files
|
|
@@ -257,7 +258,7 @@ def proc_submission(
|
|
| 257 |
msg = f"""
|
| 258 |
<div style="background-color: #FFA500; color: white; padding: 20px;">
|
| 259 |
<h3>Warning</h3>
|
| 260 |
-
<p>Input text was truncated to {max_input_length} words. That's about {100*max_input_length/
|
| 261 |
<p>Dropping stopwords is set to {predrop_stopwords}. If this is not what you intended, please validate the advanced settings.</p>
|
| 262 |
</div>
|
| 263 |
"""
|
|
@@ -267,6 +268,22 @@ def proc_submission(
|
|
| 267 |
model_input_text = truncation_validated["processed_text"]
|
| 268 |
msg = None
|
| 269 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
if len(input_text) < 50:
|
| 271 |
# this is essentially a different case from the above
|
| 272 |
msg = f"""
|
|
@@ -589,8 +606,8 @@ if __name__ == "__main__":
|
|
| 589 |
)
|
| 590 |
gr.Markdown(
|
| 591 |
f"""Aggregate the above batches into a cohesive summary.
|
| 592 |
-
-
|
| 593 |
-
-
|
| 594 |
"""
|
| 595 |
)
|
| 596 |
with gr.Column(variant="panel"):
|
|
|
|
| 212 |
length_penalty (float): the length penalty to use
|
| 213 |
repetition_penalty (float): the repetition penalty to use
|
| 214 |
no_repeat_ngram_size (int): the no repeat ngram size to use
|
| 215 |
+
predrop_stopwords (bool): whether to pre-drop stopwords before truncating/summarizing
|
| 216 |
max_input_length (int, optional): the maximum input length to use. Defaults to 6144.
|
| 217 |
|
| 218 |
Note:
|
|
|
|
| 220 |
environment variable APP_MAX_WORDS to a different value.
|
| 221 |
|
| 222 |
Returns:
|
| 223 |
+
tuple (4): a tuple containing the following:
|
| 224 |
"""
|
| 225 |
|
| 226 |
remove_stagnant_files() # clean up old files
|
|
|
|
| 258 |
msg = f"""
|
| 259 |
<div style="background-color: #FFA500; color: white; padding: 20px;">
|
| 260 |
<h3>Warning</h3>
|
| 261 |
+
<p>Input text was truncated to {max_input_length} words. That's about {100*max_input_length/input_wc:.2f}% of the original text.</p>
|
| 262 |
<p>Dropping stopwords is set to {predrop_stopwords}. If this is not what you intended, please validate the advanced settings.</p>
|
| 263 |
</div>
|
| 264 |
"""
|
|
|
|
| 268 |
model_input_text = truncation_validated["processed_text"]
|
| 269 |
msg = None
|
| 270 |
|
| 271 |
+
if predrop_stopwords:
|
| 272 |
+
# TODO: remove this
|
| 273 |
+
|
| 274 |
+
outdir = Path.cwd() / "scratch" / "predrop_stopwords-v4"
|
| 275 |
+
outdir.mkdir(parents=True, exist_ok=True)
|
| 276 |
+
keywords_cln = " ".join(extract_keywords(cln_text, kw_max_len=4))
|
| 277 |
+
keywords_sw_removed = "_".join(extract_keywords(model_input_text, kw_max_len=4))
|
| 278 |
+
cln_filename = f"{keywords_cln}_{len(cln_text)}.txt"
|
| 279 |
+
cln_outdir = outdir.parent / "source-text"
|
| 280 |
+
cln_outdir.mkdir(parents=True, exist_ok=True)
|
| 281 |
+
with open(cln_outdir / cln_filename, "w", encoding="utf-8") as f:
|
| 282 |
+
f.write(cln_text)
|
| 283 |
+
sw_rm_filename = f"{keywords_sw_removed}_{len(model_input_text)}.txt"
|
| 284 |
+
with open(outdir / sw_rm_filename, "w", encoding="utf-8") as f:
|
| 285 |
+
f.write(model_input_text)
|
| 286 |
+
logging.info(f"saved predrop_stopwords file to {outdir / sw_rm_filename}")
|
| 287 |
if len(input_text) < 50:
|
| 288 |
# this is essentially a different case from the above
|
| 289 |
msg = f"""
|
|
|
|
| 606 |
)
|
| 607 |
gr.Markdown(
|
| 608 |
f"""Aggregate the above batches into a cohesive summary.
|
| 609 |
+
- A secondary instruct-tuned LM consolidates info
|
| 610 |
+
- Current model: [{AGGREGATE_MODEL}](https://hf.co/{AGGREGATE_MODEL})
|
| 611 |
"""
|
| 612 |
)
|
| 613 |
with gr.Column(variant="panel"):
|