Spaces:
Runtime error
Runtime error
๐ html UI updates
Browse filesSigned-off-by: peter szemraj <peterszemraj@gmail.com>
app.py
CHANGED
|
@@ -36,7 +36,12 @@ from doctr.models import ocr_predictor
|
|
| 36 |
|
| 37 |
from pdf2text import convert_PDF_to_Text
|
| 38 |
from summarize import load_model_and_tokenizer, summarize_via_tokenbatches
|
| 39 |
-
from utils import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
_here = Path(__file__).parent
|
| 42 |
|
|
@@ -179,15 +184,13 @@ def proc_submission(
|
|
| 179 |
token_batch_length=token_batch_length,
|
| 180 |
**settings,
|
| 181 |
)
|
| 182 |
-
sum_text = [
|
| 183 |
-
f"Batch {i}:\n\t" + s["summary"][0] for i, s in enumerate(_summaries, start=1)
|
| 184 |
-
]
|
| 185 |
sum_scores = [
|
| 186 |
f" - Batch Summary {i}: {round(s['summary_score'],4)}"
|
| 187 |
for i, s in enumerate(_summaries)
|
| 188 |
]
|
| 189 |
|
| 190 |
-
|
| 191 |
history["Summary Scores"] = "<br><br>"
|
| 192 |
scores_out = "\n".join(sum_scores)
|
| 193 |
rt = round((time.perf_counter() - st) / 60, 2)
|
|
@@ -203,7 +206,7 @@ def proc_submission(
|
|
| 203 |
settings["model_name"] = model_name
|
| 204 |
saved_file = saves_summary(summarize_output=_summaries, outpath=None, **settings)
|
| 205 |
|
| 206 |
-
return html,
|
| 207 |
|
| 208 |
|
| 209 |
def load_single_example_text(
|
|
@@ -356,22 +359,26 @@ if __name__ == "__main__":
|
|
| 356 |
|
| 357 |
output_text = gr.HTML("<p><em>Output will appear below:</em></p>")
|
| 358 |
gr.Markdown("### Summary Output")
|
| 359 |
-
summary_text = gr.
|
| 360 |
-
label="Summary",
|
| 361 |
-
)
|
| 362 |
-
gr.Markdown(
|
| 363 |
-
"The summary scores can be thought of as representing the quality of the summary. less-negative numbers (closer to 0) are better:"
|
| 364 |
-
)
|
| 365 |
-
summary_scores = gr.Textbox(
|
| 366 |
-
label="Summary Scores", placeholder="Summary scores will appear here"
|
| 367 |
-
)
|
| 368 |
-
|
| 369 |
-
text_file = gr.File(
|
| 370 |
-
label="Download as Text File",
|
| 371 |
-
file_count="single",
|
| 372 |
-
type="file",
|
| 373 |
-
interactive=False,
|
| 374 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 375 |
|
| 376 |
gr.Markdown("---")
|
| 377 |
with gr.Column():
|
|
|
|
| 36 |
|
| 37 |
from pdf2text import convert_PDF_to_Text
|
| 38 |
from summarize import load_model_and_tokenizer, summarize_via_tokenbatches
|
| 39 |
+
from utils import (
|
| 40 |
+
load_example_filenames,
|
| 41 |
+
saves_summary,
|
| 42 |
+
textlist2html,
|
| 43 |
+
truncate_word_count,
|
| 44 |
+
)
|
| 45 |
|
| 46 |
_here = Path(__file__).parent
|
| 47 |
|
|
|
|
| 184 |
token_batch_length=token_batch_length,
|
| 185 |
**settings,
|
| 186 |
)
|
| 187 |
+
sum_text = [s["summary"][0].strip() + "\n" for i, s in _summaries]
|
|
|
|
|
|
|
| 188 |
sum_scores = [
|
| 189 |
f" - Batch Summary {i}: {round(s['summary_score'],4)}"
|
| 190 |
for i, s in enumerate(_summaries)
|
| 191 |
]
|
| 192 |
|
| 193 |
+
full_summary = textlist2html(sum_text)
|
| 194 |
history["Summary Scores"] = "<br><br>"
|
| 195 |
scores_out = "\n".join(sum_scores)
|
| 196 |
rt = round((time.perf_counter() - st) / 60, 2)
|
|
|
|
| 206 |
settings["model_name"] = model_name
|
| 207 |
saved_file = saves_summary(summarize_output=_summaries, outpath=None, **settings)
|
| 208 |
|
| 209 |
+
return html, full_summary, scores_out, saved_file
|
| 210 |
|
| 211 |
|
| 212 |
def load_single_example_text(
|
|
|
|
| 359 |
|
| 360 |
output_text = gr.HTML("<p><em>Output will appear below:</em></p>")
|
| 361 |
gr.Markdown("### Summary Output")
|
| 362 |
+
summary_text = gr.HTML(
|
| 363 |
+
label="Summary", value="<i>Summary will appear here!</i>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 364 |
)
|
| 365 |
+
with gr.Column():
|
| 366 |
+
gr.Markdown("Export & Summary Scores")
|
| 367 |
+
with gr.Row(variant="panel"):
|
| 368 |
+
text_file = gr.File(
|
| 369 |
+
label="Download as Text File",
|
| 370 |
+
file_count="single",
|
| 371 |
+
type="file",
|
| 372 |
+
interactive=False,
|
| 373 |
+
)
|
| 374 |
+
with gr.Row(variant="panel"):
|
| 375 |
+
gr.Markdown(
|
| 376 |
+
"The summary scores can be thought of as representing the quality of the summary. less-negative numbers (closer to 0) are better:"
|
| 377 |
+
)
|
| 378 |
+
summary_scores = gr.Textbox(
|
| 379 |
+
label="Summary Scores",
|
| 380 |
+
placeholder="Summary scores will appear here",
|
| 381 |
+
)
|
| 382 |
|
| 383 |
gr.Markdown("---")
|
| 384 |
with gr.Column():
|
utils.py
CHANGED
|
@@ -106,6 +106,28 @@ def load_example_filenames(example_path: str or Path):
|
|
| 106 |
return examples
|
| 107 |
|
| 108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
def extract_keywords(
|
| 110 |
text: str, num_keywords: int = 3, window_size: int = 5, kw_max_len: int = 20
|
| 111 |
) -> List[str]:
|
|
|
|
| 106 |
return examples
|
| 107 |
|
| 108 |
|
| 109 |
+
def textlist2html(text_batches):
|
| 110 |
+
html_list = [
|
| 111 |
+
f"""
|
| 112 |
+
<div style="
|
| 113 |
+
margin-bottom: 20px;
|
| 114 |
+
font-size: 18px;
|
| 115 |
+
line-height: 1.5em;
|
| 116 |
+
color: #333;
|
| 117 |
+
background-color: #f9f9f9;
|
| 118 |
+
border: 1px solid #ddd;
|
| 119 |
+
border-radius: 5px;
|
| 120 |
+
padding: 20px;
|
| 121 |
+
">
|
| 122 |
+
<h2 style="font-size: 22px; color: #555;">Batch {i}:</h2>
|
| 123 |
+
<p style="white-space: pre-line;">{s}</p>
|
| 124 |
+
</div>
|
| 125 |
+
"""
|
| 126 |
+
for i, s in enumerate(text_batches, start=1)
|
| 127 |
+
]
|
| 128 |
+
return "\n".join(html_list)
|
| 129 |
+
|
| 130 |
+
|
| 131 |
def extract_keywords(
|
| 132 |
text: str, num_keywords: int = 3, window_size: int = 5, kw_max_len: int = 20
|
| 133 |
) -> List[str]:
|