Spaces:
Runtime error
Runtime error
๐ consolidate outname
Browse filesSigned-off-by: peter szemraj <peterszemraj@gmail.com>
utils.py
CHANGED
|
@@ -107,16 +107,16 @@ def load_example_filenames(example_path: str or Path):
|
|
| 107 |
|
| 108 |
|
| 109 |
def extract_keywords(
|
| 110 |
-
text: str, num_keywords: int = 3, window_size: int = 5
|
| 111 |
) -> List[str]:
|
| 112 |
"""
|
| 113 |
Extracts keywords from a text using a simplified TextRank algorithm.
|
| 114 |
|
| 115 |
Args:
|
| 116 |
text: The text to extract keywords from.
|
| 117 |
-
num_keywords: The number of keywords to extract. Default
|
| 118 |
-
window_size: The number of words considered for co-occurrence. Default
|
| 119 |
-
|
| 120 |
Returns:
|
| 121 |
A list of strings, where each string is a keyword extracted from the input text.
|
| 122 |
"""
|
|
@@ -155,8 +155,8 @@ def extract_keywords(
|
|
| 155 |
final_keywords = []
|
| 156 |
for keyword in keywords:
|
| 157 |
if not any(fuzz.ratio(keyword, other) > 70 for other in final_keywords):
|
| 158 |
-
final_keywords.append(keyword)
|
| 159 |
-
logger.debug(f"Keywords (
|
| 160 |
return final_keywords
|
| 161 |
|
| 162 |
|
|
@@ -177,10 +177,10 @@ def saves_summary(
|
|
| 177 |
scores_text = "\n".join(sum_scores)
|
| 178 |
full_summary = "\n".join(sum_text)
|
| 179 |
|
| 180 |
-
keywords = "_".join(extract_keywords(full_summary))
|
| 181 |
logger.debug(f"kw:\t{keywords}")
|
| 182 |
outpath = (
|
| 183 |
-
Path.cwd() / f"
|
| 184 |
if outpath is None
|
| 185 |
else Path(outpath)
|
| 186 |
)
|
|
|
|
| 107 |
|
| 108 |
|
| 109 |
def extract_keywords(
|
| 110 |
+
text: str, num_keywords: int = 3, window_size: int = 5, kw_max_len: int = 20
|
| 111 |
) -> List[str]:
|
| 112 |
"""
|
| 113 |
Extracts keywords from a text using a simplified TextRank algorithm.
|
| 114 |
|
| 115 |
Args:
|
| 116 |
text: The text to extract keywords from.
|
| 117 |
+
num_keywords: The number of keywords to extract. Default: 3
|
| 118 |
+
window_size: The number of words considered for co-occurrence. Default: 5
|
| 119 |
+
kw_max_len: The maximum length of a keyword (truncate longer keywords to max). Default: 20
|
| 120 |
Returns:
|
| 121 |
A list of strings, where each string is a keyword extracted from the input text.
|
| 122 |
"""
|
|
|
|
| 155 |
final_keywords = []
|
| 156 |
for keyword in keywords:
|
| 157 |
if not any(fuzz.ratio(keyword, other) > 70 for other in final_keywords):
|
| 158 |
+
final_keywords.append(keyword[:kw_max_len])
|
| 159 |
+
logger.debug(f"Keywords (max len. {kw_max_len}):\t{final_keywords}")
|
| 160 |
return final_keywords
|
| 161 |
|
| 162 |
|
|
|
|
| 177 |
scores_text = "\n".join(sum_scores)
|
| 178 |
full_summary = "\n".join(sum_text)
|
| 179 |
|
| 180 |
+
keywords = "_".join(extract_keywords(full_summary, kw_max_len=4))
|
| 181 |
logger.debug(f"kw:\t{keywords}")
|
| 182 |
outpath = (
|
| 183 |
+
Path.cwd() / f"DocSummary_{keywords}_{get_timestamp()}.txt"
|
| 184 |
if outpath is None
|
| 185 |
else Path(outpath)
|
| 186 |
)
|