Update Wikidata_Text_Parser.py
Browse files- Wikidata_Text_Parser.py +4 -1
Wikidata_Text_Parser.py
CHANGED
|
@@ -870,8 +870,11 @@ def html2text(html_set):
|
|
| 870 |
splitter = SentenceSplitter(language='en')
|
| 871 |
|
| 872 |
seg = pysbd.Segmenter(language="en", clean=False)
|
|
|
|
|
|
|
|
|
|
| 873 |
|
| 874 |
-
nlp = spacy.load("
|
| 875 |
|
| 876 |
text = reference_html_df.loc[0,'extracted_text']
|
| 877 |
|
|
|
|
| 870 |
splitter = SentenceSplitter(language='en')
|
| 871 |
|
| 872 |
seg = pysbd.Segmenter(language="en", clean=False)
|
| 873 |
+
|
| 874 |
+
if not spacy.util.is_package("en_core_web_lg"):
|
| 875 |
+
os.system("python -m spacy download en_core_web_lg")
|
| 876 |
|
| 877 |
+
nlp = spacy.load("en_core_web_lg")
|
| 878 |
|
| 879 |
text = reference_html_df.loc[0,'extracted_text']
|
| 880 |
|