Spaces:
Runtime error
Runtime error
Ignoring book if missing.
Browse files
utils.py
CHANGED
|
@@ -183,12 +183,13 @@ def get_links(index_url, paths):
|
|
| 183 |
|
| 184 |
def get_document_data(book_file, book_url):
|
| 185 |
document_list = []
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
|
|
|
| 192 |
|
| 193 |
# print("document list" + str(len(document_list)))
|
| 194 |
return document_list
|
|
|
|
| 183 |
|
| 184 |
def get_document_data(book_file, book_url):
|
| 185 |
document_list = []
|
| 186 |
+
if os.path.isfile(book_file):
|
| 187 |
+
with open(book_file, 'rb') as f:
|
| 188 |
+
pdf_reader = PdfReader(f)
|
| 189 |
+
for i in range(len(pdf_reader.pages)):
|
| 190 |
+
page_text = pdf_reader.pages[i].extract_text()
|
| 191 |
+
metadata = {"source": book_url}
|
| 192 |
+
document_list.append(Document(page_content=page_text, metadata=metadata))
|
| 193 |
|
| 194 |
# print("document list" + str(len(document_list)))
|
| 195 |
return document_list
|