Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,7 +6,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
|
|
| 6 |
import os
|
| 7 |
from threading import Thread
|
| 8 |
|
| 9 |
-
import
|
| 10 |
import docx
|
| 11 |
from pptx import Presentation
|
| 12 |
|
|
@@ -56,7 +56,7 @@ def extract_text(path):
|
|
| 56 |
return open(path, 'r').read()
|
| 57 |
|
| 58 |
def extract_pdf(path):
|
| 59 |
-
doc =
|
| 60 |
text = ""
|
| 61 |
for page in doc:
|
| 62 |
text += page.get_text()
|
|
@@ -82,12 +82,13 @@ def extract_pptx(path):
|
|
| 82 |
def mode_load(path):
|
| 83 |
choice = ""
|
| 84 |
file_type = path.split(".")[-1]
|
|
|
|
| 85 |
if file_type in ["pdf", "txt", "py", "docx", "pptx", "json", "cpp", "md"]:
|
| 86 |
-
if file_type.endswith("
|
| 87 |
content = extract_pdf(path)
|
| 88 |
-
elif file_type.endswith("
|
| 89 |
content = extract_docx(path)
|
| 90 |
-
elif file_type.endswith("
|
| 91 |
content = extract_pptx(path)
|
| 92 |
else:
|
| 93 |
content = extract_text(path)
|
|
|
|
| 6 |
import os
|
| 7 |
from threading import Thread
|
| 8 |
|
| 9 |
+
import pymupdf
|
| 10 |
import docx
|
| 11 |
from pptx import Presentation
|
| 12 |
|
|
|
|
| 56 |
return open(path, 'r').read()
|
| 57 |
|
| 58 |
def extract_pdf(path):
|
| 59 |
+
doc = pymupdf.open(path)
|
| 60 |
text = ""
|
| 61 |
for page in doc:
|
| 62 |
text += page.get_text()
|
|
|
|
| 82 |
def mode_load(path):
|
| 83 |
choice = ""
|
| 84 |
file_type = path.split(".")[-1]
|
| 85 |
+
print(file_type)
|
| 86 |
if file_type in ["pdf", "txt", "py", "docx", "pptx", "json", "cpp", "md"]:
|
| 87 |
+
if file_type.endswith("pdf"):
|
| 88 |
content = extract_pdf(path)
|
| 89 |
+
elif file_type.endswith("docx"):
|
| 90 |
content = extract_docx(path)
|
| 91 |
+
elif file_type.endswith("pptx"):
|
| 92 |
content = extract_pptx(path)
|
| 93 |
else:
|
| 94 |
content = extract_text(path)
|