Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,7 +6,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
|
|
| 6 |
import os
|
| 7 |
from threading import Thread
|
| 8 |
|
| 9 |
-
|
| 10 |
import docx
|
| 11 |
from pptx import Presentation
|
| 12 |
|
|
@@ -56,11 +56,11 @@ def extract_text(path):
|
|
| 56 |
return open(path, 'r').read()
|
| 57 |
|
| 58 |
def extract_pdf(path):
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
return
|
| 64 |
|
| 65 |
def extract_docx(path):
|
| 66 |
doc = docx.Document(path)
|
|
@@ -68,6 +68,7 @@ def extract_docx(path):
|
|
| 68 |
for paragraph in doc.paragraphs:
|
| 69 |
data.append(paragraph.text)
|
| 70 |
content = '\n\n'.join(data)
|
|
|
|
| 71 |
|
| 72 |
def extract_pptx(path):
|
| 73 |
prs = Presentation(path)
|
|
@@ -91,8 +92,8 @@ def mode_load(path):
|
|
| 91 |
else:
|
| 92 |
content = extract_text(path)
|
| 93 |
choice = "doc"
|
| 94 |
-
print(content)
|
| 95 |
-
return choice, content
|
| 96 |
elif file_type in ["png", "jpg", "jpeg", "bmp", "tiff", "webp"]:
|
| 97 |
content = Image.open(path).convert('RGB')
|
| 98 |
choice = "image"
|
|
|
|
| 6 |
import os
|
| 7 |
from threading import Thread
|
| 8 |
|
| 9 |
+
import fitz
|
| 10 |
import docx
|
| 11 |
from pptx import Presentation
|
| 12 |
|
|
|
|
| 56 |
return open(path, 'r').read()
|
| 57 |
|
| 58 |
def extract_pdf(path):
|
| 59 |
+
doc = fitz.open(path)
|
| 60 |
+
text = ""
|
| 61 |
+
for page in doc:
|
| 62 |
+
text += page.get_text()
|
| 63 |
+
return text
|
| 64 |
|
| 65 |
def extract_docx(path):
|
| 66 |
doc = docx.Document(path)
|
|
|
|
| 68 |
for paragraph in doc.paragraphs:
|
| 69 |
data.append(paragraph.text)
|
| 70 |
content = '\n\n'.join(data)
|
| 71 |
+
return content
|
| 72 |
|
| 73 |
def extract_pptx(path):
|
| 74 |
prs = Presentation(path)
|
|
|
|
| 92 |
else:
|
| 93 |
content = extract_text(path)
|
| 94 |
choice = "doc"
|
| 95 |
+
print(content[:100])
|
| 96 |
+
return choice, content[:5000]
|
| 97 |
elif file_type in ["png", "jpg", "jpeg", "bmp", "tiff", "webp"]:
|
| 98 |
content = Image.open(path).convert('RGB')
|
| 99 |
choice = "image"
|