Spaces:
Running
Running
Update retrival.py
Browse files- retrival.py +2 -1
retrival.py
CHANGED
|
@@ -18,6 +18,7 @@ pytesseract.pytesseract.tesseract_cmd = (r'/usr/bin/tesseract')
|
|
| 18 |
# Configurations
|
| 19 |
UPLOAD_FOLDER = "./uploads"
|
| 20 |
VECTOR_DB_FOLDER = "./VectorDB"
|
|
|
|
| 21 |
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
| 22 |
os.makedirs(VECTOR_DB_FOLDER, exist_ok=True)
|
| 23 |
|
|
@@ -41,7 +42,7 @@ def load_document(data_path):
|
|
| 41 |
try:
|
| 42 |
# Determine the file type based on extension
|
| 43 |
filename, file_extension = os.path.splitext(file.lower())
|
| 44 |
-
image_output = f"
|
| 45 |
# Use specific partition techniques based on file extension
|
| 46 |
if file_extension == ".pdf":
|
| 47 |
elements = partition_pdf(
|
|
|
|
| 18 |
# Configurations
|
| 19 |
UPLOAD_FOLDER = "./uploads"
|
| 20 |
VECTOR_DB_FOLDER = "./VectorDB"
|
| 21 |
+
IMAGE_DB_FOLDER = "./Images"
|
| 22 |
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
| 23 |
os.makedirs(VECTOR_DB_FOLDER, exist_ok=True)
|
| 24 |
|
|
|
|
| 42 |
try:
|
| 43 |
# Determine the file type based on extension
|
| 44 |
filename, file_extension = os.path.splitext(file.lower())
|
| 45 |
+
image_output = f"./Images/{filename}/"
|
| 46 |
# Use specific partition techniques based on file extension
|
| 47 |
if file_extension == ".pdf":
|
| 48 |
elements = partition_pdf(
|