Spaces:
Sleeping
Sleeping
Pranjal Gupta
commited on
Commit
·
d272c6c
1
Parent(s):
157b2e3
meta chunks error fixed
Browse files
app.py
CHANGED
|
@@ -45,7 +45,24 @@ def process_pdf(file_path):
|
|
| 45 |
|
| 46 |
# Ingest into the vector store
|
| 47 |
# Note: A docId is added to group these documents
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
gr.Info("PDF processed and ready for questions!")
|
| 50 |
|
| 51 |
# Your existing functions
|
|
|
|
| 45 |
|
| 46 |
# Ingest into the vector store
|
| 47 |
# Note: A docId is added to group these documents
|
| 48 |
+
metadata_chunks = []
|
| 49 |
+
# Concatenate all chunks into a single string
|
| 50 |
+
for i, chunk in enumerate(texts):
|
| 51 |
+
# Add metadata to each chunk
|
| 52 |
+
metadata = {"source": f"example_source_{i}", "docId":str("42")}
|
| 53 |
+
id = str(i)
|
| 54 |
+
doc_with_metadata = Document(
|
| 55 |
+
page_content=chunk.page_content, metadata=metadata, id=id,docId="42"
|
| 56 |
+
)
|
| 57 |
+
metadata_chunks.append(doc_with_metadata)
|
| 58 |
+
|
| 59 |
+
print("Done")
|
| 60 |
+
|
| 61 |
+
# Add the documents to the vector database
|
| 62 |
+
try:
|
| 63 |
+
vectorDB.add_documents(metadata_chunks)
|
| 64 |
+
except:
|
| 65 |
+
raise Exception()
|
| 66 |
gr.Info("PDF processed and ready for questions!")
|
| 67 |
|
| 68 |
# Your existing functions
|