Spaces:
Sleeping
Sleeping
| import pickle | |
| from pathlib import Path | |
| from langchain_community.vectorstores import Chroma | |
| from langchain_openai import OpenAIEmbeddings | |
| BASE_DIR = Path(__file__).resolve().parent.parent | |
| CHUNKS_PATH = BASE_DIR / "output" / "chunks.pkl" | |
| DB_DIR = BASE_DIR / "db" | |
| with open(CHUNKS_PATH, "rb") as f: | |
| chunks = pickle.load(f) | |
| embedding = OpenAIEmbeddings(model="text-embedding-3-small") | |
| vectorstore = Chroma(persist_directory=str(DB_DIR), embedding_function=embedding) | |
| BATCH_SIZE = 100 | |
| print(f"🧠 Embedding and adding {len(chunks)} chunks in batches...") | |
| for i in range(0, len(chunks), BATCH_SIZE): | |
| batch = chunks[i:i + BATCH_SIZE] | |
| vectorstore.add_documents(batch) | |
| print(f"✅ Added batch {i // BATCH_SIZE + 1}") | |