Spaces:

bhardwaj08sarthak
/

STEM-Question-Generator

Sleeping

bhardwaj08sarthak commited on Sep 25

Commit

022c0a9

verified ·

1 Parent(s): 69796e9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -70,22 +70,28 @@ except Exception:
 _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
 _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
 DATASET_REPO = "bhardwaj08sarthak/my-stem-index"   # your dataset repo id
 PERSIST_SUBDIR = "index_store"                      # the folder you uploaded
-LOCAL_BASE = "/data/index"                          # where to place files in the Space
-# Download the persisted index folder into ephemeral storage
-os.makedirs(LOCAL_BASE, exist_ok=True)
-snapshot_download(
-    repo_id=DATASET_REPO,
-    repo_type="dataset",
-    local_dir=LOCAL_BASE,
-    allow_patterns=[f"{PERSIST_SUBDIR}/**"],  # only grab the index folder
-    local_dir_use_symlinks=False,             # real files (safer in Spaces)
-)
-persist_dir = os.path.join(LOCAL_BASE, PERSIST_SUBDIR)
 # Recreate the SAME embedding model used to build the index
 emb = HuggingFaceEmbeddings(
     model_name="google/embeddinggemma-300m",
@@ -94,7 +100,14 @@ emb = HuggingFaceEmbeddings(
 )
 # Load the index from storage
-storage_context = StorageContext.from_defaults(persist_dir=persist_dir)
 index = load_index_from_storage(storage_context, embed_model=emb)
 # Datasets & GPU build code remains commented out...

 _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
 _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
 DATASET_REPO = "bhardwaj08sarthak/my-stem-index"   # your dataset repo id
 PERSIST_SUBDIR = "index_store"                      # the folder you uploaded
+def _pick_writable_base() -> Path:
+    # Prefer home, fall back to /tmp
+    for base in (Path.home(), Path("/tmp")):
+        try:
+            base.mkdir(parents=True, exist_ok=True)
+            test = base / ".write_test"
+            with open(test, "w") as f:
+                f.write("ok")
+            test.unlink(missing_ok=True)
+            return base
+        except Exception:
+            continue
+    # Last resort: current working directory
+    return Path.cwd()
+WRITABLE_BASE = _pick_writable_base()
+LOCAL_BASE = WRITABLE_BASE / "my_app_cache" / "index"
+LOCAL_BASE.mkdir(parents=True, exist_ok=True)
 # Recreate the SAME embedding model used to build the index
 emb = HuggingFaceEmbeddings(
     model_name="google/embeddinggemma-300m",
 )
 # Load the index from storage
+snapshot_download(
+    repo_id=DATASET_REPO,
+    repo_type="dataset",
+    local_dir=str(LOCAL_BASE),
+    allow_patterns=[f"{PERSIST_SUBDIR}/**"],
+    local_dir_use_symlinks=False,
+)
+persist_dir = str(LOCAL_BASE / PERSIST_SUBDIR)
 index = load_index_from_storage(storage_context, embed_model=emb)
 # Datasets & GPU build code remains commented out...