bhardwaj08sarthak commited on
Commit
022c0a9
·
verified ·
1 Parent(s): 69796e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -13
app.py CHANGED
@@ -70,22 +70,28 @@ except Exception:
70
  _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
71
  _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
72
 
 
73
  DATASET_REPO = "bhardwaj08sarthak/my-stem-index" # your dataset repo id
74
  PERSIST_SUBDIR = "index_store" # the folder you uploaded
75
- LOCAL_BASE = "/data/index" # where to place files in the Space
76
 
77
- # Download the persisted index folder into ephemeral storage
78
- os.makedirs(LOCAL_BASE, exist_ok=True)
79
- snapshot_download(
80
- repo_id=DATASET_REPO,
81
- repo_type="dataset",
82
- local_dir=LOCAL_BASE,
83
- allow_patterns=[f"{PERSIST_SUBDIR}/**"], # only grab the index folder
84
- local_dir_use_symlinks=False, # real files (safer in Spaces)
85
- )
86
-
87
- persist_dir = os.path.join(LOCAL_BASE, PERSIST_SUBDIR)
 
 
 
88
 
 
 
 
89
  # Recreate the SAME embedding model used to build the index
90
  emb = HuggingFaceEmbeddings(
91
  model_name="google/embeddinggemma-300m",
@@ -94,7 +100,14 @@ emb = HuggingFaceEmbeddings(
94
  )
95
 
96
  # Load the index from storage
97
- storage_context = StorageContext.from_defaults(persist_dir=persist_dir)
 
 
 
 
 
 
 
98
  index = load_index_from_storage(storage_context, embed_model=emb)
99
 
100
  # Datasets & GPU build code remains commented out...
 
70
  _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
71
  _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
72
 
73
+
74
  DATASET_REPO = "bhardwaj08sarthak/my-stem-index" # your dataset repo id
75
  PERSIST_SUBDIR = "index_store" # the folder you uploaded
 
76
 
77
+ def _pick_writable_base() -> Path:
78
+ # Prefer home, fall back to /tmp
79
+ for base in (Path.home(), Path("/tmp")):
80
+ try:
81
+ base.mkdir(parents=True, exist_ok=True)
82
+ test = base / ".write_test"
83
+ with open(test, "w") as f:
84
+ f.write("ok")
85
+ test.unlink(missing_ok=True)
86
+ return base
87
+ except Exception:
88
+ continue
89
+ # Last resort: current working directory
90
+ return Path.cwd()
91
 
92
+ WRITABLE_BASE = _pick_writable_base()
93
+ LOCAL_BASE = WRITABLE_BASE / "my_app_cache" / "index"
94
+ LOCAL_BASE.mkdir(parents=True, exist_ok=True)
95
  # Recreate the SAME embedding model used to build the index
96
  emb = HuggingFaceEmbeddings(
97
  model_name="google/embeddinggemma-300m",
 
100
  )
101
 
102
  # Load the index from storage
103
+ snapshot_download(
104
+ repo_id=DATASET_REPO,
105
+ repo_type="dataset",
106
+ local_dir=str(LOCAL_BASE),
107
+ allow_patterns=[f"{PERSIST_SUBDIR}/**"],
108
+ local_dir_use_symlinks=False,
109
+ )
110
+ persist_dir = str(LOCAL_BASE / PERSIST_SUBDIR)
111
  index = load_index_from_storage(storage_context, embed_model=emb)
112
 
113
  # Datasets & GPU build code remains commented out...