vikramvasudevan commited on
Commit
7f4024a
·
verified ·
1 Parent(s): 5b0aa61

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. modules/dropbox/audio.py +25 -13
modules/dropbox/audio.py CHANGED
@@ -82,6 +82,8 @@ def list_dropbox_folder_hierarchy(dbx: dropbox.Dropbox, base_path: str = ""):
82
  audio_cache: dict[tuple[str, int, str], dict] = {}
83
  CACHE_TTL = timedelta(hours=3, minutes=30) # refresh before 4h expiry
84
 
 
 
85
 
86
  async def get_audio_urls(req: AudioRequest):
87
  base_path = f"/{req.scripture_name}/audio"
@@ -90,20 +92,29 @@ async def get_audio_urls(req: AudioRequest):
90
  urls = {}
91
  now = datetime.now(timezone.utc)
92
 
93
- try:
94
- # Initial listing (first batch of up to 500)
95
- result = dbx.files_list_folder(base_path)
96
- entries = result.entries
 
 
 
 
 
97
 
98
- # Continue listing if there are more entries
99
- while result.has_more:
100
- result = dbx.files_list_folder_continue(result.cursor)
101
- entries.extend(result.entries)
102
 
103
- except dropbox.exceptions.ApiError:
104
- raise HTTPException(status_code=404, detail="Audio directory not found")
 
 
105
 
106
- # Filter files matching the prefix
 
 
 
107
  matching_files = [
108
  entry
109
  for entry in entries
@@ -113,10 +124,10 @@ async def get_audio_urls(req: AudioRequest):
113
  if not matching_files:
114
  raise HTTPException(status_code=404, detail="No audio files found")
115
 
 
116
  for entry in matching_files:
117
  filename = entry.name
118
- file_type = filename[len(prefix) :].rsplit(".", 1)[0]
119
-
120
  cache_key = (req.scripture_name, req.global_index, file_type)
121
 
122
  cached = audio_cache.get(cache_key)
@@ -135,6 +146,7 @@ async def get_audio_urls(req: AudioRequest):
135
  return urls
136
 
137
 
 
138
  async def cleanup_audio_url_cache(interval_seconds: int = 600):
139
  """Periodically remove expired entries from audio_cache."""
140
  while True:
 
82
  audio_cache: dict[tuple[str, int, str], dict] = {}
83
  CACHE_TTL = timedelta(hours=3, minutes=30) # refresh before 4h expiry
84
 
85
+ AUDIO_LIST_CACHE_TTL = timedelta(hours=24)
86
+ audio_list_cache = {} # {(scripture_name): {"entries": [...], "expiry": datetime}}
87
 
88
  async def get_audio_urls(req: AudioRequest):
89
  base_path = f"/{req.scripture_name}/audio"
 
92
  urls = {}
93
  now = datetime.now(timezone.utc)
94
 
95
+ # --- 1️⃣ Check if folder listing is cached ---
96
+ cache_entry = audio_list_cache.get(req.scripture_name)
97
+ if cache_entry and cache_entry["expiry"] > now:
98
+ entries = cache_entry["entries"]
99
+ else:
100
+ # Fetch fresh listing from Dropbox
101
+ try:
102
+ result = dbx.files_list_folder(base_path)
103
+ entries = result.entries
104
 
105
+ while result.has_more:
106
+ result = dbx.files_list_folder_continue(result.cursor)
107
+ entries.extend(result.entries)
 
108
 
109
+ audio_list_cache[req.scripture_name] = {
110
+ "entries": entries,
111
+ "expiry": now + AUDIO_LIST_CACHE_TTL,
112
+ }
113
 
114
+ except dropbox.exceptions.ApiError:
115
+ raise HTTPException(status_code=404, detail="Audio directory not found")
116
+
117
+ # --- 2️⃣ Filter matching files ---
118
  matching_files = [
119
  entry
120
  for entry in entries
 
124
  if not matching_files:
125
  raise HTTPException(status_code=404, detail="No audio files found")
126
 
127
+ # --- 3️⃣ Generate or reuse cached URLs ---
128
  for entry in matching_files:
129
  filename = entry.name
130
+ file_type = filename[len(prefix):].rsplit(".", 1)[0]
 
131
  cache_key = (req.scripture_name, req.global_index, file_type)
132
 
133
  cached = audio_cache.get(cache_key)
 
146
  return urls
147
 
148
 
149
+
150
  async def cleanup_audio_url_cache(interval_seconds: int = 600):
151
  """Periodically remove expired entries from audio_cache."""
152
  while True: