vikramvasudevan commited on
Commit
064643c
·
verified ·
1 Parent(s): e74e470

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. modules/dropbox/discources.py +109 -85
  2. server.py +6 -7
modules/dropbox/discources.py CHANGED
@@ -1,82 +1,94 @@
1
- import os
2
  import json
3
-
4
- def fetch_all_discourses(folder_path="data/dropbox/discourses"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  """
6
- Loads all JSON discourse files from the specified folder and returns
7
- them as a list of Python dictionaries.
8
-
9
- Each file should contain a valid JSON object representing one topic.
10
  """
11
- discourses = []
12
-
13
- if not os.path.exists(folder_path):
14
- print(f"⚠️ Folder not found: {folder_path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  return discourses
16
 
17
- for filename in os.listdir(folder_path):
18
- if filename.lower().endswith(".json"):
19
- file_path = os.path.join(folder_path, filename)
20
- try:
21
- with open(file_path, "r", encoding="utf-8") as f:
22
- data = json.load(f)
23
- discourses.append(data)
24
- except Exception as e:
25
- print(f"❌ Error loading {filename}: {e}")
26
 
27
- return discourses
28
 
29
- def get_discourse_summaries(folder_path="data/dropbox/discourses", page=1, per_page=10):
30
  """
31
- Returns a paginated list of discourse topic summaries.
32
-
33
- Each summary contains:
34
- - id
35
- - topic_name
36
- - thumbnail_url
37
-
38
- The results are sorted alphabetically by topic_name.
39
-
40
- Args:
41
- folder_path (str): Path to the folder containing JSON files.
42
- page (int): Page number (1-indexed).
43
- per_page (int): Number of items per page.
44
  """
45
- summaries = []
46
-
47
- if not os.path.exists(folder_path):
48
- print(f"⚠️ Folder not found: {folder_path}")
49
- return []
50
-
51
- # Load and extract only minimal info
52
- for filename in os.listdir(folder_path):
53
- if filename.lower().endswith(".json"):
54
- file_path = os.path.join(folder_path, filename)
55
- try:
56
- with open(file_path, "r", encoding="utf-8") as f:
57
- data = json.load(f)
58
- summaries.append({
59
- "id": data.get("id"),
60
- "topic_name": data.get("topic_name"),
61
- "thumbnail_url": data.get("thumbnail_url")
62
- })
63
- except Exception as e:
64
- print(f"❌ Error loading {filename}: {e}")
65
-
66
- # Sort by topic name
67
  summaries.sort(key=lambda x: (x.get("topic_name") or "").lower())
68
 
69
- # Pagination logic
70
  total_items = len(summaries)
71
- total_pages = (total_items + per_page - 1) // per_page # ceil division
72
-
73
  if page < 1 or page > total_pages:
74
- print(f"⚠️ Invalid page number {page}. Must be between 1 and {total_pages}.")
75
- return []
76
 
77
  start = (page - 1) * per_page
78
  end = start + per_page
79
-
80
  paginated = summaries[start:end]
81
 
82
  return {
@@ -84,31 +96,43 @@ def get_discourse_summaries(folder_path="data/dropbox/discourses", page=1, per_p
84
  "per_page": per_page,
85
  "total_pages": total_pages,
86
  "total_items": total_items,
87
- "data": paginated
88
  }
89
 
90
 
91
- import os
92
- import json
93
-
94
- def get_discourse_by_id(topic_id: int, folder_path="data/dropbox/discourses"):
95
  """
96
- Returns the full discourse JSON for a given topic_id.
97
- Returns None if not found.
98
  """
99
- if not os.path.exists(folder_path):
100
- print(f"⚠️ Folder not found: {folder_path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  return None
102
-
103
- for filename in os.listdir(folder_path):
104
- if filename.lower().endswith(".json"):
105
- file_path = os.path.join(folder_path, filename)
106
- try:
107
- with open(file_path, "r", encoding="utf-8") as f:
108
- data = json.load(f)
109
- if data.get("id") == topic_id:
110
- return data
111
- except Exception as e:
112
- print(f"❌ Error loading {filename}: {e}")
113
-
114
- return None
 
1
+ import asyncio
2
  import json
3
+ import logging
4
+ from datetime import datetime, timedelta
5
+ from typing import List, Optional
6
+ import dropbox
7
+ from modules.dropbox.client import dbx
8
+
9
+ # Logger
10
+ logger = logging.getLogger(__name__)
11
+ logger.setLevel(logging.INFO)
12
+
13
+ # Cache: key = folder_path, value = {"timestamp": datetime, "data": List[dict]}
14
+ _discourse_cache: dict[str, dict] = {}
15
+ CACHE_TTL = timedelta(hours=1)
16
+ FOLDER_PATH = "/_discourses"
17
+
18
+ async def fetch_discourses_from_dropbox() -> List[dict]:
19
  """
20
+ Fetch all discourse JSONs for a scripture from Dropbox with caching.
21
+ Expects files in "/_discourses/".
 
 
22
  """
23
+ loop = asyncio.get_running_loop()
24
+ folder_path = FOLDER_PATH
25
+
26
+ # Check cache
27
+ cache_entry = _discourse_cache.get(folder_path)
28
+ if cache_entry:
29
+ age = datetime.now() - cache_entry["timestamp"]
30
+ if age < CACHE_TTL:
31
+ logger.info(f"Using cached discourses for '{folder_path}' (age={age})")
32
+ return cache_entry["data"]
33
+
34
+ logger.info(f"Fetching discourses from Dropbox folder '{folder_path}'")
35
+ discourses: List[dict] = []
36
+
37
+ try:
38
+ # List folder contents (synchronously in executor)
39
+ res = await loop.run_in_executor(None, dbx.files_list_folder, folder_path)
40
+ for entry in res.entries:
41
+ if isinstance(entry, dropbox.files.FileMetadata) and entry.name.lower().endswith(".json"):
42
+ metadata, fres = await loop.run_in_executor(
43
+ None, dbx.files_download, f"{folder_path}/{entry.name}"
44
+ )
45
+ data = fres.content.decode("utf-8")
46
+ discourses.append(json.loads(data))
47
+
48
+ # Update cache
49
+ _discourse_cache[folder_path] = {"timestamp": datetime.now(), "data": discourses}
50
+ logger.info(f"Cached {len(discourses)} discourses for '{folder_path}'")
51
  return discourses
52
 
53
+ except Exception as e:
54
+ logger.error(f"Error fetching discourses from '{folder_path}'", exc_info=e)
55
+ # fallback to cached data if available
56
+ if cache_entry:
57
+ logger.warning(f"Returning stale cached discourses for '{folder_path}'")
58
+ return cache_entry["data"]
59
+ else:
60
+ logger.warning(f"No cached discourses available for '{folder_path}'")
61
+ return []
62
 
 
63
 
64
+ async def get_discourse_summaries(page: int = 1, per_page: int = 10):
65
  """
66
+ Returns paginated summaries: id, topic_name, thumbnail_url.
67
+ Sorted by topic_name.
 
 
 
 
 
 
 
 
 
 
 
68
  """
69
+ all_discourses = await fetch_discourses_from_dropbox()
70
+
71
+ # Build summaries
72
+ summaries = [
73
+ {
74
+ "id": d.get("id"),
75
+ "topic_name": d.get("topic_name"),
76
+ "thumbnail_url": d.get("thumbnail_url"),
77
+ }
78
+ for d in all_discourses
79
+ ]
80
+
 
 
 
 
 
 
 
 
 
 
81
  summaries.sort(key=lambda x: (x.get("topic_name") or "").lower())
82
 
83
+ # Pagination
84
  total_items = len(summaries)
85
+ total_pages = (total_items + per_page - 1) // per_page
 
86
  if page < 1 or page > total_pages:
87
+ logger.warning(f"Invalid page {page}. Must be between 1 and {total_pages}")
88
+ return {"page": page, "per_page": per_page, "total_pages": total_pages, "total_items": total_items, "data": []}
89
 
90
  start = (page - 1) * per_page
91
  end = start + per_page
 
92
  paginated = summaries[start:end]
93
 
94
  return {
 
96
  "per_page": per_page,
97
  "total_pages": total_pages,
98
  "total_items": total_items,
99
+ "data": paginated,
100
  }
101
 
102
 
103
+ async def get_discourse_by_id(topic_id: int) -> Optional[dict]:
 
 
 
104
  """
105
+ Fetch a single discourse JSON by topic_id from Dropbox.
106
+ Uses in-memory caching per file.
107
  """
108
+ loop = asyncio.get_running_loop()
109
+ file_path = f"{FOLDER_PATH}/{topic_id}.json"
110
+
111
+ # Check cache
112
+ cache_entry = _discourse_cache.get(file_path)
113
+ if cache_entry:
114
+ age = datetime.now() - cache_entry["timestamp"]
115
+ if age < CACHE_TTL:
116
+ logger.info(f"Using cached discourse for topic {topic_id} (age={age})")
117
+ return cache_entry["data"]
118
+
119
+ try:
120
+ logger.info(f"Fetching discourse {topic_id} from Dropbox: {file_path}")
121
+ metadata, res = await loop.run_in_executor(None, dbx.files_download, file_path)
122
+ data = res.content.decode("utf-8")
123
+ discourse = json.loads(data)
124
+
125
+ # Update cache
126
+ _discourse_cache[file_path] = {"timestamp": datetime.now(), "data": discourse}
127
+ return discourse
128
+
129
+ except dropbox.exceptions.HttpError as e:
130
+ logger.error(f"Dropbox file not found: {file_path}", exc_info=e)
131
  return None
132
+ except Exception as e:
133
+ logger.error(f"Error fetching discourse {topic_id}", exc_info=e)
134
+ # fallback to cached data if available
135
+ if cache_entry:
136
+ logger.warning(f"Returning stale cached discourse for topic {topic_id}")
137
+ return cache_entry["data"]
138
+ return None
 
 
 
 
 
 
server.py CHANGED
@@ -1,8 +1,7 @@
1
  # server.py
2
- import json
3
  import random
4
  import traceback
5
- from typing import List, Optional
6
  import uuid
7
  from fastapi import APIRouter, HTTPException, Request, Query
8
  from fastapi.responses import JSONResponse
@@ -15,7 +14,7 @@ from metadata import MetadataWhereClause
15
  from modules.audio.model import AudioRequest, AudioType
16
  from modules.audio.service import svc_get_audio_urls, svc_get_indices_with_audio
17
  from modules.config.categories import get_scripture_categories
18
- from modules.dropbox.discources import fetch_all_discourses, get_discourse_by_id, get_discourse_summaries
19
  from modules.languages.get_v2 import handle_fetch_languages_v2
20
  from modules.quiz.answer_validator import validate_answer
21
  from modules.quiz.models import Question
@@ -567,7 +566,7 @@ def route_get_donation_product_ids(include_tests: bool = False):
567
  return products
568
 
569
  @router.get("/discourse/list")
570
- def get_all_discourses(
571
  page: int = Query(1, ge=1, description="Page number (1-indexed)"),
572
  per_page: int = Query(10, ge=1, le=100, description="Number of items per page")
573
  ):
@@ -578,15 +577,15 @@ def get_all_discourses(
578
  - topic_name
579
  - thumbnail_url
580
  """
581
- result = get_discourse_summaries(page=page, per_page=per_page)
582
  return result
583
 
584
  @router.get("/discourse/find/{topic_id}")
585
- def get_discourse_detail(topic_id: int):
586
  """
587
  Returns the full details of a discourse topic by its unique ID.
588
  """
589
- topic = get_discourse_by_id(topic_id)
590
  if not topic:
591
  raise HTTPException(status_code=404, detail="Discourse topic not found")
592
  return topic
 
1
  # server.py
 
2
  import random
3
  import traceback
4
+ from typing import Optional
5
  import uuid
6
  from fastapi import APIRouter, HTTPException, Request, Query
7
  from fastapi.responses import JSONResponse
 
14
  from modules.audio.model import AudioRequest, AudioType
15
  from modules.audio.service import svc_get_audio_urls, svc_get_indices_with_audio
16
  from modules.config.categories import get_scripture_categories
17
+ from modules.dropbox.discources import get_discourse_by_id, get_discourse_summaries
18
  from modules.languages.get_v2 import handle_fetch_languages_v2
19
  from modules.quiz.answer_validator import validate_answer
20
  from modules.quiz.models import Question
 
566
  return products
567
 
568
  @router.get("/discourse/list")
569
+ async def get_all_discourses(
570
  page: int = Query(1, ge=1, description="Page number (1-indexed)"),
571
  per_page: int = Query(10, ge=1, le=100, description="Number of items per page")
572
  ):
 
577
  - topic_name
578
  - thumbnail_url
579
  """
580
+ result = await get_discourse_summaries(page=page, per_page=per_page)
581
  return result
582
 
583
  @router.get("/discourse/find/{topic_id}")
584
+ async def get_discourse_detail(topic_id: int):
585
  """
586
  Returns the full details of a discourse topic by its unique ID.
587
  """
588
+ topic = await get_discourse_by_id(topic_id)
589
  if not topic:
590
  raise HTTPException(status_code=404, detail="Discourse topic not found")
591
  return topic