Spaces:

vikramvasudevan
/

sanatan_ai

Running on CPU Upgrade

App Files Files Community

vikramvasudevan commited on Sep 21

Commit

4aebf77

verified ·

1 Parent(s): a19a3df

Upload folder using huggingface_hub

Browse files

Files changed (17) hide show

app.py +11 -4
main.py +10 -4
modules/home/app.py +7 -0
modules/youtube_metadata/answerer.py +116 -0
modules/youtube_metadata/app.py +500 -0
modules/youtube_metadata/channel_utils.py +120 -0
modules/youtube_metadata/collector.py +65 -0
modules/youtube_metadata/db.py +65 -0
modules/youtube_metadata/downloader.py +20 -0
modules/youtube_metadata/embeddings.py +31 -0
modules/youtube_metadata/indexer.py +71 -0
modules/youtube_metadata/retriever.py +49 -0
modules/youtube_metadata/youtube_poller.py +105 -0
modules/youtube_metadata/youtube_sync.py +78 -0
modules/youtube_metadata/youtube_utils.py +26 -0
pyproject.toml +2 -0
uv.lock +34 -0

app.py CHANGED Viewed

@@ -23,16 +23,23 @@ from db import SanatanDatabase
 from drive_downloader import ZipDownloader
 from graph_helper import generate_graph
 from nalayiram_helper import delete_taniyan
 # Logging
 logging.basicConfig()
 logger = logging.getLogger()
-logger.setLevel(logging.INFO)
-graph = generate_graph()
-import pycountry
 def get_all_languages():
     """
@@ -89,7 +96,7 @@ message_textbox = gr.Textbox(
 with gr.Blocks(
     theme=gr.themes.Citrus(),
-    title="Sanatan-AI",
     css="""
     /* hide the additional inputs row under the textbox */
     .gr-chat-interface .gr-form {

 from drive_downloader import ZipDownloader
 from graph_helper import generate_graph
 from nalayiram_helper import delete_taniyan
+import pycountry
 # Logging
 logging.basicConfig()
 logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+# Suppress OpenAI debug logs
+logging.getLogger("openai").setLevel(logging.WARNING)
+# Silence httpx + httpcore logs
+logging.getLogger("httpx").setLevel(logging.WARNING)
+logging.getLogger("httpcore").setLevel(logging.WARNING)
+# (Optional) Silence OpenAI logs too
+logging.getLogger("openai").setLevel(logging.WARNING)
+graph = generate_graph()
 def get_all_languages():
     """
 with gr.Blocks(
     theme=gr.themes.Citrus(),
+    title="Sanatan-AI | Chat",
     css="""
     /* hide the additional inputs row under the textbox */
     .gr-chat-interface .gr-form {

main.py CHANGED Viewed

@@ -4,6 +4,8 @@ from fastapi.responses import RedirectResponse
 import uvicorn
 from fastapi import FastAPI
 from modules.dropbox.audio import cleanup_audio_url_cache
 from server import router as mobile_router
 from app import gradio_app  # your Blocks object
 import gradio as gr
@@ -18,12 +20,16 @@ app = FastAPI(title="Sanatan AI Unified Server")
 app.include_router(mobile_router, prefix="/api")
 # Convert Gradio Blocks to ASGI app
-app = gr.mount_gradio_app(app, gradio_app,"/web")
-# Redirect root URL to /web/
 @app.get("/")
 async def redirect_to_web():
-    return RedirectResponse(url="/web/")
 @app.middleware("http")
 async def log_requests(request: Request, call_next):
@@ -40,4 +46,4 @@ async def lifespan(app: FastAPI):
     # Shutdown code (optional) can go here
 if __name__ == "__main__":
-    uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=False)

 import uvicorn
 from fastapi import FastAPI
 from modules.dropbox.audio import cleanup_audio_url_cache
+from modules.home.app import home_app
+from modules.youtube_metadata.app import youtube_metadata_app
 from server import router as mobile_router
 from app import gradio_app  # your Blocks object
 import gradio as gr
 app.include_router(mobile_router, prefix="/api")
 # Convert Gradio Blocks to ASGI app
+app = gr.mount_gradio_app(app, gradio_app,"/sanatan_ai_web")
+app = gr.mount_gradio_app(app, youtube_metadata_app,"/yt_web")
+app = gr.mount_gradio_app(app, home_app,"/home")
+# Redirect root URL to /home/
 @app.get("/")
 async def redirect_to_web():
+    return RedirectResponse(url="/home/")
 @app.middleware("http")
 async def log_requests(request: Request, call_next):
     # Shutdown code (optional) can go here
 if __name__ == "__main__":
+    uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=False, access_log=False)

modules/home/app.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import gradio as gr
+with gr.Blocks(title="Sanatana AI - Home") as home_app:
+    gr.Markdown("## Welcome to Sanatan AI!")
+    with gr.Row():
+        gr.Button("Go to Sanatan AI", link="/sanatan_ai_web")       # link to /web
+        gr.Button("Manage Youtube Metadata", link="/yt_web") # link to /yt_web

modules/youtube_metadata/answerer.py ADDED Viewed

	@@ -0,0 +1,116 @@

+# -------------------------------
+# 4. Answerer
+# -------------------------------
+from typing import List
+from pydantic import BaseModel
+from openai import OpenAI
+from modules.youtube_metadata.retriever import retrieve_videos
+# -------------------------------
+# Structured Output Classes
+# -------------------------------
+class VideoItem(BaseModel):
+    video_id: str
+    title: str
+    channel: str
+    description: str
+class LLMAnswer(BaseModel):
+    answer_text: str
+    top_videos: List[VideoItem]
+# -------------------------------
+# Main Function
+# -------------------------------
+def answer_query(
+    query: str, collection, top_k: int = 5, channel_id: str = None
+) -> LLMAnswer:
+    """
+    Answer a user query using YouTube video metadata.
+    Returns an LLMAnswer object with textual answer + list of videos.
+    """
+    results = retrieve_videos(query, collection, top_k=top_k, channel_id=channel_id)
+    if not results:
+        return LLMAnswer(answer_text="No relevant videos found.", top_videos=[])
+    # Build context lines for the LLM
+    context_lines = []
+    for r in results:
+        if not isinstance(r, dict):
+            continue
+        vid_id = r.get("video_id", "")
+        title = r.get("video_title") or r.get("title", "")
+        channel = r.get("channel") or r.get("channel_title", "")
+        description = r.get("description", "")
+        context_lines.append(
+            f"- {title} ({channel}) (https://youtube.com/watch?v={vid_id})\n  description: {description}"
+        )
+    context_text = "\n".join(context_lines)
+    # Call LLM with structured output
+    client = OpenAI()
+    response = client.chat.completions.parse(
+        model="gpt-4o-mini",
+        messages=[
+            {
+                "role": "system",
+                "content": (
+                    "You are a helpful assistant that answers questions using YouTube video metadata. "
+                    "Return your response strictly as the LLMAnswer class, including 'answer_text' and a list of **only the most relevant** 'top_videos'.\n"
+                    "- `answer_text` MUST be very short and concise in natural language (max 100 words).\n"
+                    "- Use `top_videos` to include only the top 3 most relevant items from context.\n"
+                    "- Do not include all items unless all are clearly relevant.\n"
+                    "- Do not makeup `description`. Use the exact descriptions as given in the context"
+                ),
+            },
+            {
+                "role": "user",
+                "content": f"Question: {query}\n\nCandidate videos:\n{context_text}\n\nPick only the relevant ones.",
+            },
+        ],
+        response_format=LLMAnswer,
+    )
+    llm_answer = response.choices[0].message.parsed
+    answer_text = "\n## Answer : \n" + llm_answer.answer_text
+    video_html = build_video_html(llm_answer.top_videos)
+    return answer_text, video_html
+def build_video_html(videos: list[VideoItem]) -> str:
+    """Build a clean HTML table from top_videos."""
+    if not videos:
+        return "<p>No relevant videos found.</p>"
+    html = """
+    <table border="1" style="border-collapse: collapse; width: 100%;">
+        <tr>
+            <th>Description</th>
+            <th>Watch</th>
+        </tr>
+    """
+    for v in videos:
+        embed_html = f"""
+        <div style="margin-bottom: 20px;">
+            <strong>{v.title}</strong> ({v.channel})<br>
+            <iframe width="360" height="203"
+                src="https://www.youtube.com/embed/{v.video_id}"
+                frameborder="0"
+                allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
+                allowfullscreen>
+            </iframe>
+        </div>
+        """
+        html += f"""
+        <tr>
+            <td>{v.description}</td>
+            <td>{embed_html}</td>
+        </tr>
+        """
+    html += "</table>"
+    return html

modules/youtube_metadata/app.py ADDED Viewed

	@@ -0,0 +1,500 @@

+import asyncio
+import os
+import re
+import threading
+import gradio as gr
+from gradio_modal import Modal
+from modules.youtube_metadata.downloader import export_channel_json
+from modules.youtube_metadata.channel_utils import fetch_channel_dataframe
+from modules.youtube_metadata.db import (
+    delete_channel_from_collection,
+    get_collection,
+    get_indexed_channels,
+)
+from modules.youtube_metadata.answerer import answer_query
+from dotenv import load_dotenv
+from modules.youtube_metadata.youtube_poller import start_poll
+from modules.youtube_metadata.youtube_sync import sync_channels_from_youtube
+load_dotenv()
+# -------------------------------
+# Utility functions
+# -------------------------------
+def refresh_channel_list():
+    return gr.update(choices=list_channels_radio())
+def show_component():
+    return gr.update(visible=True)
+def hide_component():
+    return gr.update(visible=False)
+def open_component():
+    return gr.update(open=True)
+def close_component():
+    return gr.update(open=False)
+def enable_component():
+    return gr.update(interactive=True)
+def disable_component():
+    return gr.update(interactive=False)
+def clear_component():
+    return gr.update(value="")
+def show_loading(question):
+    return gr.update(value=f"⏳Fetching details on [{question}]...")
+def enable_if_not_none(question):
+    if question is None:
+        return disable_component()
+    else:
+        return enable_component()
+def index_channels(channel_urls: str):
+    yield "saving ...", gr.update(), gr.update()
+    yt_api_key = os.environ["YOUTUBE_API_KEY"]
+    urls = [u.strip() for u in re.split(r"[\n,]+", channel_urls) if u.strip()]
+    total_videos = 0
+    # sync all channels, streaming progress
+    for message, videos_count in sync_channels_from_youtube(yt_api_key, urls):
+        total_videos = videos_count  # accumulate actual number of videos indexed
+        yield message, gr.update(), gr.update()
+    # final UI update
+    yield (
+        f"✅ Indexed {total_videos} videos from {len(urls)} channels.",
+        refresh_channel_list(),
+        list_channels_radio(),
+    )
+def youtube_metadata_init(progress: gr.Progress = None):
+    channels = (
+        "https://www.youtube.com/@onedayonepasuram6126,"
+        "https://www.youtube.com/@srisookthi,"
+        "https://www.youtube.com/@learn-aksharam,"
+        "https://www.youtube.com/@SriYadugiriYathirajaMutt,"
+        "https://www.youtube.com/@akivasudev,"
+        "https://www.youtube.com/@Arulicheyal_Amutham"
+    )
+    for msg, upd, upd in index_channels(channels):
+        # print(resp)
+        yield msg
+def refresh_all_channels():
+    yt_api_key = os.environ["YOUTUBE_API_KEY"]
+    channels = get_indexed_channels(get_collection())
+    if not channels:
+        return "⚠️ No channels available to refresh.", refresh_channel_list()
+    # build list of URLs
+    urls = []
+    for key, val in channels.items():
+        url = val.get("channel_url") if isinstance(val, dict) else key
+        if url:
+            urls.append(url)
+    # re-index all at once
+    total_videos = sync_channels_from_youtube(yt_api_key, urls)
+    return (
+        f"🔄 Refreshed {len(urls)} channels, re-indexed {total_videos} videos.",
+        refresh_channel_list(),
+    )
+# -------------------------------
+# Channel selection as radio
+# -------------------------------
+def list_channels_radio():
+    channels = get_indexed_channels(get_collection())
+    choices = []
+    for key, val in channels.items():
+        if isinstance(val, dict):
+            channel_display_name = val.get("channel_title", "Unknown")
+            channel_id = val.get("channel_url")
+        else:
+            channel_display_name = val
+            channel_id = key
+        if channel_id:
+            choices.append((channel_display_name, channel_id))
+    # print("choices= ", choices)
+    return choices
+# Delete a channel
+# -------------------------------
+def delete_channel(channel_url: str):
+    delete_channel_from_collection(channel_url)
+    # Return updated radio choices
+    return refresh_channel_list()
+# -------------------------------
+# LLM query
+# -------------------------------
+def handle_query(query: str, search_channel_id: str):
+    answer_text, video_html = answer_query(
+        query, get_collection(), channel_id=search_channel_id, top_k=10
+    )
+    if not answer_text:
+        answer_text = "No answer available."
+    if not video_html or not isinstance(video_html, str):
+        video_html = ""  # ensure string for gr.HTML
+    return answer_text, video_html
+# -------------------------------
+# Gradio UI
+# -------------------------------
+with gr.Blocks(title="Sanatana AI - Youtube Metadata Surfer") as youtube_metadata_app:
+    gr.Markdown("### 📺 YouTube Channel Surfer")
+    with Modal(visible=False) as download_modal:
+        with gr.Row():
+            gr.Column()
+            download_status = gr.Markdown("## Preparing the file ...")
+            gr.Column()
+        with gr.Row():
+            gr.Column()
+            download_ready_btn = gr.DownloadButton(
+                label="Click to Download",
+                visible=False,
+                variant="primary",
+                scale=0,
+            )
+            gr.Column()
+    # Modal to show channel videos
+    with Modal(visible=False) as videos_list_modal:
+        gr.Markdown("### Videos List")
+        # the HTML table that shows one page of videos
+        # modal_html = gr.HTML()
+        channel_videos_df = gr.DataFrame(
+            show_search=True,
+            show_copy_button=True,
+            show_fullscreen_button=True,
+            datatype=[
+                "int",
+                "str",
+                "str",
+                "html",
+            ],
+            headers=["#", "title", "description", "url"],
+            column_widths=["5%", "25%", "60%", "10%"],
+            wrap=True,
+            col_count=(4, "fixed"),
+        )
+    # Modal to add new channels
+    with Modal(visible=False) as add_channel_modal:
+        channel_input = gr.Textbox(
+            label="Channel URLs",
+            placeholder="Paste one or more YouTube channel URLs (comma or newline separated)",
+        )
+        examples = {
+            "Comma Separated Channels Example": "https://www.youtube.com/@onedayonepasuram6126,https://www.youtube.com/@srisookthi,https://www.youtube.com/@learn-aksharam,https://www.youtube.com/@SriYadugiriYathirajaMutt",
+            "Newline Separated Channels Example": "https://www.youtube.com/@onedayonepasuram6126\nhttps://www.youtube.com/@srisookthi\nhttps://www.youtube.com/@learn-aksharam\nhttps://www.youtube.com/@SriYadugiriYathirajaMutt",
+            "One Day One Pasuram": "https://www.youtube.com/@onedayonepasuram6126",
+            "Sri Sookthi": "https://www.youtube.com/@srisookthi",
+            "Aksharam": "https://www.youtube.com/@learn-aksharam",
+            "Cricinfo": "https://www.youtube.com/@espncricinfo",
+            "Chanakyaa": "https://www.youtube.com/@ChanakyaaTV",
+            "Aptitude Guru": "https://www.youtube.com/@AptitudeGuruHem",
+            "Universe Genius": "https://www.youtube.com/@UniverseGenius",
+            "Praveen Mohan": "https://www.youtube.com/@RealPraveenMohan",
+            "Yathiraja Mutt": "https://www.youtube.com/@SriYadugiriYathirajaMutt",
+            "Vasudevan Srinivasachariar": "https://www.youtube.com/@akivasudev",
+        }
+        def set_example(label):
+            return examples[label]
+        gr.Markdown("Click on any example below and then click on add channels button.")
+        with gr.Row():
+            for label in examples:
+                gr.Button(label, size="sm", variant="huggingface", scale=0).click(
+                    fn=set_example,
+                    inputs=gr.State(label),
+                    outputs=channel_input,
+                )
+        with gr.Row():
+            gr.Column()
+            save_add_channels_btn = gr.Button(
+                "Add Channel(s)", scale=0, variant="primary"
+            )
+            gr.Column()
+        index_status = gr.Markdown(label="Index Status", container=False)
+    with gr.Row():
+        # Sidebar
+        with gr.Sidebar() as my_sidebar:
+            gr.Markdown("### 📺 Channels")
+            channel_list_values = list_channels_radio()
+            channel_list_state = gr.State(channel_list_values)
+            no_channels_message = gr.Markdown(
+                "⚠️ **No channels available.**",
+                visible=False if channel_list_values else True,
+            )
+            channel_radio = gr.Radio(
+                choices=channel_list_values,
+                label="Select a Channel",
+                visible=True if channel_list_values else False,
+            )
+            with gr.Row():
+                export_btn = gr.Button(
+                    "⏬ Download",
+                    size="sm",
+                    scale=0,
+                    variant="primary",
+                    interactive=False,
+                )
+                show_videos_btn = gr.Button(
+                    "🎬Videos",
+                    size="sm",
+                    scale=0,
+                    variant="secondary",
+                    interactive=False,
+                )
+                refresh_btn = gr.Button(
+                    "⭮ Refresh",
+                    size="sm",
+                    scale=0,
+                    variant="huggingface",
+                )
+                refresh_all_btn = gr.Button(
+                    "🔄 Sync from YouTube",
+                    size="sm",
+                    scale=0,
+                    variant="stop",
+                    visible=False,
+                )
+                add_channels_btn = gr.Button(
+                    "➕ Add", size="sm", scale=0, variant="primary"
+                )
+                delete_channel_btn = gr.Button(
+                    "🗑️ Delete", size="sm", scale=0, variant="stop"
+                )
+            refresh_status = gr.Markdown(label="Refresh Status", container=False)
+            refresh_all_btn.click(
+                fn=refresh_all_channels,
+                inputs=None,
+                outputs=[refresh_status, channel_radio],
+            )
+            refresh_btn.click(fn=refresh_channel_list, outputs=[channel_radio]).then(
+                fn=list_channels_radio, outputs=[channel_list_state]
+            )
+            add_channels_btn.click(close_component, outputs=[my_sidebar]).then(
+                show_component, outputs=[add_channel_modal]
+            )
+            def toggle_no_data_found(channel_list):
+                if channel_list:
+                    return show_component(), hide_component()
+                else:
+                    return hide_component(), show_component()
+            save_add_channels_btn.click(
+                disable_component, outputs=[save_add_channels_btn]
+            ).then(
+                index_channels,
+                inputs=[channel_input],
+                outputs=[index_status, channel_radio, channel_list_state],
+            ).then(
+                hide_component, outputs=[add_channel_modal]
+            ).then(
+                open_component, outputs=[my_sidebar]
+            ).then(
+                enable_component, outputs=[save_add_channels_btn]
+            ).then(
+                toggle_no_data_found,
+                inputs=[channel_list_state],
+                outputs=[channel_radio, no_channels_message],
+            )
+            ## Onload refresh the channel list.
+            gr.on(fn=refresh_channel_list, outputs=[channel_radio]).then(
+                fn=list_channels_radio, outputs=[channel_list_state]
+            )
+        # Main Column
+        main_content_no_channels_html = gr.HTML(
+            """
+<div style="
+    display: flex;
+    justify-content: center;
+    align-items: center;
+    height: 150px;
+">
+    <div style="
+        border: 2px solid #FFA500;
+        background-color: #FFF8E1;
+        color: #FF6F00;
+        padding: 20px 30px;
+        border-radius: 12px;
+        font-weight: bold;
+        font-size: 1.2rem;
+        text-align: center;
+        box-shadow: 0 4px 10px rgba(0,0,0,0.1);
+    ">
+        ⚠️ No channels added.<br>
+        Please add channels from the side bar
+    </div>
+</div>
+            """,
+            visible=True if not channel_list_state.value else False,
+        )
+        with gr.Column(
+            scale=3, visible=True if channel_list_state.value else False
+        ) as main_content:
+            with gr.Row():
+                search_channel = gr.Dropdown(
+                    label="Select a Channel",
+                    choices=[("All Channels", None)] + channel_list_state.value,
+                    value=None,
+                )
+                question = gr.Textbox(
+                    label="Ask a Question",
+                    placeholder="e.g., How to write the letter Aa in grantham?",
+                    submit_btn=True,
+                )
+                gr.Column(scale=2)
+            gr.Examples(
+                [
+                    "Srirangam",
+                    "Gajendra moksham",
+                    "Poorvikalyani",
+                    "Virutham from chathusloki",
+                    "Lesson 9.15 from Aksharam",
+                ],
+                inputs=question,
+            )
+            submitted_question = gr.Markdown()
+            ask_status = gr.Markdown()
+            answer = gr.Markdown()
+            video_embed = gr.HTML()  # iframe embeds
+            def get_question(q):
+                return f"## You asked : {q}\n---"
+            # question.change(enable_if_not_none, inputs=[question], outputs=[question])
+            question.submit(show_loading, inputs=[question], outputs=[ask_status]).then(
+                get_question, inputs=[question], outputs=[submitted_question]
+            ).then(disable_component, outputs=[question]).then(
+                handle_query,
+                inputs=[question, search_channel],
+                outputs=[answer, video_embed],
+            ).then(
+                enable_component, outputs=[question]
+            ).then(
+                clear_component, outputs=[ask_status]
+            )
+            # Show videos modal when button clicked
+            def show_selected_channel_videos(selected_channel_id):
+                # print("selected_channel_id = ", selected_channel_id)
+                df = fetch_channel_dataframe(selected_channel_id)
+                return gr.update(value=df, label=f"{len(df)} videos")
+            channel_radio.change(
+                enable_if_not_none, inputs=[channel_radio], outputs=[show_videos_btn]
+            ).then(enable_if_not_none, inputs=[channel_radio], outputs=[export_btn])
+            show_videos_btn.click(disable_component, outputs=[show_videos_btn]).then(
+                close_component, outputs=[my_sidebar]
+            ).then(
+                show_selected_channel_videos,
+                inputs=[channel_radio],
+                outputs=[channel_videos_df],
+            ).then(
+                show_component, outputs=[videos_list_modal]
+            ).then(
+                enable_component, outputs=[show_videos_btn]
+            )
+            delete_channel_btn.click(
+                disable_component, outputs=[delete_channel_btn]
+            ).then(
+                delete_channel,  # function
+                inputs=[channel_radio],  # selected channel name
+                outputs=[channel_radio],  # update the radio choices
+            ).then(
+                enable_component, outputs=[delete_channel_btn]
+            )
+        channel_list_state.change(
+            toggle_no_data_found,
+            inputs=[channel_list_state],
+            outputs=[main_content, main_content_no_channels_html],
+        ).then(
+            toggle_no_data_found,
+            inputs=[channel_list_state],
+            outputs=[channel_radio, no_channels_message],
+        )
+        def get_channel_choices(channel_list):
+            return gr.update(choices=[("All Channels", None)] + channel_list)
+        channel_list_state.change(
+            get_channel_choices, inputs=[channel_list_state], outputs=[search_channel]
+        )
+        export_btn.click(close_component, outputs=[my_sidebar]).then(
+            show_component, outputs=[download_status]
+        ).then(hide_component, outputs=[download_ready_btn]).then(
+            show_component, outputs=[download_modal]
+        ).then(
+            export_channel_json, inputs=channel_radio, outputs=download_ready_btn
+        ).then(
+            hide_component, outputs=[download_status]
+        ).then(
+            show_component, outputs=[download_ready_btn]
+        )
+def initialize_youtube_metadata_and_poll():
+    # Step 1: Initialize metadata
+    for msg in youtube_metadata_init():
+        print(msg)
+    # Step 2: Start polling after init
+    start_poll()  # run in the same thread
+    # OR if you want it in a separate daemon thread:
+    # poll_thread = threading.Thread(target=start_poll, daemon=True)
+    # poll_thread.start()
+# Launch the whole thing in a background thread
+yt_init_thread = threading.Thread(target=initialize_youtube_metadata_and_poll, daemon=True)
+yt_init_thread.start()
+if __name__ == "__main__":
+    initialize_youtube_metadata_and_poll()
+    # Start polling in a background thread
+    youtube_metadata_app.launch()

modules/youtube_metadata/channel_utils.py ADDED Viewed

	@@ -0,0 +1,120 @@

+from modules.youtube_metadata.db import get_collection
+import pandas as pd
+page_size = 10  # change if you like
+# -------------------------------
+# Fetch channel videos as HTML table with pagination
+# -------------------------------
+def fetch_channel_html(channel_id: str, page: int = 1, page_size: int = 10):
+    collection = get_collection()
+    offset = (page - 1) * page_size
+    all_results = collection.get(
+        where={"channel_id": channel_id}, include=["metadatas"]
+    )
+    total_count = (
+        len(all_results["metadatas"])
+        if all_results and "metadatas" in all_results
+        else 0
+    )
+    results = collection.get(
+        where={"channel_id": channel_id},
+        include=["documents", "metadatas"],
+        limit=page_size,
+        offset=offset,
+    )
+    # handle empty
+    if not results or not results.get("metadatas"):
+        return f"""
+        <div style="display:flex;justify-content:center;align-items:center;
+                    height:200px;flex-direction:column;color:#666;">
+            ⚠️ No videos found for this channel (page {page}).
+        </div>
+        """
+    videos = results["metadatas"]
+    # build table
+    html = (
+        f"<div>Total: {total_count} videos</div>"
+        + """
+    <table border="1" style="border-collapse:collapse;width:100%;font-family:sans-serif;">
+        <thead style="background:#f0f0f0;">
+            <tr>
+                <th>#</th>
+                <th>Title</th>
+                <th>Video URL</th>
+                <th>Description</th>
+            </tr>
+        </thead>
+        <tbody>
+    """
+    )
+    for idx, v in enumerate(videos, start=offset + 1):
+        html += f"""
+        <tr>
+            <td>{idx}</td>
+            <td>{v.get('video_title','')}</td>
+            <td><a href="https://youtube.com/watch?v={v.get('video_id')}"
+                   target="_blank">Watch Video</a></td>
+            <td>{v.get('description','')}</td>
+        </tr>
+        """
+    html += "</tbody></table>"
+    return html
+# -------------------------------
+# Fetch channel videos as HTML table with pagination
+# -------------------------------
+def fetch_channel_dataframe(channel_id: str):
+    collection = get_collection()
+    results = collection.get(
+        where={"channel_id": channel_id}, include=["documents", "metadatas"]
+    )
+    total_count = len(results["metadatas"]) if results and "metadatas" in results else 0
+    # handle empty
+    if not results or not results.get("metadatas"):
+        return pd.DataFrame(data=[])
+    videos = results["metadatas"]
+    items = []
+    for idx, v in enumerate(videos, start=1):
+        item = {
+            "#": idx,
+            "title": v.get("video_title", "-"),
+            "description": v.get("description", ""),
+            "url": f"""<a style="color: blue" href="https://youtube.com/watch?v={v.get('video_id')}"
+                   target="_blank">▶️Watch Video</a>""",
+        }
+        items.append(item)
+    return pd.DataFrame(data=items)
+def update_table(channel_id, page):
+    return fetch_channel_html(channel_id, page, page_size), f"Page {page}"
+def prev_page(channel_id, page):
+    new_page = max(1, page - 1)
+    return (
+        fetch_channel_html(channel_id, new_page, page_size),
+        f"Page {new_page}",
+        new_page,
+    )
+def next_page(channel_id, page):
+    new_page = page + 1
+    return (
+        fetch_channel_html(channel_id, new_page, page_size),
+        f"Page {new_page}",
+        new_page,
+    )

modules/youtube_metadata/collector.py ADDED Viewed

	@@ -0,0 +1,65 @@

+# -------------------------------
+# 1. Collector
+# -------------------------------
+from googleapiclient.discovery import build
+from modules.youtube_metadata.youtube_utils import get_channel_id
+import logging
+logging.basicConfig()
+logger=logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+def fetch_all_channel_videos(api_key: str, channel_url: str, max_results_per_call=50):
+    youtube = build("youtube", "v3", developerKey=api_key)
+    channel_id = get_channel_id(youtube, channel_url)
+    final_videos = []
+    for videos in fetch_channel_videos_by_id(api_key, channel_id, max_results_per_call):
+        final_videos.extend(videos)
+        logger.info("fetch_all_channel_videos: Fetched %d", len(final_videos))
+        yield (f"Fetched {len(final_videos)}", videos)  # <-- only yield the *new* batch
+    yield (f"Fetched {len(final_videos)}", [])  # final "summary"
+def fetch_channel_videos_by_id(api_key: str, channel_id: str, max_results=50):
+    youtube = build("youtube", "v3", developerKey=api_key)
+    # Get uploads playlist ID
+    channel_response = youtube.channels().list(
+        part="contentDetails,snippet", id=channel_id
+    ).execute()
+    channel_title = channel_response["items"][0]["snippet"]["title"]
+    uploads_playlist_id = channel_response["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]
+    next_page_token = None
+    while True:
+        request = youtube.playlistItems().list(
+            part="snippet",
+            playlistId=uploads_playlist_id,
+            maxResults=max_results,
+            pageToken=next_page_token,
+        )
+        response = request.execute()
+        videos = []
+        for item in response.get("items", []):
+            snippet = item["snippet"]
+            videos.append(
+                {
+                    "video_id": snippet["resourceId"]["videoId"],
+                    "title": snippet["title"],
+                    "description": snippet.get("description", ""),
+                    "channel_id": channel_id,
+                    "channel_title": channel_title,
+                }
+            )
+        yield videos  # yield one page worth
+        next_page_token = response.get("nextPageToken")
+        if not next_page_token:
+            break

modules/youtube_metadata/db.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import chromadb
+from config import SanatanConfig
+config = SanatanConfig()
+YT_METADATA_COLLECTION_NAME =  config.get_collection_name(scripture_name="yt_metadata")
+def get_client():
+    client = chromadb.PersistentClient(path=config.dbStorePath)
+    return client
+def get_collection():
+    client = get_client()
+    # Ensure fresh collection with correct dimension
+    try:
+        collection = client.get_collection(YT_METADATA_COLLECTION_NAME)
+    except Exception:
+        collection = client.create_collection(YT_METADATA_COLLECTION_NAME)
+    # # Check dimension mismatch
+    # try:
+    #     # quick test query
+    #     collection.query(query_embeddings=[[0.0] * 1536], n_results=1)
+    # except Exception:
+    #     # Delete and recreate with fresh schema
+    #     client.delete_collection("yt_metadata")
+    #     collection = client.create_collection("yt_metadata")
+    return collection
+# modules/db.py
+def get_indexed_channels(collection=get_collection()):
+    results = collection.get(include=["metadatas"])
+    channels = {}
+    for meta in results["metadatas"]:
+        cid = meta.get("channel_id")  # ✅ safe
+        cname = meta.get("channel_title", "Unknown Channel")
+        if cid:  # only include if we have a channel_id
+            channels[cid] = cname
+    # print("channels= ",channels)
+    return channels
+# -------------------------------
+# Delete a channel
+# -------------------------------
+def delete_channel_from_collection(channel_id: str):
+    """Remove a channel from the index and refresh the radio choices."""
+    # Delete all videos for this channel
+    # print("Deleting channel", channel_id)
+    # print("data = ", data)
+    get_collection().delete(where={"channel_id": channel_id})
+def fetch_channel_data(channel_id: str):
+    data = get_collection().get(
+        where={"channel_id": channel_id}, include=["embeddings", "metadatas", "documents"]
+    )
+    return data

modules/youtube_metadata/downloader.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import gradio as gr
+import json
+import tempfile
+import os
+from modules.youtube_metadata.db import fetch_channel_data
+def json_serializer(obj):
+    if hasattr(obj, "tolist"):  # NumPy arrays
+        return obj.tolist()
+    return str(obj)
+def export_channel_json(channel_id):
+    data = fetch_channel_data(channel_id)
+    # Save to a temporary JSON file
+    fd, path = tempfile.mkstemp(suffix=".json")
+    with os.fdopen(fd, "w", encoding="utf-8") as f:
+        json.dump(data, f, indent=2, ensure_ascii=False, default=json_serializer)
+    return path

modules/youtube_metadata/embeddings.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from sentence_transformers import SentenceTransformer
+from openai import OpenAI
+from dotenv import load_dotenv
+load_dotenv()
+# Step 1: Load SentenceTransformer model
+# Old MiniLM version:
+# model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
+# Better MPNet alternative:
+model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
+client = OpenAI()
+def _get_hf_embedding(text: str) -> list:
+    return model.encode(text).tolist()
+def _get_openai_embedding(text: str) -> list:
+    response = client.embeddings.create(
+        model="text-embedding-3-large",  # or "text-embedding-3-large"
+        input=text
+    )
+    return response.data[0].embedding
+def get_embedding(text: str) -> list:
+    """
+    Switch according to the embedding model you want.
+    """
+    # return _get_hf_embedding(text)
+    return _get_openai_embedding(text)

modules/youtube_metadata/indexer.py ADDED Viewed

	@@ -0,0 +1,71 @@

+# modules/indexer.py
+from typing import Dict, List
+from openai import OpenAI
+from modules.youtube_metadata.embeddings import get_embedding
+import logging
+logging.basicConfig()
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+def index_videos(
+    videos: List[Dict], collection, channel_url: str, batch_size: int = 50
+):
+    client = OpenAI()
+    total = len(videos)
+    logger.info(
+        f"index_videos: [INDEX] Starting indexing for {total} videos (channel={channel_url})"
+    )
+    # Split into batches
+    for start in range(0, total, batch_size):
+        batch = videos[start : start + batch_size]
+        end = start + len(batch)
+        percent = round((end / total) * 100, 1)
+        logger.info(
+            f"index_videos: [INDEX] Processing batch {start+1} → {end} of {total} — {percent}%"
+        )
+        # Prepare text inputs
+        texts = [
+            f"{vid.get('title', '')} - {vid.get('description', '')}" for vid in batch
+        ]
+        embeddings = [get_embedding(text) for text in texts]
+        # Build metadata + ids
+        metadatas, ids = [], []
+        for vid in batch:
+            metadata = {
+                "video_id": vid.get("video_id"),
+                "video_title": vid.get("title", ""),
+                "description": vid.get("description", ""),
+                "channel_url": channel_url,
+            }
+            if "channel_id" in vid:
+                metadata["channel_id"] = vid["channel_id"]
+            if "channel_title" in vid:
+                metadata["channel_title"] = vid["channel_title"]
+            metadatas.append(metadata)
+            ids.append(vid.get("video_id"))
+        # Insert in bulk
+        collection.add(
+            documents=texts,
+            embeddings=embeddings,
+            metadatas=metadatas,
+            ids=ids,
+        )
+        logger.info(
+            f"index_videos: [INDEX] ✅ Indexed {len(batch)} videos (total so far: {end}/{total} — {percent}%)"
+        )
+    logger.info(
+        f"index_videos: [INDEX] 🎉 Finished indexing {total} videos for channel={channel_url}"
+    )
+    return total

modules/youtube_metadata/retriever.py ADDED Viewed

	@@ -0,0 +1,49 @@

+# modules/retriever.py
+from typing import List, Dict
+from openai import OpenAI
+from modules.youtube_metadata.embeddings import get_embedding
+def retrieve_videos(
+    query: str, collection, top_k: int = 3, channel_id: str = None
+) -> List[Dict]:
+    client = OpenAI()
+    # Create embedding for query
+    embedding = get_embedding(query)
+    # Query Chroma
+    if not channel_id:
+        results = collection.query(
+            query_embeddings=[embedding],
+            n_results=top_k,
+            include=["metadatas", "documents", "distances"],
+        )
+    else:
+        results = collection.query(
+            query_embeddings=[embedding],
+            n_results=top_k,
+            include=["metadatas", "documents", "distances"],
+            where={"channel_id": channel_id},
+        )
+    # Build list of standardized dicts
+    videos = []
+    metadatas_list = results.get("metadatas", [[]])[0]  # list of metadata dicts
+    documents_list = results.get("documents", [[]])[0]  # list of text
+    distances_list = results.get("distances", [[]])[0]  # optional
+    for idx, meta in enumerate(metadatas_list):
+        videos.append(
+            {
+                "video_id": meta.get("video_id", ""),
+                "video_title": meta.get(
+                    "video_title", meta.get("title", documents_list[idx])
+                ),
+                "channel": meta.get("channel", meta.get("channel_title", "")),
+                "description": documents_list[idx] if idx < len(documents_list) else "",
+                "score": distances_list[idx] if idx < len(distances_list) else None,
+            }
+        )
+    return videos

modules/youtube_metadata/youtube_poller.py ADDED Viewed

	@@ -0,0 +1,105 @@

+from chromadb import Collection
+import feedparser
+from modules.youtube_metadata.db import get_collection, get_indexed_channels
+from modules.youtube_metadata.embeddings import get_embedding
+import logging
+logging.basicConfig()
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+def fetch_channel_videos_rss(channel_id, max_results=50):
+    feed_url = f"https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}"
+    logger.info("fetch_channel_videos_rss: feed_url = %s", feed_url)
+    feed = feedparser.parse(feed_url)
+    # Capture channel title from the <feed> section
+    channel_title = getattr(feed.feed, "title", None)
+    channel_url = getattr(feed.feed, "link", None)
+    channel_author = getattr(feed.feed, "author", "")
+    logger.info("fetch_channel_videos_rss: channel_title = %s", channel_title)
+    videos = []
+    for entry in feed.entries[:max_results]:
+        description = (
+            getattr(getattr(entry, "title_detail", None), "value", "")
+            or getattr(entry, "media_description", None)
+            or getattr(entry, "summary", None)
+            or ""
+        )
+        videos.append(
+            {
+                "video_id": entry.yt_videoid,
+                "video_title": entry.title,
+                "description": description,
+                "published": entry.published,
+                "video_url": entry.link,
+                "channel_url": channel_url,
+                "channel_id": channel_id,
+                "channel_title": channel_title,
+                "channel_author" : channel_author,
+            }
+        )
+    return videos
+def get_existing_video_ids(collection, channel_id):
+    # n_results: how many results to fetch; use a high number to get all entries
+    results = collection.get(where={"channel_id": channel_id})
+    existing_ids = set()
+    for metadata in results.get("metadatas", []):
+        if metadata and "video_id" in metadata:
+            existing_ids.add(metadata["video_id"])
+    return existing_ids
+def filter_new_videos(videos, existing_ids):
+    return [v for v in videos if v["video_id"] not in existing_ids]
+def add_to_chroma(collection: Collection, new_videos):
+    if not new_videos:
+        return
+    collection.add(
+        documents=[v["title"] for v in new_videos],
+        embeddings=[get_embedding(v["title"]) for v in new_videos],
+        metadatas=[
+            {
+                "video_id": v["video_id"],
+                "channel_id": v["channel_id"],
+                "link": v["link"],
+            }
+            for v in new_videos
+        ],
+        ids=[v["video_id"] for v in new_videos],
+    )
+def incremental_update(collection, channel_id):
+    existing_ids = get_existing_video_ids(collection, channel_id)
+    latest_videos = fetch_channel_videos_rss(channel_id)
+    new_videos = filter_new_videos(latest_videos, existing_ids)
+    if new_videos:
+        add_to_chroma(collection, new_videos)
+        logger.info(
+            f"incremental_update: Added {len(new_videos)} new videos from {channel_id}"
+        )
+    else:
+        logger.info(f"incremental_uddate: No new videos for {channel_id}")
+def start_poll():
+    import time
+    configured_channels = get_indexed_channels().keys()
+    while True:
+        for channel_id in configured_channels:
+            incremental_update(get_collection(), channel_id)
+        time.sleep(600)  # 10 minutes

modules/youtube_metadata/youtube_sync.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import threading
+import gradio as gr
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from modules.youtube_metadata.collector import fetch_all_channel_videos
+from modules.youtube_metadata.db import get_collection
+from modules.youtube_metadata.indexer import index_videos
+# global stop signal
+stop_event = threading.Event()
+MAX_BATCHES = 200  # safety cutoff
+def stop_sync():
+    """External call to stop the sync process."""
+    stop_event.set()
+def sync_channels_from_youtube(api_key, channel_urls: list, progress: gr.Progress = None):
+    """
+    Sync multiple channels, yielding (progress_message, videos_indexed_in_batch)
+    """
+    global stop_event
+    stop_event.clear()
+    total_channels = len(channel_urls)
+    total_videos = 0
+    for idx, channel_url in enumerate(channel_urls, 1):
+        if stop_event.is_set():
+            yield f"🛑 Stopped before processing channel: {channel_url}", 0
+            break
+        yield f"🔄 Syncing {channel_url} ({idx}/{total_channels})", 0
+        # stream video-level progress from inner generator
+        for update_message, batch_count in _refresh_single_channel(api_key, channel_url, progress):
+            total_videos += batch_count
+            yield update_message, batch_count
+    yield f"✅ Finished syncing. Total channels: {total_channels}, total videos: {total_videos}", 0
+def _refresh_single_channel(api_key, channel_url, progress):
+    # fetch all batches first
+    fetched_batches = list(fetch_all_channel_videos(api_key, channel_url))
+    all_videos = [v | {"channel_url": channel_url} for _, batch in fetched_batches for v in batch]
+    total_videos = len(all_videos)
+    if total_videos == 0:
+        yield f"{channel_url}: No videos found", 0
+        return
+    with ThreadPoolExecutor(max_workers=4) as executor:
+        futures = [
+            executor.submit(index_videos, batch, get_collection(), channel_url=channel_url)
+            for _, batch in fetched_batches
+        ]
+        completed_videos = 0
+        for f in as_completed(futures):
+            if stop_event.is_set():
+                yield "🛑 Stop requested during indexing stage", completed_videos
+                break
+            try:
+                indexed_count = f.result()
+                if indexed_count is None:
+                    indexed_count = len(all_videos)  # fallback if index_videos doesn't return
+            except Exception as e:
+                indexed_count = 0
+                yield f"⚠️ Error indexing {channel_url}: {e}", completed_videos
+            completed_videos += indexed_count
+            pct = 100.0 * completed_videos / max(1, total_videos)
+            if progress:
+                progress(completed_videos / total_videos)
+            yield f"{channel_url}: Indexed {completed_videos}/{total_videos} videos — {pct:.1f}%", completed_videos

modules/youtube_metadata/youtube_utils.py ADDED Viewed

	@@ -0,0 +1,26 @@

+def get_channel_id(youtube, channel_url: str) -> str:
+    """
+    Extract channel ID from a YouTube URL or handle.
+    Supports:
+    - https://www.youtube.com/channel/UCxxxx
+    - https://www.youtube.com/@handle
+    - @handle
+    """
+    # If already a UC... ID
+    if "channel/" in channel_url:
+        return channel_url.split("channel/")[-1].split("/")[0]
+    # If it's a handle (@xyz or full URL)
+    if "@" in channel_url:
+        handle = channel_url.split("@")[-1]
+        request = youtube.channels().list(
+            part="id",
+            forHandle=handle
+        )
+        response = request.execute()
+        return response["items"][0]["id"]
+    if channel_url.startswith("UC"):
+        return channel_url
+    raise ValueError(f"Unsupported channel URL format {channel_url}")

pyproject.toml CHANGED Viewed

@@ -8,10 +8,12 @@ dependencies = [
     "chromadb>=1.0.15",
     "dotenv>=0.9.9",
     "dropbox>=12.0.2",
     "google-api-python-client>=2.177.0",
     "google-auth-httplib2>=0.2.0",
     "google-auth-oauthlib>=1.2.2",
     "gradio>=5.39.0",
     "gspread>=6.2.1",
     "langchain>=0.3.27",
     "langchain-community>=0.3.27",

     "chromadb>=1.0.15",
     "dotenv>=0.9.9",
     "dropbox>=12.0.2",
+    "feedparser>=6.0.12",
     "google-api-python-client>=2.177.0",
     "google-auth-httplib2>=0.2.0",
     "google-auth-oauthlib>=1.2.2",
     "gradio>=5.39.0",
+    "gradio-modal>=0.0.4",
     "gspread>=6.2.1",
     "langchain>=0.3.27",
     "langchain-community>=0.3.27",

uv.lock CHANGED Viewed

@@ -502,6 +502,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e5/47/d63c60f59a59467fda0f93f46335c9d18526d7071f025cb5b89d5353ea42/fastapi-0.116.1-py3-none-any.whl", hash = "sha256:c46ac7c312df840f0c9e220f7964bada936781bc4e2e6eb71f1c4d7553786565", size = 95631, upload-time = "2025-07-11T16:22:30.485Z" },
 ]
 [[package]]
 name = "ffmpy"
 version = "0.6.1"
@@ -740,6 +752,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e0/38/7f50ae95de8fa419276742230f57a34e8c0f47231da0ad54479dd0088972/gradio_client-1.11.0-py3-none-any.whl", hash = "sha256:afb714aea50224f6f04679fe2ce79c1be75011012d0dc3b3ee575610a0dc8eb2", size = 324452, upload-time = "2025-07-17T02:02:44.542Z" },
 ]
 [[package]]
 name = "greenlet"
 version = "3.2.3"
@@ -2773,10 +2797,12 @@ dependencies = [
     { name = "chromadb" },
     { name = "dotenv" },
     { name = "dropbox" },
     { name = "google-api-python-client" },
     { name = "google-auth-httplib2" },
     { name = "google-auth-oauthlib" },
     { name = "gradio" },
     { name = "gspread" },
     { name = "langchain" },
     { name = "langchain-community" },
@@ -2792,10 +2818,12 @@ requires-dist = [
     { name = "chromadb", specifier = ">=1.0.15" },
     { name = "dotenv", specifier = ">=0.9.9" },
     { name = "dropbox", specifier = ">=12.0.2" },
     { name = "google-api-python-client", specifier = ">=2.177.0" },
     { name = "google-auth-httplib2", specifier = ">=0.2.0" },
     { name = "google-auth-oauthlib", specifier = ">=1.2.2" },
     { name = "gradio", specifier = ">=5.39.0" },
     { name = "gspread", specifier = ">=6.2.1" },
     { name = "langchain", specifier = ">=0.3.27" },
     { name = "langchain-community", specifier = ">=0.3.27" },
@@ -2928,6 +2956,12 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
 ]
 [[package]]
 name = "shellingham"
 version = "1.5.4"

     { url = "https://files.pythonhosted.org/packages/e5/47/d63c60f59a59467fda0f93f46335c9d18526d7071f025cb5b89d5353ea42/fastapi-0.116.1-py3-none-any.whl", hash = "sha256:c46ac7c312df840f0c9e220f7964bada936781bc4e2e6eb71f1c4d7553786565", size = 95631, upload-time = "2025-07-11T16:22:30.485Z" },
 ]
+[[package]]
+name = "feedparser"
+version = "6.0.12"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "sgmllib3k" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/dc/79/db7edb5e77d6dfbc54d7d9df72828be4318275b2e580549ff45a962f6461/feedparser-6.0.12.tar.gz", hash = "sha256:64f76ce90ae3e8ef5d1ede0f8d3b50ce26bcce71dd8ae5e82b1cd2d4a5f94228", size = 286579, upload-time = "2025-09-10T13:33:59.486Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4e/eb/c96d64137e29ae17d83ad2552470bafe3a7a915e85434d9942077d7fd011/feedparser-6.0.12-py3-none-any.whl", hash = "sha256:6bbff10f5a52662c00a2e3f86a38928c37c48f77b3c511aedcd51de933549324", size = 81480, upload-time = "2025-09-10T13:33:58.022Z" },
+]
 [[package]]
 name = "ffmpy"
 version = "0.6.1"
     { url = "https://files.pythonhosted.org/packages/e0/38/7f50ae95de8fa419276742230f57a34e8c0f47231da0ad54479dd0088972/gradio_client-1.11.0-py3-none-any.whl", hash = "sha256:afb714aea50224f6f04679fe2ce79c1be75011012d0dc3b3ee575610a0dc8eb2", size = 324452, upload-time = "2025-07-17T02:02:44.542Z" },
 ]
+[[package]]
+name = "gradio-modal"
+version = "0.0.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "gradio" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e2/fd/3b383f9ee8d60625e9e26871ba4adcacbedeab132041b94290758e02e543/gradio_modal-0.0.4.tar.gz", hash = "sha256:717ae699072a171648cfa1b84bc153be84e92d04e9ad58c1bc59af68ef332726", size = 1180812, upload-time = "2024-10-15T23:46:06.134Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/05/3d/76f454de84ae1dccbf2b7023e933afb8dde5fdd89e9476786726ef770737/gradio_modal-0.0.4-py3-none-any.whl", hash = "sha256:d96e817d2e934d9e1b835b06474f45fd349b5ccea499d1536bfb4bd38f62dedb", size = 1106241, upload-time = "2024-10-15T23:46:04.13Z" },
+]
 [[package]]
 name = "greenlet"
 version = "3.2.3"
     { name = "chromadb" },
     { name = "dotenv" },
     { name = "dropbox" },
+    { name = "feedparser" },
     { name = "google-api-python-client" },
     { name = "google-auth-httplib2" },
     { name = "google-auth-oauthlib" },
     { name = "gradio" },
+    { name = "gradio-modal" },
     { name = "gspread" },
     { name = "langchain" },
     { name = "langchain-community" },
     { name = "chromadb", specifier = ">=1.0.15" },
     { name = "dotenv", specifier = ">=0.9.9" },
     { name = "dropbox", specifier = ">=12.0.2" },
+    { name = "feedparser", specifier = ">=6.0.12" },
     { name = "google-api-python-client", specifier = ">=2.177.0" },
     { name = "google-auth-httplib2", specifier = ">=0.2.0" },
     { name = "google-auth-oauthlib", specifier = ">=1.2.2" },
     { name = "gradio", specifier = ">=5.39.0" },
+    { name = "gradio-modal", specifier = ">=0.0.4" },
     { name = "gspread", specifier = ">=6.2.1" },
     { name = "langchain", specifier = ">=0.3.27" },
     { name = "langchain-community", specifier = ">=0.3.27" },
     { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
 ]
+[[package]]
+name = "sgmllib3k"
+version = "1.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/9e/bd/3704a8c3e0942d711c1299ebf7b9091930adae6675d7c8f476a7ce48653c/sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9", size = 5750, upload-time = "2010-08-24T14:33:52.445Z" }
 [[package]]
 name = "shellingham"
 version = "1.5.4"