Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Upload folder using huggingface_hub
Browse files- app.py +11 -4
- main.py +10 -4
- modules/home/app.py +7 -0
- modules/youtube_metadata/answerer.py +116 -0
- modules/youtube_metadata/app.py +500 -0
- modules/youtube_metadata/channel_utils.py +120 -0
- modules/youtube_metadata/collector.py +65 -0
- modules/youtube_metadata/db.py +65 -0
- modules/youtube_metadata/downloader.py +20 -0
- modules/youtube_metadata/embeddings.py +31 -0
- modules/youtube_metadata/indexer.py +71 -0
- modules/youtube_metadata/retriever.py +49 -0
- modules/youtube_metadata/youtube_poller.py +105 -0
- modules/youtube_metadata/youtube_sync.py +78 -0
- modules/youtube_metadata/youtube_utils.py +26 -0
- pyproject.toml +2 -0
- uv.lock +34 -0
app.py
CHANGED
|
@@ -23,16 +23,23 @@ from db import SanatanDatabase
|
|
| 23 |
from drive_downloader import ZipDownloader
|
| 24 |
from graph_helper import generate_graph
|
| 25 |
from nalayiram_helper import delete_taniyan
|
|
|
|
| 26 |
|
| 27 |
# Logging
|
| 28 |
logging.basicConfig()
|
| 29 |
logger = logging.getLogger()
|
| 30 |
-
logger.setLevel(logging.INFO)
|
| 31 |
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
-
|
|
|
|
| 35 |
|
|
|
|
| 36 |
|
| 37 |
def get_all_languages():
|
| 38 |
"""
|
|
@@ -89,7 +96,7 @@ message_textbox = gr.Textbox(
|
|
| 89 |
|
| 90 |
with gr.Blocks(
|
| 91 |
theme=gr.themes.Citrus(),
|
| 92 |
-
title="Sanatan-AI",
|
| 93 |
css="""
|
| 94 |
/* hide the additional inputs row under the textbox */
|
| 95 |
.gr-chat-interface .gr-form {
|
|
|
|
| 23 |
from drive_downloader import ZipDownloader
|
| 24 |
from graph_helper import generate_graph
|
| 25 |
from nalayiram_helper import delete_taniyan
|
| 26 |
+
import pycountry
|
| 27 |
|
| 28 |
# Logging
|
| 29 |
logging.basicConfig()
|
| 30 |
logger = logging.getLogger()
|
|
|
|
| 31 |
|
| 32 |
+
logger.setLevel(logging.INFO)
|
| 33 |
+
# Suppress OpenAI debug logs
|
| 34 |
+
logging.getLogger("openai").setLevel(logging.WARNING)
|
| 35 |
+
# Silence httpx + httpcore logs
|
| 36 |
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
| 37 |
+
logging.getLogger("httpcore").setLevel(logging.WARNING)
|
| 38 |
|
| 39 |
+
# (Optional) Silence OpenAI logs too
|
| 40 |
+
logging.getLogger("openai").setLevel(logging.WARNING)
|
| 41 |
|
| 42 |
+
graph = generate_graph()
|
| 43 |
|
| 44 |
def get_all_languages():
|
| 45 |
"""
|
|
|
|
| 96 |
|
| 97 |
with gr.Blocks(
|
| 98 |
theme=gr.themes.Citrus(),
|
| 99 |
+
title="Sanatan-AI | Chat",
|
| 100 |
css="""
|
| 101 |
/* hide the additional inputs row under the textbox */
|
| 102 |
.gr-chat-interface .gr-form {
|
main.py
CHANGED
|
@@ -4,6 +4,8 @@ from fastapi.responses import RedirectResponse
|
|
| 4 |
import uvicorn
|
| 5 |
from fastapi import FastAPI
|
| 6 |
from modules.dropbox.audio import cleanup_audio_url_cache
|
|
|
|
|
|
|
| 7 |
from server import router as mobile_router
|
| 8 |
from app import gradio_app # your Blocks object
|
| 9 |
import gradio as gr
|
|
@@ -18,12 +20,16 @@ app = FastAPI(title="Sanatan AI Unified Server")
|
|
| 18 |
app.include_router(mobile_router, prefix="/api")
|
| 19 |
|
| 20 |
# Convert Gradio Blocks to ASGI app
|
| 21 |
-
app = gr.mount_gradio_app(app, gradio_app,"/
|
| 22 |
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
@app.get("/")
|
| 25 |
async def redirect_to_web():
|
| 26 |
-
return RedirectResponse(url="/
|
| 27 |
|
| 28 |
@app.middleware("http")
|
| 29 |
async def log_requests(request: Request, call_next):
|
|
@@ -40,4 +46,4 @@ async def lifespan(app: FastAPI):
|
|
| 40 |
# Shutdown code (optional) can go here
|
| 41 |
|
| 42 |
if __name__ == "__main__":
|
| 43 |
-
uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=False)
|
|
|
|
| 4 |
import uvicorn
|
| 5 |
from fastapi import FastAPI
|
| 6 |
from modules.dropbox.audio import cleanup_audio_url_cache
|
| 7 |
+
from modules.home.app import home_app
|
| 8 |
+
from modules.youtube_metadata.app import youtube_metadata_app
|
| 9 |
from server import router as mobile_router
|
| 10 |
from app import gradio_app # your Blocks object
|
| 11 |
import gradio as gr
|
|
|
|
| 20 |
app.include_router(mobile_router, prefix="/api")
|
| 21 |
|
| 22 |
# Convert Gradio Blocks to ASGI app
|
| 23 |
+
app = gr.mount_gradio_app(app, gradio_app,"/sanatan_ai_web")
|
| 24 |
|
| 25 |
+
app = gr.mount_gradio_app(app, youtube_metadata_app,"/yt_web")
|
| 26 |
+
|
| 27 |
+
app = gr.mount_gradio_app(app, home_app,"/home")
|
| 28 |
+
|
| 29 |
+
# Redirect root URL to /home/
|
| 30 |
@app.get("/")
|
| 31 |
async def redirect_to_web():
|
| 32 |
+
return RedirectResponse(url="/home/")
|
| 33 |
|
| 34 |
@app.middleware("http")
|
| 35 |
async def log_requests(request: Request, call_next):
|
|
|
|
| 46 |
# Shutdown code (optional) can go here
|
| 47 |
|
| 48 |
if __name__ == "__main__":
|
| 49 |
+
uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=False, access_log=False)
|
modules/home/app.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
|
| 3 |
+
with gr.Blocks(title="Sanatana AI - Home") as home_app:
|
| 4 |
+
gr.Markdown("## Welcome to Sanatan AI!")
|
| 5 |
+
with gr.Row():
|
| 6 |
+
gr.Button("Go to Sanatan AI", link="/sanatan_ai_web") # link to /web
|
| 7 |
+
gr.Button("Manage Youtube Metadata", link="/yt_web") # link to /yt_web
|
modules/youtube_metadata/answerer.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -------------------------------
|
| 2 |
+
# 4. Answerer
|
| 3 |
+
# -------------------------------
|
| 4 |
+
from typing import List
|
| 5 |
+
from pydantic import BaseModel
|
| 6 |
+
from openai import OpenAI
|
| 7 |
+
from modules.youtube_metadata.retriever import retrieve_videos
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
# -------------------------------
|
| 11 |
+
# Structured Output Classes
|
| 12 |
+
# -------------------------------
|
| 13 |
+
class VideoItem(BaseModel):
|
| 14 |
+
video_id: str
|
| 15 |
+
title: str
|
| 16 |
+
channel: str
|
| 17 |
+
description: str
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class LLMAnswer(BaseModel):
|
| 21 |
+
answer_text: str
|
| 22 |
+
top_videos: List[VideoItem]
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# -------------------------------
|
| 26 |
+
# Main Function
|
| 27 |
+
# -------------------------------
|
| 28 |
+
def answer_query(
|
| 29 |
+
query: str, collection, top_k: int = 5, channel_id: str = None
|
| 30 |
+
) -> LLMAnswer:
|
| 31 |
+
"""
|
| 32 |
+
Answer a user query using YouTube video metadata.
|
| 33 |
+
Returns an LLMAnswer object with textual answer + list of videos.
|
| 34 |
+
"""
|
| 35 |
+
results = retrieve_videos(query, collection, top_k=top_k, channel_id=channel_id)
|
| 36 |
+
|
| 37 |
+
if not results:
|
| 38 |
+
return LLMAnswer(answer_text="No relevant videos found.", top_videos=[])
|
| 39 |
+
|
| 40 |
+
# Build context lines for the LLM
|
| 41 |
+
context_lines = []
|
| 42 |
+
for r in results:
|
| 43 |
+
if not isinstance(r, dict):
|
| 44 |
+
continue
|
| 45 |
+
vid_id = r.get("video_id", "")
|
| 46 |
+
title = r.get("video_title") or r.get("title", "")
|
| 47 |
+
channel = r.get("channel") or r.get("channel_title", "")
|
| 48 |
+
description = r.get("description", "")
|
| 49 |
+
context_lines.append(
|
| 50 |
+
f"- {title} ({channel}) (https://youtube.com/watch?v={vid_id})\n description: {description}"
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
context_text = "\n".join(context_lines)
|
| 54 |
+
|
| 55 |
+
# Call LLM with structured output
|
| 56 |
+
client = OpenAI()
|
| 57 |
+
response = client.chat.completions.parse(
|
| 58 |
+
model="gpt-4o-mini",
|
| 59 |
+
messages=[
|
| 60 |
+
{
|
| 61 |
+
"role": "system",
|
| 62 |
+
"content": (
|
| 63 |
+
"You are a helpful assistant that answers questions using YouTube video metadata. "
|
| 64 |
+
"Return your response strictly as the LLMAnswer class, including 'answer_text' and a list of **only the most relevant** 'top_videos'.\n"
|
| 65 |
+
"- `answer_text` MUST be very short and concise in natural language (max 100 words).\n"
|
| 66 |
+
"- Use `top_videos` to include only the top 3 most relevant items from context.\n"
|
| 67 |
+
"- Do not include all items unless all are clearly relevant.\n"
|
| 68 |
+
"- Do not makeup `description`. Use the exact descriptions as given in the context"
|
| 69 |
+
),
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"role": "user",
|
| 73 |
+
"content": f"Question: {query}\n\nCandidate videos:\n{context_text}\n\nPick only the relevant ones.",
|
| 74 |
+
},
|
| 75 |
+
],
|
| 76 |
+
response_format=LLMAnswer,
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
llm_answer = response.choices[0].message.parsed
|
| 80 |
+
answer_text = "\n## Answer : \n" + llm_answer.answer_text
|
| 81 |
+
video_html = build_video_html(llm_answer.top_videos)
|
| 82 |
+
return answer_text, video_html
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def build_video_html(videos: list[VideoItem]) -> str:
|
| 86 |
+
"""Build a clean HTML table from top_videos."""
|
| 87 |
+
if not videos:
|
| 88 |
+
return "<p>No relevant videos found.</p>"
|
| 89 |
+
|
| 90 |
+
html = """
|
| 91 |
+
<table border="1" style="border-collapse: collapse; width: 100%;">
|
| 92 |
+
<tr>
|
| 93 |
+
<th>Description</th>
|
| 94 |
+
<th>Watch</th>
|
| 95 |
+
</tr>
|
| 96 |
+
"""
|
| 97 |
+
for v in videos:
|
| 98 |
+
embed_html = f"""
|
| 99 |
+
<div style="margin-bottom: 20px;">
|
| 100 |
+
<strong>{v.title}</strong> ({v.channel})<br>
|
| 101 |
+
<iframe width="360" height="203"
|
| 102 |
+
src="https://www.youtube.com/embed/{v.video_id}"
|
| 103 |
+
frameborder="0"
|
| 104 |
+
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
|
| 105 |
+
allowfullscreen>
|
| 106 |
+
</iframe>
|
| 107 |
+
</div>
|
| 108 |
+
"""
|
| 109 |
+
html += f"""
|
| 110 |
+
<tr>
|
| 111 |
+
<td>{v.description}</td>
|
| 112 |
+
<td>{embed_html}</td>
|
| 113 |
+
</tr>
|
| 114 |
+
"""
|
| 115 |
+
html += "</table>"
|
| 116 |
+
return html
|
modules/youtube_metadata/app.py
ADDED
|
@@ -0,0 +1,500 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import os
|
| 3 |
+
import re
|
| 4 |
+
import threading
|
| 5 |
+
import gradio as gr
|
| 6 |
+
from gradio_modal import Modal
|
| 7 |
+
from modules.youtube_metadata.downloader import export_channel_json
|
| 8 |
+
from modules.youtube_metadata.channel_utils import fetch_channel_dataframe
|
| 9 |
+
from modules.youtube_metadata.db import (
|
| 10 |
+
delete_channel_from_collection,
|
| 11 |
+
get_collection,
|
| 12 |
+
get_indexed_channels,
|
| 13 |
+
)
|
| 14 |
+
from modules.youtube_metadata.answerer import answer_query
|
| 15 |
+
from dotenv import load_dotenv
|
| 16 |
+
|
| 17 |
+
from modules.youtube_metadata.youtube_poller import start_poll
|
| 18 |
+
from modules.youtube_metadata.youtube_sync import sync_channels_from_youtube
|
| 19 |
+
|
| 20 |
+
load_dotenv()
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
# -------------------------------
|
| 24 |
+
# Utility functions
|
| 25 |
+
# -------------------------------
|
| 26 |
+
def refresh_channel_list():
|
| 27 |
+
return gr.update(choices=list_channels_radio())
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def show_component():
|
| 31 |
+
return gr.update(visible=True)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def hide_component():
|
| 35 |
+
return gr.update(visible=False)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def open_component():
|
| 39 |
+
return gr.update(open=True)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def close_component():
|
| 43 |
+
return gr.update(open=False)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def enable_component():
|
| 47 |
+
return gr.update(interactive=True)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def disable_component():
|
| 51 |
+
return gr.update(interactive=False)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def clear_component():
|
| 55 |
+
return gr.update(value="")
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def show_loading(question):
|
| 59 |
+
return gr.update(value=f"⏳Fetching details on [{question}]...")
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def enable_if_not_none(question):
|
| 63 |
+
if question is None:
|
| 64 |
+
return disable_component()
|
| 65 |
+
else:
|
| 66 |
+
return enable_component()
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def index_channels(channel_urls: str):
|
| 70 |
+
yield "saving ...", gr.update(), gr.update()
|
| 71 |
+
yt_api_key = os.environ["YOUTUBE_API_KEY"]
|
| 72 |
+
|
| 73 |
+
urls = [u.strip() for u in re.split(r"[\n,]+", channel_urls) if u.strip()]
|
| 74 |
+
total_videos = 0
|
| 75 |
+
|
| 76 |
+
# sync all channels, streaming progress
|
| 77 |
+
for message, videos_count in sync_channels_from_youtube(yt_api_key, urls):
|
| 78 |
+
total_videos = videos_count # accumulate actual number of videos indexed
|
| 79 |
+
yield message, gr.update(), gr.update()
|
| 80 |
+
|
| 81 |
+
# final UI update
|
| 82 |
+
yield (
|
| 83 |
+
f"✅ Indexed {total_videos} videos from {len(urls)} channels.",
|
| 84 |
+
refresh_channel_list(),
|
| 85 |
+
list_channels_radio(),
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def youtube_metadata_init(progress: gr.Progress = None):
|
| 90 |
+
channels = (
|
| 91 |
+
"https://www.youtube.com/@onedayonepasuram6126,"
|
| 92 |
+
"https://www.youtube.com/@srisookthi,"
|
| 93 |
+
"https://www.youtube.com/@learn-aksharam,"
|
| 94 |
+
"https://www.youtube.com/@SriYadugiriYathirajaMutt,"
|
| 95 |
+
"https://www.youtube.com/@akivasudev,"
|
| 96 |
+
"https://www.youtube.com/@Arulicheyal_Amutham"
|
| 97 |
+
)
|
| 98 |
+
for msg, upd, upd in index_channels(channels):
|
| 99 |
+
# print(resp)
|
| 100 |
+
yield msg
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def refresh_all_channels():
|
| 104 |
+
yt_api_key = os.environ["YOUTUBE_API_KEY"]
|
| 105 |
+
channels = get_indexed_channels(get_collection())
|
| 106 |
+
|
| 107 |
+
if not channels:
|
| 108 |
+
return "⚠️ No channels available to refresh.", refresh_channel_list()
|
| 109 |
+
|
| 110 |
+
# build list of URLs
|
| 111 |
+
urls = []
|
| 112 |
+
for key, val in channels.items():
|
| 113 |
+
url = val.get("channel_url") if isinstance(val, dict) else key
|
| 114 |
+
if url:
|
| 115 |
+
urls.append(url)
|
| 116 |
+
|
| 117 |
+
# re-index all at once
|
| 118 |
+
total_videos = sync_channels_from_youtube(yt_api_key, urls)
|
| 119 |
+
|
| 120 |
+
return (
|
| 121 |
+
f"🔄 Refreshed {len(urls)} channels, re-indexed {total_videos} videos.",
|
| 122 |
+
refresh_channel_list(),
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
# -------------------------------
|
| 127 |
+
# Channel selection as radio
|
| 128 |
+
# -------------------------------
|
| 129 |
+
def list_channels_radio():
|
| 130 |
+
channels = get_indexed_channels(get_collection())
|
| 131 |
+
choices = []
|
| 132 |
+
for key, val in channels.items():
|
| 133 |
+
if isinstance(val, dict):
|
| 134 |
+
channel_display_name = val.get("channel_title", "Unknown")
|
| 135 |
+
channel_id = val.get("channel_url")
|
| 136 |
+
else:
|
| 137 |
+
channel_display_name = val
|
| 138 |
+
channel_id = key
|
| 139 |
+
if channel_id:
|
| 140 |
+
choices.append((channel_display_name, channel_id))
|
| 141 |
+
# print("choices= ", choices)
|
| 142 |
+
return choices
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
# Delete a channel
|
| 146 |
+
# -------------------------------
|
| 147 |
+
def delete_channel(channel_url: str):
|
| 148 |
+
delete_channel_from_collection(channel_url)
|
| 149 |
+
# Return updated radio choices
|
| 150 |
+
return refresh_channel_list()
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
# -------------------------------
|
| 154 |
+
# LLM query
|
| 155 |
+
# -------------------------------
|
| 156 |
+
def handle_query(query: str, search_channel_id: str):
|
| 157 |
+
answer_text, video_html = answer_query(
|
| 158 |
+
query, get_collection(), channel_id=search_channel_id, top_k=10
|
| 159 |
+
)
|
| 160 |
+
if not answer_text:
|
| 161 |
+
answer_text = "No answer available."
|
| 162 |
+
if not video_html or not isinstance(video_html, str):
|
| 163 |
+
video_html = "" # ensure string for gr.HTML
|
| 164 |
+
return answer_text, video_html
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
# -------------------------------
|
| 168 |
+
# Gradio UI
|
| 169 |
+
# -------------------------------
|
| 170 |
+
with gr.Blocks(title="Sanatana AI - Youtube Metadata Surfer") as youtube_metadata_app:
|
| 171 |
+
gr.Markdown("### 📺 YouTube Channel Surfer")
|
| 172 |
+
|
| 173 |
+
with Modal(visible=False) as download_modal:
|
| 174 |
+
with gr.Row():
|
| 175 |
+
gr.Column()
|
| 176 |
+
download_status = gr.Markdown("## Preparing the file ...")
|
| 177 |
+
gr.Column()
|
| 178 |
+
with gr.Row():
|
| 179 |
+
gr.Column()
|
| 180 |
+
download_ready_btn = gr.DownloadButton(
|
| 181 |
+
label="Click to Download",
|
| 182 |
+
visible=False,
|
| 183 |
+
variant="primary",
|
| 184 |
+
scale=0,
|
| 185 |
+
)
|
| 186 |
+
gr.Column()
|
| 187 |
+
|
| 188 |
+
# Modal to show channel videos
|
| 189 |
+
with Modal(visible=False) as videos_list_modal:
|
| 190 |
+
gr.Markdown("### Videos List")
|
| 191 |
+
|
| 192 |
+
# the HTML table that shows one page of videos
|
| 193 |
+
# modal_html = gr.HTML()
|
| 194 |
+
channel_videos_df = gr.DataFrame(
|
| 195 |
+
show_search=True,
|
| 196 |
+
show_copy_button=True,
|
| 197 |
+
show_fullscreen_button=True,
|
| 198 |
+
datatype=[
|
| 199 |
+
"int",
|
| 200 |
+
"str",
|
| 201 |
+
"str",
|
| 202 |
+
"html",
|
| 203 |
+
],
|
| 204 |
+
headers=["#", "title", "description", "url"],
|
| 205 |
+
column_widths=["5%", "25%", "60%", "10%"],
|
| 206 |
+
wrap=True,
|
| 207 |
+
col_count=(4, "fixed"),
|
| 208 |
+
)
|
| 209 |
+
|
| 210 |
+
# Modal to add new channels
|
| 211 |
+
with Modal(visible=False) as add_channel_modal:
|
| 212 |
+
channel_input = gr.Textbox(
|
| 213 |
+
label="Channel URLs",
|
| 214 |
+
placeholder="Paste one or more YouTube channel URLs (comma or newline separated)",
|
| 215 |
+
)
|
| 216 |
+
examples = {
|
| 217 |
+
"Comma Separated Channels Example": "https://www.youtube.com/@onedayonepasuram6126,https://www.youtube.com/@srisookthi,https://www.youtube.com/@learn-aksharam,https://www.youtube.com/@SriYadugiriYathirajaMutt",
|
| 218 |
+
"Newline Separated Channels Example": "https://www.youtube.com/@onedayonepasuram6126\nhttps://www.youtube.com/@srisookthi\nhttps://www.youtube.com/@learn-aksharam\nhttps://www.youtube.com/@SriYadugiriYathirajaMutt",
|
| 219 |
+
"One Day One Pasuram": "https://www.youtube.com/@onedayonepasuram6126",
|
| 220 |
+
"Sri Sookthi": "https://www.youtube.com/@srisookthi",
|
| 221 |
+
"Aksharam": "https://www.youtube.com/@learn-aksharam",
|
| 222 |
+
"Cricinfo": "https://www.youtube.com/@espncricinfo",
|
| 223 |
+
"Chanakyaa": "https://www.youtube.com/@ChanakyaaTV",
|
| 224 |
+
"Aptitude Guru": "https://www.youtube.com/@AptitudeGuruHem",
|
| 225 |
+
"Universe Genius": "https://www.youtube.com/@UniverseGenius",
|
| 226 |
+
"Praveen Mohan": "https://www.youtube.com/@RealPraveenMohan",
|
| 227 |
+
"Yathiraja Mutt": "https://www.youtube.com/@SriYadugiriYathirajaMutt",
|
| 228 |
+
"Vasudevan Srinivasachariar": "https://www.youtube.com/@akivasudev",
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
def set_example(label):
|
| 232 |
+
return examples[label]
|
| 233 |
+
|
| 234 |
+
gr.Markdown("Click on any example below and then click on add channels button.")
|
| 235 |
+
with gr.Row():
|
| 236 |
+
for label in examples:
|
| 237 |
+
gr.Button(label, size="sm", variant="huggingface", scale=0).click(
|
| 238 |
+
fn=set_example,
|
| 239 |
+
inputs=gr.State(label),
|
| 240 |
+
outputs=channel_input,
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
with gr.Row():
|
| 244 |
+
gr.Column()
|
| 245 |
+
save_add_channels_btn = gr.Button(
|
| 246 |
+
"Add Channel(s)", scale=0, variant="primary"
|
| 247 |
+
)
|
| 248 |
+
gr.Column()
|
| 249 |
+
index_status = gr.Markdown(label="Index Status", container=False)
|
| 250 |
+
|
| 251 |
+
with gr.Row():
|
| 252 |
+
# Sidebar
|
| 253 |
+
with gr.Sidebar() as my_sidebar:
|
| 254 |
+
gr.Markdown("### 📺 Channels")
|
| 255 |
+
channel_list_values = list_channels_radio()
|
| 256 |
+
channel_list_state = gr.State(channel_list_values)
|
| 257 |
+
|
| 258 |
+
no_channels_message = gr.Markdown(
|
| 259 |
+
"⚠️ **No channels available.**",
|
| 260 |
+
visible=False if channel_list_values else True,
|
| 261 |
+
)
|
| 262 |
+
channel_radio = gr.Radio(
|
| 263 |
+
choices=channel_list_values,
|
| 264 |
+
label="Select a Channel",
|
| 265 |
+
visible=True if channel_list_values else False,
|
| 266 |
+
)
|
| 267 |
+
|
| 268 |
+
with gr.Row():
|
| 269 |
+
export_btn = gr.Button(
|
| 270 |
+
"⏬ Download",
|
| 271 |
+
size="sm",
|
| 272 |
+
scale=0,
|
| 273 |
+
variant="primary",
|
| 274 |
+
interactive=False,
|
| 275 |
+
)
|
| 276 |
+
show_videos_btn = gr.Button(
|
| 277 |
+
"🎬Videos",
|
| 278 |
+
size="sm",
|
| 279 |
+
scale=0,
|
| 280 |
+
variant="secondary",
|
| 281 |
+
interactive=False,
|
| 282 |
+
)
|
| 283 |
+
refresh_btn = gr.Button(
|
| 284 |
+
"⭮ Refresh",
|
| 285 |
+
size="sm",
|
| 286 |
+
scale=0,
|
| 287 |
+
variant="huggingface",
|
| 288 |
+
)
|
| 289 |
+
refresh_all_btn = gr.Button(
|
| 290 |
+
"🔄 Sync from YouTube",
|
| 291 |
+
size="sm",
|
| 292 |
+
scale=0,
|
| 293 |
+
variant="stop",
|
| 294 |
+
visible=False,
|
| 295 |
+
)
|
| 296 |
+
add_channels_btn = gr.Button(
|
| 297 |
+
"➕ Add", size="sm", scale=0, variant="primary"
|
| 298 |
+
)
|
| 299 |
+
|
| 300 |
+
delete_channel_btn = gr.Button(
|
| 301 |
+
"🗑️ Delete", size="sm", scale=0, variant="stop"
|
| 302 |
+
)
|
| 303 |
+
|
| 304 |
+
refresh_status = gr.Markdown(label="Refresh Status", container=False)
|
| 305 |
+
|
| 306 |
+
refresh_all_btn.click(
|
| 307 |
+
fn=refresh_all_channels,
|
| 308 |
+
inputs=None,
|
| 309 |
+
outputs=[refresh_status, channel_radio],
|
| 310 |
+
)
|
| 311 |
+
|
| 312 |
+
refresh_btn.click(fn=refresh_channel_list, outputs=[channel_radio]).then(
|
| 313 |
+
fn=list_channels_radio, outputs=[channel_list_state]
|
| 314 |
+
)
|
| 315 |
+
add_channels_btn.click(close_component, outputs=[my_sidebar]).then(
|
| 316 |
+
show_component, outputs=[add_channel_modal]
|
| 317 |
+
)
|
| 318 |
+
|
| 319 |
+
def toggle_no_data_found(channel_list):
|
| 320 |
+
if channel_list:
|
| 321 |
+
return show_component(), hide_component()
|
| 322 |
+
else:
|
| 323 |
+
return hide_component(), show_component()
|
| 324 |
+
|
| 325 |
+
save_add_channels_btn.click(
|
| 326 |
+
disable_component, outputs=[save_add_channels_btn]
|
| 327 |
+
).then(
|
| 328 |
+
index_channels,
|
| 329 |
+
inputs=[channel_input],
|
| 330 |
+
outputs=[index_status, channel_radio, channel_list_state],
|
| 331 |
+
).then(
|
| 332 |
+
hide_component, outputs=[add_channel_modal]
|
| 333 |
+
).then(
|
| 334 |
+
open_component, outputs=[my_sidebar]
|
| 335 |
+
).then(
|
| 336 |
+
enable_component, outputs=[save_add_channels_btn]
|
| 337 |
+
).then(
|
| 338 |
+
toggle_no_data_found,
|
| 339 |
+
inputs=[channel_list_state],
|
| 340 |
+
outputs=[channel_radio, no_channels_message],
|
| 341 |
+
)
|
| 342 |
+
## Onload refresh the channel list.
|
| 343 |
+
gr.on(fn=refresh_channel_list, outputs=[channel_radio]).then(
|
| 344 |
+
fn=list_channels_radio, outputs=[channel_list_state]
|
| 345 |
+
)
|
| 346 |
+
# Main Column
|
| 347 |
+
main_content_no_channels_html = gr.HTML(
|
| 348 |
+
"""
|
| 349 |
+
<div style="
|
| 350 |
+
display: flex;
|
| 351 |
+
justify-content: center;
|
| 352 |
+
align-items: center;
|
| 353 |
+
height: 150px;
|
| 354 |
+
">
|
| 355 |
+
<div style="
|
| 356 |
+
border: 2px solid #FFA500;
|
| 357 |
+
background-color: #FFF8E1;
|
| 358 |
+
color: #FF6F00;
|
| 359 |
+
padding: 20px 30px;
|
| 360 |
+
border-radius: 12px;
|
| 361 |
+
font-weight: bold;
|
| 362 |
+
font-size: 1.2rem;
|
| 363 |
+
text-align: center;
|
| 364 |
+
box-shadow: 0 4px 10px rgba(0,0,0,0.1);
|
| 365 |
+
">
|
| 366 |
+
⚠️ No channels added.<br>
|
| 367 |
+
Please add channels from the side bar
|
| 368 |
+
</div>
|
| 369 |
+
</div>
|
| 370 |
+
|
| 371 |
+
""",
|
| 372 |
+
visible=True if not channel_list_state.value else False,
|
| 373 |
+
)
|
| 374 |
+
with gr.Column(
|
| 375 |
+
scale=3, visible=True if channel_list_state.value else False
|
| 376 |
+
) as main_content:
|
| 377 |
+
with gr.Row():
|
| 378 |
+
search_channel = gr.Dropdown(
|
| 379 |
+
label="Select a Channel",
|
| 380 |
+
choices=[("All Channels", None)] + channel_list_state.value,
|
| 381 |
+
value=None,
|
| 382 |
+
)
|
| 383 |
+
question = gr.Textbox(
|
| 384 |
+
label="Ask a Question",
|
| 385 |
+
placeholder="e.g., How to write the letter Aa in grantham?",
|
| 386 |
+
submit_btn=True,
|
| 387 |
+
)
|
| 388 |
+
gr.Column(scale=2)
|
| 389 |
+
|
| 390 |
+
gr.Examples(
|
| 391 |
+
[
|
| 392 |
+
"Srirangam",
|
| 393 |
+
"Gajendra moksham",
|
| 394 |
+
"Poorvikalyani",
|
| 395 |
+
"Virutham from chathusloki",
|
| 396 |
+
"Lesson 9.15 from Aksharam",
|
| 397 |
+
],
|
| 398 |
+
inputs=question,
|
| 399 |
+
)
|
| 400 |
+
|
| 401 |
+
submitted_question = gr.Markdown()
|
| 402 |
+
ask_status = gr.Markdown()
|
| 403 |
+
answer = gr.Markdown()
|
| 404 |
+
video_embed = gr.HTML() # iframe embeds
|
| 405 |
+
|
| 406 |
+
def get_question(q):
|
| 407 |
+
return f"## You asked : {q}\n---"
|
| 408 |
+
|
| 409 |
+
# question.change(enable_if_not_none, inputs=[question], outputs=[question])
|
| 410 |
+
question.submit(show_loading, inputs=[question], outputs=[ask_status]).then(
|
| 411 |
+
get_question, inputs=[question], outputs=[submitted_question]
|
| 412 |
+
).then(disable_component, outputs=[question]).then(
|
| 413 |
+
handle_query,
|
| 414 |
+
inputs=[question, search_channel],
|
| 415 |
+
outputs=[answer, video_embed],
|
| 416 |
+
).then(
|
| 417 |
+
enable_component, outputs=[question]
|
| 418 |
+
).then(
|
| 419 |
+
clear_component, outputs=[ask_status]
|
| 420 |
+
)
|
| 421 |
+
|
| 422 |
+
# Show videos modal when button clicked
|
| 423 |
+
def show_selected_channel_videos(selected_channel_id):
|
| 424 |
+
# print("selected_channel_id = ", selected_channel_id)
|
| 425 |
+
df = fetch_channel_dataframe(selected_channel_id)
|
| 426 |
+
return gr.update(value=df, label=f"{len(df)} videos")
|
| 427 |
+
|
| 428 |
+
channel_radio.change(
|
| 429 |
+
enable_if_not_none, inputs=[channel_radio], outputs=[show_videos_btn]
|
| 430 |
+
).then(enable_if_not_none, inputs=[channel_radio], outputs=[export_btn])
|
| 431 |
+
show_videos_btn.click(disable_component, outputs=[show_videos_btn]).then(
|
| 432 |
+
close_component, outputs=[my_sidebar]
|
| 433 |
+
).then(
|
| 434 |
+
show_selected_channel_videos,
|
| 435 |
+
inputs=[channel_radio],
|
| 436 |
+
outputs=[channel_videos_df],
|
| 437 |
+
).then(
|
| 438 |
+
show_component, outputs=[videos_list_modal]
|
| 439 |
+
).then(
|
| 440 |
+
enable_component, outputs=[show_videos_btn]
|
| 441 |
+
)
|
| 442 |
+
|
| 443 |
+
delete_channel_btn.click(
|
| 444 |
+
disable_component, outputs=[delete_channel_btn]
|
| 445 |
+
).then(
|
| 446 |
+
delete_channel, # function
|
| 447 |
+
inputs=[channel_radio], # selected channel name
|
| 448 |
+
outputs=[channel_radio], # update the radio choices
|
| 449 |
+
).then(
|
| 450 |
+
enable_component, outputs=[delete_channel_btn]
|
| 451 |
+
)
|
| 452 |
+
channel_list_state.change(
|
| 453 |
+
toggle_no_data_found,
|
| 454 |
+
inputs=[channel_list_state],
|
| 455 |
+
outputs=[main_content, main_content_no_channels_html],
|
| 456 |
+
).then(
|
| 457 |
+
toggle_no_data_found,
|
| 458 |
+
inputs=[channel_list_state],
|
| 459 |
+
outputs=[channel_radio, no_channels_message],
|
| 460 |
+
)
|
| 461 |
+
|
| 462 |
+
def get_channel_choices(channel_list):
|
| 463 |
+
return gr.update(choices=[("All Channels", None)] + channel_list)
|
| 464 |
+
|
| 465 |
+
channel_list_state.change(
|
| 466 |
+
get_channel_choices, inputs=[channel_list_state], outputs=[search_channel]
|
| 467 |
+
)
|
| 468 |
+
|
| 469 |
+
export_btn.click(close_component, outputs=[my_sidebar]).then(
|
| 470 |
+
show_component, outputs=[download_status]
|
| 471 |
+
).then(hide_component, outputs=[download_ready_btn]).then(
|
| 472 |
+
show_component, outputs=[download_modal]
|
| 473 |
+
).then(
|
| 474 |
+
export_channel_json, inputs=channel_radio, outputs=download_ready_btn
|
| 475 |
+
).then(
|
| 476 |
+
hide_component, outputs=[download_status]
|
| 477 |
+
).then(
|
| 478 |
+
show_component, outputs=[download_ready_btn]
|
| 479 |
+
)
|
| 480 |
+
|
| 481 |
+
|
| 482 |
+
def initialize_youtube_metadata_and_poll():
|
| 483 |
+
# Step 1: Initialize metadata
|
| 484 |
+
for msg in youtube_metadata_init():
|
| 485 |
+
print(msg)
|
| 486 |
+
|
| 487 |
+
# Step 2: Start polling after init
|
| 488 |
+
start_poll() # run in the same thread
|
| 489 |
+
# OR if you want it in a separate daemon thread:
|
| 490 |
+
# poll_thread = threading.Thread(target=start_poll, daemon=True)
|
| 491 |
+
# poll_thread.start()
|
| 492 |
+
|
| 493 |
+
# Launch the whole thing in a background thread
|
| 494 |
+
yt_init_thread = threading.Thread(target=initialize_youtube_metadata_and_poll, daemon=True)
|
| 495 |
+
yt_init_thread.start()
|
| 496 |
+
|
| 497 |
+
if __name__ == "__main__":
|
| 498 |
+
initialize_youtube_metadata_and_poll()
|
| 499 |
+
# Start polling in a background thread
|
| 500 |
+
youtube_metadata_app.launch()
|
modules/youtube_metadata/channel_utils.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from modules.youtube_metadata.db import get_collection
|
| 2 |
+
import pandas as pd
|
| 3 |
+
|
| 4 |
+
page_size = 10 # change if you like
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
# -------------------------------
|
| 8 |
+
# Fetch channel videos as HTML table with pagination
|
| 9 |
+
# -------------------------------
|
| 10 |
+
def fetch_channel_html(channel_id: str, page: int = 1, page_size: int = 10):
|
| 11 |
+
collection = get_collection()
|
| 12 |
+
offset = (page - 1) * page_size
|
| 13 |
+
|
| 14 |
+
all_results = collection.get(
|
| 15 |
+
where={"channel_id": channel_id}, include=["metadatas"]
|
| 16 |
+
)
|
| 17 |
+
total_count = (
|
| 18 |
+
len(all_results["metadatas"])
|
| 19 |
+
if all_results and "metadatas" in all_results
|
| 20 |
+
else 0
|
| 21 |
+
)
|
| 22 |
+
results = collection.get(
|
| 23 |
+
where={"channel_id": channel_id},
|
| 24 |
+
include=["documents", "metadatas"],
|
| 25 |
+
limit=page_size,
|
| 26 |
+
offset=offset,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
# handle empty
|
| 30 |
+
if not results or not results.get("metadatas"):
|
| 31 |
+
return f"""
|
| 32 |
+
<div style="display:flex;justify-content:center;align-items:center;
|
| 33 |
+
height:200px;flex-direction:column;color:#666;">
|
| 34 |
+
⚠️ No videos found for this channel (page {page}).
|
| 35 |
+
</div>
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
videos = results["metadatas"]
|
| 39 |
+
|
| 40 |
+
# build table
|
| 41 |
+
html = (
|
| 42 |
+
f"<div>Total: {total_count} videos</div>"
|
| 43 |
+
+ """
|
| 44 |
+
<table border="1" style="border-collapse:collapse;width:100%;font-family:sans-serif;">
|
| 45 |
+
<thead style="background:#f0f0f0;">
|
| 46 |
+
<tr>
|
| 47 |
+
<th>#</th>
|
| 48 |
+
<th>Title</th>
|
| 49 |
+
<th>Video URL</th>
|
| 50 |
+
<th>Description</th>
|
| 51 |
+
</tr>
|
| 52 |
+
</thead>
|
| 53 |
+
<tbody>
|
| 54 |
+
"""
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
for idx, v in enumerate(videos, start=offset + 1):
|
| 58 |
+
html += f"""
|
| 59 |
+
<tr>
|
| 60 |
+
<td>{idx}</td>
|
| 61 |
+
<td>{v.get('video_title','')}</td>
|
| 62 |
+
<td><a href="https://youtube.com/watch?v={v.get('video_id')}"
|
| 63 |
+
target="_blank">Watch Video</a></td>
|
| 64 |
+
<td>{v.get('description','')}</td>
|
| 65 |
+
</tr>
|
| 66 |
+
"""
|
| 67 |
+
|
| 68 |
+
html += "</tbody></table>"
|
| 69 |
+
return html
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
# -------------------------------
|
| 73 |
+
# Fetch channel videos as HTML table with pagination
|
| 74 |
+
# -------------------------------
|
| 75 |
+
def fetch_channel_dataframe(channel_id: str):
|
| 76 |
+
collection = get_collection()
|
| 77 |
+
|
| 78 |
+
results = collection.get(
|
| 79 |
+
where={"channel_id": channel_id}, include=["documents", "metadatas"]
|
| 80 |
+
)
|
| 81 |
+
total_count = len(results["metadatas"]) if results and "metadatas" in results else 0
|
| 82 |
+
# handle empty
|
| 83 |
+
if not results or not results.get("metadatas"):
|
| 84 |
+
return pd.DataFrame(data=[])
|
| 85 |
+
|
| 86 |
+
videos = results["metadatas"]
|
| 87 |
+
|
| 88 |
+
items = []
|
| 89 |
+
for idx, v in enumerate(videos, start=1):
|
| 90 |
+
item = {
|
| 91 |
+
"#": idx,
|
| 92 |
+
"title": v.get("video_title", "-"),
|
| 93 |
+
"description": v.get("description", ""),
|
| 94 |
+
"url": f"""<a style="color: blue" href="https://youtube.com/watch?v={v.get('video_id')}"
|
| 95 |
+
target="_blank">▶️Watch Video</a>""",
|
| 96 |
+
}
|
| 97 |
+
items.append(item)
|
| 98 |
+
return pd.DataFrame(data=items)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def update_table(channel_id, page):
|
| 102 |
+
return fetch_channel_html(channel_id, page, page_size), f"Page {page}"
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def prev_page(channel_id, page):
|
| 106 |
+
new_page = max(1, page - 1)
|
| 107 |
+
return (
|
| 108 |
+
fetch_channel_html(channel_id, new_page, page_size),
|
| 109 |
+
f"Page {new_page}",
|
| 110 |
+
new_page,
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def next_page(channel_id, page):
|
| 115 |
+
new_page = page + 1
|
| 116 |
+
return (
|
| 117 |
+
fetch_channel_html(channel_id, new_page, page_size),
|
| 118 |
+
f"Page {new_page}",
|
| 119 |
+
new_page,
|
| 120 |
+
)
|
modules/youtube_metadata/collector.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -------------------------------
|
| 2 |
+
# 1. Collector
|
| 3 |
+
# -------------------------------
|
| 4 |
+
from googleapiclient.discovery import build
|
| 5 |
+
from modules.youtube_metadata.youtube_utils import get_channel_id
|
| 6 |
+
import logging
|
| 7 |
+
|
| 8 |
+
logging.basicConfig()
|
| 9 |
+
logger=logging.getLogger(__name__)
|
| 10 |
+
logger.setLevel(logging.INFO)
|
| 11 |
+
|
| 12 |
+
def fetch_all_channel_videos(api_key: str, channel_url: str, max_results_per_call=50):
|
| 13 |
+
youtube = build("youtube", "v3", developerKey=api_key)
|
| 14 |
+
channel_id = get_channel_id(youtube, channel_url)
|
| 15 |
+
|
| 16 |
+
final_videos = []
|
| 17 |
+
for videos in fetch_channel_videos_by_id(api_key, channel_id, max_results_per_call):
|
| 18 |
+
final_videos.extend(videos)
|
| 19 |
+
logger.info("fetch_all_channel_videos: Fetched %d", len(final_videos))
|
| 20 |
+
yield (f"Fetched {len(final_videos)}", videos) # <-- only yield the *new* batch
|
| 21 |
+
|
| 22 |
+
yield (f"Fetched {len(final_videos)}", []) # final "summary"
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def fetch_channel_videos_by_id(api_key: str, channel_id: str, max_results=50):
|
| 26 |
+
youtube = build("youtube", "v3", developerKey=api_key)
|
| 27 |
+
|
| 28 |
+
# Get uploads playlist ID
|
| 29 |
+
channel_response = youtube.channels().list(
|
| 30 |
+
part="contentDetails,snippet", id=channel_id
|
| 31 |
+
).execute()
|
| 32 |
+
|
| 33 |
+
channel_title = channel_response["items"][0]["snippet"]["title"]
|
| 34 |
+
uploads_playlist_id = channel_response["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]
|
| 35 |
+
|
| 36 |
+
next_page_token = None
|
| 37 |
+
|
| 38 |
+
while True:
|
| 39 |
+
request = youtube.playlistItems().list(
|
| 40 |
+
part="snippet",
|
| 41 |
+
playlistId=uploads_playlist_id,
|
| 42 |
+
maxResults=max_results,
|
| 43 |
+
pageToken=next_page_token,
|
| 44 |
+
)
|
| 45 |
+
response = request.execute()
|
| 46 |
+
|
| 47 |
+
videos = []
|
| 48 |
+
for item in response.get("items", []):
|
| 49 |
+
snippet = item["snippet"]
|
| 50 |
+
videos.append(
|
| 51 |
+
{
|
| 52 |
+
"video_id": snippet["resourceId"]["videoId"],
|
| 53 |
+
"title": snippet["title"],
|
| 54 |
+
"description": snippet.get("description", ""),
|
| 55 |
+
"channel_id": channel_id,
|
| 56 |
+
"channel_title": channel_title,
|
| 57 |
+
}
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
yield videos # yield one page worth
|
| 61 |
+
|
| 62 |
+
next_page_token = response.get("nextPageToken")
|
| 63 |
+
if not next_page_token:
|
| 64 |
+
break
|
| 65 |
+
|
modules/youtube_metadata/db.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import chromadb
|
| 2 |
+
|
| 3 |
+
from config import SanatanConfig
|
| 4 |
+
|
| 5 |
+
config = SanatanConfig()
|
| 6 |
+
YT_METADATA_COLLECTION_NAME = config.get_collection_name(scripture_name="yt_metadata")
|
| 7 |
+
|
| 8 |
+
def get_client():
|
| 9 |
+
client = chromadb.PersistentClient(path=config.dbStorePath)
|
| 10 |
+
return client
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def get_collection():
|
| 14 |
+
client = get_client()
|
| 15 |
+
|
| 16 |
+
# Ensure fresh collection with correct dimension
|
| 17 |
+
try:
|
| 18 |
+
collection = client.get_collection(YT_METADATA_COLLECTION_NAME)
|
| 19 |
+
except Exception:
|
| 20 |
+
collection = client.create_collection(YT_METADATA_COLLECTION_NAME)
|
| 21 |
+
|
| 22 |
+
# # Check dimension mismatch
|
| 23 |
+
# try:
|
| 24 |
+
# # quick test query
|
| 25 |
+
# collection.query(query_embeddings=[[0.0] * 1536], n_results=1)
|
| 26 |
+
# except Exception:
|
| 27 |
+
# # Delete and recreate with fresh schema
|
| 28 |
+
# client.delete_collection("yt_metadata")
|
| 29 |
+
# collection = client.create_collection("yt_metadata")
|
| 30 |
+
|
| 31 |
+
return collection
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
# modules/db.py
|
| 35 |
+
def get_indexed_channels(collection=get_collection()):
|
| 36 |
+
results = collection.get(include=["metadatas"])
|
| 37 |
+
channels = {}
|
| 38 |
+
|
| 39 |
+
for meta in results["metadatas"]:
|
| 40 |
+
cid = meta.get("channel_id") # ✅ safe
|
| 41 |
+
cname = meta.get("channel_title", "Unknown Channel")
|
| 42 |
+
|
| 43 |
+
if cid: # only include if we have a channel_id
|
| 44 |
+
channels[cid] = cname
|
| 45 |
+
# print("channels= ",channels)
|
| 46 |
+
return channels
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# -------------------------------
|
| 50 |
+
# Delete a channel
|
| 51 |
+
# -------------------------------
|
| 52 |
+
def delete_channel_from_collection(channel_id: str):
|
| 53 |
+
"""Remove a channel from the index and refresh the radio choices."""
|
| 54 |
+
# Delete all videos for this channel
|
| 55 |
+
# print("Deleting channel", channel_id)
|
| 56 |
+
|
| 57 |
+
# print("data = ", data)
|
| 58 |
+
get_collection().delete(where={"channel_id": channel_id})
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def fetch_channel_data(channel_id: str):
|
| 62 |
+
data = get_collection().get(
|
| 63 |
+
where={"channel_id": channel_id}, include=["embeddings", "metadatas", "documents"]
|
| 64 |
+
)
|
| 65 |
+
return data
|
modules/youtube_metadata/downloader.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import json
|
| 3 |
+
import tempfile
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
from modules.youtube_metadata.db import fetch_channel_data
|
| 7 |
+
|
| 8 |
+
def json_serializer(obj):
|
| 9 |
+
if hasattr(obj, "tolist"): # NumPy arrays
|
| 10 |
+
return obj.tolist()
|
| 11 |
+
return str(obj)
|
| 12 |
+
|
| 13 |
+
def export_channel_json(channel_id):
|
| 14 |
+
data = fetch_channel_data(channel_id)
|
| 15 |
+
|
| 16 |
+
# Save to a temporary JSON file
|
| 17 |
+
fd, path = tempfile.mkstemp(suffix=".json")
|
| 18 |
+
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
| 19 |
+
json.dump(data, f, indent=2, ensure_ascii=False, default=json_serializer)
|
| 20 |
+
return path
|
modules/youtube_metadata/embeddings.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sentence_transformers import SentenceTransformer
|
| 2 |
+
from openai import OpenAI
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
load_dotenv()
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
# Step 1: Load SentenceTransformer model
|
| 8 |
+
# Old MiniLM version:
|
| 9 |
+
# model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
| 10 |
+
|
| 11 |
+
# Better MPNet alternative:
|
| 12 |
+
model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
|
| 13 |
+
client = OpenAI()
|
| 14 |
+
|
| 15 |
+
def _get_hf_embedding(text: str) -> list:
|
| 16 |
+
return model.encode(text).tolist()
|
| 17 |
+
|
| 18 |
+
def _get_openai_embedding(text: str) -> list:
|
| 19 |
+
response = client.embeddings.create(
|
| 20 |
+
model="text-embedding-3-large", # or "text-embedding-3-large"
|
| 21 |
+
input=text
|
| 22 |
+
)
|
| 23 |
+
return response.data[0].embedding
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def get_embedding(text: str) -> list:
|
| 27 |
+
"""
|
| 28 |
+
Switch according to the embedding model you want.
|
| 29 |
+
"""
|
| 30 |
+
# return _get_hf_embedding(text)
|
| 31 |
+
return _get_openai_embedding(text)
|
modules/youtube_metadata/indexer.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# modules/indexer.py
|
| 2 |
+
from typing import Dict, List
|
| 3 |
+
from openai import OpenAI
|
| 4 |
+
from modules.youtube_metadata.embeddings import get_embedding
|
| 5 |
+
import logging
|
| 6 |
+
|
| 7 |
+
logging.basicConfig()
|
| 8 |
+
logger = logging.getLogger(__name__)
|
| 9 |
+
logger.setLevel(logging.INFO)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def index_videos(
|
| 13 |
+
videos: List[Dict], collection, channel_url: str, batch_size: int = 50
|
| 14 |
+
):
|
| 15 |
+
client = OpenAI()
|
| 16 |
+
|
| 17 |
+
total = len(videos)
|
| 18 |
+
logger.info(
|
| 19 |
+
f"index_videos: [INDEX] Starting indexing for {total} videos (channel={channel_url})"
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
# Split into batches
|
| 23 |
+
for start in range(0, total, batch_size):
|
| 24 |
+
batch = videos[start : start + batch_size]
|
| 25 |
+
end = start + len(batch)
|
| 26 |
+
percent = round((end / total) * 100, 1)
|
| 27 |
+
|
| 28 |
+
logger.info(
|
| 29 |
+
f"index_videos: [INDEX] Processing batch {start+1} → {end} of {total} — {percent}%"
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
# Prepare text inputs
|
| 33 |
+
texts = [
|
| 34 |
+
f"{vid.get('title', '')} - {vid.get('description', '')}" for vid in batch
|
| 35 |
+
]
|
| 36 |
+
|
| 37 |
+
embeddings = [get_embedding(text) for text in texts]
|
| 38 |
+
|
| 39 |
+
# Build metadata + ids
|
| 40 |
+
metadatas, ids = [], []
|
| 41 |
+
for vid in batch:
|
| 42 |
+
metadata = {
|
| 43 |
+
"video_id": vid.get("video_id"),
|
| 44 |
+
"video_title": vid.get("title", ""),
|
| 45 |
+
"description": vid.get("description", ""),
|
| 46 |
+
"channel_url": channel_url,
|
| 47 |
+
}
|
| 48 |
+
if "channel_id" in vid:
|
| 49 |
+
metadata["channel_id"] = vid["channel_id"]
|
| 50 |
+
if "channel_title" in vid:
|
| 51 |
+
metadata["channel_title"] = vid["channel_title"]
|
| 52 |
+
|
| 53 |
+
metadatas.append(metadata)
|
| 54 |
+
ids.append(vid.get("video_id"))
|
| 55 |
+
|
| 56 |
+
# Insert in bulk
|
| 57 |
+
collection.add(
|
| 58 |
+
documents=texts,
|
| 59 |
+
embeddings=embeddings,
|
| 60 |
+
metadatas=metadatas,
|
| 61 |
+
ids=ids,
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
logger.info(
|
| 65 |
+
f"index_videos: [INDEX] ✅ Indexed {len(batch)} videos (total so far: {end}/{total} — {percent}%)"
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
logger.info(
|
| 69 |
+
f"index_videos: [INDEX] 🎉 Finished indexing {total} videos for channel={channel_url}"
|
| 70 |
+
)
|
| 71 |
+
return total
|
modules/youtube_metadata/retriever.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# modules/retriever.py
|
| 2 |
+
from typing import List, Dict
|
| 3 |
+
from openai import OpenAI
|
| 4 |
+
|
| 5 |
+
from modules.youtube_metadata.embeddings import get_embedding
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def retrieve_videos(
|
| 9 |
+
query: str, collection, top_k: int = 3, channel_id: str = None
|
| 10 |
+
) -> List[Dict]:
|
| 11 |
+
client = OpenAI()
|
| 12 |
+
|
| 13 |
+
# Create embedding for query
|
| 14 |
+
embedding = get_embedding(query)
|
| 15 |
+
|
| 16 |
+
# Query Chroma
|
| 17 |
+
if not channel_id:
|
| 18 |
+
results = collection.query(
|
| 19 |
+
query_embeddings=[embedding],
|
| 20 |
+
n_results=top_k,
|
| 21 |
+
include=["metadatas", "documents", "distances"],
|
| 22 |
+
)
|
| 23 |
+
else:
|
| 24 |
+
results = collection.query(
|
| 25 |
+
query_embeddings=[embedding],
|
| 26 |
+
n_results=top_k,
|
| 27 |
+
include=["metadatas", "documents", "distances"],
|
| 28 |
+
where={"channel_id": channel_id},
|
| 29 |
+
)
|
| 30 |
+
# Build list of standardized dicts
|
| 31 |
+
videos = []
|
| 32 |
+
metadatas_list = results.get("metadatas", [[]])[0] # list of metadata dicts
|
| 33 |
+
documents_list = results.get("documents", [[]])[0] # list of text
|
| 34 |
+
distances_list = results.get("distances", [[]])[0] # optional
|
| 35 |
+
|
| 36 |
+
for idx, meta in enumerate(metadatas_list):
|
| 37 |
+
videos.append(
|
| 38 |
+
{
|
| 39 |
+
"video_id": meta.get("video_id", ""),
|
| 40 |
+
"video_title": meta.get(
|
| 41 |
+
"video_title", meta.get("title", documents_list[idx])
|
| 42 |
+
),
|
| 43 |
+
"channel": meta.get("channel", meta.get("channel_title", "")),
|
| 44 |
+
"description": documents_list[idx] if idx < len(documents_list) else "",
|
| 45 |
+
"score": distances_list[idx] if idx < len(distances_list) else None,
|
| 46 |
+
}
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
return videos
|
modules/youtube_metadata/youtube_poller.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from chromadb import Collection
|
| 2 |
+
import feedparser
|
| 3 |
+
from modules.youtube_metadata.db import get_collection, get_indexed_channels
|
| 4 |
+
from modules.youtube_metadata.embeddings import get_embedding
|
| 5 |
+
import logging
|
| 6 |
+
|
| 7 |
+
logging.basicConfig()
|
| 8 |
+
logger = logging.getLogger(__name__)
|
| 9 |
+
logger.setLevel(logging.INFO)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def fetch_channel_videos_rss(channel_id, max_results=50):
|
| 13 |
+
feed_url = f"https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}"
|
| 14 |
+
logger.info("fetch_channel_videos_rss: feed_url = %s", feed_url)
|
| 15 |
+
feed = feedparser.parse(feed_url)
|
| 16 |
+
|
| 17 |
+
# Capture channel title from the <feed> section
|
| 18 |
+
channel_title = getattr(feed.feed, "title", None)
|
| 19 |
+
channel_url = getattr(feed.feed, "link", None)
|
| 20 |
+
channel_author = getattr(feed.feed, "author", "")
|
| 21 |
+
|
| 22 |
+
logger.info("fetch_channel_videos_rss: channel_title = %s", channel_title)
|
| 23 |
+
|
| 24 |
+
videos = []
|
| 25 |
+
for entry in feed.entries[:max_results]:
|
| 26 |
+
description = (
|
| 27 |
+
getattr(getattr(entry, "title_detail", None), "value", "")
|
| 28 |
+
or getattr(entry, "media_description", None)
|
| 29 |
+
or getattr(entry, "summary", None)
|
| 30 |
+
or ""
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
videos.append(
|
| 34 |
+
{
|
| 35 |
+
"video_id": entry.yt_videoid,
|
| 36 |
+
"video_title": entry.title,
|
| 37 |
+
"description": description,
|
| 38 |
+
"published": entry.published,
|
| 39 |
+
"video_url": entry.link,
|
| 40 |
+
"channel_url": channel_url,
|
| 41 |
+
"channel_id": channel_id,
|
| 42 |
+
"channel_title": channel_title,
|
| 43 |
+
"channel_author" : channel_author,
|
| 44 |
+
}
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
return videos
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def get_existing_video_ids(collection, channel_id):
|
| 51 |
+
# n_results: how many results to fetch; use a high number to get all entries
|
| 52 |
+
results = collection.get(where={"channel_id": channel_id})
|
| 53 |
+
|
| 54 |
+
existing_ids = set()
|
| 55 |
+
for metadata in results.get("metadatas", []):
|
| 56 |
+
if metadata and "video_id" in metadata:
|
| 57 |
+
existing_ids.add(metadata["video_id"])
|
| 58 |
+
return existing_ids
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def filter_new_videos(videos, existing_ids):
|
| 62 |
+
return [v for v in videos if v["video_id"] not in existing_ids]
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def add_to_chroma(collection: Collection, new_videos):
|
| 66 |
+
if not new_videos:
|
| 67 |
+
return
|
| 68 |
+
collection.add(
|
| 69 |
+
documents=[v["title"] for v in new_videos],
|
| 70 |
+
embeddings=[get_embedding(v["title"]) for v in new_videos],
|
| 71 |
+
metadatas=[
|
| 72 |
+
{
|
| 73 |
+
"video_id": v["video_id"],
|
| 74 |
+
"channel_id": v["channel_id"],
|
| 75 |
+
"link": v["link"],
|
| 76 |
+
}
|
| 77 |
+
for v in new_videos
|
| 78 |
+
],
|
| 79 |
+
ids=[v["video_id"] for v in new_videos],
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def incremental_update(collection, channel_id):
|
| 84 |
+
existing_ids = get_existing_video_ids(collection, channel_id)
|
| 85 |
+
latest_videos = fetch_channel_videos_rss(channel_id)
|
| 86 |
+
new_videos = filter_new_videos(latest_videos, existing_ids)
|
| 87 |
+
|
| 88 |
+
if new_videos:
|
| 89 |
+
add_to_chroma(collection, new_videos)
|
| 90 |
+
logger.info(
|
| 91 |
+
f"incremental_update: Added {len(new_videos)} new videos from {channel_id}"
|
| 92 |
+
)
|
| 93 |
+
else:
|
| 94 |
+
logger.info(f"incremental_uddate: No new videos for {channel_id}")
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def start_poll():
|
| 98 |
+
import time
|
| 99 |
+
|
| 100 |
+
configured_channels = get_indexed_channels().keys()
|
| 101 |
+
|
| 102 |
+
while True:
|
| 103 |
+
for channel_id in configured_channels:
|
| 104 |
+
incremental_update(get_collection(), channel_id)
|
| 105 |
+
time.sleep(600) # 10 minutes
|
modules/youtube_metadata/youtube_sync.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import threading
|
| 2 |
+
import gradio as gr
|
| 3 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 4 |
+
|
| 5 |
+
from modules.youtube_metadata.collector import fetch_all_channel_videos
|
| 6 |
+
from modules.youtube_metadata.db import get_collection
|
| 7 |
+
from modules.youtube_metadata.indexer import index_videos
|
| 8 |
+
|
| 9 |
+
# global stop signal
|
| 10 |
+
stop_event = threading.Event()
|
| 11 |
+
MAX_BATCHES = 200 # safety cutoff
|
| 12 |
+
|
| 13 |
+
def stop_sync():
|
| 14 |
+
"""External call to stop the sync process."""
|
| 15 |
+
stop_event.set()
|
| 16 |
+
|
| 17 |
+
def sync_channels_from_youtube(api_key, channel_urls: list, progress: gr.Progress = None):
|
| 18 |
+
"""
|
| 19 |
+
Sync multiple channels, yielding (progress_message, videos_indexed_in_batch)
|
| 20 |
+
"""
|
| 21 |
+
global stop_event
|
| 22 |
+
stop_event.clear()
|
| 23 |
+
|
| 24 |
+
total_channels = len(channel_urls)
|
| 25 |
+
total_videos = 0
|
| 26 |
+
|
| 27 |
+
for idx, channel_url in enumerate(channel_urls, 1):
|
| 28 |
+
if stop_event.is_set():
|
| 29 |
+
yield f"🛑 Stopped before processing channel: {channel_url}", 0
|
| 30 |
+
break
|
| 31 |
+
|
| 32 |
+
yield f"🔄 Syncing {channel_url} ({idx}/{total_channels})", 0
|
| 33 |
+
|
| 34 |
+
# stream video-level progress from inner generator
|
| 35 |
+
for update_message, batch_count in _refresh_single_channel(api_key, channel_url, progress):
|
| 36 |
+
total_videos += batch_count
|
| 37 |
+
yield update_message, batch_count
|
| 38 |
+
|
| 39 |
+
yield f"✅ Finished syncing. Total channels: {total_channels}, total videos: {total_videos}", 0
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def _refresh_single_channel(api_key, channel_url, progress):
|
| 43 |
+
# fetch all batches first
|
| 44 |
+
fetched_batches = list(fetch_all_channel_videos(api_key, channel_url))
|
| 45 |
+
all_videos = [v | {"channel_url": channel_url} for _, batch in fetched_batches for v in batch]
|
| 46 |
+
total_videos = len(all_videos)
|
| 47 |
+
|
| 48 |
+
if total_videos == 0:
|
| 49 |
+
yield f"{channel_url}: No videos found", 0
|
| 50 |
+
return
|
| 51 |
+
|
| 52 |
+
with ThreadPoolExecutor(max_workers=4) as executor:
|
| 53 |
+
futures = [
|
| 54 |
+
executor.submit(index_videos, batch, get_collection(), channel_url=channel_url)
|
| 55 |
+
for _, batch in fetched_batches
|
| 56 |
+
]
|
| 57 |
+
|
| 58 |
+
completed_videos = 0
|
| 59 |
+
for f in as_completed(futures):
|
| 60 |
+
if stop_event.is_set():
|
| 61 |
+
yield "🛑 Stop requested during indexing stage", completed_videos
|
| 62 |
+
break
|
| 63 |
+
|
| 64 |
+
try:
|
| 65 |
+
indexed_count = f.result()
|
| 66 |
+
if indexed_count is None:
|
| 67 |
+
indexed_count = len(all_videos) # fallback if index_videos doesn't return
|
| 68 |
+
except Exception as e:
|
| 69 |
+
indexed_count = 0
|
| 70 |
+
yield f"⚠️ Error indexing {channel_url}: {e}", completed_videos
|
| 71 |
+
|
| 72 |
+
completed_videos += indexed_count
|
| 73 |
+
pct = 100.0 * completed_videos / max(1, total_videos)
|
| 74 |
+
|
| 75 |
+
if progress:
|
| 76 |
+
progress(completed_videos / total_videos)
|
| 77 |
+
|
| 78 |
+
yield f"{channel_url}: Indexed {completed_videos}/{total_videos} videos — {pct:.1f}%", completed_videos
|
modules/youtube_metadata/youtube_utils.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def get_channel_id(youtube, channel_url: str) -> str:
|
| 2 |
+
"""
|
| 3 |
+
Extract channel ID from a YouTube URL or handle.
|
| 4 |
+
Supports:
|
| 5 |
+
- https://www.youtube.com/channel/UCxxxx
|
| 6 |
+
- https://www.youtube.com/@handle
|
| 7 |
+
- @handle
|
| 8 |
+
"""
|
| 9 |
+
# If already a UC... ID
|
| 10 |
+
if "channel/" in channel_url:
|
| 11 |
+
return channel_url.split("channel/")[-1].split("/")[0]
|
| 12 |
+
|
| 13 |
+
# If it's a handle (@xyz or full URL)
|
| 14 |
+
if "@" in channel_url:
|
| 15 |
+
handle = channel_url.split("@")[-1]
|
| 16 |
+
request = youtube.channels().list(
|
| 17 |
+
part="id",
|
| 18 |
+
forHandle=handle
|
| 19 |
+
)
|
| 20 |
+
response = request.execute()
|
| 21 |
+
return response["items"][0]["id"]
|
| 22 |
+
|
| 23 |
+
if channel_url.startswith("UC"):
|
| 24 |
+
return channel_url
|
| 25 |
+
|
| 26 |
+
raise ValueError(f"Unsupported channel URL format {channel_url}")
|
pyproject.toml
CHANGED
|
@@ -8,10 +8,12 @@ dependencies = [
|
|
| 8 |
"chromadb>=1.0.15",
|
| 9 |
"dotenv>=0.9.9",
|
| 10 |
"dropbox>=12.0.2",
|
|
|
|
| 11 |
"google-api-python-client>=2.177.0",
|
| 12 |
"google-auth-httplib2>=0.2.0",
|
| 13 |
"google-auth-oauthlib>=1.2.2",
|
| 14 |
"gradio>=5.39.0",
|
|
|
|
| 15 |
"gspread>=6.2.1",
|
| 16 |
"langchain>=0.3.27",
|
| 17 |
"langchain-community>=0.3.27",
|
|
|
|
| 8 |
"chromadb>=1.0.15",
|
| 9 |
"dotenv>=0.9.9",
|
| 10 |
"dropbox>=12.0.2",
|
| 11 |
+
"feedparser>=6.0.12",
|
| 12 |
"google-api-python-client>=2.177.0",
|
| 13 |
"google-auth-httplib2>=0.2.0",
|
| 14 |
"google-auth-oauthlib>=1.2.2",
|
| 15 |
"gradio>=5.39.0",
|
| 16 |
+
"gradio-modal>=0.0.4",
|
| 17 |
"gspread>=6.2.1",
|
| 18 |
"langchain>=0.3.27",
|
| 19 |
"langchain-community>=0.3.27",
|
uv.lock
CHANGED
|
@@ -502,6 +502,18 @@ wheels = [
|
|
| 502 |
{ url = "https://files.pythonhosted.org/packages/e5/47/d63c60f59a59467fda0f93f46335c9d18526d7071f025cb5b89d5353ea42/fastapi-0.116.1-py3-none-any.whl", hash = "sha256:c46ac7c312df840f0c9e220f7964bada936781bc4e2e6eb71f1c4d7553786565", size = 95631, upload-time = "2025-07-11T16:22:30.485Z" },
|
| 503 |
]
|
| 504 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 505 |
[[package]]
|
| 506 |
name = "ffmpy"
|
| 507 |
version = "0.6.1"
|
|
@@ -740,6 +752,18 @@ wheels = [
|
|
| 740 |
{ url = "https://files.pythonhosted.org/packages/e0/38/7f50ae95de8fa419276742230f57a34e8c0f47231da0ad54479dd0088972/gradio_client-1.11.0-py3-none-any.whl", hash = "sha256:afb714aea50224f6f04679fe2ce79c1be75011012d0dc3b3ee575610a0dc8eb2", size = 324452, upload-time = "2025-07-17T02:02:44.542Z" },
|
| 741 |
]
|
| 742 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 743 |
[[package]]
|
| 744 |
name = "greenlet"
|
| 745 |
version = "3.2.3"
|
|
@@ -2773,10 +2797,12 @@ dependencies = [
|
|
| 2773 |
{ name = "chromadb" },
|
| 2774 |
{ name = "dotenv" },
|
| 2775 |
{ name = "dropbox" },
|
|
|
|
| 2776 |
{ name = "google-api-python-client" },
|
| 2777 |
{ name = "google-auth-httplib2" },
|
| 2778 |
{ name = "google-auth-oauthlib" },
|
| 2779 |
{ name = "gradio" },
|
|
|
|
| 2780 |
{ name = "gspread" },
|
| 2781 |
{ name = "langchain" },
|
| 2782 |
{ name = "langchain-community" },
|
|
@@ -2792,10 +2818,12 @@ requires-dist = [
|
|
| 2792 |
{ name = "chromadb", specifier = ">=1.0.15" },
|
| 2793 |
{ name = "dotenv", specifier = ">=0.9.9" },
|
| 2794 |
{ name = "dropbox", specifier = ">=12.0.2" },
|
|
|
|
| 2795 |
{ name = "google-api-python-client", specifier = ">=2.177.0" },
|
| 2796 |
{ name = "google-auth-httplib2", specifier = ">=0.2.0" },
|
| 2797 |
{ name = "google-auth-oauthlib", specifier = ">=1.2.2" },
|
| 2798 |
{ name = "gradio", specifier = ">=5.39.0" },
|
|
|
|
| 2799 |
{ name = "gspread", specifier = ">=6.2.1" },
|
| 2800 |
{ name = "langchain", specifier = ">=0.3.27" },
|
| 2801 |
{ name = "langchain-community", specifier = ">=0.3.27" },
|
|
@@ -2928,6 +2956,12 @@ wheels = [
|
|
| 2928 |
{ url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
|
| 2929 |
]
|
| 2930 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2931 |
[[package]]
|
| 2932 |
name = "shellingham"
|
| 2933 |
version = "1.5.4"
|
|
|
|
| 502 |
{ url = "https://files.pythonhosted.org/packages/e5/47/d63c60f59a59467fda0f93f46335c9d18526d7071f025cb5b89d5353ea42/fastapi-0.116.1-py3-none-any.whl", hash = "sha256:c46ac7c312df840f0c9e220f7964bada936781bc4e2e6eb71f1c4d7553786565", size = 95631, upload-time = "2025-07-11T16:22:30.485Z" },
|
| 503 |
]
|
| 504 |
|
| 505 |
+
[[package]]
|
| 506 |
+
name = "feedparser"
|
| 507 |
+
version = "6.0.12"
|
| 508 |
+
source = { registry = "https://pypi.org/simple" }
|
| 509 |
+
dependencies = [
|
| 510 |
+
{ name = "sgmllib3k" },
|
| 511 |
+
]
|
| 512 |
+
sdist = { url = "https://files.pythonhosted.org/packages/dc/79/db7edb5e77d6dfbc54d7d9df72828be4318275b2e580549ff45a962f6461/feedparser-6.0.12.tar.gz", hash = "sha256:64f76ce90ae3e8ef5d1ede0f8d3b50ce26bcce71dd8ae5e82b1cd2d4a5f94228", size = 286579, upload-time = "2025-09-10T13:33:59.486Z" }
|
| 513 |
+
wheels = [
|
| 514 |
+
{ url = "https://files.pythonhosted.org/packages/4e/eb/c96d64137e29ae17d83ad2552470bafe3a7a915e85434d9942077d7fd011/feedparser-6.0.12-py3-none-any.whl", hash = "sha256:6bbff10f5a52662c00a2e3f86a38928c37c48f77b3c511aedcd51de933549324", size = 81480, upload-time = "2025-09-10T13:33:58.022Z" },
|
| 515 |
+
]
|
| 516 |
+
|
| 517 |
[[package]]
|
| 518 |
name = "ffmpy"
|
| 519 |
version = "0.6.1"
|
|
|
|
| 752 |
{ url = "https://files.pythonhosted.org/packages/e0/38/7f50ae95de8fa419276742230f57a34e8c0f47231da0ad54479dd0088972/gradio_client-1.11.0-py3-none-any.whl", hash = "sha256:afb714aea50224f6f04679fe2ce79c1be75011012d0dc3b3ee575610a0dc8eb2", size = 324452, upload-time = "2025-07-17T02:02:44.542Z" },
|
| 753 |
]
|
| 754 |
|
| 755 |
+
[[package]]
|
| 756 |
+
name = "gradio-modal"
|
| 757 |
+
version = "0.0.4"
|
| 758 |
+
source = { registry = "https://pypi.org/simple" }
|
| 759 |
+
dependencies = [
|
| 760 |
+
{ name = "gradio" },
|
| 761 |
+
]
|
| 762 |
+
sdist = { url = "https://files.pythonhosted.org/packages/e2/fd/3b383f9ee8d60625e9e26871ba4adcacbedeab132041b94290758e02e543/gradio_modal-0.0.4.tar.gz", hash = "sha256:717ae699072a171648cfa1b84bc153be84e92d04e9ad58c1bc59af68ef332726", size = 1180812, upload-time = "2024-10-15T23:46:06.134Z" }
|
| 763 |
+
wheels = [
|
| 764 |
+
{ url = "https://files.pythonhosted.org/packages/05/3d/76f454de84ae1dccbf2b7023e933afb8dde5fdd89e9476786726ef770737/gradio_modal-0.0.4-py3-none-any.whl", hash = "sha256:d96e817d2e934d9e1b835b06474f45fd349b5ccea499d1536bfb4bd38f62dedb", size = 1106241, upload-time = "2024-10-15T23:46:04.13Z" },
|
| 765 |
+
]
|
| 766 |
+
|
| 767 |
[[package]]
|
| 768 |
name = "greenlet"
|
| 769 |
version = "3.2.3"
|
|
|
|
| 2797 |
{ name = "chromadb" },
|
| 2798 |
{ name = "dotenv" },
|
| 2799 |
{ name = "dropbox" },
|
| 2800 |
+
{ name = "feedparser" },
|
| 2801 |
{ name = "google-api-python-client" },
|
| 2802 |
{ name = "google-auth-httplib2" },
|
| 2803 |
{ name = "google-auth-oauthlib" },
|
| 2804 |
{ name = "gradio" },
|
| 2805 |
+
{ name = "gradio-modal" },
|
| 2806 |
{ name = "gspread" },
|
| 2807 |
{ name = "langchain" },
|
| 2808 |
{ name = "langchain-community" },
|
|
|
|
| 2818 |
{ name = "chromadb", specifier = ">=1.0.15" },
|
| 2819 |
{ name = "dotenv", specifier = ">=0.9.9" },
|
| 2820 |
{ name = "dropbox", specifier = ">=12.0.2" },
|
| 2821 |
+
{ name = "feedparser", specifier = ">=6.0.12" },
|
| 2822 |
{ name = "google-api-python-client", specifier = ">=2.177.0" },
|
| 2823 |
{ name = "google-auth-httplib2", specifier = ">=0.2.0" },
|
| 2824 |
{ name = "google-auth-oauthlib", specifier = ">=1.2.2" },
|
| 2825 |
{ name = "gradio", specifier = ">=5.39.0" },
|
| 2826 |
+
{ name = "gradio-modal", specifier = ">=0.0.4" },
|
| 2827 |
{ name = "gspread", specifier = ">=6.2.1" },
|
| 2828 |
{ name = "langchain", specifier = ">=0.3.27" },
|
| 2829 |
{ name = "langchain-community", specifier = ">=0.3.27" },
|
|
|
|
| 2956 |
{ url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
|
| 2957 |
]
|
| 2958 |
|
| 2959 |
+
[[package]]
|
| 2960 |
+
name = "sgmllib3k"
|
| 2961 |
+
version = "1.0.0"
|
| 2962 |
+
source = { registry = "https://pypi.org/simple" }
|
| 2963 |
+
sdist = { url = "https://files.pythonhosted.org/packages/9e/bd/3704a8c3e0942d711c1299ebf7b9091930adae6675d7c8f476a7ce48653c/sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9", size = 5750, upload-time = "2010-08-24T14:33:52.445Z" }
|
| 2964 |
+
|
| 2965 |
[[package]]
|
| 2966 |
name = "shellingham"
|
| 2967 |
version = "1.5.4"
|