Spaces:

taspol
/

PAN-SEA

Sleeping

App Files Files Community

taspol commited on Aug 12

Commit

1e81c91

1 Parent(s): 9027ae3

feat: init repo

Browse files

Files changed (7) hide show

.gitignore +203 -0
app/app.py +69 -0
data_importer.py +99 -0
interface.py +67 -0
plan_mock.json +129 -0
utils/llm_caller.py +196 -0
utils/youtube_extractor.py +32 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,203 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+# Streamlit
+.streamlit/secrets.toml

app/app.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from fastapi import FastAPI
+from interface import PlanRequest, PlanResponse, PlanStep, TransportInfo, TripPlan , YoutubeLinkRequest, YoutubeLinkResponse
+from data_importer import DataImporter
+import os
+import json
+app = FastAPI()
+data_importer = DataImporter()
+def load_mock_data(path: str = "plan_mock.json") -> dict:
+    """Load mock data from plan_mock.json"""
+    try:
+        file_path = os.path.join(os.path.dirname(__file__), path)
+        with open(file_path, 'r', encoding='utf-8') as file:
+            return json.load(file)
+    except FileNotFoundError:
+        # Return default mock data if file not found
+        print("Mock data file not found. Using default mock data.")
+        return {"error": "Invalid JSON format"}
+@app.get("/v1")
+def greet_json():
+    return {"Hello": "World!"}
+@app.post("/v1/generateTripPlan", response_model=PlanResponse)
+def generate_trip_plan(request: PlanRequest):
+    mock_trip_plan = load_mock_data()
+    print(mock_trip_plan)
+    return PlanResponse(
+        tripOverview="Sample trip overview.",
+        query_params=request,
+        retrieved_data=[],
+        trip_plan=TripPlan(
+            overview="Sample trip overview",
+            total_estimated_cost=1000.0,
+            steps=[PlanStep(
+            day=1,
+            title="Arrival in New York",
+            description="Arrive at JFK Airport and check-in at the hotel.",
+            transport=TransportInfo(
+                mode="Plane",
+                departure="Your hometown airport",
+                arrival="JFK Airport",
+                duration_minutes=300,
+                price=300.0,
+                details="Non-stop flight"
+            ),
+            map_coordinates={"lat": 40.6413, "lon": -73.7781},
+            images=["https://example.com/images/jfk_airport.jpg"],
+            tips=["Bring a valid ID", "Confirm your hotel reservation"]
+        )]),
+        meta={"status": "success"}
+    )
+# @app.post("/v1/addYoutubeLink", response_model=YoutubeLinkResponse)
+# def add_youtube_link(request: YoutubeLinkRequest):
+#     try:
+#         data_importer.insert_from_youtube(request.video_id)
+#     except Exception as e:
+#         return YoutubeLinkResponse(
+#             message="Failed to add YouTube link",
+#             video_url=None
+#         )
+#     return YoutubeLinkResponse(
+#         message="add successfully",
+#         video_url=f"https://www.youtube.com/watch?v={request.video_id}"
+#     )

data_importer.py ADDED Viewed

	@@ -0,0 +1,99 @@

+from utils.youtube_extractor import YoutubeExtractor
+from sentence_transformers import SentenceTransformer
+from qdrant_client import QdrantClient
+from qdrant_client.models import Distance, VectorParams, PointStruct
+from typing import List, Dict, Optional, Union
+import uuid
+class DataImporter:
+    def __init__(self, qdrant_url: str = "http://localhost:6333", collection_name: str = "demo_bge_m3"):
+        self.model = SentenceTransformer("BAAI/bge-m3")
+        self.client = QdrantClient(url=qdrant_url)
+        self.collection_name = collection_name
+        self.youtube_extractor = YoutubeExtractor()
+        # Create collection if it doesn't exist
+        self._create_collection()
+    def _create_collection(self):
+        try:
+            self.client.recreate_collection(
+                collection_name=self.collection_name,
+                vectors_config=VectorParams(size=1024, distance=Distance.COSINE)
+            )
+            print(f"Collection '{self.collection_name}' created successfully")
+        except Exception as e:
+            print(f"Error creating collection: {e}")
+    def encode_text(self, texts: Union[str, List[str]]) -> List[List[float]]:
+        if isinstance(texts, str):
+            texts = [texts]
+        embeddings = self.model.encode(texts, normalize_embeddings=True)
+        return embeddings.tolist()
+    def insert_text(self, text: str, metadata: Optional[Dict] = None, custom_id: Optional[str] = None) -> str:
+        point_id = custom_id or str(uuid.uuid4())
+        embedding = self.encode_text(text)[0]
+        payload = {"text": text}
+        if metadata:
+            payload.update(metadata)
+        self.client.upsert(
+            collection_name=self.collection_name,
+            points=[PointStruct(id=point_id, vector=embedding, payload=payload)]
+        )
+        print(f"Inserted text with ID: {point_id}")
+        return point_id
+    def insert_texts(self, texts: List[str], metadata_list: Optional[List[Dict]] = None) -> List[str]:
+        embeddings = self.encode_text(texts)
+        point_ids = [str(uuid.uuid4()) for _ in texts]
+        points = []
+        for i, (text, embedding, point_id) in enumerate(zip(texts, embeddings, point_ids)):
+            payload = {"text": text}
+            if metadata_list and i < len(metadata_list):
+                payload.update(metadata_list[i])
+            points.append(PointStruct(id=point_id, vector=embedding, payload=payload))
+        self.client.upsert(collection_name=self.collection_name, points=points)
+        print(f"Inserted {len(texts)} texts")
+        return point_ids
+    def insert_from_youtube(self, video_id: str, metadata: Optional[Dict] = None) -> Optional[str]:
+        try:
+            # Extract text from YouTube (assuming your YoutubeExtractor has this method)
+            text = self.youtube_extractor.extract_transcript(video_id)
+            if text:
+                video_metadata = {"source": "youtube", "video_id": video_id}
+                if metadata:
+                    video_metadata.update(metadata)
+                return self.insert_text(text, video_metadata)
+            return None
+        except Exception as e:
+            print(f"Error extracting from YouTube: {e}")
+            return None
+    def search_similar(self, query: str, limit: int = 5) -> List[Dict]:
+        query_embedding = self.encode_text(query)[0]
+        results = self.client.search(
+            collection_name=self.collection_name,
+            query_vector=query_embedding,
+            limit=limit
+        )
+        return [
+            {
+                "id": result.id,
+                "score": result.score,
+                "text": result.payload.get("text", ""),
+                "metadata": {k: v for k, v in result.payload.items() if k != "text"}
+            }
+            for result in results
+        ]

interface.py ADDED Viewed

	@@ -0,0 +1,67 @@

+from pydantic import BaseModel, Field
+from typing import List, Optional, Any, Dict
+class TripPlanRequest(BaseModel):
+    destination: str
+    duration: int
+    budget: float
+    preferences: list[str] = []
+class TripPlanResponse(BaseModel):
+    message: str
+    plan: dict
+class YoutubeLinkRequest(BaseModel):
+    video_id: str
+class YoutubeLinkResponse(BaseModel):
+    message: str
+    video_url: str
+class PlanRequest(BaseModel):
+    start_place: str
+    destination_place: str
+    trip_price: Optional[float] = Field(None, description="Total budget in local currency")
+    trip_context: Optional[str] = Field(None, description="e.g. adventure, rest, date")
+    trip_duration_days: Optional[int] = 1
+    group_size: Optional[int] = 1
+    preferences: Optional[List[str]] = None
+    top_k: Optional[int] = 3
+class RetrievedItem(BaseModel):
+    place_id: str
+    place_name: str
+    description: Optional[str]
+    score: float
+    metadata: Optional[Dict[str, Any]] = None
+class TransportInfo(BaseModel):
+    mode: Optional[str]
+    departure: Optional[str]
+    arrival: Optional[str]
+    duration_minutes: Optional[int]
+    price: Optional[float]
+    details: Optional[str]
+class PlanStep(BaseModel):
+    day: Optional[int]
+    title: Optional[str]
+    description: Optional[str]
+    transport: Optional[TransportInfo]
+    map_coordinates: Optional[Dict[str, float]]
+    images: Optional[List[str]]
+    tips: Optional[List[str]]
+class TripPlan(BaseModel):
+    overview: str
+    total_estimated_cost: Optional[float]
+    steps: List[PlanStep]
+class PlanResponse(BaseModel):
+    tripOverview: str
+    query_params: PlanRequest
+    retrieved_data: List[RetrievedItem]
+    trip_plan: TripPlan
+    meta: Dict[str, Any]

plan_mock.json ADDED Viewed

	@@ -0,0 +1,129 @@

+{
+  "tripOverview": {
+    "summary": "Adventure trip from Bangkok to Chiang Mai with cultural exploration",
+    "regions": ["Thailand", "Chiang Mai"],
+    "destination": "Chiang Mai, Thailand",
+    "EstimatedCost": "Approximately 10,000 THB per person",
+    "durationDays": 5,
+    "purpose": "Adventure, cultural experience"
+  },
+  "milestones": [
+    "Bangkok Suvarnabhumi Airport",
+    "Chiang Mai International Airport",
+    "Doi Suthep-Pui National Park",
+    "Chiang Mai Old City",
+    "Elephant Nature Park",
+    "Chiang Rai Night Bazaar"
+  ],
+  "transportation": [
+    {
+      "mode": "Plane",
+      "from": "Bangkok Suvarnabhumi Airport",
+      "to": "Chiang Mai International Airport",
+      "schedule": "2025-09-15T08:00:00Z",
+      "price": "1500 THB per person"
+    },
+    {
+      "mode": "Songthaew (Red Truck)",
+      "from": "Chiang Mai International Airport",
+      "to": "Chiang Mai Old City",
+      "schedule": "2025-09-15T11:00:00Z",
+      "price": "40 THB per person"
+    }
+  ],
+  "accommodation": [
+    {
+      "type": "Guesthouse",
+      "location": "Chiang Mai Old City",
+      "contact": "053-211-111",
+      "notes": "Centrally located with air conditioning"
+    }
+  ],
+  "tripRoute": [
+    {
+      "day": 1,
+      "activities": [
+        "Arrival at Chiang Mai International Airport",
+        "Check-in at guesthouse",
+        "Explore Chiang Mai Old City",
+        "Visit Wat Phra That Doi Suthep"
+      ],
+      "walkingRoute": "Old City Moat",
+      "signage": "Follow city maps",
+      "suggestions": ["Wear comfortable shoes", "Try local street food"],
+      "precautions": ["Beware of traffic"]
+    },
+    {
+      "day": 2,
+      "activities": [
+        "Visit Elephant Nature Park",
+        "Participate in elephant conservation activities"
+      ],
+      "transport": {
+        "mode": "Van",
+        "from": "Chiang Mai Old City",
+        "to": "Elephant Nature Park",
+        "price": "800 THB per person",
+        "duration_minutes": 60
+      },
+      "suggestions": ["Wear long sleeves", "Bring sunscreen"],
+      "precautions": ["Follow park guidelines"]
+    },
+    {
+      "day": 3,
+      "activities": [
+        "Trekking in Doi Suthep-Pui National Park",
+        "Visit waterfalls and scenic viewpoints"
+      ],
+      "transport": {
+        "mode": "Songthaew (Red Truck)",
+        "from": "Chiang Mai Old City",
+        "to": "Doi Suthep-Pui National Park",
+        "price": "50 THB per person",
+        "duration_minutes": 45
+      },
+      "suggestions": ["Bring water and snacks", "Wear hiking boots"],
+      "precautions": ["Beware of slippery trails"]
+    },
+    {
+      "day": 4,
+      "activities": [
+        "Visit Chiang Rai Night Bazaar",
+        "Explore local markets and try street food"
+      ],
+      "transport": {
+        "mode": "Bus",
+        "from": "Chiang Mai Arcade Bus Station",
+        "to": "Chiang Rai Night Bazaar",
+        "price": "200 THB per person",
+        "duration_minutes": 180
+      },
+      "suggestions": ["Bring cash", "Try local delicacies"],
+      "precautions": ["Beware of pickpockets"]
+    },
+    {
+      "day": 5,
+      "activities": [
+        "Return to Bangkok",
+        "Departure from Chiang Mai International Airport"
+      ],
+      "transport": {
+        "mode": "Plane",
+        "from": "Chiang Mai International Airport",
+        "to": "Bangkok Suvarnabhumi Airport",
+        "price": "1500 THB per person",
+        "duration_minutes": 75
+      }
+    }
+  ],
+  "emergencyContacts": {
+    "localRangers": "053-211-111",
+    "hospital": "Chiang Mai Ram Hospital: 053-211-111",
+    "embassy": "Thai Embassy: 02-281-0141"
+  },
+  "tips": [
+    "Always carry a map or use a GPS app",
+    "Respect local customs and traditions",
+    "Stay hydrated and wear sunscreen"
+  ]
+}

utils/llm_caller.py ADDED Viewed

	@@ -0,0 +1,196 @@

+import os
+import asyncio
+import httpx
+from typing import List, Optional, Dict, Any
+from dataclasses import dataclass
+from qdrant_client import QdrantClient
+SYSTEM_PROMPT = """You are a helpful travel assistant. Use the provided context to answer the user's question about travel destinations and places.
+If the context doesn't contain relevant information, say so politely and provide general advice if possible."""
+'''
+'''
+@dataclass
+class RetrievedItem:
+    place_id: str
+    place_name: str
+    description: Optional[str]
+    score: float
+    metadata: Dict[str, Any]
+class LLMCaller:
+    def __init__(self):
+        # Environment variables
+        self.qdrant_host = os.getenv("QDRANT_HOST", "localhost")
+        self.qdrant_api_key = os.getenv("QDRANT_API_KEY", None)
+        self.qdrant_collection = os.getenv("QDRANT_COLLECTION", "trip_places")
+        self.embedding_dim = int(os.getenv("EMBEDDING_DIM", "1024"))
+        self.top_k = int(os.getenv("TOP_K", "6"))
+        # LLM configuration
+        self.llm_api_url = os.getenv("LLM_API_URL", "https://api.openai.com/v1/chat/completions")
+        self.llm_api_key = os.getenv("LLM_API_KEY", "sk-REPLACE_ME")
+        # Initialize Qdrant client
+        self.qdrant = QdrantClient(
+            host=self.qdrant_host,
+            api_key=self.qdrant_api_key
+        )
+    async def call_llm(self, system_prompt: str, user_prompt: str, max_tokens: int = 512, model: str = "sea-lion-7b-instruct") -> str:
+        """
+        Call LLM with system and user prompts
+        Args:
+            system_prompt (str): System message for the LLM
+            user_prompt (str): User message/question
+            max_tokens (int): Maximum tokens to generate
+            model (str): Model to use
+        Returns:
+            str: LLM response text
+        """
+        headers = {
+            "Authorization": f"Bearer {self.llm_api_key}",
+            "Content-Type": "application/json",
+        }
+        payload = {
+            "model": model,
+            "messages": [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt}
+            ],
+            "max_tokens": max_tokens,
+            "temperature": 0.7,
+        }
+        try:
+            async with httpx.AsyncClient(timeout=30) as client:
+                resp = await client.post(self.llm_api_url, json=payload, headers=headers)
+                resp.raise_for_status()
+                data = resp.json()
+                # Handle OpenAI-like response format
+                if "choices" in data and len(data["choices"]) > 0:
+                    return data["choices"][0]["message"]["content"]
+                # Fallback for other formats
+                return data.get("text", "")
+        except Exception as e:
+            print(f"Error calling LLM: {e}")
+            return f"Error: Unable to get LLM response - {str(e)}"
+    async def query_qdrant(self, query_embedding: List[float], top_k: Optional[int] = None, collection_name: Optional[str] = None) -> List[RetrievedItem]:
+        """
+        Query Qdrant vector database
+        Args:
+            query_embedding (List[float]): Query vector embedding
+            top_k (int, optional): Number of results to return
+            collection_name (str, optional): Collection name to query
+        Returns:
+            List[RetrievedItem]: Retrieved items from Qdrant
+        """
+        top_k = top_k or self.top_k
+        collection_name = collection_name or self.qdrant_collection
+        def _search():
+            try:
+                hits = self.qdrant.search(
+                    collection_name=collection_name,
+                    query_vector=query_embedding,
+                    limit=top_k,
+                    with_payload=True,
+                )
+                items: List[RetrievedItem] = []
+                for h in hits:
+                    payload = h.payload or {}
+                    items.append(RetrievedItem(
+                        place_id=str(h.id),
+                        place_name=payload.get("name") or payload.get("title") or "",
+                        description=payload.get("description") or payload.get("summary") or None,
+                        score=float(h.score) if h.score is not None else 0.0,
+                        metadata=payload,
+                    ))
+                return items
+            except Exception as e:
+                print(f"Error querying Qdrant: {e}")
+                return []
+        return await asyncio.to_thread(_search)
+    async def rag_query(self, query: str, query_embedding: List[float], system_prompt: Optional[str] = None) -> Dict[str, Any]:
+        # Retrieve relevant items from Qdrant
+        retrieved_items = await self.query_qdrant(query_embedding)
+        # Build context from retrieved items
+        context_parts = []
+        for item in retrieved_items:
+            context_parts.append(f"- {item.place_name}: {item.description or 'No description available'}")
+        context = "\n".join(context_parts) if context_parts else "No relevant information found."
+        # Default system prompt if none provided
+        if not system_prompt:
+            system_prompt = """You are a helpful travel assistant. Use the provided context to answer the user's question about travel destinations and places.
+                               If the context doesn't contain relevant information, say so politely and provide general advice if possible."""
+        # Create user prompt with context
+        user_prompt = f"""Context:
+{context}
+Question: {query}
+Please provide a helpful response based on the context above."""
+        # Get LLM response
+        llm_response = await self.call_llm(system_prompt, user_prompt)
+        return {
+            "answer": llm_response,
+            "retrieved_items": retrieved_items,
+            "context": context,
+            "query": query
+        }
+    def update_config(self, **kwargs):
+        """
+        Update configuration parameters
+        Args:
+            **kwargs: Configuration parameters to update
+        """
+        for key, value in kwargs.items():
+            if hasattr(self, key):
+                setattr(self, key, value)
+            else:
+                print(f"Warning: Unknown configuration parameter: {key}")
+# Example usage
+if __name__ == "__main__":
+    async def main():
+        # Initialize LLM caller
+        llm_caller = LLMCaller()
+        # Example embedding (replace with actual embedding)
+        query_embedding = [0.1] * 1024  # Dummy embedding
+        # Perform RAG query
+        result = await llm_caller.rag_query(
+            query="What are the best places to visit in Thailand?",
+            query_embedding=query_embedding
+        )
+        print("Answer:", result["answer"])
+        print(f"Found {len(result['retrieved_items'])} relevant items")
+        # Direct LLM call
+        response = await llm_caller.call_llm(
+            system_prompt="You are a helpful assistant.",
+            user_prompt="What is the capital of Thailand?"
+        )
+        print("Direct LLM Response:", response)
+    asyncio.run(main())

utils/youtube_extractor.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from youtube_transcript_api import YouTubeTranscriptApi
+from typing import List, Dict, Optional
+class YoutubeExtractor:
+    def __init__(self):
+        self.ytt_api = YouTubeTranscriptApi()
+    def extract_transcript(self, video_id: str) -> Optional[List[Dict]]:
+        try:
+            transcript = self.ytt_api.fetch(video_id)
+            return transcript
+        except Exception as e:
+            print(f"An error occurred: {e}")
+            return None
+    def get_text_only(self, video_id: str) -> Optional[List[str]]:
+        transcript = self.extract_transcript(video_id)
+        if transcript:
+            return [entry['text'] for entry in transcript]
+        return None
+    def get_full_text(self, video_id: str) -> Optional[str]:
+        text_segments = self.get_text_only(video_id)
+        if text_segments:
+            return ' '.join(text_segments)
+        return None
+    def print_transcript(self, video_id: str) -> None:
+        transcript = self.extract_transcript(video_id)
+        if transcript:
+            print("--- Full Transcript ---")
+            for entry in transcript:
+                print(entry['text'])