Spaces:
Runtime error
Runtime error
| from langchain.llms import HuggingFacePipeline | |
| from langchain.embeddings import HuggingFaceInstructEmbeddings | |
| from langchain.chains import RetrievalQA | |
| from transformers import ( | |
| AutoTokenizer, | |
| AutoModelForSeq2SeqLM, | |
| pipeline, | |
| GenerationConfig | |
| ) | |
| from textwrap import dedent | |
| class lamini: | |
| def __init__(self): | |
| pass | |
| def load_model(self, task="text2text-generation", **kwargs) -> HuggingFacePipeline: | |
| """Returns a pipeline for the model | |
| - model: MBZUAI/LaMini-Flan-T5-248M | |
| Returns: | |
| _type_: _description_ | |
| """ | |
| model_id = "MBZUAI/LaMini-Flan-T5-248M" | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_id) | |
| gen_config = GenerationConfig.from_pretrained(model_id) | |
| max_length = kwargs.get("max_length", 512) | |
| temperature = kwargs.get("temperature", 0) | |
| top_p = kwargs.get("top_p", 0.95) | |
| repetition_penalty = kwargs.get("repetition_penalty", 1.15) | |
| pipe = pipeline( | |
| "text2text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| generation_config=gen_config, | |
| max_length=max_length, | |
| top_p=top_p, | |
| temperature=temperature, | |
| repetition_penalty=repetition_penalty, | |
| ) | |
| llm = HuggingFacePipeline(pipeline=pipe) | |
| return llm | |
| class templates: | |
| def __init__(self, llm: HuggingFacePipeline): | |
| self.llm = llm | |
| def summarize(self, text, **kwargs): | |
| """Summarize text | |
| Args: | |
| text (str): text to summarize | |
| Returns: | |
| str: summarized text | |
| """ | |
| instruction = "summarize for better understanding: " | |
| text = instruction + text | |
| return self.llm(text, **kwargs) | |
| def generate_tile(self, text, **kwargs): | |
| """Generate a title for text | |
| Args: | |
| text (str): text to generate title for | |
| Returns: | |
| str: title | |
| """ | |
| instruction = "generate a title for this text: " | |
| text = instruction + text | |
| return self.llm(text, **kwargs) | |
| class qa_template: | |
| def __init__(self, llm): | |
| from langchain.chains.retrieval_qa.base import BaseRetrievalQA | |
| self.llm = llm | |
| self.qa_inf: BaseRetrievalQA | |
| def load(self, knowledge_base): | |
| """Load knowledge base | |
| Args: | |
| knowledge_base (str): knowledge base to load | |
| Returns: | |
| BaseRetrievalQA: (optional to use) returns QA interface | |
| """ | |
| from utils import LangChainChunker | |
| from langchain.vectorstores import Chroma | |
| from langchain.chains import RetrievalQA | |
| embeds = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large") | |
| chunker = LangChainChunker(knowledge_base) | |
| chunks = chunker.chunker(size=512) | |
| db = Chroma.from_texts(chunks, embeds) | |
| retriever = db.as_retriever() | |
| qa_inf = RetrievalQA.from_chain_type( | |
| llm=self.llm, chain_type="stuff", retriever=retriever | |
| ) | |
| self.qa_inf = qa_inf | |
| return qa_inf | |
| def start_gradio(self, title: str): | |
| """Start gradio interface | |
| Returns: | |
| _type_: _description_ | |
| """ | |
| import gradio as gr | |
| load = self.load | |
| def interface(msg, history): | |
| res = self.qa_inf.run(msg) | |
| history.append((msg, res)) | |
| return "", history | |
| def reload(video_id): | |
| from utils import getSubsText | |
| print(f"Setting up {video_id}") | |
| subs = getSubsText(video_id) | |
| _ = load(subs) | |
| with gr.Blocks() as demo: | |
| with gr.Column(): | |
| gr.Markdown(dedent(f""" | |
| # video to QA | |
| A test implementation to use vectorstores and mini llms to create | |
| a question answer chatbot interface for _youtube videos_ | |
| """)) | |
| chatbot = gr.Chatbot() | |
| with gr.Row(): | |
| with gr.Column(): | |
| videoId = gr.Textbox(label="Video ID", placeholder="Enter video ID here") | |
| msg = gr.Textbox(label="Question Box" , placeholder="Enter your question here") | |
| clear = gr.ClearButton([msg, videoId, chatbot]) | |
| gr.Markdown( | |
| dedent(""" | |
| ## Getting started | |
| to start up you need to enter the video ID of youtube video first | |
| Get a youtube video which has English dialog | |
| > ex: https://www.youtube.com/watch?v=BsnCpESUEqM | |
| in this `BsnCpESUEqM` is the video ID | |
| ``` | |
| https://www.youtube.com/watch?v=BsnCpESUEqM | |
| ^^^^^^^^^^^ | |
| video_id | |
| ``` | |
| > in url paramets are seperated by `?` and for video id its `?v` | |
| copy-paste the video id to the textbox and press return/enter and wait ~5 seconds to fetch video information | |
| --- | |
| Now in the Question Box _box_/feild start typing the quesions and press return/enter to send to llm | |
| """) | |
| ) | |
| msg.submit(interface, [msg, chatbot], [msg, chatbot]) | |
| videoId.submit(reload, [videoId]) | |
| # ui = gr.ChatInterface( | |
| # fn=interface, | |
| # examples=["What is the video about?", "key points of the video"], | |
| # title=f"Question Mode - {title}", | |
| # ) | |
| # ui.launch() | |
| demo.launch() | |