Spaces:
Paused
Paused
| import subprocess | |
| script_path = './setup.sh' # Adjust the path if needed | |
| # Run the script | |
| exit_code = subprocess.call(['bash', script_path]) | |
| if exit_code == 0: | |
| print("Script executed successfully.") | |
| else: | |
| print(f"Script failed with exit code {exit_code}.") | |
| import gradio as gr | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.vectorstores import Chroma | |
| from langchain.llms import HuggingFacePipeline | |
| from langchain.chains import RetrievalQA | |
| from transformers import AutoConfig, AutoTokenizer, pipeline, AutoModelForCausalLM | |
| from langchain_community.document_loaders import DirectoryLoader | |
| from torch import bfloat16 | |
| import torch | |
| import re | |
| import transformers | |
| import spaces | |
| import requests | |
| from urllib.parse import urlencode, urlparse, parse_qs | |
| from selenium import webdriver | |
| # Initialize embeddings and ChromaDB | |
| model_name = "sentence-transformers/all-mpnet-base-v2" | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model_kwargs = {"device": device} | |
| embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs) | |
| loader = DirectoryLoader('./example', glob="**/*.pdf", recursive=True, use_multithreading=True) | |
| docs = loader.load() | |
| vectordb = Chroma.from_documents(documents=docs, embedding=embeddings, persist_directory="companies_db") | |
| books_db = Chroma(persist_directory="./companies_db", embedding_function=embeddings) | |
| books_db_client = books_db.as_retriever() | |
| # Initialize the model and tokenizer | |
| model_name = "stabilityai/stablelm-zephyr-3b" | |
| bnb_config = transformers.BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_quant_type='nf4', | |
| bnb_4bit_use_double_quant=True, | |
| bnb_4bit_compute_dtype=torch.bfloat16 | |
| ) | |
| model_config = transformers.AutoConfig.from_pretrained(model_name, max_new_tokens=1024) | |
| model = transformers.AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| trust_remote_code=True, | |
| config=model_config, | |
| quantization_config=bnb_config, | |
| device_map=device, | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| query_pipeline = transformers.pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| return_full_text=True, | |
| torch_dtype=torch.float16, | |
| device_map=device, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_p=0.9, | |
| top_k=50, | |
| max_new_tokens=256 | |
| ) | |
| llm = HuggingFacePipeline(pipeline=query_pipeline) | |
| books_db_client_retriever = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| chain_type="stuff", | |
| retriever=books_db_client, | |
| verbose=True | |
| ) | |
| # OAuth Configuration | |
| TENANT_ID = '2b093ced-2571-463f-bc3e-b4f8bcb427ee' | |
| CLIENT_ID = '2a7c884c-942d-49e2-9e5d-7a29d8a0d3e5' | |
| CLIENT_SECRET = 'EOF8Q~kKHCRgx8tnlLM-H8e93ifetxI6x7sU6bGW' | |
| REDIRECT_URI = 'https://sanjeevbora-chatbot.hf.space/' # Your redirect URI here | |
| AUTH_URL = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/authorize" | |
| TOKEN_URL = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token" | |
| # OAuth parameters | |
| params = { | |
| 'client_id': CLIENT_ID, | |
| 'response_type': 'code', | |
| 'redirect_uri': REDIRECT_URI, | |
| 'response_mode': 'query', | |
| 'scope': 'User.Read', | |
| 'state': '12345' | |
| } | |
| # Construct the login URL | |
| login_url = f"{AUTH_URL}?{urlencode(params)}" | |
| # Function to exchange authorization code for access token | |
| def exchange_code_for_token(auth_code): | |
| data = { | |
| 'grant_type': 'authorization_code', | |
| 'client_id': CLIENT_ID, | |
| 'client_secret': CLIENT_SECRET, | |
| 'code': auth_code, | |
| 'redirect_uri': REDIRECT_URI | |
| } | |
| response = requests.post(TOKEN_URL, data=data) | |
| if response.status_code == 200: | |
| token_data = response.json() | |
| access_token = token_data.get('access_token') | |
| return access_token | |
| else: | |
| return None | |
| # Dummy function to simulate token validation (you will replace this with actual validation) | |
| def is_logged_in(token): | |
| # Check if the token exists (or check if it's valid) | |
| return token is not None | |
| # Function to retrieve answer using the RAG system | |
| def test_rag(query): | |
| books_retriever = books_db_client_retriever.run(query) | |
| # Extract the relevant answer using regex | |
| corrected_text_match = re.search(r"Helpful Answer:(.*)", books_retriever, re.DOTALL) | |
| if corrected_text_match: | |
| corrected_text_books = corrected_text_match.group(1).strip() | |
| else: | |
| corrected_text_books = "No helpful answer found." | |
| return corrected_text_books | |
| # Define the Gradio interface | |
| def chat(query, history=None): | |
| if history is None: | |
| history = [] | |
| if query: | |
| answer = test_rag(query) | |
| history.append((query, answer)) | |
| return history, "" # Clear input after submission | |
| with gr.Blocks() as interface: | |
| gr.Markdown("## RAG Chatbot") | |
| gr.Markdown("Please log in to continue.") | |
| # Step 1: Provide a link for the user to log in | |
| login_link = gr.HTML(f'<a href="{login_url}" target="_blank">Click here to login with Microsoft</a>') | |
| # Step 2: Ask the user to paste the authorization code after login | |
| auth_code_box = gr.Textbox(label="Copy the link you got after loging in to the website", placeholder="Paste your Website link", type="password") | |
| # Step 3: Button to handle token exchange after user pastes the authorization code | |
| login_button = gr.Button("Submit Authorization Code") | |
| # Handle login button click | |
| def handle_login(auth_code): | |
| # Extract the authorization code from the text box | |
| parsed_url = urlparse(auth_code) # Parse the URL containing the authorization code | |
| # Extract query parameters | |
| query_params = parse_qs(parsed_url.query) | |
| # Get the code value | |
| code_value = query_params.get('code', [None])[0] | |
| token = exchange_code_for_token(code_value) | |
| if token: | |
| return gr.update(visible=True), gr.update(visible=True), gr.update(visible=True) | |
| else: | |
| return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) | |
| # Components for chat (initially hidden) | |
| input_box = gr.Textbox(label="Enter your question", placeholder="Type your question here...", visible=False) | |
| submit_btn = gr.Button("Submit", visible=False) | |
| chat_history = gr.Chatbot(label="Chat History", visible=False) | |
| login_button.click(handle_login, inputs=[auth_code_box], outputs=[input_box, submit_btn, chat_history]) | |
| # Chat handling | |
| submit_btn.click(chat, inputs=[input_box, chat_history], outputs=[chat_history, input_box]) | |
| interface.launch() |