Spaces:

alx-d
/

PhiRAG

Running

App Files Files Community

alx-d commited on Jun 10

Commit

e8b305d

verified ·

1 Parent(s): 01330c2

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

advanced_rag.py +731 -125

advanced_rag.py CHANGED Viewed

@@ -36,6 +36,189 @@ from langchain_community.document_loaders import PyMuPDFLoader  # Updated loader
 import tempfile
 import mimetypes
 def get_mime_type(file_path):
     return mimetypes.guess_type(file_path)[0] or 'application/octet-stream'
@@ -43,6 +226,8 @@ print("Pydantic Version: ")
 print(pydantic.__version__)
 # Add Mistral imports with fallback handling
 try:
     from mistralai import Mistral
     MISTRAL_AVAILABLE = True
@@ -107,11 +292,14 @@ def process_in_background(job_id, function, args):
         error_result = (f"Error processing job: {str(e)}", "", "", "")
         results_queue.put((job_id, error_result))
-def load_pdfs_async(file_links, model_choice, prompt_template, bm25_weight, temperature, top_p):
     """Asynchronous version of load_pdfs_updated to prevent timeouts"""
     global last_job_id
     if not file_links:
-        return "Please enter non-empty URLs", "", "Model used: N/A", "", "", get_job_list()
     job_id = str(uuid.uuid4())
     debug_print(f"Starting async job {job_id} for file loading")
@@ -119,7 +307,7 @@ def load_pdfs_async(file_links, model_choice, prompt_template, bm25_weight, temp
     # Start background thread
     threading.Thread(
         target=process_in_background,
-        args=(job_id, load_pdfs_updated, [file_links, model_choice, prompt_template, bm25_weight, temperature, top_p])
     ).start()
     job_query = f"Loading files: {file_links.split()[0]}..." if file_links else "No files"
@@ -132,6 +320,8 @@ def load_pdfs_async(file_links, model_choice, prompt_template, bm25_weight, temp
     last_job_id = job_id
     return (
         f"Files submitted and processing in the background (Job ID: {job_id}).\n\n"
         f"Use 'Check Job Status' tab with this ID to get results.",
@@ -139,14 +329,17 @@ def load_pdfs_async(file_links, model_choice, prompt_template, bm25_weight, temp
         f"Model requested: {model_choice}",
         job_id,  # Return job_id to update the job_id_input component
         job_query,  # Return job_query to update the job_query_display component
-        get_job_list()  # Return updated job list
     )
-def submit_query_async(query, model_choice=None):
     """Asynchronous version of submit_query_updated to prevent timeouts"""
     global last_job_id
     if not query:
         return "Please enter a non-empty query", "", "Input tokens: 0", "Output tokens: 0", "", "", get_job_list()
     job_id = str(uuid.uuid4())
     debug_print(f"Starting async job {job_id} for query: {query}")
@@ -154,13 +347,13 @@ def submit_query_async(query, model_choice=None):
     # Update model if specified
     if model_choice and rag_chain and rag_chain.llm_choice != model_choice:
         debug_print(f"Updating model to {model_choice} for this query")
-        rag_chain.update_llm_pipeline(model_choice, rag_chain.temperature, rag_chain.top_p,
-                                     rag_chain.prompt_template, rag_chain.bm25_weight)
     # Start background thread
     threading.Thread(
         target=process_in_background,
-        args=(job_id, submit_query_updated, [query])
     ).start()
     jobs[job_id] = {
@@ -550,7 +743,7 @@ def load_file_from_google_drive(link: str) -> list:
 class ElevatedRagChain:
     def __init__(self, llm_choice: str = "Meta-Llama-3", prompt_template: str = default_prompt,
-                 bm25_weight: float = 0.6, temperature: float = 0.5, top_p: float = 0.95) -> None:
         debug_print(f"Initializing ElevatedRagChain with model: {llm_choice}")
         self.embed_func = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2",
@@ -558,7 +751,7 @@ class ElevatedRagChain:
         )
         self.bm25_weight = bm25_weight
         self.faiss_weight = 1.0 - bm25_weight
-        self.top_k = 5
         self.llm_choice = llm_choice
         self.temperature = temperature
         self.top_p = top_p
@@ -587,9 +780,119 @@ class ElevatedRagChain:
     # Improve error handling in the ElevatedRagChain class
     def create_llm_pipeline(self):
         from langchain.llms.base import LLM  # Import LLM here so it's always defined
-        normalized = self.llm_choice.lower()
         try:
-            if "remote" in normalized:
                 debug_print("Creating remote Meta-Llama-3 pipeline via Hugging Face Inference API...")
                 from huggingface_hub import InferenceClient
                 repo_id = "meta-llama/Meta-Llama-3-8B-Instruct"
@@ -598,20 +901,19 @@ class ElevatedRagChain:
                     raise ValueError("Please set the HF_API_TOKEN environment variable to use remote inference.")
                 client = InferenceClient(token=hf_api_token, timeout=120)
-                # We no longer use wait_for_model because it's unsupported
                 def remote_generate(prompt: str) -> str:
                     max_retries = 3
                     backoff = 2  # start with 2 seconds
                     for attempt in range(max_retries):
                         try:
-                            debug_print(f"Remote generation attempt {attempt+1}")
                             response = client.text_generation(
                                 prompt,
                                 model=repo_id,
                                 temperature=self.temperature,
                                 top_p=self.top_p,
-                                max_new_tokens=512  # Reduced token count for speed
                             )
                             return response
                         except Exception as e:
@@ -623,6 +925,11 @@ class ElevatedRagChain:
                     return "Failed to generate response after multiple attempts."
                 class RemoteLLM(LLM):
                     @property
                     def _llm_type(self) -> str:
                         return "remote_llm"
@@ -632,97 +939,74 @@ class ElevatedRagChain:
                     @property
                     def _identifying_params(self) -> dict:
-                        return {"model": repo_id}
                 debug_print("Remote Meta-Llama-3 pipeline created successfully.")
                 return RemoteLLM()
-            elif "mistral-api" in normalized:
                 debug_print("Creating Mistral API pipeline...")
                 mistral_api_key = os.environ.get("MISTRAL_API_KEY")
                 if not mistral_api_key:
                     raise ValueError("Please set the MISTRAL_API_KEY environment variable to use Mistral API.")
                 try:
-                    from mistralai import Mistral
                     debug_print("Mistral library imported successfully")
                 except ImportError:
-                    debug_print("Mistral client library not installed. Falling back to Llama pipeline.")
-                    normalized = "llama"
-                if normalized != "llama":
-#                    from pydantic import PrivateAttr
-#                    from langchain.llms.base import LLM
-#                    from typing import Any, Optional, List
-#                    import typing
-                    class MistralLLM(LLM):
-                        temperature: float = 0.7
-                        top_p: float = 0.95
-                        _client: Any = PrivateAttr(default=None)
-                        def __init__(self, api_key: str, temperature: float = 0.7, top_p: float = 0.95, **kwargs: Any):
-                            try:
-                                super().__init__(**kwargs)
-                                # Bypass Pydantic's __setattr__ to assign to _client
-                                object.__setattr__(self, '_client', Mistral(api_key=api_key))
-                                self.temperature = temperature
-                                self.top_p = top_p
-                            except Exception as e:
-                                debug_print(f"Init Mistral failed with error: {e}")
-                        @property
-                        def _llm_type(self) -> str:
-                            return "mistral_llm"
-                        def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
-                            try:
-                                debug_print("Calling Mistral API...")
-                                response = self._client.chat.complete(
-                                    model="mistral-small-latest",
-                                    messages=[{"role": "user", "content": prompt}],
-                                    temperature=self.temperature,
-                                    top_p=self.top_p
-                                )
-                                return response.choices[0].message.content
-                            except Exception as e:
-                                debug_print(f"Mistral API error: {str(e)}")
-                                return f"Error generating response: {str(e)}"
-                        @property
-                        def _identifying_params(self) -> dict:
-                            return {"model": "mistral-small-latest"}
-                    debug_print("Creating Mistral LLM instance")
-                    mistral_llm = MistralLLM(api_key=mistral_api_key, temperature=self.temperature, top_p=self.top_p)
-                    debug_print("Mistral API pipeline created successfully.")
-                    return mistral_llm
-            else:
-                # Default case - using a fallback model (or Llama)
-                debug_print("Using local/fallback model pipeline")
-                model_id = "facebook/opt-350m"  # Use a smaller model as fallback
-                pipe = pipeline(
-                    "text-generation",
-                    model=model_id,
-                    device=-1,  # CPU
-                    max_length=1024
-                )
-                class LocalLLM(LLM):
                     @property
                     def _llm_type(self) -> str:
-                        return "local_llm"
                     def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
-                        # For this fallback, truncate prompt if it exceeds limits
-                        reserved_gen = 128
-                        max_total = 1024
-                        max_prompt_tokens = max_total - reserved_gen
-                        truncated_prompt = truncate_prompt(prompt, max_tokens=max_prompt_tokens)
-                        generated = pipe(truncated_prompt, max_new_tokens=reserved_gen)[0]["generated_text"]
-                        return generated
                     @property
                     def _identifying_params(self) -> dict:
-                        return {"model": model_id, "max_length": 1024}
-                debug_print("Local fallback pipeline created.")
-                return LocalLLM()
         except Exception as e:
             debug_print(f"Error creating LLM pipeline: {str(e)}")
             # Return a dummy LLM that explains the error
@@ -741,11 +1025,12 @@ class ElevatedRagChain:
             return ErrorLLM()
-    def update_llm_pipeline(self, new_model_choice: str, temperature: float, top_p: float, prompt_template: str, bm25_weight: float):
         debug_print(f"Updating chain with new model: {new_model_choice}")
         self.llm_choice = new_model_choice
         self.temperature = temperature
         self.top_p = top_p
         self.prompt_template = prompt_template
         self.bm25_weight = bm25_weight
         self.faiss_weight = 1.0 - bm25_weight
@@ -753,7 +1038,14 @@ class ElevatedRagChain:
         def format_response(response: str) -> str:
             input_tokens = count_tokens(self.context + self.prompt_template)
             output_tokens = count_tokens(response)
-            formatted = f"### Response\n\n{response}\n\n---\n"
             formatted += f"- **Input tokens:** {input_tokens}\n"
             formatted += f"- **Output tokens:** {output_tokens}\n"
             formatted += f"- **Generated using:** {self.llm_choice}\n"
@@ -836,7 +1128,14 @@ class ElevatedRagChain:
         def format_response(response: str) -> str:
             input_tokens = count_tokens(self.context + self.prompt_template)
             output_tokens = count_tokens(response)
-            formatted = f"### Response\n\n{response}\n\n---\n"
             formatted += f"- **Input tokens:** {input_tokens}\n"
             formatted += f"- **Output tokens:** {output_tokens}\n"
             formatted += f"- **Generated using:** {self.llm_choice}\n"
@@ -863,7 +1162,7 @@ class ElevatedRagChain:
 global rag_chain
 rag_chain = ElevatedRagChain()
-def load_pdfs_updated(file_links, model_choice, prompt_template, bm25_weight, temperature, top_p):
     debug_print("Inside load_pdfs function.")
     if not file_links:
         debug_print("Please enter non-empty URLs")
@@ -872,7 +1171,7 @@ def load_pdfs_updated(file_links, model_choice, prompt_template, bm25_weight, te
         links = [link.strip() for link in file_links.split("\n") if link.strip()]
         global rag_chain
         if rag_chain.raw_data:
-            rag_chain.update_llm_pipeline(model_choice, temperature, top_p, prompt_template, bm25_weight)
             context_display = rag_chain.get_current_context()
             response_msg = f"Files already loaded. Chain updated with model: {model_choice}"
             return (
@@ -887,7 +1186,8 @@ def load_pdfs_updated(file_links, model_choice, prompt_template, bm25_weight, te
                 prompt_template=prompt_template,
                 bm25_weight=bm25_weight,
                 temperature=temperature,
-                top_p=top_p
             )
             rag_chain.add_pdfs_to_vectore_store(links)
             context_display = rag_chain.get_current_context()
@@ -911,7 +1211,7 @@ def load_pdfs_updated(file_links, model_choice, prompt_template, bm25_weight, te
 def update_model(new_model: str):
     global rag_chain
     if rag_chain and rag_chain.raw_data:
-        rag_chain.update_llm_pipeline(new_model, rag_chain.temperature, rag_chain.top_p,
                                       rag_chain.prompt_template, rag_chain.bm25_weight)
         debug_print(f"Model updated to {rag_chain.llm_choice}")
         return f"Model updated to: {rag_chain.llm_choice}"
@@ -920,7 +1220,7 @@ def update_model(new_model: str):
 # Update submit_query_updated to better handle context limitation
-def submit_query_updated(query):
     debug_print(f"Processing query: {query}")
     if not query:
         debug_print("Empty query received")
@@ -931,6 +1231,19 @@ def submit_query_updated(query):
         return "Please load files first.", "", "Input tokens: 0", "Output tokens: 0"
     try:
         # Determine max context size based on model
         model_name = rag_chain.llm_choice.lower()
         max_context_tokens = 32000 if "mistral" in model_name else 4096
@@ -1077,6 +1390,43 @@ document.addEventListener('DOMContentLoaded', function() {
             clearInterval(jobListInterval);
         }
     }, 500);
 });
 """) as app:
     gr.Markdown('''# PhiRAG - Async Version
@@ -1113,8 +1463,16 @@ https://www.gutenberg.org/ebooks/8438.txt.utf-8
             with gr.Row():
                 with gr.Column():
                     model_dropdown = gr.Dropdown(
-                        choices=["🇺🇸 Remote Meta-Llama-3", "🇪🇺 Mistral-API"],
-                        value="🇺🇸 Remote Meta-Llama-3",
                         label="Select Model"
                     )
                     temperature_slider = gr.Slider(
@@ -1125,6 +1483,10 @@ https://www.gutenberg.org/ebooks/8438.txt.utf-8
                         minimum=0.1, maximum=0.99, value=0.95, step=0.05,
                         label="Word Variety (Top-p)"
                     )
                 with gr.Column():
                     pdf_input = gr.Textbox(
                         label="Enter your file URLs (one per line)",
@@ -1160,21 +1522,46 @@ https://www.gutenberg.org/ebooks/8438.txt.utf-8
             with gr.Row():
                 model_output = gr.Markdown("**Current Model**: Not selected")
-        with gr.TabItem("Submit Query"):
             with gr.Row():
-                # Add this line to define the query_model_dropdown
-                query_model_dropdown = gr.Dropdown(
-                    choices=["🇺🇸 Remote Meta-Llama-3", "🇪🇺 Mistral-API"],
-                    value="🇺🇸 Remote Meta-Llama-3",
-                    label="Query Model"
-                )
-                query_input = gr.Textbox(
-                    label="Enter your query here",
-                    placeholder="Type your query",
-                    lines=4
-                )
-                submit_button = gr.Button("Submit Query (Async)")
             with gr.Row():
                 query_response = gr.Textbox(
@@ -1247,6 +1634,138 @@ https://www.gutenberg.org/ebooks/8438.txt.utf-8
                     status_tokens1 = gr.Markdown("")
                     status_tokens2 = gr.Markdown("")
         with gr.TabItem("App Management"):
             with gr.Row():
                 reset_button = gr.Button("Reset App")
@@ -1267,26 +1786,50 @@ https://www.gutenberg.org/ebooks/8438.txt.utf-8
             with gr.Row():
                 reset_model = gr.Markdown("")
-    # Connect the buttons to their respective functions
     load_button.click(
         load_pdfs_async,
-        inputs=[pdf_input, model_dropdown, prompt_input, bm25_weight_slider, temperature_slider, top_p_slider],
-        outputs=[load_response, load_context, model_output, job_id_input, job_query_display, job_list]
-    )
-    # Also sync in the other direction
-    query_model_dropdown.change(
-        fn=sync_model_dropdown,
-        inputs=query_model_dropdown,
-        outputs=model_dropdown
     )
     submit_button.click(
         submit_query_async,
-        inputs=[query_input, query_model_dropdown],
         outputs=[query_response, query_context, input_tokens, output_tokens, job_id_input, job_query_display, job_list]
     )
     check_button.click(
         check_job_status,
         inputs=[job_id_input],
@@ -1340,6 +1883,69 @@ https://www.gutenberg.org/ebooks/8438.txt.utf-8
         every=2 #if auto_refresh_checkbox.value else None  # Directly set `every` based on the checkbox state
     )
 if __name__ == "__main__":
     debug_print("Launching Gradio interface.")
     app.queue().launch(share=False)

 import tempfile
 import mimetypes
+# Add batch processing helper functions
+def generate_parameter_values(min_val, max_val, num_values):
+    """Generate evenly spaced values between min and max"""
+    if num_values == 1:
+        return [min_val]
+    step = (max_val - min_val) / (num_values - 1)
+    return [min_val + (step * i) for i in range(num_values)]
+def process_batch_query(query, model_choice, max_tokens, param_configs, slider_values, job_id):
+    """Process a batch of queries with different parameter combinations"""
+    results = []
+    # Generate all parameter combinations
+    temp_values = [slider_values['temperature']] if param_configs['temperature'] == "Constant" else generate_parameter_values(0.1, 1.0, int(param_configs['temperature'].split()[2]))
+    top_p_values = [slider_values['top_p']] if param_configs['top_p'] == "Constant" else generate_parameter_values(0.1, 0.99, int(param_configs['top_p'].split()[2]))
+    top_k_values = [slider_values['top_k']] if param_configs['top_k'] == "Constant" else generate_parameter_values(1, 100, int(param_configs['top_k'].split()[2]))
+    bm25_values = [slider_values['bm25']] if param_configs['bm25'] == "Constant" else generate_parameter_values(0.0, 1.0, int(param_configs['bm25'].split()[2]))
+    total_combinations = len(temp_values) * len(top_p_values) * len(top_k_values) * len(bm25_values)
+    current = 0
+    for temp in temp_values:
+        for top_p in top_p_values:
+            for top_k in top_k_values:
+                for bm25 in bm25_values:
+                    current += 1
+                    try:
+                        # Update parameters
+                        rag_chain.temperature = temp
+                        rag_chain.top_p = top_p
+                        rag_chain.top_k = top_k
+                        rag_chain.bm25_weight = bm25
+                        rag_chain.faiss_weight = 1.0 - bm25
+                        # Update ensemble retriever
+                        rag_chain.ensemble_retriever = EnsembleRetriever(
+                            retrievers=[rag_chain.bm25_retriever, rag_chain.faiss_retriever],
+                            weights=[rag_chain.bm25_weight, rag_chain.faiss_weight]
+                        )
+                        # Process query
+                        response = rag_chain.elevated_rag_chain.invoke({"question": query})
+                        # Format result
+                        result = {
+                            "Parameters": f"Temp: {temp:.2f}, Top-p: {top_p:.2f}, Top-k: {top_k}, BM25: {bm25:.2f}",
+                            "Response": response,
+                            "Progress": f"Query {current}/{total_combinations}"
+                        }
+                        results.append(result)
+                    except Exception as e:
+                        results.append({
+                            "Parameters": f"Temp: {temp:.2f}, Top-p: {top_p:.2f}, Top-k: {top_k}, BM25: {bm25:.2f}",
+                            "Response": f"Error: {str(e)}",
+                            "Progress": f"Query {current}/{total_combinations}"
+                        })
+    # Format final results
+    formatted_results = "### Batch Query Results\n\n"
+    for result in results:
+        formatted_results += f"#### {result['Parameters']}\n"
+        formatted_results += f"**Progress:** {result['Progress']}\n\n"
+        formatted_results += f"{result['Response']}\n\n"
+        formatted_results += "---\n\n"
+    return (
+        formatted_results,
+        f"Job ID: {job_id}",
+        f"Input tokens: {count_tokens(query)}",
+        f"Output tokens: {sum(count_tokens(r['Response']) for r in results)}"
+    )
+def process_batch_query_async(query, model_choice, max_tokens, param_configs, slider_values):
+    """Asynchronous version of batch query processing"""
+    global last_job_id
+    if not query:
+        return "Please enter a non-empty query", "", "Input tokens: 0", "Output tokens: 0", "", "", get_job_list()
+    if not hasattr(rag_chain, 'elevated_rag_chain') or not rag_chain.raw_data:
+        return "Please load files first.", "", "Input tokens: 0", "Output tokens: 0", "", "", get_job_list()
+    job_id = str(uuid.uuid4())
+    debug_print(f"Starting async batch job {job_id} for query: {query}")
+    # Get slider values
+    slider_values = {
+        'temperature': slider_values['temperature'],
+        'top_p': slider_values['top_p'],
+        'top_k': slider_values['top_k'],
+        'bm25': slider_values['bm25']
+    }
+    # Start background thread
+    threading.Thread(
+        target=process_in_background,
+        args=(job_id, process_batch_query, [query, model_choice, max_tokens, param_configs, slider_values, job_id])
+    ).start()
+    jobs[job_id] = {
+        "status": "processing",
+        "type": "batch_query",
+        "start_time": time.time(),
+        "query": query,
+        "model": model_choice,
+        "param_configs": param_configs
+    }
+    last_job_id = job_id
+    return (
+        f"Batch query submitted and processing in the background (Job ID: {job_id}).\n\n"
+        f"Use 'Check Job Status' tab with this ID to get results.",
+        f"Job ID: {job_id}",
+        f"Input tokens: {count_tokens(query)}",
+        "Output tokens: pending",
+        job_id,  # Return job_id to update the job_id_input component
+        query,  # Return query to update the job_query_display component
+        get_job_list()  # Return updated job list
+    )
+def submit_batch_query_async(query, model_choice, max_tokens, temp_config, top_p_config, top_k_config, bm25_config,
+                           temp_slider, top_p_slider, top_k_slider, bm25_slider):
+    """Handle batch query submission with async processing"""
+    if not query:
+        return "Please enter a non-empty query", "", "Input tokens: 0", "Output tokens: 0", "", "", get_job_list()
+    if not hasattr(rag_chain, 'elevated_rag_chain') or not rag_chain.raw_data:
+        return "Please load files first.", "", "Input tokens: 0", "Output tokens: 0", "", "", get_job_list()
+    # Get slider values
+    slider_values = {
+        'temperature': temp_slider,
+        'top_p': top_p_slider,
+        'top_k': top_k_slider,
+        'bm25': bm25_slider
+    }
+    param_configs = {
+        'temperature': temp_config,
+        'top_p': top_p_config,
+        'top_k': top_k_config,
+        'bm25': bm25_config
+    }
+    return process_batch_query_async(query, model_choice, max_tokens, param_configs, slider_values)
+def submit_batch_query(query, model_choice, max_tokens, temp_config, top_p_config, top_k_config, bm25_config,
+                      temp_slider, top_p_slider, top_k_slider, bm25_slider):
+    """Handle batch query submission"""
+    if not query:
+        return "Please enter a non-empty query", "", "Input tokens: 0", "Output tokens: 0"
+    if not hasattr(rag_chain, 'elevated_rag_chain') or not rag_chain.raw_data:
+        return "Please load files first.", "", "Input tokens: 0", "Output tokens: 0"
+    # Get slider values
+    slider_values = {
+        'temperature': temp_slider,
+        'top_p': top_p_slider,
+        'top_k': top_k_slider,
+        'bm25': bm25_slider
+    }
+    try:
+        results = process_batch_query(query, model_choice, max_tokens,
+                                    {'temperature': temp_config, 'top_p': top_p_config,
+                                     'top_k': top_k_config, 'bm25': bm25_config},
+                                    slider_values)
+        # Format results for display
+        formatted_results = "### Batch Query Results\n\n"
+        for result in results:
+            formatted_results += f"#### {result['Parameters']}\n"
+            formatted_results += f"**Progress:** {result['Progress']}\n\n"
+            formatted_results += f"{result['Response']}\n\n"
+            formatted_results += "---\n\n"
+        return formatted_results, "", f"Input tokens: {count_tokens(query)}", f"Output tokens: {sum(count_tokens(r['Response']) for r in results)}"
+    except Exception as e:
+        return f"Error processing batch query: {str(e)}", "", "Input tokens: 0", "Output tokens: 0"
 def get_mime_type(file_path):
     return mimetypes.guess_type(file_path)[0] or 'application/octet-stream'
 print(pydantic.__version__)
 # Add Mistral imports with fallback handling
+slider_max_tokens = None
 try:
     from mistralai import Mistral
     MISTRAL_AVAILABLE = True
         error_result = (f"Error processing job: {str(e)}", "", "", "")
         results_queue.put((job_id, error_result))
+def load_pdfs_async(file_links, model_choice, prompt_template, bm25_weight, temperature, top_p, top_k, max_tokens_slider):
     """Asynchronous version of load_pdfs_updated to prevent timeouts"""
     global last_job_id
     if not file_links:
+        return "Please enter non-empty URLs", "", "Model used: N/A", "", "", get_job_list(), ""
+    global slider_max_tokens
+    slider_max_tokens = max_tokens_slider
     job_id = str(uuid.uuid4())
     debug_print(f"Starting async job {job_id} for file loading")
     # Start background thread
     threading.Thread(
         target=process_in_background,
+        args=(job_id, load_pdfs_updated, [file_links, model_choice, prompt_template, bm25_weight, temperature, top_p, top_k])
     ).start()
     job_query = f"Loading files: {file_links.split()[0]}..." if file_links else "No files"
     last_job_id = job_id
+    init_message = "Vector database initialized using the files.\nThe above parameters were used in the initialization of the RAG chain."
     return (
         f"Files submitted and processing in the background (Job ID: {job_id}).\n\n"
         f"Use 'Check Job Status' tab with this ID to get results.",
         f"Model requested: {model_choice}",
         job_id,  # Return job_id to update the job_id_input component
         job_query,  # Return job_query to update the job_query_display component
+        get_job_list(),  # Return updated job list
+        init_message  # Return initialization message
     )
+def submit_query_async(query, model_choice, max_tokens_slider, temperature, top_p, top_k, bm25_weight):
     """Asynchronous version of submit_query_updated to prevent timeouts"""
     global last_job_id
     if not query:
         return "Please enter a non-empty query", "", "Input tokens: 0", "Output tokens: 0", "", "", get_job_list()
+    global slider_max_tokens
+    slider_max_tokens = max_tokens_slider
     job_id = str(uuid.uuid4())
     debug_print(f"Starting async job {job_id} for query: {query}")
     # Update model if specified
     if model_choice and rag_chain and rag_chain.llm_choice != model_choice:
         debug_print(f"Updating model to {model_choice} for this query")
+        rag_chain.update_llm_pipeline(model_choice, temperature, top_p, top_k,
+                                     rag_chain.prompt_template, bm25_weight)
     # Start background thread
     threading.Thread(
         target=process_in_background,
+        args=(job_id, submit_query_updated, [query, temperature, top_p, top_k, bm25_weight])
     ).start()
     jobs[job_id] = {
 class ElevatedRagChain:
     def __init__(self, llm_choice: str = "Meta-Llama-3", prompt_template: str = default_prompt,
+                 bm25_weight: float = 0.6, temperature: float = 0.5, top_p: float = 0.95, top_k: int = 50) -> None:
         debug_print(f"Initializing ElevatedRagChain with model: {llm_choice}")
         self.embed_func = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2",
         )
         self.bm25_weight = bm25_weight
         self.faiss_weight = 1.0 - bm25_weight
+        self.top_k = top_k
         self.llm_choice = llm_choice
         self.temperature = temperature
         self.top_p = top_p
     # Improve error handling in the ElevatedRagChain class
     def create_llm_pipeline(self):
         from langchain.llms.base import LLM  # Import LLM here so it's always defined
+        from typing import Optional, List, Any
+        from pydantic import PrivateAttr
+        global slider_max_tokens
+        # Extract the model name without the flag emoji prefix
+        clean_llm_choice = self.llm_choice.split(" ", 1)[-1] if " " in self.llm_choice else self.llm_choice
+        normalized = clean_llm_choice.lower()
+        print(f"Normalized model name: {normalized}")
+        # Model configurations from the second file
+        model_token_limits = {
+            "gpt-3.5": 16385,
+            "gpt-4o": 128000,
+            "gpt-4o-mini": 128000,
+            "meta-llama-3": 4096,
+            "mistral-api": 128000,
+            "o1-mini": 128000,
+            "o3-mini": 128000
+        }
+        model_map = {
+            "gpt-3.5": "gpt-3.5-turbo",
+            "gpt-4o": "gpt-4o",
+            "gpt-4o mini": "gpt-4o-mini",
+            "o1-mini": "gpt-4o-mini",
+            "o3-mini": "gpt-4o-mini",
+            "mistral": "mistral-small-latest",
+            "mistral-api": "mistral-small-latest",
+            "meta-llama-3": "meta-llama/Meta-Llama-3-8B-Instruct",
+            "remote meta-llama-3": "meta-llama/Meta-Llama-3-8B-Instruct"
+        }
+        model_pricing = {
+            "gpt-3.5": {"USD": {"input": 0.0000005, "output": 0.0000015}, "RON": {"input": 0.0000023, "output": 0.0000069}},
+            "gpt-4o": {"USD": {"input": 0.0000025, "output": 0.00001}, "RON": {"input": 0.0000115, "output": 0.000046}},
+            "gpt-4o-mini": {"USD": {"input": 0.00000015, "output": 0.0000006}, "RON": {"input": 0.0000007, "output": 0.0000028}},
+            "o1-mini": {"USD": {"input": 0.0000011, "output": 0.0000044}, "RON": {"input": 0.0000051, "output": 0.0000204}},
+            "o3-mini": {"USD": {"input": 0.0000011, "output": 0.0000044}, "RON": {"input": 0.0000051, "output": 0.0000204}},
+            "meta-llama-3": {"USD": {"input": 0.00, "output": 0.00}, "RON": {"input": 0.00, "output": 0.00}},
+            "mistral": {"USD": {"input": 0.00, "output": 0.00}, "RON": {"input": 0.00, "output": 0.00}},
+            "mistral-api": {"USD": {"input": 0.00, "output": 0.00}, "RON": {"input": 0.00, "output": 0.00}}
+        }
+        pricing_info = ""
+        # Find the matching model
+        model_key = None
+        for key in model_map:
+            if key.lower() in normalized:
+                model_key = key
+                break
+        if not model_key:
+            raise ValueError(f"Unsupported model: {normalized}")
+        model = model_map[model_key]
+        max_tokens = model_token_limits.get(model, 4096)
+        max_tokens = min(slider_max_tokens, max_tokens)
+        pricing_info = model_pricing.get(model_key, {"USD": {"input": 0.00, "output": 0.00}, "RON": {"input": 0.00, "output": 0.00}})
         try:
+            # OpenAI models (GPT-3.5, GPT-4o, GPT-4o mini, o1-mini, o3-mini)
+            if any(model in normalized for model in ["gpt-3.5", "gpt-4o", "o1-mini", "o3-mini"]):
+                debug_print(f"Creating OpenAI API pipeline for {normalized}...")
+                openai_api_key = os.environ.get("OPENAI_API_KEY")
+                if not openai_api_key:
+                    raise ValueError("Please set the OPENAI_API_KEY environment variable to use OpenAI API.")
+                import openai
+                class OpenAILLM(LLM):
+                    model_name: str = model
+                    llm_choice: str = model
+                    max_context_tokens: int = max_tokens
+                    pricing: dict = pricing_info
+                    temperature: float = 0.7
+                    top_p: float = 0.95
+                    top_k: int = 50
+                    @property
+                    def _llm_type(self) -> str:
+                        return "openai_llm"
+                    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
+                        try:
+                            openai.api_key = openai_api_key
+                            print(f" tokens: {max_tokens}")
+                            response = openai.ChatCompletion.create(
+                                model=self.model_name,
+                                messages=[{"role": "user", "content": prompt}],
+                                temperature=self.temperature,
+                                top_p=self.top_p,
+                                max_tokens=max_tokens
+                            )
+                            return response["choices"][0]["message"]["content"]
+                        except Exception as e:
+                            debug_print(f"OpenAI API error: {str(e)}")
+                            return f"Error generating response: {str(e)}"
+                    @property
+                    def _identifying_params(self) -> dict:
+                        return {
+                            "model": self.model_name,
+                            "max_tokens": self.max_context_tokens,
+                            "temperature": self.temperature,
+                            "top_p": self.top_p,
+                            "top_k": self.top_k
+                        }
+                debug_print(f"OpenAI {model} pipeline created successfully.")
+                return OpenAILLM()
+            # Meta-Llama-3 model
+            elif "meta-llama" in normalized or "llama" in normalized:
                 debug_print("Creating remote Meta-Llama-3 pipeline via Hugging Face Inference API...")
                 from huggingface_hub import InferenceClient
                 repo_id = "meta-llama/Meta-Llama-3-8B-Instruct"
                     raise ValueError("Please set the HF_API_TOKEN environment variable to use remote inference.")
                 client = InferenceClient(token=hf_api_token, timeout=120)
                 def remote_generate(prompt: str) -> str:
                     max_retries = 3
                     backoff = 2  # start with 2 seconds
                     for attempt in range(max_retries):
                         try:
+                            debug_print(f"Remote generation attempt {attempt+1} tokens: {self.max_tokens}")
                             response = client.text_generation(
                                 prompt,
                                 model=repo_id,
                                 temperature=self.temperature,
                                 top_p=self.top_p,
+                                max_tokens= max_tokens  # Reduced token count for speed
                             )
                             return response
                         except Exception as e:
                     return "Failed to generate response after multiple attempts."
                 class RemoteLLM(LLM):
+                    model_name: str = repo_id
+                    llm_choice: str = repo_id
+                    max_context_tokens: int = max_tokens
+                    pricing: dict = pricing_info
                     @property
                     def _llm_type(self) -> str:
                         return "remote_llm"
                     @property
                     def _identifying_params(self) -> dict:
+                        return {"model": self.model_name, "max_tokens": self.max_context_tokens}
                 debug_print("Remote Meta-Llama-3 pipeline created successfully.")
                 return RemoteLLM()
+            # Mistral API model
+            elif "mistral" in normalized:
                 debug_print("Creating Mistral API pipeline...")
                 mistral_api_key = os.environ.get("MISTRAL_API_KEY")
                 if not mistral_api_key:
                     raise ValueError("Please set the MISTRAL_API_KEY environment variable to use Mistral API.")
                 try:
+                    from mistralai import Mistral
                     debug_print("Mistral library imported successfully")
                 except ImportError:
+                    raise ImportError("Mistral client library not installed. Please install with 'pip install mistralai'.")
+                class MistralLLM(LLM):
+                    temperature: float = 0.7
+                    top_p: float = 0.95
+                    model_name: str = model
+                    llm_choice: str = model
+                    pricing: dict = pricing_info
+                    _client: Any = PrivateAttr(default=None)
+                    def __init__(self, api_key: str, temperature: float = 0.7, top_p: float = 0.95, **kwargs: Any):
+                        try:
+                            super().__init__(**kwargs)
+                            # Bypass Pydantic's __setattr__ to assign to _client
+                            object.__setattr__(self, '_client', Mistral(api_key=api_key))
+                            self.temperature = temperature
+                            self.top_p = top_p
+                        except Exception as e:
+                            debug_print(f"Init Mistral failed with error: {e}")
                     @property
                     def _llm_type(self) -> str:
+                        return "mistral_llm"
                     def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
+                        try:
+                            debug_print(f"Calling Mistral API...  tokens: {max_tokens}")
+                            response = self._client.chat.complete(
+                                model=self.model_name,
+                                messages=[{"role": "user", "content": prompt}],
+                                temperature=self.temperature,
+                                top_p=self.top_p,
+                                max_tokens= max_tokens
+                            )
+                            return response.choices[0].message.content
+                        except Exception as e:
+                            debug_print(f"Mistral API error: {str(e)}")
+                            return f"Error generating response: {str(e)}"
                     @property
                     def _identifying_params(self) -> dict:
+                        return {"model": self.model_name, "max_tokens": max_tokens}
+                debug_print("Creating Mistral LLM instance")
+                mistral_llm = MistralLLM(api_key=mistral_api_key, temperature=self.temperature, top_p=self.top_p)
+                debug_print("Mistral API pipeline created successfully.")
+                return mistral_llm
+            else:
+                raise ValueError(f"Unsupported model choice: {self.llm_choice}")
         except Exception as e:
             debug_print(f"Error creating LLM pipeline: {str(e)}")
             # Return a dummy LLM that explains the error
             return ErrorLLM()
+    def update_llm_pipeline(self, new_model_choice: str, temperature: float, top_p: float, top_k: int, prompt_template: str, bm25_weight: float):
         debug_print(f"Updating chain with new model: {new_model_choice}")
         self.llm_choice = new_model_choice
         self.temperature = temperature
         self.top_p = top_p
+        self.top_k = top_k
         self.prompt_template = prompt_template
         self.bm25_weight = bm25_weight
         self.faiss_weight = 1.0 - bm25_weight
         def format_response(response: str) -> str:
             input_tokens = count_tokens(self.context + self.prompt_template)
             output_tokens = count_tokens(response)
+            formatted = f"✅ Response:\n\n"
+            formatted += f"Model: {self.llm_choice}\n"
+            formatted += f"Model Parameters:\n"
+            formatted += f"- Temperature: {self.temperature}\n"
+            formatted += f"- Top-p: {self.top_p}\n"
+            formatted += f"- Top-k: {self.top_k}\n"
+            formatted += f"- BM25 Weight: {self.bm25_weight}\n\n"
+            formatted += f"{response}\n\n---\n"
             formatted += f"- **Input tokens:** {input_tokens}\n"
             formatted += f"- **Output tokens:** {output_tokens}\n"
             formatted += f"- **Generated using:** {self.llm_choice}\n"
         def format_response(response: str) -> str:
             input_tokens = count_tokens(self.context + self.prompt_template)
             output_tokens = count_tokens(response)
+            formatted = f"✅ Response:\n\n"
+            formatted += f"Model: {self.llm_choice}\n"
+            formatted += f"Model Parameters:\n"
+            formatted += f"- Temperature: {self.temperature}\n"
+            formatted += f"- Top-p: {self.top_p}\n"
+            formatted += f"- Top-k: {self.top_k}\n"
+            formatted += f"- BM25 Weight: {self.bm25_weight}\n\n"
+            formatted += f"{response}\n\n---\n"
             formatted += f"- **Input tokens:** {input_tokens}\n"
             formatted += f"- **Output tokens:** {output_tokens}\n"
             formatted += f"- **Generated using:** {self.llm_choice}\n"
 global rag_chain
 rag_chain = ElevatedRagChain()
+def load_pdfs_updated(file_links, model_choice, prompt_template, bm25_weight, temperature, top_p, top_k):
     debug_print("Inside load_pdfs function.")
     if not file_links:
         debug_print("Please enter non-empty URLs")
         links = [link.strip() for link in file_links.split("\n") if link.strip()]
         global rag_chain
         if rag_chain.raw_data:
+            rag_chain.update_llm_pipeline(model_choice, temperature, top_p, top_k, prompt_template, bm25_weight)
             context_display = rag_chain.get_current_context()
             response_msg = f"Files already loaded. Chain updated with model: {model_choice}"
             return (
                 prompt_template=prompt_template,
                 bm25_weight=bm25_weight,
                 temperature=temperature,
+                top_p=top_p,
+                top_k=top_k
             )
             rag_chain.add_pdfs_to_vectore_store(links)
             context_display = rag_chain.get_current_context()
 def update_model(new_model: str):
     global rag_chain
     if rag_chain and rag_chain.raw_data:
+        rag_chain.update_llm_pipeline(new_model, rag_chain.temperature, rag_chain.top_p, rag_chain.top_k,
                                       rag_chain.prompt_template, rag_chain.bm25_weight)
         debug_print(f"Model updated to {rag_chain.llm_choice}")
         return f"Model updated to: {rag_chain.llm_choice}"
 # Update submit_query_updated to better handle context limitation
+def submit_query_updated(query, temperature, top_p, top_k, bm25_weight):
     debug_print(f"Processing query: {query}")
     if not query:
         debug_print("Empty query received")
         return "Please load files first.", "", "Input tokens: 0", "Output tokens: 0"
     try:
+        # Update all parameters for this query
+        rag_chain.temperature = temperature
+        rag_chain.top_p = top_p
+        rag_chain.top_k = top_k
+        rag_chain.bm25_weight = bm25_weight
+        rag_chain.faiss_weight = 1.0 - bm25_weight
+        # Update the ensemble retriever weights
+        rag_chain.ensemble_retriever = EnsembleRetriever(
+            retrievers=[rag_chain.bm25_retriever, rag_chain.faiss_retriever],
+            weights=[rag_chain.bm25_weight, rag_chain.faiss_weight]
+        )
         # Determine max context size based on model
         model_name = rag_chain.llm_choice.lower()
         max_context_tokens = 32000 if "mistral" in model_name else 4096
             clearInterval(jobListInterval);
         }
     }, 500);
+    // Function to disable sliders
+    function disableSliders() {
+        const sliders = document.querySelectorAll('input[type="range"]');
+        sliders.forEach(slider => {
+            if (!slider.closest('.query-tab')) {  // Don't disable sliders in query tab
+                slider.disabled = true;
+                slider.style.opacity = '0.5';
+            }
+        });
+    }
+    // Function to enable sliders
+    function enableSliders() {
+        const sliders = document.querySelectorAll('input[type="range"]');
+        sliders.forEach(slider => {
+            slider.disabled = false;
+            slider.style.opacity = '1';
+        });
+    }
+    // Add event listener for load button
+    const loadButton = document.querySelector('button:contains("Load Files (Async)")');
+    if (loadButton) {
+        loadButton.addEventListener('click', function() {
+            // Wait for the response to come back
+            setTimeout(disableSliders, 1000);
+        });
+    }
+    // Add event listener for reset button
+    const resetButton = document.querySelector('button:contains("Reset App")');
+    if (resetButton) {
+        resetButton.addEventListener('click', function() {
+            enableSliders();
+        });
+    }
 });
 """) as app:
     gr.Markdown('''# PhiRAG - Async Version
             with gr.Row():
                 with gr.Column():
                     model_dropdown = gr.Dropdown(
+                        choices=[
+                            "🇺🇸 GPT-3.5",
+                            "🇺🇸 GPT-4o",
+                            "🇺🇸 GPT-4o mini",
+                            "🇺🇸 o1-mini",
+                            "🇺🇸 o3-mini",
+                            "🇺🇸 Remote Meta-Llama-3",
+                            "🇪🇺 Mistral-API",
+                        ],
+                        value="🇪🇺 Mistral-API",
                         label="Select Model"
                     )
                     temperature_slider = gr.Slider(
                         minimum=0.1, maximum=0.99, value=0.95, step=0.05,
                         label="Word Variety (Top-p)"
                     )
+                    top_k_slider = gr.Slider(
+                        minimum=1, maximum=100, value=50, step=1,
+                        label="Token Selection (Top-k)"
+                    )
                 with gr.Column():
                     pdf_input = gr.Textbox(
                         label="Enter your file URLs (one per line)",
             with gr.Row():
                 model_output = gr.Markdown("**Current Model**: Not selected")
+        with gr.TabItem("Submit Query", elem_classes=["query-tab"]):
             with gr.Row():
+                with gr.Column():
+                    query_model_dropdown = gr.Dropdown(
+                        choices=[
+                            "🇺🇸 GPT-3.5",
+                            "🇺🇸 GPT-4o",
+                            "🇺🇸 GPT-4o mini",
+                            "🇺🇸 o1-mini",
+                            "🇺🇸 o3-mini",
+                            "🇺🇸 Remote Meta-Llama-3",
+                            "🇪🇺 Mistral-API",
+                        ],
+                        value="🇪🇺 Mistral-API",
+                        label="Query Model"
+                    )
+                    query_temperature_slider = gr.Slider(
+                        minimum=0.1, maximum=1.0, value=0.5, step=0.1,
+                        label="Randomness (Temperature)"
+                    )
+                    query_top_p_slider = gr.Slider(
+                        minimum=0.1, maximum=0.99, value=0.95, step=0.05,
+                        label="Word Variety (Top-p)"
+                    )
+                    query_top_k_slider = gr.Slider(
+                        minimum=1, maximum=100, value=50, step=1,
+                        label="Token Selection (Top-k)"
+                    )
+                    query_bm25_weight_slider = gr.Slider(
+                        minimum=0.0, maximum=1.0, value=0.6, step=0.1,
+                        label="Lexical vs Semantics (BM25 Weight)"
+                    )
+                with gr.Column():
+                    max_tokens_slider = gr.Slider(minimum=1000, maximum=128000, value=3000, label="🔢 Max Tokens", step=1000)
+                    query_input = gr.Textbox(
+                        label="Enter your query here",
+                        placeholder="Type your query",
+                        lines=4
+                    )
+                    submit_button = gr.Button("Submit Query (Async)")
             with gr.Row():
                 query_response = gr.Textbox(
                     status_tokens1 = gr.Markdown("")
                     status_tokens2 = gr.Markdown("")
+        with gr.TabItem("Batch Query"):
+            with gr.Row():
+                with gr.Column():
+                    batch_model_dropdown = gr.Dropdown(
+                        choices=[
+                            "🇺🇸 GPT-3.5",
+                            "🇺🇸 GPT-4o",
+                            "🇺🇸 GPT-4o mini",
+                            "🇺🇸 o1-mini",
+                            "🇺🇸 o3-mini",
+                            "🇺🇸 Remote Meta-Llama-3",
+                            "🇪🇺 Mistral-API",
+                        ],
+                        value="🇪🇺 Mistral-API",
+                        label="Query Model"
+                    )
+                    with gr.Row():
+                        temp_variation = gr.Dropdown(
+                            choices=["Constant", "Whole range 3 values", "Whole range 5 values", "Whole range 7 values", "Whole range 10 values"],
+                            value="Constant",
+                            label="Temperature Variation"
+                        )
+                        batch_temperature_slider = gr.Slider(
+                            minimum=0.1, maximum=1.0, value=0.5, step=0.1,
+                            label="Randomness (Temperature)"
+                        )
+                    with gr.Row():
+                        top_p_variation = gr.Dropdown(
+                            choices=["Constant", "Whole range 3 values", "Whole range 5 values", "Whole range 7 values", "Whole range 10 values"],
+                            value="Constant",
+                            label="Top-p Variation"
+                        )
+                        batch_top_p_slider = gr.Slider(
+                            minimum=0.1, maximum=0.99, value=0.95, step=0.05,
+                            label="Word Variety (Top-p)"
+                        )
+                    with gr.Row():
+                        top_k_variation = gr.Dropdown(
+                            choices=["Constant", "Whole range 3 values", "Whole range 5 values", "Whole range 7 values", "Whole range 10 values"],
+                            value="Constant",
+                            label="Top-k Variation"
+                        )
+                        batch_top_k_slider = gr.Slider(
+                            minimum=1, maximum=100, value=50, step=1,
+                            label="Token Selection (Top-k)"
+                        )
+                    with gr.Row():
+                        bm25_variation = gr.Dropdown(
+                            choices=["Constant", "Whole range 3 values", "Whole range 5 values", "Whole range 7 values", "Whole range 10 values"],
+                            value="Constant",
+                            label="BM25 Weight Variation"
+                        )
+                        batch_bm25_weight_slider = gr.Slider(
+                            minimum=0.0, maximum=1.0, value=0.6, step=0.1,
+                            label="Lexical vs Semantics (BM25 Weight)"
+                        )
+                with gr.Column():
+                    batch_max_tokens_slider = gr.Slider(
+                        minimum=1000, maximum=128000, value=3000, label="🔢 Max Tokens", step=1000
+                    )
+                    batch_query_input = gr.Textbox(
+                        label="Enter your query here",
+                        placeholder="Type your query",
+                        lines=4
+                    )
+                    batch_submit_button = gr.Button("Submit Batch Query (Async)")
+            with gr.Row():
+                batch_query_response = gr.Textbox(
+                    label="Batch Query Results",
+                    placeholder="Results will appear here (formatted as Markdown)",
+                    lines=10
+                )
+                batch_query_context = gr.Textbox(
+                    label="Context Information",
+                    placeholder="Retrieved context will appear here",
+                    lines=6
+                )
+            with gr.Row():
+                batch_input_tokens = gr.Markdown("Input tokens: 0")
+                batch_output_tokens = gr.Markdown("Output tokens: 0")
+            with gr.Row():
+                with gr.Column(scale=1):
+                    batch_job_list = gr.Markdown(
+                        value="No jobs yet",
+                        label="Job List (Click to select)"
+                    )
+                    batch_refresh_button = gr.Button("Refresh Job List")
+                    batch_auto_refresh_checkbox = gr.Checkbox(
+                        label="Enable Auto Refresh",
+                        value=False
+                    )
+                    batch_df = gr.DataFrame(
+                        value=run_query(10),
+                        headers=["Number", "Square"],
+                        label="Query Results",
+                        visible=False
+                    )
+                with gr.Column(scale=2):
+                    batch_job_id_input = gr.Textbox(
+                        label="Job ID",
+                        placeholder="Job ID will appear here when selected from the list",
+                        lines=1
+                    )
+                    batch_job_query_display = gr.Textbox(
+                        label="Job Query",
+                        placeholder="The query associated with this job will appear here",
+                        lines=2,
+                        interactive=False
+                    )
+                    batch_check_button = gr.Button("Check Status")
+                    batch_cleanup_button = gr.Button("Cleanup Old Jobs")
+            with gr.Row():
+                batch_status_response = gr.Textbox(
+                    label="Job Result",
+                    placeholder="Job result will appear here",
+                    lines=6
+                )
+                batch_status_context = gr.Textbox(
+                    label="Context Information",
+                    placeholder="Context information will appear here",
+                    lines=6
+                )
+            with gr.Row():
+                batch_status_tokens1 = gr.Markdown("")
+                batch_status_tokens2 = gr.Markdown("")
         with gr.TabItem("App Management"):
             with gr.Row():
                 reset_button = gr.Button("Reset App")
             with gr.Row():
                 reset_model = gr.Markdown("")
+    # Add initialization info display
+    init_info = gr.Markdown("")
+    # Update load_button click to include top_k
     load_button.click(
         load_pdfs_async,
+        inputs=[pdf_input, model_dropdown, prompt_input, bm25_weight_slider, temperature_slider, top_p_slider, top_k_slider, max_tokens_slider],
+        outputs=[load_response, load_context, model_output, job_id_input, job_query_display, job_list, init_info]
     )
+    # Update submit_button click to include top_k
     submit_button.click(
         submit_query_async,
+        inputs=[query_input, query_model_dropdown, max_tokens_slider, query_temperature_slider, query_top_p_slider, query_top_k_slider, query_bm25_weight_slider],
         outputs=[query_response, query_context, input_tokens, output_tokens, job_id_input, job_query_display, job_list]
     )
+    # Add function to sync all parameters
+    def sync_parameters(temperature, top_p, top_k, bm25_weight):
+        return temperature, top_p, top_k, bm25_weight
+    # Sync parameters between tabs
+    temperature_slider.change(
+        fn=sync_parameters,
+        inputs=[temperature_slider, top_p_slider, top_k_slider, bm25_weight_slider],
+        outputs=[query_temperature_slider, query_top_p_slider, query_top_k_slider, query_bm25_weight_slider]
+    )
+    top_p_slider.change(
+        fn=sync_parameters,
+        inputs=[temperature_slider, top_p_slider, top_k_slider, bm25_weight_slider],
+        outputs=[query_temperature_slider, query_top_p_slider, query_top_k_slider, query_bm25_weight_slider]
+    )
+    top_k_slider.change(
+        fn=sync_parameters,
+        inputs=[temperature_slider, top_p_slider, top_k_slider, bm25_weight_slider],
+        outputs=[query_temperature_slider, query_top_p_slider, query_top_k_slider, query_bm25_weight_slider]
+    )
+    bm25_weight_slider.change(
+        fn=sync_parameters,
+        inputs=[temperature_slider, top_p_slider, top_k_slider, bm25_weight_slider],
+        outputs=[query_temperature_slider, query_top_p_slider, query_top_k_slider, query_bm25_weight_slider]
+    )
+    # Connect the buttons to their respective functions
     check_button.click(
         check_job_status,
         inputs=[job_id_input],
         every=2 #if auto_refresh_checkbox.value else None  # Directly set `every` based on the checkbox state
     )
+    # Add batch query button click handler
+    batch_submit_button.click(
+        submit_batch_query_async,
+        inputs=[
+            batch_query_input,
+            batch_model_dropdown,
+            batch_max_tokens_slider,
+            temp_variation,
+            top_p_variation,
+            top_k_variation,
+            bm25_variation,
+            batch_temperature_slider,
+            batch_top_p_slider,
+            batch_top_k_slider,
+            batch_bm25_weight_slider
+        ],
+        outputs=[
+            batch_query_response,
+            batch_query_context,
+            batch_input_tokens,
+            batch_output_tokens,
+            batch_job_id_input,
+            batch_job_query_display,
+            batch_job_list
+        ]
+    )
+    # Add batch job status checking
+    batch_check_button.click(
+        check_job_status,
+        inputs=[batch_job_id_input],
+        outputs=[batch_status_response, batch_status_context, batch_status_tokens1, batch_status_tokens2, batch_job_query_display]
+    )
+    # Add batch job list refresh
+    batch_refresh_button.click(
+        refresh_job_list,
+        inputs=[],
+        outputs=[batch_job_list]
+    )
+    # Add batch job list selection
+    batch_job_id_input.change(
+        job_selected,
+        inputs=[batch_job_id_input],
+        outputs=[batch_job_id_input, batch_job_query_display]
+    )
+    # Add batch cleanup
+    batch_cleanup_button.click(
+        cleanup_old_jobs,
+        inputs=[],
+        outputs=[batch_status_response, batch_status_context, batch_status_tokens1]
+    )
+    # Add batch auto-refresh
+    batch_auto_refresh_checkbox.change(
+        fn=periodic_update,
+        inputs=[batch_auto_refresh_checkbox],
+        outputs=[batch_job_list, batch_status_response, batch_df, batch_status_context],
+        every=2
+    )
 if __name__ == "__main__":
     debug_print("Launching Gradio interface.")
     app.queue().launch(share=False)