File size: 11,029 Bytes
f840733
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
import datetime
import functools
import traceback
from typing import List, Optional, Any, Dict, Tuple
import csv
import pandas as pd
import tempfile
import shutil
import glob

import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain_community.llms import HuggingFacePipeline

# Other LangChain and community imports
from langchain_community.document_loaders import OnlinePDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings  
from langchain_community.retrievers import BM25Retriever
from langchain.embeddings.base import Embeddings
from langchain.retrievers import EnsembleRetriever
from langchain.prompts import ChatPromptTemplate
from langchain.schema import StrOutputParser, Document
from langchain_core.runnables import RunnableParallel, RunnableLambda
from transformers.quantizers.auto import AutoQuantizationConfig
import gradio as gr
from pydantic import PrivateAttr
import pydantic

from langchain.llms.base import LLM
from typing import Any, Optional, List
import typing
import time
import re
import requests
from langchain.schema import Document
from langchain_community.document_loaders import PyMuPDFLoader  # Updated loader
import tempfile
import mimetypes
import gc

# Add batch processing helper functions
def generate_parameter_values(min_val, max_val, num_values):
    """Generate evenly spaced values between min and max"""
    if num_values == 1:
        return [min_val]
    step = (max_val - min_val) / (num_values - 1)
    return [min_val + (step * i) for i in range(num_values)]

def process_batch_query(query, model_choice, max_tokens, param_configs, slider_values, job_id, use_history=True):
    """Process a batch of queries with different parameter combinations"""
    results = []
    
    # Generate all parameter combinations
    temp_values = [slider_values['temperature']] if param_configs['temperature'] == "Constant" else generate_parameter_values(0.1, 1.0, int(param_configs['temperature'].split()[2]))
    top_p_values = [slider_values['top_p']] if param_configs['top_p'] == "Constant" else generate_parameter_values(0.1, 0.99, int(param_configs['top_p'].split()[2]))
    top_k_values = [slider_values['top_k']] if param_configs['top_k'] == "Constant" else generate_parameter_values(1, 100, int(param_configs['top_k'].split()[2]))
    bm25_values = [slider_values['bm25']] if param_configs['bm25'] == "Constant" else generate_parameter_values(0.0, 1.0, int(param_configs['bm25'].split()[2]))
    
    total_combinations = len(temp_values) * len(top_p_values) * len(top_k_values) * len(bm25_values)
    current = 0
    
    for temp in temp_values:
        for top_p in top_p_values:
            for top_k in top_k_values:
                for bm25 in bm25_values:
                    current += 1
                    try:
                        # Update parameters
                        rag_chain.temperature = temp
                        rag_chain.top_p = top_p
                        rag_chain.top_k = top_k
                        rag_chain.bm25_weight = bm25
                        rag_chain.faiss_weight = 1.0 - bm25
                        
                        # Update ensemble retriever
                        rag_chain.ensemble_retriever = EnsembleRetriever(
                            retrievers=[rag_chain.bm25_retriever, rag_chain.faiss_retriever],
                            weights=[rag_chain.bm25_weight, rag_chain.faiss_weight]
                        )
                        
                        # Process query
                        response = rag_chain.elevated_rag_chain.invoke({"question": query})
                        
                        # Store response in history if enabled
                        if use_history:
                            trimmed_response = response[:1000] + ("..." if len(response) > 1000 else "")
                            rag_chain.conversation_history.append({"query": query, "response": trimmed_response})
                        
                        # Format result
                        result = {
                            "Parameters": f"Temp: {temp:.2f}, Top-p: {top_p:.2f}, Top-k: {top_k}, BM25: {bm25:.2f}",
                            "Response": response,
                            "Progress": f"Query {current}/{total_combinations}"
                        }
                        results.append(result)
                        
                    except Exception as e:
                        results.append({
                            "Parameters": f"Temp: {temp:.2f}, Top-p: {top_p:.2f}, Top-k: {top_k}, BM25: {bm25:.2f}",
                            "Response": f"Error: {str(e)}",
                            "Progress": f"Query {current}/{total_combinations}"
                        })
    
    # Format results with CSV file links - UPDATED TO PASS ADDITIONAL PARAMETERS
    formatted_results, csv_path = format_batch_result_files(
        results, job_id, 
        embedding_model=getattr(rag_chain, 'embedding_model', 'unknown'),
        llm_model=model_choice,
        param_variations=param_configs
    )
    
    return (
        formatted_results,
        csv_path,
        f"Job ID: {job_id}",
        f"Input tokens: {count_tokens(query)}",
        f"Output tokens: {sum(count_tokens(r['Response']) for r in results)}"
    )

# ... (rest of the file content would go here, but I'll focus on the specific functions that need updating)

def create_csv_from_batch_results(results: List[Dict], job_id: str, 

                                embedding_model: str = None, llm_model: str = None, 

                                param_variations: Dict = None) -> str:
    """Create a CSV file from batch query results and return the file path"""
    # Save CSV files in the current directory for HuggingFace Spaces compatibility
    
    # Create a descriptive filename
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Extract short names for filename
    def get_short_name(full_name, prefix_length=2):
        """Extract short name from full model name"""
        if not full_name:
            return "unknown"
        # Remove emojis and get the actual model name
        clean_name = full_name.split(" ", 1)[-1] if " " in full_name else full_name
        # Get first few characters and last few characters
        if len(clean_name) > 8:
            return clean_name[:4] + clean_name[-4:]
        return clean_name
    
    def get_param_variation_name(param_configs):
        """Get the parameter that was varied"""
        if not param_configs:
            return "const"
        
        varied_params = []
        for param, config in param_configs.items():
            if config != "Constant":
                # Extract the number from "Whole range X values"
                if "values" in config:
                    num_values = config.split()[2] if len(config.split()) > 2 else "X"
                    varied_params.append(f"{param}_{num_values}")
        
        if not varied_params:
            return "const"
        return "_".join(varied_params)
    
    # Build filename components
    embedding_short = get_short_name(embedding_model) if embedding_model else "emb"
    llm_short = get_short_name(llm_model) if llm_model else "llm"
    param_short = get_param_variation_name(param_variations) if param_variations else "const"
    
    # Create filename: batch_embedding_llm_params_timestamp.csv
    csv_filename = f"batch_{embedding_short}_{llm_short}_{param_short}_{timestamp}.csv"
    csv_path = os.path.abspath(csv_filename)
    
    # Extract parameters and responses
    data = []
    start_time = time.time()
    for result in results:
        params = result["Parameters"]
        response = result["Response"]
        progress = result["Progress"]
        
        # Calculate elapsed time for this query
        current_time = time.time()
        elapsed_time = current_time - start_time
        
        # Extract individual parameter values
        temp = float(re.search(r"Temp: ([\d.]+)", params).group(1))
        top_p = float(re.search(r"Top-p: ([\d.]+)", params).group(1))
        top_k = int(re.search(r"Top-k: (\d+)", params).group(1))
        bm25 = float(re.search(r"BM25: ([\d.]+)", params).group(1))
        
        # Extract response components
        model_info = re.search(r"Model: (.*?)\n", response)
        model = model_info.group(1) if model_info else "Unknown"
        
        # Extract main answer (everything between the parameters and the token counts)
        answer_match = re.search(r"Model Parameters:.*?\n\n(.*?)\n\n---", response, re.DOTALL)
        main_answer = answer_match.group(1).strip() if answer_match else response
        
        # Extract token counts
        input_tokens = re.search(r"Input tokens: (\d+)", response)
        output_tokens = re.search(r"Output tokens: (\d+)", response)
        
        # Extract conversation history count
        conv_history = re.search(r"Conversation History: (\d+) conversation", response)
        
        data.append({
            "Temperature": temp,
            "Top-p": top_p,
            "Top-k": top_k,
            "BM25 Weight": bm25,
            "Model": model,
            "Main Answer": main_answer,
            "Input Tokens": input_tokens.group(1) if input_tokens else "N/A",
            "Output Tokens": output_tokens.group(1) if output_tokens else "N/A",
            "Conversation History": conv_history.group(1) if conv_history else "0",
            "Progress": progress,
            "Elapsed Time (s)": f"{elapsed_time:.2f}"
        })
    
    # Create DataFrame and save to CSV
    df = pd.DataFrame(data)
    df.to_csv(csv_path, index=False)
    
    return csv_path

def format_batch_result_files(results: List[Dict], job_id: str, 

                            embedding_model: str = None, llm_model: str = None, 

                            param_variations: Dict = None) -> Tuple[str, str]:
    """Format batch results with links to CSV files"""
    # Create CSV file with improved filename
    csv_path = create_csv_from_batch_results(results, job_id, embedding_model, llm_model, param_variations)
    
    # Format the results
    formatted_results = "### Batch Query Results\n\n"
    
    # Add the actual results
    for result in results:
        formatted_results += f"#### {result['Parameters']}\n"
        formatted_results += f"**Progress:** {result['Progress']}\n\n"
        formatted_results += f"{result['Response']}\n\n"
        formatted_results += "---\n\n"
    
    return formatted_results, csv_path