File size: 11,029 Bytes
f840733 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 |
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
import datetime
import functools
import traceback
from typing import List, Optional, Any, Dict, Tuple
import csv
import pandas as pd
import tempfile
import shutil
import glob
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain_community.llms import HuggingFacePipeline
# Other LangChain and community imports
from langchain_community.document_loaders import OnlinePDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.retrievers import BM25Retriever
from langchain.embeddings.base import Embeddings
from langchain.retrievers import EnsembleRetriever
from langchain.prompts import ChatPromptTemplate
from langchain.schema import StrOutputParser, Document
from langchain_core.runnables import RunnableParallel, RunnableLambda
from transformers.quantizers.auto import AutoQuantizationConfig
import gradio as gr
from pydantic import PrivateAttr
import pydantic
from langchain.llms.base import LLM
from typing import Any, Optional, List
import typing
import time
import re
import requests
from langchain.schema import Document
from langchain_community.document_loaders import PyMuPDFLoader # Updated loader
import tempfile
import mimetypes
import gc
# Add batch processing helper functions
def generate_parameter_values(min_val, max_val, num_values):
"""Generate evenly spaced values between min and max"""
if num_values == 1:
return [min_val]
step = (max_val - min_val) / (num_values - 1)
return [min_val + (step * i) for i in range(num_values)]
def process_batch_query(query, model_choice, max_tokens, param_configs, slider_values, job_id, use_history=True):
"""Process a batch of queries with different parameter combinations"""
results = []
# Generate all parameter combinations
temp_values = [slider_values['temperature']] if param_configs['temperature'] == "Constant" else generate_parameter_values(0.1, 1.0, int(param_configs['temperature'].split()[2]))
top_p_values = [slider_values['top_p']] if param_configs['top_p'] == "Constant" else generate_parameter_values(0.1, 0.99, int(param_configs['top_p'].split()[2]))
top_k_values = [slider_values['top_k']] if param_configs['top_k'] == "Constant" else generate_parameter_values(1, 100, int(param_configs['top_k'].split()[2]))
bm25_values = [slider_values['bm25']] if param_configs['bm25'] == "Constant" else generate_parameter_values(0.0, 1.0, int(param_configs['bm25'].split()[2]))
total_combinations = len(temp_values) * len(top_p_values) * len(top_k_values) * len(bm25_values)
current = 0
for temp in temp_values:
for top_p in top_p_values:
for top_k in top_k_values:
for bm25 in bm25_values:
current += 1
try:
# Update parameters
rag_chain.temperature = temp
rag_chain.top_p = top_p
rag_chain.top_k = top_k
rag_chain.bm25_weight = bm25
rag_chain.faiss_weight = 1.0 - bm25
# Update ensemble retriever
rag_chain.ensemble_retriever = EnsembleRetriever(
retrievers=[rag_chain.bm25_retriever, rag_chain.faiss_retriever],
weights=[rag_chain.bm25_weight, rag_chain.faiss_weight]
)
# Process query
response = rag_chain.elevated_rag_chain.invoke({"question": query})
# Store response in history if enabled
if use_history:
trimmed_response = response[:1000] + ("..." if len(response) > 1000 else "")
rag_chain.conversation_history.append({"query": query, "response": trimmed_response})
# Format result
result = {
"Parameters": f"Temp: {temp:.2f}, Top-p: {top_p:.2f}, Top-k: {top_k}, BM25: {bm25:.2f}",
"Response": response,
"Progress": f"Query {current}/{total_combinations}"
}
results.append(result)
except Exception as e:
results.append({
"Parameters": f"Temp: {temp:.2f}, Top-p: {top_p:.2f}, Top-k: {top_k}, BM25: {bm25:.2f}",
"Response": f"Error: {str(e)}",
"Progress": f"Query {current}/{total_combinations}"
})
# Format results with CSV file links - UPDATED TO PASS ADDITIONAL PARAMETERS
formatted_results, csv_path = format_batch_result_files(
results, job_id,
embedding_model=getattr(rag_chain, 'embedding_model', 'unknown'),
llm_model=model_choice,
param_variations=param_configs
)
return (
formatted_results,
csv_path,
f"Job ID: {job_id}",
f"Input tokens: {count_tokens(query)}",
f"Output tokens: {sum(count_tokens(r['Response']) for r in results)}"
)
# ... (rest of the file content would go here, but I'll focus on the specific functions that need updating)
def create_csv_from_batch_results(results: List[Dict], job_id: str,
embedding_model: str = None, llm_model: str = None,
param_variations: Dict = None) -> str:
"""Create a CSV file from batch query results and return the file path"""
# Save CSV files in the current directory for HuggingFace Spaces compatibility
# Create a descriptive filename
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
# Extract short names for filename
def get_short_name(full_name, prefix_length=2):
"""Extract short name from full model name"""
if not full_name:
return "unknown"
# Remove emojis and get the actual model name
clean_name = full_name.split(" ", 1)[-1] if " " in full_name else full_name
# Get first few characters and last few characters
if len(clean_name) > 8:
return clean_name[:4] + clean_name[-4:]
return clean_name
def get_param_variation_name(param_configs):
"""Get the parameter that was varied"""
if not param_configs:
return "const"
varied_params = []
for param, config in param_configs.items():
if config != "Constant":
# Extract the number from "Whole range X values"
if "values" in config:
num_values = config.split()[2] if len(config.split()) > 2 else "X"
varied_params.append(f"{param}_{num_values}")
if not varied_params:
return "const"
return "_".join(varied_params)
# Build filename components
embedding_short = get_short_name(embedding_model) if embedding_model else "emb"
llm_short = get_short_name(llm_model) if llm_model else "llm"
param_short = get_param_variation_name(param_variations) if param_variations else "const"
# Create filename: batch_embedding_llm_params_timestamp.csv
csv_filename = f"batch_{embedding_short}_{llm_short}_{param_short}_{timestamp}.csv"
csv_path = os.path.abspath(csv_filename)
# Extract parameters and responses
data = []
start_time = time.time()
for result in results:
params = result["Parameters"]
response = result["Response"]
progress = result["Progress"]
# Calculate elapsed time for this query
current_time = time.time()
elapsed_time = current_time - start_time
# Extract individual parameter values
temp = float(re.search(r"Temp: ([\d.]+)", params).group(1))
top_p = float(re.search(r"Top-p: ([\d.]+)", params).group(1))
top_k = int(re.search(r"Top-k: (\d+)", params).group(1))
bm25 = float(re.search(r"BM25: ([\d.]+)", params).group(1))
# Extract response components
model_info = re.search(r"Model: (.*?)\n", response)
model = model_info.group(1) if model_info else "Unknown"
# Extract main answer (everything between the parameters and the token counts)
answer_match = re.search(r"Model Parameters:.*?\n\n(.*?)\n\n---", response, re.DOTALL)
main_answer = answer_match.group(1).strip() if answer_match else response
# Extract token counts
input_tokens = re.search(r"Input tokens: (\d+)", response)
output_tokens = re.search(r"Output tokens: (\d+)", response)
# Extract conversation history count
conv_history = re.search(r"Conversation History: (\d+) conversation", response)
data.append({
"Temperature": temp,
"Top-p": top_p,
"Top-k": top_k,
"BM25 Weight": bm25,
"Model": model,
"Main Answer": main_answer,
"Input Tokens": input_tokens.group(1) if input_tokens else "N/A",
"Output Tokens": output_tokens.group(1) if output_tokens else "N/A",
"Conversation History": conv_history.group(1) if conv_history else "0",
"Progress": progress,
"Elapsed Time (s)": f"{elapsed_time:.2f}"
})
# Create DataFrame and save to CSV
df = pd.DataFrame(data)
df.to_csv(csv_path, index=False)
return csv_path
def format_batch_result_files(results: List[Dict], job_id: str,
embedding_model: str = None, llm_model: str = None,
param_variations: Dict = None) -> Tuple[str, str]:
"""Format batch results with links to CSV files"""
# Create CSV file with improved filename
csv_path = create_csv_from_batch_results(results, job_id, embedding_model, llm_model, param_variations)
# Format the results
formatted_results = "### Batch Query Results\n\n"
# Add the actual results
for result in results:
formatted_results += f"#### {result['Parameters']}\n"
formatted_results += f"**Progress:** {result['Progress']}\n\n"
formatted_results += f"{result['Response']}\n\n"
formatted_results += "---\n\n"
return formatted_results, csv_path
|