loggenix-moe-0.3B-A0.1B-demo / enhanced_app.py
kshitijthakkar
run via ollama quants gguf for faster inference speed
02a42b8
raw
history blame
31.9 kB
import gradio as gr
import pandas as pd
from datasets import load_dataset
import plotly.graph_objects as go
import datetime
import json
import random
import os
#from model_handler import generate_response, get_inference_configs
#from enhanced_model_handler import generate_response, get_inference_configs
from model_handler_ollama import generate_response, get_inference_configs
import torch
# Configuration for datasets
DATASET_CONFIGS = {
'Loggenix Synthetic AI Tasks Eval (with outputs)-small': {
'repo_id': 'kshitijthakkar/loggenix-synthetic-ai-tasks-eval-with-outputs',
'split': 'train'
},
'Loggenix Synthetic AI Tasks Eval (with outputs) v5-large': {
'repo_id': 'kshitijthakkar/loggenix-synthetic-ai-tasks-eval_v5-with-outputs',
'split': 'train'
},
'Loggenix Synthetic AI Tasks Eval (with outputs) v6-large': {
'repo_id': 'kshitijthakkar/loggenix-synthetic-ai-tasks-eval_v6-with-outputs',
'split': 'train'
}
}
# Load main dataset for inference tab
def load_inference_dataset():
"""Load the main dataset for inference use case"""
try:
print("Loading synthetic-ai-tasks-eval-v5 dataset...")
dataset = load_dataset(
'kshitijthakkar/synthetic-ai-tasks-eval-v5',
split='train',
trust_remote_code=True
)
df = dataset.to_pandas()
print(f"✓ Successfully loaded: {len(df)} rows, {len(df.columns)} columns")
return df
except Exception as e:
print(f"✗ Error loading dataset: {str(e)}")
return pd.DataFrame({'Error': [f'Failed to load: {str(e)}']})
# Load dataset for eval samples tab
def load_eval_datasets():
"""Load all datasets for evaluation samples"""
datasets = {}
for display_name, config in DATASET_CONFIGS.items():
try:
print(f"Loading {display_name}...")
dataset = load_dataset(
config['repo_id'],
split=config['split'],
trust_remote_code=True
)
df = dataset.to_pandas()
datasets[display_name] = df
print(f"✓ Successfully loaded {display_name}: {len(df)} rows")
except Exception as e:
print(f"✗ Error loading {display_name}: {str(e)}")
datasets[display_name] = pd.DataFrame({
'Error': [f'Failed to load: {str(e)}'],
'Dataset': [config['repo_id']]
})
return datasets
# Load datasets
INFERENCE_DATASET = load_inference_dataset()
EVAL_DATASETS = load_eval_datasets()
# ===== TAB 1: INFERENCE USE CASE WITH INTEGRATED FLAGGING =====
def get_task_types():
"""Get unique task types from inference dataset"""
if 'task_type' in INFERENCE_DATASET.columns:
task_types = INFERENCE_DATASET['task_type'].unique().tolist()
return [str(t) for t in task_types if pd.notna(t)]
return ["No task types available"]
def get_task_by_type(task_type):
"""Get task content by task type"""
if 'task_type' in INFERENCE_DATASET.columns and 'task' in INFERENCE_DATASET.columns:
filtered = INFERENCE_DATASET[INFERENCE_DATASET['task_type'] == task_type]
if len(filtered) > 0:
return str(filtered.iloc[0]['task'])
return "No task found for this type"
def chat_interface_with_inference(prompt, history, system_prompt, inference_config):
"""Enhanced chat interface with model inference and history"""
if not prompt.strip():
return history, ""
# Add user message to history
history.append(("You", prompt))
try:
if not system_prompt.strip():
response = "Please select a task type to load system prompt first."
else:
# Get inference configuration
configs = get_inference_configs()
config = configs.get(inference_config, configs["Optimized for Speed"])
# Run inference using the model
response = generate_response(
system_prompt=system_prompt,
user_input=prompt,
config_name=inference_config
)
# Format and add AI response to history
formatted_response = f"**AI Assistant:**\n{response}"
history.append(("AI Assistant", formatted_response))
except Exception as e:
error_msg = f"**AI Assistant:**\nError during inference: {str(e)}"
history.append(("AI Assistant", error_msg))
return history, ""
def flag_response(history, flagged_message, flag_reason):
"""Flag a response"""
if not flagged_message or flagged_message == "No responses available":
return "Invalid message selection."
try:
flagged_index = int(flagged_message.split()[1][:-1])
if flagged_index >= len(history) or history[flagged_index][0] != "AI Assistant":
return "You can only flag assistant responses."
flagged_message_content = history[flagged_index][1]
log_entry = {
"timestamp": datetime.datetime.now().isoformat(),
"flag_reason": str(flag_reason),
"flagged_message": str(flagged_message_content),
"conversation_context": history,
}
os.makedirs("logs", exist_ok=True)
with open("logs/flagged_responses.log", "a") as f:
f.write(json.dumps(log_entry) + "\n")
return f"Response flagged successfully: {flag_reason}"
except Exception as e:
return f"Error flagging response: {str(e)}"
def get_assistant_responses(history):
"""Get dropdown options for assistant responses"""
responses = [
f"Response {i}: {str(msg[1])[:50]}..."
for i, msg in enumerate(history)
if msg[0] == "AI Assistant"
]
if not responses:
responses = ["No responses available"]
return gr.update(choices=responses, value=responses[0] if responses else "No responses available")
def display_selected_message(selected_index, history):
"""Display the selected flagged message"""
if selected_index == "No responses available":
return "No responses available"
try:
flagged_index = int(selected_index.split()[1][:-1])
if flagged_index < len(history) and history[flagged_index][0] == "AI Assistant":
return history[flagged_index][1]
else:
return "Invalid selection."
except Exception as e:
return f"Error: {str(e)}"
def clear_inference_history():
"""Clear chat history for inference tab"""
return [], gr.update(choices=["No responses available"], value="No responses available")
# ===== TAB 2: EVAL SAMPLES =====
def update_eval_table(dataset_name):
"""Update eval table based on selected dataset"""
if dataset_name in EVAL_DATASETS:
return EVAL_DATASETS[dataset_name].head(100)
return pd.DataFrame()
def get_eval_dataset_info(dataset_name):
"""Get info about selected eval dataset"""
if dataset_name in EVAL_DATASETS:
df = EVAL_DATASETS[dataset_name]
return f"""
**Dataset**: {dataset_name}
- **Rows**: {len(df):,}
- **Columns**: {len(df.columns)}
- **Column Names**: {', '.join(df.columns.tolist())}
"""
return "No dataset selected"
# def get_task_types_for_eval(dataset_name):
# """Get unique task types from selected eval dataset"""
# if dataset_name in EVAL_DATASETS and 'task_type' in EVAL_DATASETS[dataset_name].columns:
# task_types = EVAL_DATASETS[dataset_name]['task_type'].unique().tolist()
# return [str(t) for t in task_types if pd.notna(t)]
# return ["No task types available"]
def get_task_types_for_eval(dataset_name):
"""Get unique task types from selected eval dataset"""
if dataset_name in EVAL_DATASETS and 'task_type' in EVAL_DATASETS[dataset_name].columns:
task_types = EVAL_DATASETS[dataset_name]['task_type'].unique().tolist()
# The correct way is to return the list directly, not a joined string.
# The list comprehension `[str(t) for t in task_types if pd.notna(t)]` already does this.
return [str(t) for t in task_types if pd.notna(t)]
return ["No task types available"]
def get_tasks_by_type_eval(dataset_name, task_type):
"""Get tasks filtered by dataset and task type"""
if (dataset_name in EVAL_DATASETS and
'task_type' in EVAL_DATASETS[dataset_name].columns and
'task' in EVAL_DATASETS[dataset_name].columns):
filtered = EVAL_DATASETS[dataset_name][EVAL_DATASETS[dataset_name]['task_type'] == task_type]
if len(filtered) > 0:
# Create display options with index and truncated task content
tasks = []
for idx, row in filtered.iterrows():
task_preview = str(row['task'])[:100] + "..." if len(str(row['task'])) > 100 else str(row['task'])
tasks.append(f"Row {idx}: {task_preview}")
return tasks
return ["No tasks found"]
# def get_selected_row_data(dataset_name, task_type, selected_task):
# """Get all data for the selected row"""
# if not selected_task or selected_task == "No tasks found":
# return "", "", "", "", "", "",""
#
# try:
# # Extract row index from selected_task
# row_idx = int(selected_task.split("Row ")[1].split(":")[0])
#
# if dataset_name in EVAL_DATASETS:
# df = EVAL_DATASETS[dataset_name]
# if row_idx in df.index:
# row = df.loc[row_idx]
#
# # Extract all fields with safe handling for missing columns
# task = str(row.get('task', 'N/A'))
# task_type_val = str(row.get('task_type', 'N/A'))
# input_model = str(row.get('input_model', 'N/A'))
# expected_response = str(row.get('expected_response', 'N/A'))
# loggenix_output = str(row.get('loggenix_output', 'N/A'))
# output_model = str(row.get('output_model', 'N/A'))
# input_text = str(row.get('input', 'N/A'))
#
#
# return task_type_val, input_model, output_model, task, input_text, expected_response, loggenix_output
#
# except Exception as e:
# return f"Error: {str(e)}", "", "", "", "", "", "", ""
#
# return "", "", "", "", "", "", ""
def get_selected_row_data_by_type(dataset_name, task_type):
"""Get all data for the first row of a selected dataset and task type"""
if (dataset_name in EVAL_DATASETS and
'task_type' in EVAL_DATASETS[dataset_name].columns and
'task' in EVAL_DATASETS[dataset_name].columns):
filtered = EVAL_DATASETS[dataset_name][EVAL_DATASETS[dataset_name]['task_type'] == task_type]
if len(filtered) > 0:
row = filtered.iloc[0] # Get the first row
# Extract all fields with safe handling for missing columns
task = str(row.get('task', 'N/A'))
input_model = str(row.get('input_model', 'N/A'))
expected_response = str(row.get('expected_response', 'N/A'))
loggenix_output = str(row.get('loggenix_output', 'N/A'))
output_model = str(row.get('output_model', 'N/A'))
input_text = str(row.get('input', 'N/A'))
return input_model, output_model, task, input_text, expected_response, loggenix_output
return "", "", "", "", "", ""
# ===== TAB 3: VIEW FLAGGED RESPONSES =====
def read_flagged_messages():
"""Read flagged messages from log file"""
try:
if not os.path.exists("logs/flagged_responses.log"):
return pd.DataFrame()
with open("logs/flagged_responses.log", "r") as f:
flagged_messages = f.readlines()
if not flagged_messages:
return pd.DataFrame()
table_data = []
for entry in flagged_messages:
data = json.loads(entry)
table_data.append({
"Timestamp": data.get("timestamp", "N/A"),
"Flag Reason": data.get("flag_reason", "N/A"),
"Flagged Message": data.get("flagged_message", "N/A")[:100] + "...",
"Conversation Context": str(len(data.get("conversation_context", []))) + " messages"
})
return pd.DataFrame(table_data)
except Exception as e:
return pd.DataFrame({"Error": [f"Error reading flagged messages: {str(e)}"]})
def handle_row_select(evt: gr.SelectData):
"""Handle row selection in flagged messages table"""
try:
if not os.path.exists("logs/flagged_responses.log"):
return []
with open("logs/flagged_responses.log", "r") as f:
flagged_messages_log = f.readlines()
if evt.index[0] < len(flagged_messages_log):
selected_entry = json.loads(flagged_messages_log[evt.index[0]])
conversation_context = selected_entry.get("conversation_context", [])
return conversation_context
return []
except Exception as e:
return [("System", f"Error loading conversation: {str(e)}")]
# ===== MAIN INTERFACE =====
def create_interface():
with gr.Blocks(title="AI Tasks Evaluation Suite", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🤖 AI Tasks Evaluation Suite")
gr.Markdown("Comprehensive platform for AI model evaluation and testing")
with gr.Tabs():
# TAB 1: INFERENCE USE CASE WITH INTEGRATED FLAGGING
with gr.Tab("🚀 Inference Use Case"):
gr.Markdown("## Model Inference Testing with Response Flagging")
with gr.Row():
with gr.Column(scale=1):
# Task type dropdown
task_type_dropdown = gr.Dropdown(
choices=get_task_types(),
value=get_task_types()[0] if get_task_types() else None,
label="Task Type",
info="Select task type to load system prompt"
)
# Inference configuration
inference_config = gr.Dropdown(
choices=list(get_inference_configs().keys()),
value="Optimized for Speed",
label="Inference Configuration",
info="Select inference optimization level"
)
with gr.Column(scale=2):
# System prompt (editable)
system_prompt = gr.Textbox(
label="System Prompt (Editable)",
lines=6,
max_lines=10,
placeholder="Select a task type to load system prompt...",
interactive=True
)
# Chat interface section
gr.Markdown("### 💬 Chat Interface")
with gr.Row():
with gr.Column(scale=2):
# Chat display (replacing the old textbox)
chat_display = gr.Chatbot(label="Conversation History", height=400)
chat_history_state = gr.State([])
# Chat input
with gr.Row():
chat_input = gr.Textbox(
placeholder="Enter your message here...",
label="Your Message",
scale=4
)
send_btn = gr.Button("Send", variant="primary", scale=1)
with gr.Row():
clear_chat_btn = gr.Button("🗑️ Clear History", variant="secondary")
# Flagging section
with gr.Column(scale=1):
gr.Markdown("### 🚩 Flag Response")
flagged_message_index = gr.Dropdown(
label="Select a response to flag",
choices=["No responses available"],
value="No responses available",
interactive=True
)
selected_message_display = gr.Textbox(
label="Selected Response",
interactive=False,
lines=4,
max_lines=6
)
flag_reason = gr.Textbox(
placeholder="Enter reason for flagging...",
label="Reason for Flagging"
)
flag_btn = gr.Button("🚩 Flag Response", variant="stop")
flag_output = gr.Textbox(label="Flagging Status", visible=True, lines=2)
# Event handlers for Tab 1
task_type_dropdown.change(
fn=get_task_by_type,
inputs=[task_type_dropdown],
outputs=[system_prompt]
)
# Chat functionality
send_btn.click(
chat_interface_with_inference,
inputs=[chat_input, chat_history_state, system_prompt, inference_config],
outputs=[chat_display, chat_input]
).then(
lambda x: x, # Update state
inputs=[chat_display],
outputs=[chat_history_state]
).then(
get_assistant_responses,
inputs=[chat_history_state],
outputs=[flagged_message_index]
)
# Enter key support for chat input
chat_input.submit(
chat_interface_with_inference,
inputs=[chat_input, chat_history_state, system_prompt, inference_config],
outputs=[chat_display, chat_input]
).then(
lambda x: x, # Update state
inputs=[chat_display],
outputs=[chat_history_state]
).then(
get_assistant_responses,
inputs=[chat_history_state],
outputs=[flagged_message_index]
)
clear_chat_btn.click(
clear_inference_history,
outputs=[chat_display, flagged_message_index]
).then(
lambda: [],
outputs=[chat_history_state]
)
# Flagging functionality
flagged_message_index.change(
display_selected_message,
inputs=[flagged_message_index, chat_history_state],
outputs=[selected_message_display]
)
flag_btn.click(
flag_response,
inputs=[chat_history_state, flagged_message_index, flag_reason],
outputs=[flag_output]
)
# TAB 2: EVAL SAMPLES
# with gr.Tab("📊 Eval Samples"):
# gr.Markdown("## Dataset Evaluation Samples")
#
# with gr.Row():
# with gr.Column(scale=1):
# eval_dataset_dropdown = gr.Dropdown(
# choices=list(EVAL_DATASETS.keys()),
# value=list(EVAL_DATASETS.keys())[0] if EVAL_DATASETS else None,
# label="Select Dataset",
# info="Choose evaluation dataset to view"
# )
#
# eval_dataset_info = gr.Markdown(
# get_eval_dataset_info(list(EVAL_DATASETS.keys())[0] if EVAL_DATASETS else "")
# )
#
# with gr.Row():
# eval_table = gr.Dataframe(
# value=update_eval_table(list(EVAL_DATASETS.keys())[0]) if EVAL_DATASETS else pd.DataFrame(),
# label="Dataset Table",
# max_height=800,
# min_width=800,
# interactive=True,
# wrap=True,
# show_fullscreen_button=True,
# show_copy_button=True,
# show_row_numbers=True,
# show_search="search",
# column_widths=["80px","80px","80px","150px","250px","250px","250px"]
# )
#
# # Event handlers for Tab 2
# eval_dataset_dropdown.change(
# fn=lambda x: (update_eval_table(x), get_eval_dataset_info(x)),
# inputs=[eval_dataset_dropdown],
# outputs=[eval_table, eval_dataset_info]
# )
with gr.Tab("📊 Eval Samples"):
gr.Markdown("## Dataset Evaluation Samples")
gr.Markdown("Select dataset and task type to view detailed information")
with gr.Row():
with gr.Column(scale=1):
eval_dataset_dropdown = gr.Dropdown(
choices=list(EVAL_DATASETS.keys()),
value=list(EVAL_DATASETS.keys())[0] if EVAL_DATASETS else None,
label="Select Dataset",
info="Choose evaluation dataset to view"
)
eval_task_type_dropdown = gr.Dropdown(
choices=[],
label="Select Task Type",
info="Choose task type from selected dataset",
allow_custom_value=True
)
with gr.Column(scale=1):
eval_dataset_info = gr.Markdown(
get_eval_dataset_info(list(EVAL_DATASETS.keys())[0] if EVAL_DATASETS else "")
)
# Task details section
gr.Markdown("### Task Details")
with gr.Row():
input_model_field = gr.Textbox(
label="input_model",
lines=1,
interactive=False
)
output_model_field = gr.Textbox(
label="output_model",
lines=1,
interactive=False
)
with gr.Row():
task_field = gr.Textbox(
label="Task",
lines=2,
max_lines=5,
interactive=False
)
with gr.Row():
input_field = gr.Textbox(
label="input",
lines=12,
max_lines=20,
interactive=False
)
# Large text fields for outputs side by side
gr.Markdown("### Expected vs Actual Response Comparison")
with gr.Row():
loggenix_output_field = gr.Textbox(
label="Expected Response",
lines=30,
max_lines=40,
interactive=False
)
expected_response_field = gr.Textbox(
label="Loggenix Output",
lines=30,
max_lines=40,
interactive=False
)
# Event handlers for Tab 2
# eval_dataset_dropdown.change(
# fn=lambda x: (get_eval_dataset_info(x), get_task_types_for_eval(x), None),
# inputs=[eval_dataset_dropdown],
# outputs=[eval_dataset_info, eval_task_type_dropdown]
# )
# Event handlers for Tab 2
# eval_dataset_dropdown.change(
# fn=lambda x: (get_eval_dataset_info(x), get_task_types_for_eval(x)),
# inputs=[eval_dataset_dropdown],
# outputs=[eval_dataset_info, eval_task_type_dropdown]
# )
# Define a new function instead of lambda for clarity
def update_eval_components(dataset_name):
info = get_eval_dataset_info(dataset_name)
task_types = get_task_types_for_eval(dataset_name)
return info, gr.update(choices=task_types,
value=task_types[0] if task_types else "No task types available")
# In the event handlers for Tab 2, replace the existing .change with this:
eval_dataset_dropdown.change(
fn=update_eval_components,
inputs=[eval_dataset_dropdown],
outputs=[eval_dataset_info, eval_task_type_dropdown]
)
eval_task_type_dropdown.change(
fn=get_selected_row_data_by_type,
inputs=[eval_dataset_dropdown, eval_task_type_dropdown],
outputs=[input_model_field, output_model_field, task_field, input_field,
loggenix_output_field, expected_response_field]
)
# NOTE: The get_tasks_by_type_eval and eval_task_dropdown.change handlers are removed as per request.
# TAB 3: VIEW FLAGGED RESPONSES (RENAMED FROM TAB 4)
with gr.Tab("👀 View Flagged Responses"):
gr.Markdown("## Review Flagged Responses")
with gr.Row():
with gr.Column():
flagged_messages_display = gr.Dataframe(
headers=["Timestamp", "Flag Reason", "Flagged Message", "Conversation Context"],
interactive=False,
max_height=400
)
refresh_btn = gr.Button("🔄 Refresh", variant="primary")
with gr.Column():
conversation_context_display = gr.Chatbot(
label="Conversation Context",
height=400
)
# Event handlers for Tab 3
flagged_messages_display.select(
handle_row_select,
outputs=[conversation_context_display]
)
refresh_btn.click(
read_flagged_messages,
outputs=[flagged_messages_display]
)
# TAB 4: MODEL EVAL RESULTS (MOVED FROM TAB 5)
with gr.Tab("📈 Model Eval Results"):
gr.Markdown("## Model Evaluation Results")
gr.Markdown("### 🚧 Coming Soon")
gr.Markdown(
"This section will display comprehensive model evaluation metrics, charts, and performance analysis.")
# Placeholder content
with gr.Row():
with gr.Column():
gr.Markdown("#### Evaluation Metrics")
gr.Markdown("- Accuracy scores")
gr.Markdown("- Performance benchmarks")
gr.Markdown("- Comparative analysis")
with gr.Column():
gr.Markdown("#### Visualization")
gr.Markdown("- Performance charts")
gr.Markdown("- Score distributions")
gr.Markdown("- Trend analysis")
# TAB 5: ABOUT (MOVED FROM TAB 6)
with gr.Tab("ℹ️ About"):
gr.Markdown("## About Loggenix MOE Model")
gr.Markdown("""
### Model: `kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v6.2-finetuned-tool`
This is a fine-tuned Mixture of Experts (MOE) model designed for specialized AI tasks with tool calling capabilities.
#### Key Features:
- **Architecture**: MOE with 0.3B total parameters, 0.1B active parameters
- **Training**: Fine-tuned with learning rate 7e-5, batch size 16
- **Hardware**: Optimized for RTX 4090 GPU
- **Capabilities**: Tool calling, instruction following, task-specific responses
#### Model Specifications:
- **Total Parameters**: 0.3B
- **Active Parameters**: 0.1B
- **Context Length**: 4096 tokens
- **Precision**: FP16 for optimal performance
- **Flash Attention**: Supported for faster inference
#### Sample Inference Code:
```python
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Load model and tokenizer
model_id = "kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v6.2-finetuned-tool"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
torch_dtype=torch.float16,
attn_implementation="flash_attention_2"
).eval()
# Prepare messages
messages = [
{"role": "system", "content": "You are a helpful AI assistant."},
{"role": "user", "content": "Calculate 25 + 37"}
]
# Format and generate
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=512,
do_sample=True,
temperature=0.7,
pad_token_id=tokenizer.pad_token_id
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)
```
#### Tool Calling Support:
The model supports structured tool calling for mathematical operations, data analysis, and other specialized tasks.
#### Performance Optimizations:
- **Speed Mode**: Max 512 new tokens for fast responses
- **Balanced Mode**: Max 2048 new tokens for comprehensive answers
- **Full Capacity**: Dynamic token allocation up to context limit
---
**Developed by**: Kshitij Thakkar
**Version**: v6.2
**License**: Please check model repository for licensing details
""")
# Load initial data
demo.load(
fn=read_flagged_messages,
outputs=[flagged_messages_display]
)
return demo
# Launch the application
if __name__ == "__main__":
print("Starting AI Tasks Evaluation Suite...")
demo = create_interface()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
debug=True,
mcp_server=True
)