|
|
import gradio as gr |
|
|
import pandas as pd |
|
|
from datasets import load_dataset |
|
|
import plotly.graph_objects as go |
|
|
import datetime |
|
|
import json |
|
|
import random |
|
|
import os |
|
|
|
|
|
|
|
|
from model_handler_ollama import generate_response, get_inference_configs |
|
|
import torch |
|
|
|
|
|
|
|
|
DATASET_CONFIGS = { |
|
|
'Loggenix Synthetic AI Tasks Eval (with outputs)-small': { |
|
|
'repo_id': 'kshitijthakkar/loggenix-synthetic-ai-tasks-eval-with-outputs', |
|
|
'split': 'train' |
|
|
}, |
|
|
'Loggenix Synthetic AI Tasks Eval (with outputs) v5-large': { |
|
|
'repo_id': 'kshitijthakkar/loggenix-synthetic-ai-tasks-eval_v5-with-outputs', |
|
|
'split': 'train' |
|
|
}, |
|
|
'Loggenix Synthetic AI Tasks Eval (with outputs) v6-large': { |
|
|
'repo_id': 'kshitijthakkar/loggenix-synthetic-ai-tasks-eval_v6-with-outputs', |
|
|
'split': 'train' |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
def load_inference_dataset(): |
|
|
"""Load the main dataset for inference use case""" |
|
|
try: |
|
|
print("Loading synthetic-ai-tasks-eval-v5 dataset...") |
|
|
dataset = load_dataset( |
|
|
'kshitijthakkar/synthetic-ai-tasks-eval-v5', |
|
|
split='train', |
|
|
trust_remote_code=True |
|
|
) |
|
|
df = dataset.to_pandas() |
|
|
print(f"✓ Successfully loaded: {len(df)} rows, {len(df.columns)} columns") |
|
|
return df |
|
|
except Exception as e: |
|
|
print(f"✗ Error loading dataset: {str(e)}") |
|
|
return pd.DataFrame({'Error': [f'Failed to load: {str(e)}']}) |
|
|
|
|
|
|
|
|
|
|
|
def load_eval_datasets(): |
|
|
"""Load all datasets for evaluation samples""" |
|
|
datasets = {} |
|
|
for display_name, config in DATASET_CONFIGS.items(): |
|
|
try: |
|
|
print(f"Loading {display_name}...") |
|
|
dataset = load_dataset( |
|
|
config['repo_id'], |
|
|
split=config['split'], |
|
|
trust_remote_code=True |
|
|
) |
|
|
df = dataset.to_pandas() |
|
|
datasets[display_name] = df |
|
|
print(f"✓ Successfully loaded {display_name}: {len(df)} rows") |
|
|
except Exception as e: |
|
|
print(f"✗ Error loading {display_name}: {str(e)}") |
|
|
datasets[display_name] = pd.DataFrame({ |
|
|
'Error': [f'Failed to load: {str(e)}'], |
|
|
'Dataset': [config['repo_id']] |
|
|
}) |
|
|
return datasets |
|
|
|
|
|
|
|
|
|
|
|
INFERENCE_DATASET = load_inference_dataset() |
|
|
EVAL_DATASETS = load_eval_datasets() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_task_types(): |
|
|
"""Get unique task types from inference dataset""" |
|
|
if 'task_type' in INFERENCE_DATASET.columns: |
|
|
task_types = INFERENCE_DATASET['task_type'].unique().tolist() |
|
|
return [str(t) for t in task_types if pd.notna(t)] |
|
|
return ["No task types available"] |
|
|
|
|
|
|
|
|
def get_task_by_type(task_type): |
|
|
"""Get task content by task type""" |
|
|
if 'task_type' in INFERENCE_DATASET.columns and 'task' in INFERENCE_DATASET.columns: |
|
|
filtered = INFERENCE_DATASET[INFERENCE_DATASET['task_type'] == task_type] |
|
|
if len(filtered) > 0: |
|
|
return str(filtered.iloc[0]['task']) |
|
|
return "No task found for this type" |
|
|
|
|
|
|
|
|
def chat_interface_with_inference(prompt, history, system_prompt, inference_config): |
|
|
"""Enhanced chat interface with model inference and history""" |
|
|
if not prompt.strip(): |
|
|
return history, "" |
|
|
|
|
|
|
|
|
history.append(("You", prompt)) |
|
|
|
|
|
try: |
|
|
if not system_prompt.strip(): |
|
|
response = "Please select a task type to load system prompt first." |
|
|
else: |
|
|
|
|
|
configs = get_inference_configs() |
|
|
config = configs.get(inference_config, configs["Optimized for Speed"]) |
|
|
|
|
|
|
|
|
response = generate_response( |
|
|
system_prompt=system_prompt, |
|
|
user_input=prompt, |
|
|
config_name=inference_config |
|
|
) |
|
|
|
|
|
|
|
|
formatted_response = f"**AI Assistant:**\n{response}" |
|
|
history.append(("AI Assistant", formatted_response)) |
|
|
|
|
|
except Exception as e: |
|
|
error_msg = f"**AI Assistant:**\nError during inference: {str(e)}" |
|
|
history.append(("AI Assistant", error_msg)) |
|
|
|
|
|
return history, "" |
|
|
|
|
|
|
|
|
def flag_response(history, flagged_message, flag_reason): |
|
|
"""Flag a response""" |
|
|
if not flagged_message or flagged_message == "No responses available": |
|
|
return "Invalid message selection." |
|
|
|
|
|
try: |
|
|
flagged_index = int(flagged_message.split()[1][:-1]) |
|
|
if flagged_index >= len(history) or history[flagged_index][0] != "AI Assistant": |
|
|
return "You can only flag assistant responses." |
|
|
|
|
|
flagged_message_content = history[flagged_index][1] |
|
|
|
|
|
log_entry = { |
|
|
"timestamp": datetime.datetime.now().isoformat(), |
|
|
"flag_reason": str(flag_reason), |
|
|
"flagged_message": str(flagged_message_content), |
|
|
"conversation_context": history, |
|
|
} |
|
|
|
|
|
os.makedirs("logs", exist_ok=True) |
|
|
with open("logs/flagged_responses.log", "a") as f: |
|
|
f.write(json.dumps(log_entry) + "\n") |
|
|
|
|
|
return f"Response flagged successfully: {flag_reason}" |
|
|
except Exception as e: |
|
|
return f"Error flagging response: {str(e)}" |
|
|
|
|
|
|
|
|
def get_assistant_responses(history): |
|
|
"""Get dropdown options for assistant responses""" |
|
|
responses = [ |
|
|
f"Response {i}: {str(msg[1])[:50]}..." |
|
|
for i, msg in enumerate(history) |
|
|
if msg[0] == "AI Assistant" |
|
|
] |
|
|
|
|
|
if not responses: |
|
|
responses = ["No responses available"] |
|
|
|
|
|
return gr.update(choices=responses, value=responses[0] if responses else "No responses available") |
|
|
|
|
|
|
|
|
def display_selected_message(selected_index, history): |
|
|
"""Display the selected flagged message""" |
|
|
if selected_index == "No responses available": |
|
|
return "No responses available" |
|
|
|
|
|
try: |
|
|
flagged_index = int(selected_index.split()[1][:-1]) |
|
|
if flagged_index < len(history) and history[flagged_index][0] == "AI Assistant": |
|
|
return history[flagged_index][1] |
|
|
else: |
|
|
return "Invalid selection." |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
def clear_inference_history(): |
|
|
"""Clear chat history for inference tab""" |
|
|
return [], gr.update(choices=["No responses available"], value="No responses available") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def update_eval_table(dataset_name): |
|
|
"""Update eval table based on selected dataset""" |
|
|
if dataset_name in EVAL_DATASETS: |
|
|
return EVAL_DATASETS[dataset_name].head(100) |
|
|
return pd.DataFrame() |
|
|
|
|
|
|
|
|
def get_eval_dataset_info(dataset_name): |
|
|
"""Get info about selected eval dataset""" |
|
|
if dataset_name in EVAL_DATASETS: |
|
|
df = EVAL_DATASETS[dataset_name] |
|
|
return f""" |
|
|
**Dataset**: {dataset_name} |
|
|
- **Rows**: {len(df):,} |
|
|
- **Columns**: {len(df.columns)} |
|
|
- **Column Names**: {', '.join(df.columns.tolist())} |
|
|
""" |
|
|
return "No dataset selected" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_task_types_for_eval(dataset_name): |
|
|
"""Get unique task types from selected eval dataset""" |
|
|
if dataset_name in EVAL_DATASETS and 'task_type' in EVAL_DATASETS[dataset_name].columns: |
|
|
task_types = EVAL_DATASETS[dataset_name]['task_type'].unique().tolist() |
|
|
|
|
|
|
|
|
return [str(t) for t in task_types if pd.notna(t)] |
|
|
return ["No task types available"] |
|
|
|
|
|
|
|
|
def get_tasks_by_type_eval(dataset_name, task_type): |
|
|
"""Get tasks filtered by dataset and task type""" |
|
|
if (dataset_name in EVAL_DATASETS and |
|
|
'task_type' in EVAL_DATASETS[dataset_name].columns and |
|
|
'task' in EVAL_DATASETS[dataset_name].columns): |
|
|
|
|
|
filtered = EVAL_DATASETS[dataset_name][EVAL_DATASETS[dataset_name]['task_type'] == task_type] |
|
|
if len(filtered) > 0: |
|
|
|
|
|
tasks = [] |
|
|
for idx, row in filtered.iterrows(): |
|
|
task_preview = str(row['task'])[:100] + "..." if len(str(row['task'])) > 100 else str(row['task']) |
|
|
tasks.append(f"Row {idx}: {task_preview}") |
|
|
return tasks |
|
|
return ["No tasks found"] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_selected_row_data_by_type(dataset_name, task_type): |
|
|
"""Get all data for the first row of a selected dataset and task type""" |
|
|
if (dataset_name in EVAL_DATASETS and |
|
|
'task_type' in EVAL_DATASETS[dataset_name].columns and |
|
|
'task' in EVAL_DATASETS[dataset_name].columns): |
|
|
|
|
|
filtered = EVAL_DATASETS[dataset_name][EVAL_DATASETS[dataset_name]['task_type'] == task_type] |
|
|
if len(filtered) > 0: |
|
|
row = filtered.iloc[0] |
|
|
|
|
|
|
|
|
task = str(row.get('task', 'N/A')) |
|
|
input_model = str(row.get('input_model', 'N/A')) |
|
|
expected_response = str(row.get('expected_response', 'N/A')) |
|
|
loggenix_output = str(row.get('loggenix_output', 'N/A')) |
|
|
output_model = str(row.get('output_model', 'N/A')) |
|
|
input_text = str(row.get('input', 'N/A')) |
|
|
|
|
|
return input_model, output_model, task, input_text, expected_response, loggenix_output |
|
|
|
|
|
return "", "", "", "", "", "" |
|
|
|
|
|
|
|
|
|
|
|
def read_flagged_messages(): |
|
|
"""Read flagged messages from log file""" |
|
|
try: |
|
|
if not os.path.exists("logs/flagged_responses.log"): |
|
|
return pd.DataFrame() |
|
|
|
|
|
with open("logs/flagged_responses.log", "r") as f: |
|
|
flagged_messages = f.readlines() |
|
|
|
|
|
if not flagged_messages: |
|
|
return pd.DataFrame() |
|
|
|
|
|
table_data = [] |
|
|
for entry in flagged_messages: |
|
|
data = json.loads(entry) |
|
|
table_data.append({ |
|
|
"Timestamp": data.get("timestamp", "N/A"), |
|
|
"Flag Reason": data.get("flag_reason", "N/A"), |
|
|
"Flagged Message": data.get("flagged_message", "N/A")[:100] + "...", |
|
|
"Conversation Context": str(len(data.get("conversation_context", []))) + " messages" |
|
|
}) |
|
|
return pd.DataFrame(table_data) |
|
|
except Exception as e: |
|
|
return pd.DataFrame({"Error": [f"Error reading flagged messages: {str(e)}"]}) |
|
|
|
|
|
|
|
|
def handle_row_select(evt: gr.SelectData): |
|
|
"""Handle row selection in flagged messages table""" |
|
|
try: |
|
|
if not os.path.exists("logs/flagged_responses.log"): |
|
|
return [] |
|
|
|
|
|
with open("logs/flagged_responses.log", "r") as f: |
|
|
flagged_messages_log = f.readlines() |
|
|
|
|
|
if evt.index[0] < len(flagged_messages_log): |
|
|
selected_entry = json.loads(flagged_messages_log[evt.index[0]]) |
|
|
conversation_context = selected_entry.get("conversation_context", []) |
|
|
return conversation_context |
|
|
return [] |
|
|
except Exception as e: |
|
|
return [("System", f"Error loading conversation: {str(e)}")] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_interface(): |
|
|
with gr.Blocks(title="AI Tasks Evaluation Suite", theme=gr.themes.Soft()) as demo: |
|
|
gr.Markdown("# 🤖 AI Tasks Evaluation Suite") |
|
|
gr.Markdown("Comprehensive platform for AI model evaluation and testing") |
|
|
|
|
|
with gr.Tabs(): |
|
|
|
|
|
with gr.Tab("🚀 Inference Use Case"): |
|
|
gr.Markdown("## Model Inference Testing with Response Flagging") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
|
|
|
task_type_dropdown = gr.Dropdown( |
|
|
choices=get_task_types(), |
|
|
value=get_task_types()[0] if get_task_types() else None, |
|
|
label="Task Type", |
|
|
info="Select task type to load system prompt" |
|
|
) |
|
|
|
|
|
|
|
|
inference_config = gr.Dropdown( |
|
|
choices=list(get_inference_configs().keys()), |
|
|
value="Optimized for Speed", |
|
|
label="Inference Configuration", |
|
|
info="Select inference optimization level" |
|
|
) |
|
|
|
|
|
with gr.Column(scale=2): |
|
|
|
|
|
system_prompt = gr.Textbox( |
|
|
label="System Prompt (Editable)", |
|
|
lines=6, |
|
|
max_lines=10, |
|
|
placeholder="Select a task type to load system prompt...", |
|
|
interactive=True |
|
|
) |
|
|
|
|
|
|
|
|
gr.Markdown("### 💬 Chat Interface") |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=2): |
|
|
|
|
|
chat_display = gr.Chatbot(label="Conversation History", height=400) |
|
|
chat_history_state = gr.State([]) |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
chat_input = gr.Textbox( |
|
|
placeholder="Enter your message here...", |
|
|
label="Your Message", |
|
|
scale=4 |
|
|
) |
|
|
send_btn = gr.Button("Send", variant="primary", scale=1) |
|
|
|
|
|
with gr.Row(): |
|
|
clear_chat_btn = gr.Button("🗑️ Clear History", variant="secondary") |
|
|
|
|
|
|
|
|
with gr.Column(scale=1): |
|
|
gr.Markdown("### 🚩 Flag Response") |
|
|
|
|
|
flagged_message_index = gr.Dropdown( |
|
|
label="Select a response to flag", |
|
|
choices=["No responses available"], |
|
|
value="No responses available", |
|
|
interactive=True |
|
|
) |
|
|
|
|
|
selected_message_display = gr.Textbox( |
|
|
label="Selected Response", |
|
|
interactive=False, |
|
|
lines=4, |
|
|
max_lines=6 |
|
|
) |
|
|
|
|
|
flag_reason = gr.Textbox( |
|
|
placeholder="Enter reason for flagging...", |
|
|
label="Reason for Flagging" |
|
|
) |
|
|
|
|
|
flag_btn = gr.Button("🚩 Flag Response", variant="stop") |
|
|
flag_output = gr.Textbox(label="Flagging Status", visible=True, lines=2) |
|
|
|
|
|
|
|
|
task_type_dropdown.change( |
|
|
fn=get_task_by_type, |
|
|
inputs=[task_type_dropdown], |
|
|
outputs=[system_prompt] |
|
|
) |
|
|
|
|
|
|
|
|
send_btn.click( |
|
|
chat_interface_with_inference, |
|
|
inputs=[chat_input, chat_history_state, system_prompt, inference_config], |
|
|
outputs=[chat_display, chat_input] |
|
|
).then( |
|
|
lambda x: x, |
|
|
inputs=[chat_display], |
|
|
outputs=[chat_history_state] |
|
|
).then( |
|
|
get_assistant_responses, |
|
|
inputs=[chat_history_state], |
|
|
outputs=[flagged_message_index] |
|
|
) |
|
|
|
|
|
|
|
|
chat_input.submit( |
|
|
chat_interface_with_inference, |
|
|
inputs=[chat_input, chat_history_state, system_prompt, inference_config], |
|
|
outputs=[chat_display, chat_input] |
|
|
).then( |
|
|
lambda x: x, |
|
|
inputs=[chat_display], |
|
|
outputs=[chat_history_state] |
|
|
).then( |
|
|
get_assistant_responses, |
|
|
inputs=[chat_history_state], |
|
|
outputs=[flagged_message_index] |
|
|
) |
|
|
|
|
|
clear_chat_btn.click( |
|
|
clear_inference_history, |
|
|
outputs=[chat_display, flagged_message_index] |
|
|
).then( |
|
|
lambda: [], |
|
|
outputs=[chat_history_state] |
|
|
) |
|
|
|
|
|
|
|
|
flagged_message_index.change( |
|
|
display_selected_message, |
|
|
inputs=[flagged_message_index, chat_history_state], |
|
|
outputs=[selected_message_display] |
|
|
) |
|
|
|
|
|
flag_btn.click( |
|
|
flag_response, |
|
|
inputs=[chat_history_state, flagged_message_index, flag_reason], |
|
|
outputs=[flag_output] |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Tab("📊 Eval Samples"): |
|
|
gr.Markdown("## Dataset Evaluation Samples") |
|
|
gr.Markdown("Select dataset and task type to view detailed information") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
eval_dataset_dropdown = gr.Dropdown( |
|
|
choices=list(EVAL_DATASETS.keys()), |
|
|
value=list(EVAL_DATASETS.keys())[0] if EVAL_DATASETS else None, |
|
|
label="Select Dataset", |
|
|
info="Choose evaluation dataset to view" |
|
|
) |
|
|
|
|
|
eval_task_type_dropdown = gr.Dropdown( |
|
|
choices=[], |
|
|
label="Select Task Type", |
|
|
info="Choose task type from selected dataset", |
|
|
allow_custom_value=True |
|
|
) |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
eval_dataset_info = gr.Markdown( |
|
|
get_eval_dataset_info(list(EVAL_DATASETS.keys())[0] if EVAL_DATASETS else "") |
|
|
) |
|
|
|
|
|
|
|
|
gr.Markdown("### Task Details") |
|
|
with gr.Row(): |
|
|
input_model_field = gr.Textbox( |
|
|
label="input_model", |
|
|
lines=1, |
|
|
interactive=False |
|
|
) |
|
|
|
|
|
output_model_field = gr.Textbox( |
|
|
label="output_model", |
|
|
lines=1, |
|
|
interactive=False |
|
|
) |
|
|
with gr.Row(): |
|
|
task_field = gr.Textbox( |
|
|
label="Task", |
|
|
lines=2, |
|
|
max_lines=5, |
|
|
interactive=False |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
input_field = gr.Textbox( |
|
|
label="input", |
|
|
lines=12, |
|
|
max_lines=20, |
|
|
interactive=False |
|
|
) |
|
|
|
|
|
|
|
|
gr.Markdown("### Expected vs Actual Response Comparison") |
|
|
|
|
|
with gr.Row(): |
|
|
loggenix_output_field = gr.Textbox( |
|
|
label="Expected Response", |
|
|
lines=30, |
|
|
max_lines=40, |
|
|
interactive=False |
|
|
) |
|
|
expected_response_field = gr.Textbox( |
|
|
label="Loggenix Output", |
|
|
lines=30, |
|
|
max_lines=40, |
|
|
interactive=False |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def update_eval_components(dataset_name): |
|
|
info = get_eval_dataset_info(dataset_name) |
|
|
task_types = get_task_types_for_eval(dataset_name) |
|
|
return info, gr.update(choices=task_types, |
|
|
value=task_types[0] if task_types else "No task types available") |
|
|
|
|
|
|
|
|
eval_dataset_dropdown.change( |
|
|
fn=update_eval_components, |
|
|
inputs=[eval_dataset_dropdown], |
|
|
outputs=[eval_dataset_info, eval_task_type_dropdown] |
|
|
) |
|
|
eval_task_type_dropdown.change( |
|
|
fn=get_selected_row_data_by_type, |
|
|
inputs=[eval_dataset_dropdown, eval_task_type_dropdown], |
|
|
outputs=[input_model_field, output_model_field, task_field, input_field, |
|
|
loggenix_output_field, expected_response_field] |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
with gr.Tab("👀 View Flagged Responses"): |
|
|
gr.Markdown("## Review Flagged Responses") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
flagged_messages_display = gr.Dataframe( |
|
|
headers=["Timestamp", "Flag Reason", "Flagged Message", "Conversation Context"], |
|
|
interactive=False, |
|
|
max_height=400 |
|
|
) |
|
|
refresh_btn = gr.Button("🔄 Refresh", variant="primary") |
|
|
|
|
|
with gr.Column(): |
|
|
conversation_context_display = gr.Chatbot( |
|
|
label="Conversation Context", |
|
|
height=400 |
|
|
) |
|
|
|
|
|
|
|
|
flagged_messages_display.select( |
|
|
handle_row_select, |
|
|
outputs=[conversation_context_display] |
|
|
) |
|
|
|
|
|
refresh_btn.click( |
|
|
read_flagged_messages, |
|
|
outputs=[flagged_messages_display] |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Tab("📈 Model Eval Results"): |
|
|
gr.Markdown("## Model Evaluation Results") |
|
|
gr.Markdown("### 🚧 Coming Soon") |
|
|
gr.Markdown( |
|
|
"This section will display comprehensive model evaluation metrics, charts, and performance analysis.") |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
gr.Markdown("#### Evaluation Metrics") |
|
|
gr.Markdown("- Accuracy scores") |
|
|
gr.Markdown("- Performance benchmarks") |
|
|
gr.Markdown("- Comparative analysis") |
|
|
|
|
|
with gr.Column(): |
|
|
gr.Markdown("#### Visualization") |
|
|
gr.Markdown("- Performance charts") |
|
|
gr.Markdown("- Score distributions") |
|
|
gr.Markdown("- Trend analysis") |
|
|
|
|
|
|
|
|
with gr.Tab("ℹ️ About"): |
|
|
gr.Markdown("## About Loggenix MOE Model") |
|
|
|
|
|
gr.Markdown(""" |
|
|
### Model: `kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v6.2-finetuned-tool` |
|
|
|
|
|
This is a fine-tuned Mixture of Experts (MOE) model designed for specialized AI tasks with tool calling capabilities. |
|
|
|
|
|
#### Key Features: |
|
|
- **Architecture**: MOE with 0.3B total parameters, 0.1B active parameters |
|
|
- **Training**: Fine-tuned with learning rate 7e-5, batch size 16 |
|
|
- **Hardware**: Optimized for RTX 4090 GPU |
|
|
- **Capabilities**: Tool calling, instruction following, task-specific responses |
|
|
|
|
|
#### Model Specifications: |
|
|
- **Total Parameters**: 0.3B |
|
|
- **Active Parameters**: 0.1B |
|
|
- **Context Length**: 4096 tokens |
|
|
- **Precision**: FP16 for optimal performance |
|
|
- **Flash Attention**: Supported for faster inference |
|
|
|
|
|
#### Sample Inference Code: |
|
|
```python |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
import torch |
|
|
|
|
|
# Load model and tokenizer |
|
|
model_id = "kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v6.2-finetuned-tool" |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
model_id, |
|
|
device_map="auto", |
|
|
torch_dtype=torch.float16, |
|
|
attn_implementation="flash_attention_2" |
|
|
).eval() |
|
|
|
|
|
# Prepare messages |
|
|
messages = [ |
|
|
{"role": "system", "content": "You are a helpful AI assistant."}, |
|
|
{"role": "user", "content": "Calculate 25 + 37"} |
|
|
] |
|
|
|
|
|
# Format and generate |
|
|
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
|
|
inputs = tokenizer(prompt, return_tensors="pt").to("cuda") |
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=512, |
|
|
do_sample=True, |
|
|
temperature=0.7, |
|
|
pad_token_id=tokenizer.pad_token_id |
|
|
) |
|
|
|
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
print(response) |
|
|
``` |
|
|
|
|
|
#### Tool Calling Support: |
|
|
The model supports structured tool calling for mathematical operations, data analysis, and other specialized tasks. |
|
|
|
|
|
#### Performance Optimizations: |
|
|
- **Speed Mode**: Max 512 new tokens for fast responses |
|
|
- **Balanced Mode**: Max 2048 new tokens for comprehensive answers |
|
|
- **Full Capacity**: Dynamic token allocation up to context limit |
|
|
|
|
|
--- |
|
|
|
|
|
**Developed by**: Kshitij Thakkar |
|
|
**Version**: v6.2 |
|
|
**License**: Please check model repository for licensing details |
|
|
""") |
|
|
|
|
|
|
|
|
demo.load( |
|
|
fn=read_flagged_messages, |
|
|
outputs=[flagged_messages_display] |
|
|
) |
|
|
|
|
|
return demo |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
print("Starting AI Tasks Evaluation Suite...") |
|
|
demo = create_interface() |
|
|
demo.launch( |
|
|
server_name="0.0.0.0", |
|
|
server_port=7860, |
|
|
share=False, |
|
|
debug=True, |
|
|
mcp_server=True |
|
|
) |