kshitijthakkar
commited on
Commit
Β·
2bd7a23
1
Parent(s):
5c1925e
updated new model
Browse files- enhanced_app.py +22 -14
- enhanced_model_handler.py +5 -5
- entrypoint.sh +1 -1
- model_handler.py +7 -6
- model_handler_ollama.py +2 -2
enhanced_app.py
CHANGED
|
@@ -24,6 +24,10 @@ DATASET_CONFIGS = {
|
|
| 24 |
'Loggenix Synthetic AI Tasks Eval (with outputs) v6-large': {
|
| 25 |
'repo_id': 'kshitijthakkar/loggenix-synthetic-ai-tasks-eval_v6-with-outputs',
|
| 26 |
'split': 'train'
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
}
|
| 28 |
}
|
| 29 |
|
|
@@ -390,7 +394,7 @@ def create_interface():
|
|
| 390 |
3. Analyze patterns for model improvement
|
| 391 |
|
| 392 |
### π‘ **Tips:**
|
| 393 |
-
- Start with "
|
| 394 |
- Use specific task types to focus your evaluation
|
| 395 |
- Flag responses immediately when issues are noticed
|
| 396 |
- Regularly review flagged responses for patterns
|
|
@@ -406,8 +410,8 @@ def create_interface():
|
|
| 406 |
1. **Select Task Type**: Choose from available task types (loaded from the inference dataset)
|
| 407 |
2. **Configure Inference**: Select optimization level:
|
| 408 |
- `Optimized for Speed`: Fast responses (max 512 tokens)
|
| 409 |
-
- `
|
| 410 |
-
- `Full Capacity`: Maximum context utilization
|
| 411 |
3. **Review System Prompt**: The system prompt auto-loads based on task type (editable)
|
| 412 |
4. **Chat Interface**:
|
| 413 |
- Enter messages in the input field
|
|
@@ -433,8 +437,9 @@ def create_interface():
|
|
| 433 |
|
| 434 |
### π **Available Datasets:**
|
| 435 |
- **Loggenix Synthetic AI Tasks Eval (small)**: Compact evaluation set
|
| 436 |
-
- **Loggenix Synthetic AI Tasks Eval v5 (large)**: Extended evaluation dataset
|
| 437 |
-
- **Loggenix Synthetic AI Tasks Eval v6 (large)**:
|
|
|
|
| 438 |
|
| 439 |
### π **How to Use:**
|
| 440 |
1. **Select Dataset**: Choose from the dropdown (displays dataset info automatically)
|
|
@@ -497,12 +502,12 @@ def create_interface():
|
|
| 497 |
with gr.Accordion("π οΈ Technical Specifications", open=False, elem_classes="panel"):
|
| 498 |
gr.Markdown("""
|
| 499 |
### π€ **Model Details:**
|
| 500 |
-
**Primary Model**: `kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-
|
| 501 |
|
| 502 |
- **Architecture**: Mixture of Experts (MOE)
|
| 503 |
- **Total Parameters**: 330M (16 experts, 2 active)
|
| 504 |
- **Active Parameters**: 185M
|
| 505 |
-
- **Context Length**:
|
| 506 |
- **Precision**: FP16
|
| 507 |
- **Flash Attention**: Supported
|
| 508 |
- **Tool Calling**: Enabled
|
|
@@ -513,17 +518,20 @@ def create_interface():
|
|
| 513 |
"Optimized for Speed": {
|
| 514 |
"max_new_tokens": 512,
|
| 515 |
"temperature": 0.7,
|
| 516 |
-
"do_sample": True
|
|
|
|
| 517 |
},
|
| 518 |
-
"
|
| 519 |
"max_new_tokens": 2048,
|
| 520 |
"temperature": 0.8,
|
| 521 |
-
"do_sample": True
|
|
|
|
| 522 |
},
|
| 523 |
"Full Capacity": {
|
| 524 |
-
"max_new_tokens":
|
| 525 |
"temperature": 0.9,
|
| 526 |
-
"do_sample": True
|
|
|
|
| 527 |
}
|
| 528 |
}
|
| 529 |
```
|
|
@@ -555,7 +563,7 @@ def create_interface():
|
|
| 555 |
ollama serve
|
| 556 |
|
| 557 |
# Pull the quantized model (Q8_0 format)
|
| 558 |
-
ollama pull hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-
|
| 559 |
```
|
| 560 |
|
| 561 |
### β
**Advantages:**
|
|
@@ -568,7 +576,7 @@ def create_interface():
|
|
| 568 |
### βοΈ **Configuration:**
|
| 569 |
```python
|
| 570 |
OLLAMA_BASE_URL = "http://localhost:11434"
|
| 571 |
-
MODEL_NAME = "hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-
|
| 572 |
```
|
| 573 |
|
| 574 |
### π οΈ **Tool Calling Support:**
|
|
|
|
| 24 |
'Loggenix Synthetic AI Tasks Eval (with outputs) v6-large': {
|
| 25 |
'repo_id': 'kshitijthakkar/loggenix-synthetic-ai-tasks-eval_v6-with-outputs',
|
| 26 |
'split': 'train'
|
| 27 |
+
},
|
| 28 |
+
'Loggenix Synthetic AI Tasks Eval (with outputs) v7-large': {
|
| 29 |
+
'repo_id': 'kshitijthakkar/loggenix-synthetic-ai-tasks-eval_v5-with-outputs-v7-sft-v1',
|
| 30 |
+
'split': 'train'
|
| 31 |
}
|
| 32 |
}
|
| 33 |
|
|
|
|
| 394 |
3. Analyze patterns for model improvement
|
| 395 |
|
| 396 |
### π‘ **Tips:**
|
| 397 |
+
- Start with "Optimized for Speed" for general testing
|
| 398 |
- Use specific task types to focus your evaluation
|
| 399 |
- Flag responses immediately when issues are noticed
|
| 400 |
- Regularly review flagged responses for patterns
|
|
|
|
| 410 |
1. **Select Task Type**: Choose from available task types (loaded from the inference dataset)
|
| 411 |
2. **Configure Inference**: Select optimization level:
|
| 412 |
- `Optimized for Speed`: Fast responses (max 512 tokens)
|
| 413 |
+
- `Middle-ground`: Comprehensive answers (max 2048 tokens)
|
| 414 |
+
- `Full Capacity`: Maximum context utilization (max 8192 tokens)
|
| 415 |
3. **Review System Prompt**: The system prompt auto-loads based on task type (editable)
|
| 416 |
4. **Chat Interface**:
|
| 417 |
- Enter messages in the input field
|
|
|
|
| 437 |
|
| 438 |
### π **Available Datasets:**
|
| 439 |
- **Loggenix Synthetic AI Tasks Eval (small)**: Compact evaluation set
|
| 440 |
+
- **Loggenix Synthetic AI Tasks Eval v5 (large)**: Extended evaluation dataset-large-models
|
| 441 |
+
- **Loggenix Synthetic AI Tasks Eval v6 (large)**: Extended evaluation dataset-small-models
|
| 442 |
+
- **Loggenix Synthetic AI Tasks Eval v7 (large)**: Latest evaluation dataset-large-models
|
| 443 |
|
| 444 |
### π **How to Use:**
|
| 445 |
1. **Select Dataset**: Choose from the dropdown (displays dataset info automatically)
|
|
|
|
| 502 |
with gr.Accordion("π οΈ Technical Specifications", open=False, elem_classes="panel"):
|
| 503 |
gr.Markdown("""
|
| 504 |
### π€ **Model Details:**
|
| 505 |
+
**Primary Model**: `kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1`
|
| 506 |
|
| 507 |
- **Architecture**: Mixture of Experts (MOE)
|
| 508 |
- **Total Parameters**: 330M (16 experts, 2 active)
|
| 509 |
- **Active Parameters**: 185M
|
| 510 |
+
- **Context Length**: 8192 tokens
|
| 511 |
- **Precision**: FP16
|
| 512 |
- **Flash Attention**: Supported
|
| 513 |
- **Tool Calling**: Enabled
|
|
|
|
| 518 |
"Optimized for Speed": {
|
| 519 |
"max_new_tokens": 512,
|
| 520 |
"temperature": 0.7,
|
| 521 |
+
"do_sample": True,
|
| 522 |
+
"use_cache": False
|
| 523 |
},
|
| 524 |
+
"Middle-ground": {
|
| 525 |
"max_new_tokens": 2048,
|
| 526 |
"temperature": 0.8,
|
| 527 |
+
"do_sample": True,
|
| 528 |
+
"use_cache": False
|
| 529 |
},
|
| 530 |
"Full Capacity": {
|
| 531 |
+
"max_new_tokens": 8192,
|
| 532 |
"temperature": 0.9,
|
| 533 |
+
"do_sample": True,
|
| 534 |
+
"use_cache": False
|
| 535 |
}
|
| 536 |
}
|
| 537 |
```
|
|
|
|
| 563 |
ollama serve
|
| 564 |
|
| 565 |
# Pull the quantized model (Q8_0 format)
|
| 566 |
+
ollama pull hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1-Q8_0-GGUF:Q8_0
|
| 567 |
```
|
| 568 |
|
| 569 |
### β
**Advantages:**
|
|
|
|
| 576 |
### βοΈ **Configuration:**
|
| 577 |
```python
|
| 578 |
OLLAMA_BASE_URL = "http://localhost:11434"
|
| 579 |
+
MODEL_NAME = "hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1-Q8_0-GGUF:Q8_0"
|
| 580 |
```
|
| 581 |
|
| 582 |
### π οΈ **Tool Calling Support:**
|
enhanced_model_handler.py
CHANGED
|
@@ -60,7 +60,7 @@ except Exception as e:
|
|
| 60 |
# Global model and tokenizer variables
|
| 61 |
model = None
|
| 62 |
tokenizer = None
|
| 63 |
-
MODEL_ID = "kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-
|
| 64 |
|
| 65 |
# Inference configurations
|
| 66 |
INFERENCE_CONFIGS = {
|
|
@@ -75,8 +75,8 @@ INFERENCE_CONFIGS = {
|
|
| 75 |
"description": "Fast responses with limited output length"
|
| 76 |
},
|
| 77 |
"Middle-ground": {
|
| 78 |
-
"max_new_tokens_base":
|
| 79 |
-
"max_new_tokens_cap":
|
| 80 |
"min_tokens": 50,
|
| 81 |
"temperature": 0.7,
|
| 82 |
"top_p": 0.9,
|
|
@@ -85,8 +85,8 @@ INFERENCE_CONFIGS = {
|
|
| 85 |
"description": "Balanced performance and output quality"
|
| 86 |
},
|
| 87 |
"Full Capacity": {
|
| 88 |
-
"max_new_tokens_base":
|
| 89 |
-
"max_new_tokens_cap":
|
| 90 |
"min_tokens": 1,
|
| 91 |
"temperature": 0.7,
|
| 92 |
"top_p": 0.9,
|
|
|
|
| 60 |
# Global model and tokenizer variables
|
| 61 |
model = None
|
| 62 |
tokenizer = None
|
| 63 |
+
MODEL_ID = "kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1"
|
| 64 |
|
| 65 |
# Inference configurations
|
| 66 |
INFERENCE_CONFIGS = {
|
|
|
|
| 75 |
"description": "Fast responses with limited output length"
|
| 76 |
},
|
| 77 |
"Middle-ground": {
|
| 78 |
+
"max_new_tokens_base": 4096,
|
| 79 |
+
"max_new_tokens_cap": 4096,
|
| 80 |
"min_tokens": 50,
|
| 81 |
"temperature": 0.7,
|
| 82 |
"top_p": 0.9,
|
|
|
|
| 85 |
"description": "Balanced performance and output quality"
|
| 86 |
},
|
| 87 |
"Full Capacity": {
|
| 88 |
+
"max_new_tokens_base": 8192,
|
| 89 |
+
"max_new_tokens_cap": 8192,
|
| 90 |
"min_tokens": 1,
|
| 91 |
"temperature": 0.7,
|
| 92 |
"top_p": 0.9,
|
entrypoint.sh
CHANGED
|
@@ -16,7 +16,7 @@ done
|
|
| 16 |
echo "π’ Ollama is live!"
|
| 17 |
|
| 18 |
# Pull your lightweight model
|
| 19 |
-
MODEL_NAME="hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-
|
| 20 |
echo "π½ Pulling model: $MODEL_NAME"
|
| 21 |
/app/ollama pull "$MODEL_NAME" || {
|
| 22 |
echo "β Failed to pull model. Check name and internet."
|
|
|
|
| 16 |
echo "π’ Ollama is live!"
|
| 17 |
|
| 18 |
# Pull your lightweight model
|
| 19 |
+
MODEL_NAME="hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1-Q8_0-GGUF:Q8_0"
|
| 20 |
echo "π½ Pulling model: $MODEL_NAME"
|
| 21 |
/app/ollama pull "$MODEL_NAME" || {
|
| 22 |
echo "β Failed to pull model. Check name and internet."
|
model_handler.py
CHANGED
|
@@ -14,7 +14,7 @@ torch.backends.cudnn.allow_tf32 = True
|
|
| 14 |
# Global model and tokenizer variables
|
| 15 |
model = None
|
| 16 |
tokenizer = None
|
| 17 |
-
MODEL_ID = "kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-
|
| 18 |
|
| 19 |
# Inference configurations
|
| 20 |
INFERENCE_CONFIGS = {
|
|
@@ -29,8 +29,8 @@ INFERENCE_CONFIGS = {
|
|
| 29 |
"description": "Fast responses with limited output length"
|
| 30 |
},
|
| 31 |
"Middle-ground": {
|
| 32 |
-
"max_new_tokens_base":
|
| 33 |
-
"max_new_tokens_cap":
|
| 34 |
"min_tokens": 50,
|
| 35 |
"temperature": 0.7,
|
| 36 |
"top_p": 0.9,
|
|
@@ -39,8 +39,8 @@ INFERENCE_CONFIGS = {
|
|
| 39 |
"description": "Balanced performance and output quality"
|
| 40 |
},
|
| 41 |
"Full Capacity": {
|
| 42 |
-
"max_new_tokens_base":
|
| 43 |
-
"max_new_tokens_cap":
|
| 44 |
"min_tokens": 1,
|
| 45 |
"temperature": 0.7,
|
| 46 |
"top_p": 0.9,
|
|
@@ -56,6 +56,7 @@ def get_inference_configs():
|
|
| 56 |
return INFERENCE_CONFIGS
|
| 57 |
|
| 58 |
|
|
|
|
| 59 |
def load_model():
|
| 60 |
"""Load model and tokenizer with optimizations"""
|
| 61 |
global model, tokenizer
|
|
@@ -87,7 +88,7 @@ def load_model():
|
|
| 87 |
dtype=torch.float16, # Use half precision for speed
|
| 88 |
attn_implementation="flash_attention_2" if hasattr(torch.nn, 'scaled_dot_product_attention') else None,
|
| 89 |
use_cache=True,
|
| 90 |
-
quantization_config=quantization_config,
|
| 91 |
).eval()
|
| 92 |
|
| 93 |
# Enable gradient checkpointing if available
|
|
|
|
| 14 |
# Global model and tokenizer variables
|
| 15 |
model = None
|
| 16 |
tokenizer = None
|
| 17 |
+
MODEL_ID = "kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1"
|
| 18 |
|
| 19 |
# Inference configurations
|
| 20 |
INFERENCE_CONFIGS = {
|
|
|
|
| 29 |
"description": "Fast responses with limited output length"
|
| 30 |
},
|
| 31 |
"Middle-ground": {
|
| 32 |
+
"max_new_tokens_base": 4096,
|
| 33 |
+
"max_new_tokens_cap": 4096,
|
| 34 |
"min_tokens": 50,
|
| 35 |
"temperature": 0.7,
|
| 36 |
"top_p": 0.9,
|
|
|
|
| 39 |
"description": "Balanced performance and output quality"
|
| 40 |
},
|
| 41 |
"Full Capacity": {
|
| 42 |
+
"max_new_tokens_base": 8192,
|
| 43 |
+
"max_new_tokens_cap": 8192,
|
| 44 |
"min_tokens": 1,
|
| 45 |
"temperature": 0.7,
|
| 46 |
"top_p": 0.9,
|
|
|
|
| 56 |
return INFERENCE_CONFIGS
|
| 57 |
|
| 58 |
|
| 59 |
+
|
| 60 |
def load_model():
|
| 61 |
"""Load model and tokenizer with optimizations"""
|
| 62 |
global model, tokenizer
|
|
|
|
| 88 |
dtype=torch.float16, # Use half precision for speed
|
| 89 |
attn_implementation="flash_attention_2" if hasattr(torch.nn, 'scaled_dot_product_attention') else None,
|
| 90 |
use_cache=True,
|
| 91 |
+
#quantization_config=quantization_config,
|
| 92 |
).eval()
|
| 93 |
|
| 94 |
# Enable gradient checkpointing if available
|
model_handler_ollama.py
CHANGED
|
@@ -6,7 +6,7 @@ from typing import Dict, Any, Optional, List
|
|
| 6 |
|
| 7 |
# Ollama configuration
|
| 8 |
OLLAMA_BASE_URL = "http://localhost:11434" # Default Ollama URL
|
| 9 |
-
MODEL_NAME = "hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-
|
| 10 |
|
| 11 |
# Inference configurations
|
| 12 |
INFERENCE_CONFIGS = {
|
|
@@ -459,6 +459,6 @@ Available tools:
|
|
| 459 |
|
| 460 |
if __name__ == "__main__":
|
| 461 |
# Update MODEL_NAME to match your model in Ollama
|
| 462 |
-
MODEL_NAME = "hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-
|
| 463 |
|
| 464 |
example_usage()
|
|
|
|
| 6 |
|
| 7 |
# Ollama configuration
|
| 8 |
OLLAMA_BASE_URL = "http://localhost:11434" # Default Ollama URL
|
| 9 |
+
MODEL_NAME = "hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1-Q8_0-GGUF:Q8_0" # Replace with your actual model name in Ollama
|
| 10 |
|
| 11 |
# Inference configurations
|
| 12 |
INFERENCE_CONFIGS = {
|
|
|
|
| 459 |
|
| 460 |
if __name__ == "__main__":
|
| 461 |
# Update MODEL_NAME to match your model in Ollama
|
| 462 |
+
MODEL_NAME = "hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1-Q8_0-GGUF:Q8_0" # Change this!
|
| 463 |
|
| 464 |
example_usage()
|