kshitijthakkar commited on
Commit
2bd7a23
Β·
1 Parent(s): 5c1925e

updated new model

Browse files
enhanced_app.py CHANGED
@@ -24,6 +24,10 @@ DATASET_CONFIGS = {
24
  'Loggenix Synthetic AI Tasks Eval (with outputs) v6-large': {
25
  'repo_id': 'kshitijthakkar/loggenix-synthetic-ai-tasks-eval_v6-with-outputs',
26
  'split': 'train'
 
 
 
 
27
  }
28
  }
29
 
@@ -390,7 +394,7 @@ def create_interface():
390
  3. Analyze patterns for model improvement
391
 
392
  ### πŸ’‘ **Tips:**
393
- - Start with "Balanced Mode" for general testing
394
  - Use specific task types to focus your evaluation
395
  - Flag responses immediately when issues are noticed
396
  - Regularly review flagged responses for patterns
@@ -406,8 +410,8 @@ def create_interface():
406
  1. **Select Task Type**: Choose from available task types (loaded from the inference dataset)
407
  2. **Configure Inference**: Select optimization level:
408
  - `Optimized for Speed`: Fast responses (max 512 tokens)
409
- - `Balanced Mode`: Comprehensive answers (max 2048 tokens)
410
- - `Full Capacity`: Maximum context utilization
411
  3. **Review System Prompt**: The system prompt auto-loads based on task type (editable)
412
  4. **Chat Interface**:
413
  - Enter messages in the input field
@@ -433,8 +437,9 @@ def create_interface():
433
 
434
  ### πŸ“š **Available Datasets:**
435
  - **Loggenix Synthetic AI Tasks Eval (small)**: Compact evaluation set
436
- - **Loggenix Synthetic AI Tasks Eval v5 (large)**: Extended evaluation dataset
437
- - **Loggenix Synthetic AI Tasks Eval v6 (large)**: Latest evaluation dataset
 
438
 
439
  ### πŸ“ **How to Use:**
440
  1. **Select Dataset**: Choose from the dropdown (displays dataset info automatically)
@@ -497,12 +502,12 @@ def create_interface():
497
  with gr.Accordion("πŸ› οΈ Technical Specifications", open=False, elem_classes="panel"):
498
  gr.Markdown("""
499
  ### πŸ€– **Model Details:**
500
- **Primary Model**: `kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v6.3-finetuned-tool`
501
 
502
  - **Architecture**: Mixture of Experts (MOE)
503
  - **Total Parameters**: 330M (16 experts, 2 active)
504
  - **Active Parameters**: 185M
505
- - **Context Length**: 4096 tokens
506
  - **Precision**: FP16
507
  - **Flash Attention**: Supported
508
  - **Tool Calling**: Enabled
@@ -513,17 +518,20 @@ def create_interface():
513
  "Optimized for Speed": {
514
  "max_new_tokens": 512,
515
  "temperature": 0.7,
516
- "do_sample": True
 
517
  },
518
- "Balanced Mode": {
519
  "max_new_tokens": 2048,
520
  "temperature": 0.8,
521
- "do_sample": True
 
522
  },
523
  "Full Capacity": {
524
- "max_new_tokens": 4000,
525
  "temperature": 0.9,
526
- "do_sample": True
 
527
  }
528
  }
529
  ```
@@ -555,7 +563,7 @@ def create_interface():
555
  ollama serve
556
 
557
  # Pull the quantized model (Q8_0 format)
558
- ollama pull hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v6.3-finetuned-tool-Q8_0-GGUF:Q8_0
559
  ```
560
 
561
  ### βœ… **Advantages:**
@@ -568,7 +576,7 @@ def create_interface():
568
  ### βš™οΈ **Configuration:**
569
  ```python
570
  OLLAMA_BASE_URL = "http://localhost:11434"
571
- MODEL_NAME = "hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v6.3-finetuned-tool-Q8_0-GGUF:Q8_0"
572
  ```
573
 
574
  ### πŸ› οΈ **Tool Calling Support:**
 
24
  'Loggenix Synthetic AI Tasks Eval (with outputs) v6-large': {
25
  'repo_id': 'kshitijthakkar/loggenix-synthetic-ai-tasks-eval_v6-with-outputs',
26
  'split': 'train'
27
+ },
28
+ 'Loggenix Synthetic AI Tasks Eval (with outputs) v7-large': {
29
+ 'repo_id': 'kshitijthakkar/loggenix-synthetic-ai-tasks-eval_v5-with-outputs-v7-sft-v1',
30
+ 'split': 'train'
31
  }
32
  }
33
 
 
394
  3. Analyze patterns for model improvement
395
 
396
  ### πŸ’‘ **Tips:**
397
+ - Start with "Optimized for Speed" for general testing
398
  - Use specific task types to focus your evaluation
399
  - Flag responses immediately when issues are noticed
400
  - Regularly review flagged responses for patterns
 
410
  1. **Select Task Type**: Choose from available task types (loaded from the inference dataset)
411
  2. **Configure Inference**: Select optimization level:
412
  - `Optimized for Speed`: Fast responses (max 512 tokens)
413
+ - `Middle-ground`: Comprehensive answers (max 2048 tokens)
414
+ - `Full Capacity`: Maximum context utilization (max 8192 tokens)
415
  3. **Review System Prompt**: The system prompt auto-loads based on task type (editable)
416
  4. **Chat Interface**:
417
  - Enter messages in the input field
 
437
 
438
  ### πŸ“š **Available Datasets:**
439
  - **Loggenix Synthetic AI Tasks Eval (small)**: Compact evaluation set
440
+ - **Loggenix Synthetic AI Tasks Eval v5 (large)**: Extended evaluation dataset-large-models
441
+ - **Loggenix Synthetic AI Tasks Eval v6 (large)**: Extended evaluation dataset-small-models
442
+ - **Loggenix Synthetic AI Tasks Eval v7 (large)**: Latest evaluation dataset-large-models
443
 
444
  ### πŸ“ **How to Use:**
445
  1. **Select Dataset**: Choose from the dropdown (displays dataset info automatically)
 
502
  with gr.Accordion("πŸ› οΈ Technical Specifications", open=False, elem_classes="panel"):
503
  gr.Markdown("""
504
  ### πŸ€– **Model Details:**
505
+ **Primary Model**: `kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1`
506
 
507
  - **Architecture**: Mixture of Experts (MOE)
508
  - **Total Parameters**: 330M (16 experts, 2 active)
509
  - **Active Parameters**: 185M
510
+ - **Context Length**: 8192 tokens
511
  - **Precision**: FP16
512
  - **Flash Attention**: Supported
513
  - **Tool Calling**: Enabled
 
518
  "Optimized for Speed": {
519
  "max_new_tokens": 512,
520
  "temperature": 0.7,
521
+ "do_sample": True,
522
+ "use_cache": False
523
  },
524
+ "Middle-ground": {
525
  "max_new_tokens": 2048,
526
  "temperature": 0.8,
527
+ "do_sample": True,
528
+ "use_cache": False
529
  },
530
  "Full Capacity": {
531
+ "max_new_tokens": 8192,
532
  "temperature": 0.9,
533
+ "do_sample": True,
534
+ "use_cache": False
535
  }
536
  }
537
  ```
 
563
  ollama serve
564
 
565
  # Pull the quantized model (Q8_0 format)
566
+ ollama pull hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1-Q8_0-GGUF:Q8_0
567
  ```
568
 
569
  ### βœ… **Advantages:**
 
576
  ### βš™οΈ **Configuration:**
577
  ```python
578
  OLLAMA_BASE_URL = "http://localhost:11434"
579
+ MODEL_NAME = "hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1-Q8_0-GGUF:Q8_0"
580
  ```
581
 
582
  ### πŸ› οΈ **Tool Calling Support:**
enhanced_model_handler.py CHANGED
@@ -60,7 +60,7 @@ except Exception as e:
60
  # Global model and tokenizer variables
61
  model = None
62
  tokenizer = None
63
- MODEL_ID = "kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v6.3-finetuned-tool"
64
 
65
  # Inference configurations
66
  INFERENCE_CONFIGS = {
@@ -75,8 +75,8 @@ INFERENCE_CONFIGS = {
75
  "description": "Fast responses with limited output length"
76
  },
77
  "Middle-ground": {
78
- "max_new_tokens_base": 2048,
79
- "max_new_tokens_cap": 2048,
80
  "min_tokens": 50,
81
  "temperature": 0.7,
82
  "top_p": 0.9,
@@ -85,8 +85,8 @@ INFERENCE_CONFIGS = {
85
  "description": "Balanced performance and output quality"
86
  },
87
  "Full Capacity": {
88
- "max_new_tokens_base": 4096,
89
- "max_new_tokens_cap": 4096,
90
  "min_tokens": 1,
91
  "temperature": 0.7,
92
  "top_p": 0.9,
 
60
  # Global model and tokenizer variables
61
  model = None
62
  tokenizer = None
63
+ MODEL_ID = "kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1"
64
 
65
  # Inference configurations
66
  INFERENCE_CONFIGS = {
 
75
  "description": "Fast responses with limited output length"
76
  },
77
  "Middle-ground": {
78
+ "max_new_tokens_base": 4096,
79
+ "max_new_tokens_cap": 4096,
80
  "min_tokens": 50,
81
  "temperature": 0.7,
82
  "top_p": 0.9,
 
85
  "description": "Balanced performance and output quality"
86
  },
87
  "Full Capacity": {
88
+ "max_new_tokens_base": 8192,
89
+ "max_new_tokens_cap": 8192,
90
  "min_tokens": 1,
91
  "temperature": 0.7,
92
  "top_p": 0.9,
entrypoint.sh CHANGED
@@ -16,7 +16,7 @@ done
16
  echo "🟒 Ollama is live!"
17
 
18
  # Pull your lightweight model
19
- MODEL_NAME="hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v6.3-finetuned-tool-Q8_0-GGUF:Q8_0"
20
  echo "πŸ”½ Pulling model: $MODEL_NAME"
21
  /app/ollama pull "$MODEL_NAME" || {
22
  echo "❌ Failed to pull model. Check name and internet."
 
16
  echo "🟒 Ollama is live!"
17
 
18
  # Pull your lightweight model
19
+ MODEL_NAME="hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1-Q8_0-GGUF:Q8_0"
20
  echo "πŸ”½ Pulling model: $MODEL_NAME"
21
  /app/ollama pull "$MODEL_NAME" || {
22
  echo "❌ Failed to pull model. Check name and internet."
model_handler.py CHANGED
@@ -14,7 +14,7 @@ torch.backends.cudnn.allow_tf32 = True
14
  # Global model and tokenizer variables
15
  model = None
16
  tokenizer = None
17
- MODEL_ID = "kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v6.3-finetuned-tool"
18
 
19
  # Inference configurations
20
  INFERENCE_CONFIGS = {
@@ -29,8 +29,8 @@ INFERENCE_CONFIGS = {
29
  "description": "Fast responses with limited output length"
30
  },
31
  "Middle-ground": {
32
- "max_new_tokens_base": 2048,
33
- "max_new_tokens_cap": 2048,
34
  "min_tokens": 50,
35
  "temperature": 0.7,
36
  "top_p": 0.9,
@@ -39,8 +39,8 @@ INFERENCE_CONFIGS = {
39
  "description": "Balanced performance and output quality"
40
  },
41
  "Full Capacity": {
42
- "max_new_tokens_base": 4096,
43
- "max_new_tokens_cap": 4096,
44
  "min_tokens": 1,
45
  "temperature": 0.7,
46
  "top_p": 0.9,
@@ -56,6 +56,7 @@ def get_inference_configs():
56
  return INFERENCE_CONFIGS
57
 
58
 
 
59
  def load_model():
60
  """Load model and tokenizer with optimizations"""
61
  global model, tokenizer
@@ -87,7 +88,7 @@ def load_model():
87
  dtype=torch.float16, # Use half precision for speed
88
  attn_implementation="flash_attention_2" if hasattr(torch.nn, 'scaled_dot_product_attention') else None,
89
  use_cache=True,
90
- quantization_config=quantization_config,
91
  ).eval()
92
 
93
  # Enable gradient checkpointing if available
 
14
  # Global model and tokenizer variables
15
  model = None
16
  tokenizer = None
17
+ MODEL_ID = "kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1"
18
 
19
  # Inference configurations
20
  INFERENCE_CONFIGS = {
 
29
  "description": "Fast responses with limited output length"
30
  },
31
  "Middle-ground": {
32
+ "max_new_tokens_base": 4096,
33
+ "max_new_tokens_cap": 4096,
34
  "min_tokens": 50,
35
  "temperature": 0.7,
36
  "top_p": 0.9,
 
39
  "description": "Balanced performance and output quality"
40
  },
41
  "Full Capacity": {
42
+ "max_new_tokens_base": 8192,
43
+ "max_new_tokens_cap": 8192,
44
  "min_tokens": 1,
45
  "temperature": 0.7,
46
  "top_p": 0.9,
 
56
  return INFERENCE_CONFIGS
57
 
58
 
59
+
60
  def load_model():
61
  """Load model and tokenizer with optimizations"""
62
  global model, tokenizer
 
88
  dtype=torch.float16, # Use half precision for speed
89
  attn_implementation="flash_attention_2" if hasattr(torch.nn, 'scaled_dot_product_attention') else None,
90
  use_cache=True,
91
+ #quantization_config=quantization_config,
92
  ).eval()
93
 
94
  # Enable gradient checkpointing if available
model_handler_ollama.py CHANGED
@@ -6,7 +6,7 @@ from typing import Dict, Any, Optional, List
6
 
7
  # Ollama configuration
8
  OLLAMA_BASE_URL = "http://localhost:11434" # Default Ollama URL
9
- MODEL_NAME = "hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v6.3-finetuned-tool-Q8_0-GGUF:Q8_0" # Replace with your actual model name in Ollama
10
 
11
  # Inference configurations
12
  INFERENCE_CONFIGS = {
@@ -459,6 +459,6 @@ Available tools:
459
 
460
  if __name__ == "__main__":
461
  # Update MODEL_NAME to match your model in Ollama
462
- MODEL_NAME = "hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v6.3-finetuned-tool-Q8_0-GGUF:Q8_0" # Change this!
463
 
464
  example_usage()
 
6
 
7
  # Ollama configuration
8
  OLLAMA_BASE_URL = "http://localhost:11434" # Default Ollama URL
9
+ MODEL_NAME = "hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1-Q8_0-GGUF:Q8_0" # Replace with your actual model name in Ollama
10
 
11
  # Inference configurations
12
  INFERENCE_CONFIGS = {
 
459
 
460
  if __name__ == "__main__":
461
  # Update MODEL_NAME to match your model in Ollama
462
+ MODEL_NAME = "hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1-Q8_0-GGUF:Q8_0" # Change this!
463
 
464
  example_usage()