KaiquanMah commited on
Commit
a6a8285
·
verified ·
1 Parent(s): 2f3cb3e

Upload 11 files

Browse files
logs/logs_cpu_2025.05.22 round1.txt ADDED
The diff for this file is too large to render. See raw diff
 
logs/logs_cpu_2025.05.22 round2.txt ADDED
The diff for this file is too large to render. See raw diff
 
logs/logs_cpu_2025.05.22 round3 parallelise1.txt ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ https://www.kaggle.com/code/kaiquanmah/01a-kaggle-ollama-llama3-2?scriptVersionId=241139873
2
+
3
+ time=2025-05-22T03:18:37.534Z level=INFO source=server.go:135 msg="system memory" total="31.4 GiB" free="30.1 GiB" free_swap="0 B"
4
+ time=2025-05-22T03:18:37.534Z level=INFO source=server.go:168 msg=offload library=cpu layers.requested=-1 layers.model=29 layers.offload=0 layers.split="" memory.available="[30.1 GiB]" memory.gpu_overhead="0 B" memory.required.full="3.5 GiB" memory.required.partial="0 B" memory.required.kv="896.0 MiB" memory.required.allocations="[3.5 GiB]" memory.weights.total="1.9 GiB" memory.weights.repeating="1.6 GiB" memory.weights.nonrepeating="308.2 MiB" memory.graph.full="424.0 MiB" memory.graph.partial="570.7 MiB"
5
+ llama_model_loader: loaded meta data with 30 key-value pairs and 255 tensors from /root/.ollama/models/blobs/sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff (version GGUF V3 (latest))
6
+ llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
7
+ llama_model_loader: - kv 0: general.architecture str = llama
8
+ llama_model_loader: - kv 1: general.type str = model
9
+ llama_model_loader: - kv 2: general.name str = Llama 3.2 3B Instruct
10
+ llama_model_loader: - kv 3: general.finetune str = Instruct
11
+ llama_model_loader: - kv 4: general.basename str = Llama-3.2
12
+ llama_model_loader: - kv 5: general.size_label str = 3B
13
+ llama_model_loader: - kv 6: general.tags arr[str,6] = ["facebook", "meta", "pytorch", "llam...
14
+ llama_model_loader: - kv 7: general.languages arr[str,8] = ["en", "de", "fr", "it", "pt", "hi", ...
15
+ llama_model_loader: - kv 8: llama.block_count u32 = 28
16
+ llama_model_loader: - kv 9: llama.context_length u32 = 131072
17
+ llama_model_loader: - kv 10: llama.embedding_length u32 = 3072
18
+ llama_model_loader: - kv 11: llama.feed_forward_length u32 = 8192
19
+ llama_model_loader: - kv 12: llama.attention.head_count u32 = 24
20
+ llama_model_loader: - kv 13: llama.attention.head_count_kv u32 = 8
21
+ llama_model_loader: - kv 14: llama.rope.freq_base f32 = 500000.000000
22
+ llama_model_loader: - kv 15: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
23
+ llama_model_loader: - kv 16: llama.attention.key_length u32 = 128
24
+ llama_model_loader: - kv 17: llama.attention.value_length u32 = 128
25
+ llama_model_loader: - kv 18: general.file_type u32 = 15
26
+ llama_model_loader: - kv 19: llama.vocab_size u32 = 128256
27
+ llama_model_loader: - kv 20: llama.rope.dimension_count u32 = 128
28
+ llama_model_loader: - kv 21: tokenizer.ggml.model str = gpt2
29
+ llama_model_loader: - kv 22: tokenizer.ggml.pre str = llama-bpe
30
+ llama_model_loader: - kv 23: tokenizer.ggml.tokens arr[str,128256] = ["!", "\"", "#", "$", "%", "&", "'", ...
31
+ llama_model_loader: - kv 24: tokenizer.ggml.token_type arr[i32,128256] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
32
+ llama_model_loader: - kv 25: tokenizer.ggml.merges arr[str,280147] = ["Ġ Ġ", "Ġ ĠĠĠ", "ĠĠ ĠĠ", "...
33
+ llama_model_loader: - kv 26: tokenizer.ggml.bos_token_id u32 = 128000
34
+ llama_model_loader: - kv 27: tokenizer.ggml.eos_token_id u32 = 128009
35
+ llama_model_loader: - kv 28: tokenizer.chat_template str = {{- bos_token }}\n{%- if custom_tools ...
36
+ llama_model_loader: - kv 29: general.quantization_version u32 = 2
37
+ llama_model_loader: - type f32: 58 tensors
38
+ llama_model_loader: - type q4_K: 168 tensors
39
+ llama_model_loader: - type q6_K: 29 tensors
40
+ print_info: file format = GGUF V3 (latest)
41
+ print_info: file type = Q4_K - Medium
42
+ print_info: file size = 1.87 GiB (5.01 BPW)
43
+ load: special tokens cache size = 256
44
+ load: token to piece cache size = 0.7999 MB
45
+ print_info: arch = llama
46
+ print_info: vocab_only = 1
47
+ print_info: model type = ?B
48
+ print_info: model params = 3.21 B
49
+ print_info: general.name = Llama 3.2 3B Instruct
50
+ print_info: vocab type = BPE
51
+ print_info: n_vocab = 128256
52
+ print_info: n_merges = 280147
53
+ print_info: BOS token = 128000 '<|begin_of_text|>'
54
+ print_info: EOS token = 128009 '<|eot_id|>'
55
+ print_info: EOT token = 128009 '<|eot_id|>'
56
+ print_info: EOM token = 128008 '<|eom_id|>'
57
+ print_info: LF token = 198 'Ċ'
58
+ print_info: EOG token = 128008 '<|eom_id|>'
59
+ print_info: EOG token = 128009 '<|eot_id|>'
60
+ print_info: max token length = 256
61
+ llama_model_load: vocab only - skipping tensors
62
+ time=2025-05-22T03:18:38.186Z level=INFO source=server.go:431 msg="starting llama server" cmd="/usr/local/bin/ollama runner --model /root/.ollama/models/blobs/sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff --ctx-size 8192 --batch-size 512 --threads 2 --no-mmap --parallel 2 --port 38637"
63
+ time=2025-05-22T03:18:38.187Z level=INFO source=sched.go:472 msg="loaded runners" count=1
64
+ time=2025-05-22T03:18:38.187Z level=INFO source=server.go:591 msg="waiting for llama runner to start responding"
65
+ time=2025-05-22T03:18:38.188Z level=INFO source=server.go:625 msg="waiting for server to become available" status="llm server not responding"
66
+ time=2025-05-22T03:18:38.217Z level=INFO source=runner.go:815 msg="starting go runner"
67
+ load_backend: loaded CPU backend from /usr/local/lib/ollama/libggml-cpu-haswell.so
68
+ time=2025-05-22T03:18:38.228Z level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 compiler=cgo(gcc)
69
+ time=2025-05-22T03:18:38.234Z level=INFO source=runner.go:874 msg="Server listening on 127.0.0.1:38637"
70
+ llama_model_loader: loaded meta data with 30 key-value pairs and 255 tensors from /root/.ollama/models/blobs/sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff (version GGUF V3 (latest))
71
+ llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
72
+ llama_model_loader: - kv 0: general.architecture str = llama
73
+ llama_model_loader: - kv 1: general.type str = model
74
+ llama_model_loader: - kv 2: general.name str = Llama 3.2 3B Instruct
75
+ llama_model_loader: - kv 3: general.finetune str = Instruct
76
+ llama_model_loader: - kv 4: general.basename str = Llama-3.2
77
+ llama_model_loader: - kv 5: general.size_label str = 3B
78
+ llama_model_loader: - kv 6: general.tags arr[str,6] = ["facebook", "meta", "pytorch", "llam...
79
+ llama_model_loader: - kv 7: general.languages arr[str,8] = ["en", "de", "fr", "it", "pt", "hi", ...
80
+ llama_model_loader: - kv 8: llama.block_count u32 = 28
81
+ llama_model_loader: - kv 9: llama.context_length u32 = 131072
82
+ llama_model_loader: - kv 10: llama.embedding_length u32 = 3072
83
+ llama_model_loader: - kv 11: llama.feed_forward_length u32 = 8192
84
+ llama_model_loader: - kv 12: llama.attention.head_count u32 = 24
85
+ llama_model_loader: - kv 13: llama.attention.head_count_kv u32 = 8
86
+ llama_model_loader: - kv 14: llama.rope.freq_base f32 = 500000.000000
87
+ llama_model_loader: - kv 15: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
88
+ llama_model_loader: - kv 16: llama.attention.key_length u32 = 128
89
+ llama_model_loader: - kv 17: llama.attention.value_length u32 = 128
90
+ llama_model_loader: - kv 18: general.file_type u32 = 15
91
+ llama_model_loader: - kv 19: llama.vocab_size u32 = 128256
92
+ llama_model_loader: - kv 20: llama.rope.dimension_count u32 = 128
93
+ llama_model_loader: - kv 21: tokenizer.ggml.model str = gpt2
94
+ llama_model_loader: - kv 22: tokenizer.ggml.pre str = llama-bpe
95
+ llama_model_loader: - kv 23: tokenizer.ggml.tokens arr[str,128256] = ["!", "\"", "#", "$", "%", "&", "'", ...
96
+ llama_model_loader: - kv 24: tokenizer.ggml.token_type arr[i32,128256] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
97
+ llama_model_loader: - kv 25: tokenizer.ggml.merges arr[str,280147] = ["Ġ Ġ", "Ġ ĠĠĠ", "ĠĠ ĠĠ", "...
98
+ llama_model_loader: - kv 26: tokenizer.ggml.bos_token_id u32 = 128000
99
+ llama_model_loader: - kv 27: tokenizer.ggml.eos_token_id u32 = 128009
100
+ llama_model_loader: - kv 28: tokenizer.chat_template str = {{- bos_token }}\n{%- if custom_tools ...
101
+ llama_model_loader: - kv 29: general.quantization_version u32 = 2
102
+ llama_model_loader: - type f32: 58 tensors
103
+ llama_model_loader: - type q4_K: 168 tensors
104
+ llama_model_loader: - type q6_K: 29 tensors
105
+ print_info: file format = GGUF V3 (latest)
106
+ print_info: file type = Q4_K - Medium
107
+ print_info: file size = 1.87 GiB (5.01 BPW)
108
+ time=2025-05-22T03:18:38.440Z level=INFO source=server.go:625 msg="waiting for server to become available" status="llm server loading model"
109
+ load: special tokens cache size = 256
110
+ load: token to piece cache size = 0.7999 MB
111
+ print_info: arch = llama
112
+ print_info: vocab_only = 0
113
+ print_info: n_ctx_train = 131072
114
+ print_info: n_embd = 3072
115
+ print_info: n_layer = 28
116
+ print_info: n_head = 24
117
+ print_info: n_head_kv = 8
118
+ print_info: n_rot = 128
119
+ print_info: n_swa = 0
120
+ print_info: n_swa_pattern = 1
121
+ print_info: n_embd_head_k = 128
122
+ print_info: n_embd_head_v = 128
123
+ print_info: n_gqa = 3
124
+ print_info: n_embd_k_gqa = 1024
125
+ print_info: n_embd_v_gqa = 1024
126
+ print_info: f_norm_eps = 0.0e+00
127
+ print_info: f_norm_rms_eps = 1.0e-05
128
+ print_info: f_clamp_kqv = 0.0e+00
129
+ print_info: f_max_alibi_bias = 0.0e+00
130
+ print_info: f_logit_scale = 0.0e+00
131
+ print_info: f_attn_scale = 0.0e+00
132
+ print_info: n_ff = 8192
133
+ print_info: n_expert = 0
134
+ print_info: n_expert_used = 0
135
+ print_info: causal attn = 1
136
+ print_info: pooling type = 0
137
+ print_info: rope type = 0
138
+ print_info: rope scaling = linear
139
+ print_info: freq_base_train = 500000.0
140
+ print_info: freq_scale_train = 1
141
+ print_info: n_ctx_orig_yarn = 131072
142
+ print_info: rope_finetuned = unknown
143
+ print_info: ssm_d_conv = 0
144
+ print_info: ssm_d_inner = 0
145
+ print_info: ssm_d_state = 0
146
+ print_info: ssm_dt_rank = 0
147
+ print_info: ssm_dt_b_c_rms = 0
148
+ print_info: model type = 3B
149
+ print_info: model params = 3.21 B
150
+ print_info: general.name = Llama 3.2 3B Instruct
151
+ print_info: vocab type = BPE
152
+ print_info: n_vocab = 128256
153
+ print_info: n_merges = 280147
154
+ print_info: BOS token = 128000 '<|begin_of_text|>'
155
+ print_info: EOS token = 128009 '<|eot_id|>'
156
+ print_info: EOT token = 128009 '<|eot_id|>'
157
+ print_info: EOM token = 128008 '<|eom_id|>'
158
+ print_info: LF token = 198 'Ċ'
159
+ print_info: EOG token = 128008 '<|eom_id|>'
160
+ print_info: EOG token = 128009 '<|eot_id|>'
161
+ print_info: max token length = 256
162
+ load_tensors: loading model tensors, this can take a while... (mmap = false)
163
+ load_tensors: CPU model buffer size = 1918.35 MiB
164
+ llama_context: constructing llama_context
165
+ llama_context: n_seq_max = 2
166
+ llama_context: n_ctx = 8192
167
+ llama_context: n_ctx_per_seq = 4096
168
+ llama_context: n_batch = 1024
169
+ llama_context: n_ubatch = 512
170
+ llama_context: causal_attn = 1
171
+ llama_context: flash_attn = 0
172
+ llama_context: freq_base = 500000.0
173
+ llama_context: freq_scale = 1
174
+ llama_context: n_ctx_per_seq (4096) < n_ctx_train (131072) -- the full capacity of the model will not be utilized
175
+ llama_context: CPU output buffer size = 1.00 MiB
176
+ llama_kv_cache_unified: kv_size = 8192, type_k = 'f16', type_v = 'f16', n_layer = 28, can_shift = 1, padding = 32
177
+ llama_kv_cache_unified: CPU KV buffer size = 896.00 MiB
178
+ llama_kv_cache_unified: KV self size = 896.00 MiB, K (f16): 448.00 MiB, V (f16): 448.00 MiB
179
+ llama_context: CPU compute buffer size = 424.01 MiB
180
+ llama_context: graph nodes = 958
181
+ llama_context: graph splits = 1
182
+ time=2025-05-22T03:18:44.220Z level=INFO source=server.go:630 msg="llama runner started in 6.03 seconds"
183
+ [GIN] 2025/05/22 - 03:21:18 | 200 | 2m41s | 127.0.0.1 | POST "/api/chat"
184
+ Processed 0 rows | Elapsed: 161.03s | ETA: 2106716.46s
185
+ [GIN] 2025/05/22 - 03:21:39 | 200 | 3m2s | 127.0.0.1 | POST "/api/chat"
186
+ [GIN] 2025/05/22 - 03:22:15 | 200 | 3m38s | 127.0.0.1 | POST "/api/chat"
187
+ [GIN] 2025/05/22 - 03:22:15 | 200 | 3m38s | 127.0.0.1 | POST "/api/chat"
188
+ [GIN] 2025/05/22 - 03:23:02 | 200 | 4m24s | 127.0.0.1 | POST "/api/chat"
189
+ [GIN] 2025/05/22 - 03:23:02 | 200 | 4m25s | 127.0.0.1 | POST "/api/chat"
190
+ [GIN] 2025/05/22 - 03:23:45 | 200 | 5m7s | 127.0.0.1 | POST "/api/chat"
191
+ [GIN] 2025/05/22 - 03:24:05 | 200 | 5m27s | 127.0.0.1 | POST "/api/chat"
192
+ [GIN] 2025/05/22 - 03:24:32 | 200 | 3m14s | 127.0.0.1 | POST "/api/chat"
193
+ [GIN] 2025/05/22 - 03:24:55 | 200 | 3m15s | 127.0.0.1 | POST "/api/chat"
194
+ [GIN] 2025/05/22 - 03:25:22 | 200 | 3m7s | 127.0.0.1 | POST "/api/chat"
195
+ [GIN] 2025/05/22 - 03:25:45 | 200 | 3m29s | 127.0.0.1 | POST "/api/chat"
196
+ [GIN] 2025/05/22 - 03:26:08 | 200 | 3m6s | 127.0.0.1 | POST "/api/chat"
197
+ [GIN] 2025/05/22 - 03:26:30 | 200 | 3m27s | 127.0.0.1 | POST "/api/chat"
logs/logs_cpu_2025.05.22 round4 parallelise2.txt ADDED
The diff for this file is too large to render. See raw diff
 
logs/logs_gpu_2025.05.22 round2 w start-end-idx.txt ADDED
The diff for this file is too large to render. See raw diff
 
logs/logs_gpu_2025.05.22.txt ADDED
The diff for this file is too large to render. See raw diff
 
predictions-zeroshot/banking/df_banking_with_predictions_llama3.2.csv ADDED
The diff for this file is too large to render. See raw diff
 
predictions-zeroshot/oos/results_llama3.2_0_11850.json ADDED
The diff for this file is too large to render. See raw diff
 
predictions-zeroshot/oos/results_llama3.2_11850_23699.json ADDED
The diff for this file is too large to render. See raw diff
 
predictions-zeroshot/stackoverflow/results_llama3.2_0_10000.json ADDED
The diff for this file is too large to render. See raw diff
 
predictions-zeroshot/stackoverflow/results_llama3.2_10000_19999.json ADDED
The diff for this file is too large to render. See raw diff