KaiquanMah commited on
Commit
dac0ce2
·
verified ·
1 Parent(s): 0288ef9

Upload 45 files

Browse files
Files changed (20) hide show
  1. predictions-zeroshot/round2-force_oos/classification_report_llama3.2_3b_banking.txt +222 -0
  2. predictions-zeroshot/round2-force_oos/classification_report_llama3.2_3b_banking_open_vs_known.txt +8 -0
  3. predictions-zeroshot/round3-pydantic/classification_report_llama3.2_3b_banking.txt +83 -0
  4. predictions-zeroshot/round3-pydantic/classification_report_llama3.2_3b_banking_open_vs_known.txt +8 -0
  5. predictions-zeroshot/round4-oos-out-of-prompt/classification_report_llama3.2_3b_banking.txt +65 -0
  6. predictions-zeroshot/round4-oos-out-of-prompt/classification_report_llama3.2_3b_banking_open_vs_known.txt +8 -0
  7. predictions-zeroshot/round4-oos-out-of-prompt/classification_report_llama3.2_3b_oos.txt +120 -0
  8. predictions-zeroshot/round4-oos-out-of-prompt/classification_report_llama3.2_3b_oos_open_vs_known.txt +8 -0
  9. predictions-zeroshot/round4-oos-out-of-prompt/classification_report_llama3.2_3b_stackoverflow.txt +22 -0
  10. predictions-zeroshot/round4-oos-out-of-prompt/classification_report_llama3.2_3b_stackoverflow_open_vs_known.txt +8 -0
  11. predictions-zeroshot/round5-other-models/classification_report_deepseek-r1_7b_banking.txt +65 -0
  12. predictions-zeroshot/round5-other-models/classification_report_deepseek-r1_7b_banking_open_vs_known.txt +8 -0
  13. predictions-zeroshot/round5-other-models/classification_report_gemma3_4b-it-qa_banking.txt +65 -0
  14. predictions-zeroshot/round5-other-models/classification_report_gemma3_4b-it-qa_banking_open_vs_known.txt +8 -0
  15. predictions-zeroshot/round5-other-models/classification_report_mistral_7b_banking.txt +65 -0
  16. predictions-zeroshot/round5-other-models/classification_report_mistral_7b_banking_open_vs_known.txt +8 -0
  17. predictions-zeroshot/round5-other-models/classification_report_qwen3_8b_banking.txt +65 -0
  18. predictions-zeroshot/round5-other-models/classification_report_qwen3_8b_banking_open_vs_known.txt +8 -0
  19. predictions-zeroshot/round5-other-models/classification_report_tulu3_8b_banking.txt +65 -0
  20. predictions-zeroshot/round5-other-models/classification_report_tulu3_8b_banking_open_vs_known.txt +8 -0
predictions-zeroshot/round2-force_oos/classification_report_llama3.2_3b_banking.txt ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ declined_card_payment 0.00 nan 0.00 0
4
+ disposable_card_limits 0.00 nan 0.00 0
5
+ Refund_not_showing_up 0.38 0.02 0.05 202
6
+ activate_my_card 0.90 0.71 0.80 199
7
+ activation_requirements 0.00 nan 0.00 0
8
+ add_funds_to_account 0.00 nan 0.00 0
9
+ add_money 0.00 nan 0.00 0
10
+ add_money_into_account 0.00 nan 0.00 0
11
+ add_physical_money 0.00 nan 0.00 0
12
+ age_limit 0.96 0.47 0.63 150
13
+ apple_pay_or_google_pay 0.75 0.48 0.59 166
14
+ atm_not_working 0.00 nan 0.00 0
15
+ atm_support 0.49 0.31 0.38 127
16
+ auto_top_up 0.00 nan 0.00 0
17
+ auto_top_up_limits 0.00 nan 0.00 0
18
+ automatic_top_up 0.94 0.81 0.87 167
19
+ avoid_charges 0.00 nan 0.00 0
20
+ balance_not_updated_after_atm_retrieval 0.00 nan 0.00 0
21
+ balance_not_updated_after_atm_withdrawal 0.00 nan 0.00 0
22
+ balance_not_updated_after_bank_transfer 0.27 0.25 0.26 211
23
+ balance_not_updated_after_cash_deposit 0.00 nan 0.00 0
24
+ balance_not_updated_after_cash_or_cheque_deposit 0.00 nan 0.00 0
25
+ balance_not_updated_after_cash_withdrawal 0.00 nan 0.00 0
26
+ balance_not_updated_after_cheque_or_cash_deposit 0.71 0.38 0.50 221
27
+ balance_not_updated_after_top_up 0.00 nan 0.00 0
28
+ balance_not_updated_after_transfer 0.00 nan 0.00 0
29
+ beneficiary_not_allowed 0.89 0.17 0.29 196
30
+ cancel_transfer 0.67 0.48 0.56 197
31
+ cancelled_card_payment 0.00 nan 0.00 0
32
+ cancelled_order 0.00 nan 0.00 0
33
+ cancelled_transaction 0.00 nan 0.00 0
34
+ cancelled_transfer 0.00 nan 0.00 0
35
+ card_about_to_expire 0.82 0.50 0.62 169
36
+ card_acceptance 0.33 0.61 0.43 99
37
+ card_arrival 0.35 0.68 0.46 193
38
+ card_change_pin 0.00 nan 0.00 0
39
+ card_delivery_estimate 0.50 0.12 0.19 152
40
+ card_limits 0.00 nan 0.00 0
41
+ card_linking 0.51 0.45 0.48 179
42
+ card_not_found 0.00 nan 0.00 0
43
+ card_not_showing_up 0.00 nan 0.00 0
44
+ card_not_working 0.31 0.70 0.43 152
45
+ card_order_details 0.00 nan 0.00 0
46
+ card_payment 0.00 nan 0.00 0
47
+ card_payment_fee_charged 0.65 0.37 0.47 227
48
+ card_payment_not_recognised 0.28 0.41 0.33 208
49
+ card_payment_not_working 0.00 nan 0.00 0
50
+ card_payment_or_addition 0.00 nan 0.00 0
51
+ card_payment_or_top_up 0.00 nan 0.00 0
52
+ card_payment_wrong_exchange_rate 0.49 0.28 0.36 207
53
+ card_reactivation 0.00 nan 0.00 0
54
+ card_reinstatement 0.00 nan 0.00 0
55
+ card_relink 0.00 nan 0.00 0
56
+ card_status 0.00 nan 0.00 0
57
+ card_swallowed 0.71 0.39 0.50 101
58
+ card_top_up 0.00 nan 0.00 0
59
+ card_virtual 0.00 nan 0.00 0
60
+ cash_or_cheque 0.00 nan 0.00 0
61
+ cash_withdrawal 0.00 nan 0.00 0
62
+ cash_withdrawal_charge 0.63 0.69 0.66 217
63
+ cash_withdrawal_not_recognised 0.20 0.22 0.21 200
64
+ cash_withdrawal_not_updated_after_bank_transfer 0.00 nan 0.00 0
65
+ cash_withdrawal_not_updated_after_cheque_or_cash_deposit 0.00 nan 0.00 0
66
+ change_pin 0.58 0.86 0.69 162
67
+ children_account 0.00 nan 0.00 0
68
+ compromised_card 0.64 0.17 0.26 126
69
+ contactless_not_working 0.79 0.69 0.74 75
70
+ country_support 0.82 0.52 0.64 169
71
+ crypto_top_up 0.00 nan 0.00 0
72
+ declined_card_payment 0.23 0.41 0.29 193
73
+ declined_cash_withdrawal 0.41 0.14 0.20 213
74
+ declined_direct_debit_payment 0.00 nan 0.00 0
75
+ declined_transfer 0.45 0.50 0.47 173
76
+ direct_debit_payment_not_recognised 0.96 0.20 0.33 222
77
+ disposable_card_limits 0.90 0.64 0.75 161
78
+ disposeable_card_limits 0.00 nan 0.00 0
79
+ disposealble_card_limits 0.00 nan 0.00 0
80
+ disposeble_card_limits 0.00 nan 0.00 0
81
+ dispute_payment 0.00 nan 0.00 0
82
+ duplicate_cards 0.00 nan 0.00 0
83
+ duplicate_charges 0.00 nan 0.00 0
84
+ duplicate_payment 0.00 nan 0.00 0
85
+ duplicate_transactions 0.00 nan 0.00 0
86
+ edit_personal_details 0.87 0.73 0.79 161
87
+ error 0.00 nan 0.00 0
88
+ exchange_charge 0.70 0.46 0.55 161
89
+ exchange_currency 0.00 nan 0.00 0
90
+ exchange_currency_limits 0.00 nan 0.00 0
91
+ exchange_rate 0.26 0.95 0.40 152
92
+ exchange_rate_error 0.00 nan 0.00 0
93
+ exchange_rate_not_matches_withdrawal 0.00 nan 0.00 0
94
+ exchange_via_app 0.46 0.20 0.28 158
95
+ extra_cash 0.00 nan 0.00 0
96
+ extra_charge_on_statement 0.47 0.56 0.51 206
97
+ failed_transfer 0.83 0.08 0.15 177
98
+ family_support 0.00 nan 0.00 0
99
+ fiat_currency_support 1.00 0.19 0.31 166
100
+ get_additional_cards 0.00 nan 0.00 0
101
+ get_disposable_virtual_card 0.62 0.74 0.68 137
102
+ get_physical_card 0.01 0.01 0.01 146
103
+ get_top_up 0.00 nan 0.00 0
104
+ get_top_up_by_card_linking 0.00 nan 0.00 0
105
+ get_top_up_verification_code 0.00 nan 0.00 0
106
+ get_virtual_card 0.00 nan 0.00 0
107
+ getting_spare_card 0.81 0.10 0.18 169
108
+ getting_virtual_card 0.59 0.07 0.13 138
109
+ google_pay_or_google_pay 0.00 nan 0.00 0
110
+ google_pay_or_google_top_up 0.00 nan 0.00 0
111
+ google_pay_or_other_payment_method 0.00 nan 0.00 0
112
+ google_pay_or_top_up 0.00 nan 0.00 0
113
+ google_pay_or_virtual_card 0.00 nan 0.00 0
114
+ invalid_query 0.00 nan 0.00 0
115
+ lost_or_stolen_card 0.40 0.75 0.52 122
116
+ lost_or_stolen_phone 0.98 0.35 0.52 161
117
+ missing_money 0.00 nan 0.00 0
118
+ new_email 0.00 nan 0.00 0
119
+ oos 0.00 0.00 0.00 203
120
+ open_account 0.00 nan 0.00 0
121
+ open_account_for_child 0.00 nan 0.00 0
122
+ open_account_for_newborn 0.00 nan 0.00 0
123
+ order_not_received 0.00 nan 0.00 0
124
+ order_physical_card 0.11 0.05 0.07 160
125
+ passcode_forgotten 0.86 0.46 0.60 145
126
+ pending_card_payment 0.86 0.57 0.68 199
127
+ pending_cash_withdrawal 0.93 0.31 0.47 183
128
+ pending_top_up 0.93 0.22 0.35 189
129
+ pending_transaction 0.00 nan 0.00 0
130
+ pending_transfer 0.56 0.15 0.24 188
131
+ pin_attempts_exceeded 0.00 nan 0.00 0
132
+ pin_blocked 0.63 0.70 0.66 155
133
+ pin_change 0.00 nan 0.00 0
134
+ pin_not_received 0.00 nan 0.00 0
135
+ protect_account_from_fraud 0.00 nan 0.00 0
136
+ reactivate_my_card 0.00 nan 0.00 0
137
+ receive_money 0.00 nan 0.00 0
138
+ receive_my_salary 0.00 nan 0.00 0
139
+ receive_salary 0.00 nan 0.00 0
140
+ receiving_money 0.58 0.16 0.25 135
141
+ refund_not_showing_up 0.00 nan 0.00 0
142
+ request_refund 0.55 0.46 0.50 209
143
+ request_refund_not_updated 0.00 nan 0.00 0
144
+ return_an_item 0.00 nan 0.00 0
145
+ reverse_my_transfer 0.00 nan 0.00 0
146
+ reverse_transaction 0.00 nan 0.00 0
147
+ reverse_transfer 0.00 nan 0.00 0
148
+ revert_card_payment 0.00 nan 0.00 0
149
+ reverted_card_payment 0.00 nan 0.00 0
150
+ reverted_card_payment? nan 0.00 0.00 201
151
+ supported_cards_and_currencies 0.54 0.30 0.38 169
152
+ terminate_account 0.98 0.75 0.85 148
153
+ terminated_card_payment 0.00 nan 0.00 0
154
+ top_up 0.00 nan 0.00 0
155
+ top_upVerificationCode 0.00 nan 0.00 0
156
+ top_up_by_bank_transfer 0.00 nan 0.00 0
157
+ top_up_by_bank_transfer_charge 0.30 0.09 0.13 151
158
+ top_up_by_card 0.00 nan 0.00 0
159
+ top_up_by_card_and_cash_or_cheque 0.00 nan 0.00 0
160
+ top_up_by_card_charge 0.57 0.42 0.48 154
161
+ top_up_by_cash_or_cheque 0.74 0.58 0.65 154
162
+ top_up_by_cheque 0.00 nan 0.00 0
163
+ top_up_by_transfer 0.00 nan 0.00 0
164
+ top_up_by_transfer_charge 0.00 nan 0.00 0
165
+ top_up_cancelled 0.00 nan 0.00 0
166
+ top_up_failed 0.45 0.51 0.48 185
167
+ top_up_limits 0.29 0.85 0.44 137
168
+ top_up_not_showing_up 0.00 nan 0.00 0
169
+ top_up_not_updated_after_transaction 0.00 nan 0.00 0
170
+ top_up_not_working 0.00 nan 0.00 0
171
+ top_up_pending 0.00 nan 0.00 0
172
+ top_up_reverted 0.87 0.29 0.44 186
173
+ top_up_transfer_process 0.00 nan 0.00 0
174
+ top_up_verification 0.00 nan 0.00 0
175
+ top_up_verification_code 0.00 nan 0.00 0
176
+ top_up_verification_code_missing 0.00 nan 0.00 0
177
+ top_up_verification_code_not_found 0.00 nan 0.00 0
178
+ top_up_via_apple_pay 0.00 nan 0.00 0
179
+ topping_up_by_card 0.67 0.01 0.03 143
180
+ topping_up_by_card_charge 0.00 nan 0.00 0
181
+ topping_up_limits 0.00 nan 0.00 0
182
+ transaction_charged_twice 0.90 0.66 0.76 215
183
+ transaction_charging_issues 0.00 nan 0.00 0
184
+ transaction_not_completed 0.00 nan 0.00 0
185
+ transfer 0.00 nan 0.00 0
186
+ transfer_cost 0.00 nan 0.00 0
187
+ transfer_declined 0.00 nan 0.00 0
188
+ transfer_failed 0.00 nan 0.00 0
189
+ transfer_faster 0.00 nan 0.00 0
190
+ transfer_fee_charged 0.70 0.48 0.57 212
191
+ transfer_info 0.00 nan 0.00 0
192
+ transfer_into_account 0.41 0.55 0.47 153
193
+ transfer_money 0.00 nan 0.00 0
194
+ transfer_not_received_by_recipient 0.21 0.51 0.30 211
195
+ transfer_origin 0.00 nan 0.00 0
196
+ transfer_pending 0.00 nan 0.00 0
197
+ transfer_policy 0.00 nan 0.00 0
198
+ transfer_process 0.00 nan 0.00 0
199
+ transfer_stopped 0.00 nan 0.00 0
200
+ transfer_timing 0.48 0.54 0.50 168
201
+ transfertiming 0.00 nan 0.00 0
202
+ unable_to_verify_identity 0.44 0.54 0.49 142
203
+ unexpected_charge 0.00 nan 0.00 0
204
+ unknown 0.00 nan 0.00 0
205
+ unknown_payment_origin 0.00 nan 0.00 0
206
+ unsupported_cards_and_currencies 0.00 nan 0.00 0
207
+ unsupported_currency 0.00 nan 0.00 0
208
+ unsupported_query 0.00 nan 0.00 0
209
+ verify_identity 0.00 nan 0.00 0
210
+ verify_my_identity 0.57 0.83 0.67 144
211
+ verify_source_of_funds 1.00 0.59 0.74 153
212
+ verify_top_up 0.95 0.43 0.60 166
213
+ virtual_card_limits 0.00 nan 0.00 0
214
+ virtual_card_not_showing_up 0.00 nan 0.00 0
215
+ virtual_card_not_working 0.74 0.32 0.45 81
216
+ visa_or_mastercard 0.93 0.51 0.66 175
217
+ why_verify_identity 0.57 0.02 0.05 161
218
+ wrong_amount_of_cash_received 0.84 0.20 0.32 220
219
+
220
+ accuracy 0.41 13083
221
+ macro avg 0.22 0.41 0.16 13083
222
+ weighted avg 0.61 0.41 0.43 13083
predictions-zeroshot/round2-force_oos/classification_report_llama3.2_3b_banking_open_vs_known.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ known 0.98 1.00 0.99 12880
4
+ open 0.00 0.00 0.00 203
5
+
6
+ accuracy 0.98 13083
7
+ macro avg 0.49 0.50 0.50 13083
8
+ weighted avg 0.97 0.98 0.97 13083
predictions-zeroshot/round3-pydantic/classification_report_llama3.2_3b_banking.txt ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ Refund_not_showing_up 0.76 0.22 0.34 202
4
+ activate_my_card 0.88 0.80 0.84 199
5
+ age_limit 0.95 0.63 0.76 150
6
+ apple_pay_or_google_pay 0.86 0.58 0.69 166
7
+ atm_support 0.51 0.37 0.43 127
8
+ automatic_top_up 0.93 0.89 0.91 167
9
+ balance_not_updated_after_bank_transfer 0.27 0.40 0.32 211
10
+ balance_not_updated_after_cheque_or_cash_deposit 0.79 0.62 0.69 221
11
+ beneficiary_not_allowed 0.93 0.21 0.35 196
12
+ cancel_transfer 0.69 0.61 0.65 197
13
+ card_about_to_expire 0.91 0.64 0.75 169
14
+ card_acceptance 0.35 0.89 0.50 99
15
+ card_arrival 0.37 0.86 0.52 193
16
+ card_delivery_estimate 0.49 0.12 0.20 152
17
+ card_linking 0.45 0.58 0.50 179
18
+ card_not_working 0.29 0.84 0.43 152
19
+ card_payment_fee_charged 0.67 0.54 0.60 227
20
+ card_payment_not_recognised 0.30 0.60 0.40 208
21
+ card_payment_wrong_exchange_rate 0.55 0.58 0.56 207
22
+ card_swallowed 0.71 0.50 0.59 101
23
+ cash_withdrawal_charge 0.68 0.86 0.76 217
24
+ cash_withdrawal_not_recognised 0.24 0.34 0.29 200
25
+ change_pin 0.55 0.93 0.69 162
26
+ compromised_card 0.73 0.19 0.30 126
27
+ contactless_not_working 0.87 0.80 0.83 75
28
+ country_support 0.84 0.60 0.70 169
29
+ declined_card_payment 0.20 0.44 0.27 193
30
+ declined_cash_withdrawal 0.51 0.19 0.28 213
31
+ declined_transfer 0.47 0.59 0.52 173
32
+ direct_debit_payment_not_recognised 0.97 0.29 0.45 222
33
+ disposable_card_limits 0.89 0.80 0.84 161
34
+ edit_personal_details 0.87 0.97 0.92 161
35
+ exchange_charge 0.67 0.62 0.65 161
36
+ exchange_rate 0.24 0.99 0.38 152
37
+ exchange_via_app 0.42 0.20 0.27 158
38
+ extra_charge_on_statement 0.50 0.80 0.62 206
39
+ failed_transfer 0.94 0.08 0.16 177
40
+ fiat_currency_support 1.00 0.22 0.36 166
41
+ get_disposable_virtual_card 0.47 0.86 0.61 137
42
+ get_physical_card 0.01 0.01 0.01 146
43
+ getting_spare_card 0.83 0.09 0.16 169
44
+ getting_virtual_card 0.92 0.08 0.15 138
45
+ lost_or_stolen_card 0.41 0.84 0.55 122
46
+ lost_or_stolen_phone 1.00 0.42 0.59 161
47
+ oos 0.00 0.00 0.00 203
48
+ order_physical_card 0.24 0.11 0.15 160
49
+ passcode_forgotten 0.92 0.53 0.67 145
50
+ pending_card_payment 0.83 0.72 0.77 199
51
+ pending_cash_withdrawal 0.99 0.37 0.54 183
52
+ pending_top_up 0.98 0.26 0.41 189
53
+ pending_transfer 0.59 0.15 0.24 188
54
+ pin_blocked 0.61 0.79 0.69 155
55
+ receiving_money 0.60 0.24 0.35 135
56
+ request_refund 0.51 0.67 0.58 209
57
+ reverted_card_payment? 0.81 0.28 0.41 201
58
+ supported_cards_and_currencies 0.55 0.38 0.45 169
59
+ terminate_account 0.98 1.00 0.99 148
60
+ top_up_by_bank_transfer_charge 0.33 0.13 0.19 151
61
+ top_up_by_card_charge 0.39 0.55 0.46 154
62
+ top_up_by_cash_or_cheque 0.77 0.76 0.76 154
63
+ top_up_failed 0.44 0.59 0.50 185
64
+ top_up_limits 0.29 0.99 0.45 137
65
+ top_up_reverted 0.92 0.32 0.48 186
66
+ topping_up_by_card 0.33 0.01 0.01 143
67
+ transaction_charged_twice 0.93 0.80 0.86 215
68
+ transfer_fee_charged 0.68 0.59 0.64 212
69
+ transfer_into_account 0.41 0.65 0.51 153
70
+ transfer_not_received_by_recipient 0.22 0.69 0.33 211
71
+ transfer_timing 0.47 0.77 0.58 168
72
+ unable_to_verify_identity 0.53 0.80 0.63 142
73
+ verify_my_identity 0.57 0.94 0.71 144
74
+ verify_source_of_funds 1.00 0.79 0.88 153
75
+ verify_top_up 0.96 0.46 0.63 166
76
+ virtual_card_not_working 0.73 0.33 0.46 81
77
+ visa_or_mastercard 0.96 0.62 0.75 175
78
+ why_verify_identity 0.60 0.02 0.04 161
79
+ wrong_amount_of_cash_received 0.90 0.32 0.47 220
80
+
81
+ accuracy 0.52 13083
82
+ macro avg 0.64 0.52 0.51 13083
83
+ weighted avg 0.64 0.52 0.51 13083
predictions-zeroshot/round3-pydantic/classification_report_llama3.2_3b_banking_open_vs_known.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ known 0.98 1.00 0.99 12880
4
+ open 0.00 0.00 0.00 203
5
+
6
+ accuracy 0.98 13083
7
+ macro avg 0.49 0.50 0.50 13083
8
+ weighted avg 0.97 0.98 0.97 13083
predictions-zeroshot/round4-oos-out-of-prompt/classification_report_llama3.2_3b_banking.txt ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ activate_my_card 0.84 0.58 0.69 199
4
+ age_limit 0.97 0.60 0.74 150
5
+ apple_pay_or_google_pay 0.90 0.69 0.78 166
6
+ atm_support 0.68 0.57 0.62 127
7
+ automatic_top_up 0.89 0.91 0.90 167
8
+ beneficiary_not_allowed 0.98 0.23 0.38 196
9
+ card_about_to_expire 0.87 0.71 0.78 169
10
+ card_acceptance 0.40 0.88 0.55 99
11
+ card_arrival 0.35 0.82 0.49 193
12
+ card_delivery_estimate 0.49 0.12 0.19 152
13
+ card_linking 0.49 0.53 0.51 179
14
+ card_not_working 0.27 0.86 0.41 152
15
+ card_payment_fee_charged 0.52 0.54 0.53 227
16
+ card_payment_not_recognised 0.21 0.58 0.31 208
17
+ card_swallowed 0.75 0.39 0.51 101
18
+ cash_withdrawal_charge 0.76 0.82 0.79 217
19
+ cash_withdrawal_not_recognised 0.13 0.20 0.16 200
20
+ change_pin 0.40 0.95 0.56 162
21
+ compromised_card 0.65 0.17 0.28 126
22
+ contactless_not_working 0.83 0.80 0.82 75
23
+ country_support 0.81 0.73 0.77 169
24
+ declined_card_payment 0.07 0.27 0.11 193
25
+ declined_cash_withdrawal 0.14 0.27 0.18 213
26
+ direct_debit_payment_not_recognised 0.94 0.34 0.50 222
27
+ disposable_card_limits 0.84 0.77 0.80 161
28
+ edit_personal_details 0.81 0.96 0.88 161
29
+ exchange_charge 0.67 0.71 0.69 161
30
+ exchange_rate 0.24 0.97 0.38 152
31
+ extra_charge_on_statement 0.36 0.81 0.50 206
32
+ failed_transfer 0.47 0.27 0.34 177
33
+ get_disposable_virtual_card 0.54 0.74 0.63 137
34
+ getting_virtual_card 0.87 0.65 0.74 138
35
+ lost_or_stolen_card 0.36 0.84 0.51 122
36
+ lost_or_stolen_phone 1.00 0.20 0.33 161
37
+ oos 0.09 0.00 0.00 3440
38
+ order_physical_card 0.27 0.45 0.34 160
39
+ passcode_forgotten 0.84 0.64 0.73 145
40
+ pending_cash_withdrawal 0.40 0.40 0.40 183
41
+ pending_top_up 1.00 0.23 0.37 189
42
+ receiving_money 0.40 0.41 0.40 135
43
+ request_refund 0.51 0.68 0.58 209
44
+ supported_cards_and_currencies 0.37 0.48 0.42 169
45
+ terminate_account 0.94 1.00 0.97 148
46
+ top_up_by_bank_transfer_charge 0.30 0.23 0.26 151
47
+ top_up_by_card_charge 0.57 0.52 0.54 154
48
+ top_up_by_cash_or_cheque 0.67 0.79 0.72 154
49
+ top_up_failed 0.33 0.71 0.45 185
50
+ top_up_limits 0.34 0.99 0.51 137
51
+ transaction_charged_twice 0.72 0.83 0.77 215
52
+ transfer_not_received_by_recipient 0.15 0.68 0.25 211
53
+ transfer_timing 0.49 0.77 0.60 168
54
+ unable_to_verify_identity 0.40 0.87 0.55 142
55
+ verify_my_identity 0.54 0.75 0.63 144
56
+ verify_top_up 1.00 0.61 0.76 166
57
+ virtual_card_not_working 0.68 0.57 0.62 81
58
+ visa_or_mastercard 0.98 0.73 0.84 175
59
+ why_verify_identity 0.62 0.03 0.06 161
60
+ wrong_amount_of_cash_received 0.74 0.44 0.55 220
61
+ wrong_exchange_rate_for_cash_withdrawal 0.57 0.58 0.58 203
62
+
63
+ accuracy 0.44 13083
64
+ macro avg 0.58 0.59 0.53 13083
65
+ weighted avg 0.46 0.44 0.39 13083
predictions-zeroshot/round4-oos-out-of-prompt/classification_report_llama3.2_3b_banking_open_vs_known.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ known 0.74 0.99 0.85 9643
4
+ open 0.09 0.00 0.00 3440
5
+
6
+ accuracy 0.73 13083
7
+ macro avg 0.41 0.50 0.42 13083
8
+ weighted avg 0.57 0.73 0.62 13083
predictions-zeroshot/round4-oos-out-of-prompt/classification_report_llama3.2_3b_oos.txt ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ account_blocked 0.21 0.96 0.34 150
4
+ application_status 0.40 0.95 0.56 150
5
+ apr 0.73 0.57 0.64 150
6
+ are_you_a_bot 0.36 0.99 0.53 150
7
+ balance 0.23 0.97 0.37 150
8
+ bill_balance 0.51 0.73 0.60 150
9
+ bill_due 0.66 0.79 0.72 150
10
+ book_flight 0.09 1.00 0.16 150
11
+ book_hotel 0.88 0.81 0.85 150
12
+ calendar 0.54 0.40 0.46 150
13
+ calendar_update 0.46 0.87 0.60 150
14
+ cancel 0.45 0.63 0.52 150
15
+ cancel_reservation 0.80 0.96 0.87 150
16
+ carry_on 0.04 0.03 0.03 150
17
+ change_accent 0.86 0.16 0.27 150
18
+ change_language 0.45 0.93 0.60 150
19
+ change_speed 0.90 0.89 0.90 150
20
+ change_user_name 0.20 0.38 0.26 150
21
+ change_volume 0.91 0.88 0.89 150
22
+ confirm_reservation 0.94 0.31 0.46 150
23
+ cook_time 0.74 0.35 0.47 150
24
+ credit_limit_change 0.35 0.97 0.51 150
25
+ credit_score 0.56 0.95 0.70 150
26
+ damaged_card 0.29 0.79 0.42 150
27
+ date 0.90 0.86 0.88 150
28
+ definition 0.14 1.00 0.24 150
29
+ direct_deposit 0.91 0.97 0.94 150
30
+ distance 0.39 0.39 0.39 150
31
+ do_you_have_pets 0.95 0.78 0.86 150
32
+ exchange_rate 0.64 0.77 0.70 150
33
+ expiration_date 0.48 0.09 0.15 150
34
+ flight_status 0.98 0.80 0.88 150
35
+ food_last 0.92 0.37 0.52 150
36
+ freeze_account 0.98 0.30 0.46 150
37
+ fun_fact 0.31 0.86 0.46 150
38
+ gas_type 0.83 0.13 0.23 150
39
+ greeting 0.14 0.94 0.24 150
40
+ how_busy 0.57 0.05 0.10 150
41
+ improve_credit_score 0.90 0.79 0.84 150
42
+ income 1.00 0.56 0.72 150
43
+ ingredient_substitution 0.79 0.77 0.78 150
44
+ insurance 0.31 0.42 0.35 150
45
+ interest_rate 0.94 0.11 0.19 150
46
+ jump_start 0.98 0.83 0.90 150
47
+ lost_luggage 0.57 0.99 0.73 150
48
+ make_call 0.37 0.89 0.52 150
49
+ maybe 0.44 0.73 0.55 150
50
+ meal_suggestion 0.11 0.57 0.19 150
51
+ measurement_conversion 0.46 0.93 0.62 150
52
+ meeting_schedule 0.75 0.84 0.79 150
53
+ min_payment 0.96 0.30 0.46 150
54
+ mpg 0.43 0.88 0.58 150
55
+ next_holiday 0.92 0.74 0.82 150
56
+ next_song 0.41 0.60 0.49 150
57
+ no nan 0.00 0.00 150
58
+ oil_change_how 0.31 0.77 0.44 150
59
+ oil_change_when 1.00 0.01 0.01 150
60
+ oos 0.51 0.01 0.01 6750
61
+ order_checks 0.66 0.59 0.62 150
62
+ pay_bill 0.40 0.95 0.56 150
63
+ payday 0.87 0.94 0.90 150
64
+ pin_change 1.00 0.25 0.40 150
65
+ plug_type 0.98 0.54 0.70 150
66
+ pto_balance 0.60 0.81 0.69 150
67
+ pto_request 0.66 0.75 0.70 150
68
+ pto_used 1.00 0.40 0.57 150
69
+ redeem_rewards 0.77 0.52 0.62 150
70
+ reminder 0.16 0.17 0.17 150
71
+ reminder_update 0.27 0.20 0.23 150
72
+ repeat 0.96 0.44 0.60 150
73
+ replacement_card_duration 1.00 0.17 0.30 150
74
+ report_fraud 0.97 0.49 0.65 150
75
+ report_lost_card 0.79 0.71 0.75 150
76
+ reset_settings 0.98 0.74 0.84 150
77
+ restaurant_reservation 0.30 0.69 0.42 150
78
+ restaurant_reviews 0.88 0.92 0.90 150
79
+ roll_dice 0.64 1.00 0.78 150
80
+ rollover_401k 1.00 0.99 1.00 150
81
+ schedule_maintenance 0.86 0.34 0.49 150
82
+ schedule_meeting 0.90 0.66 0.76 150
83
+ shopping_list 0.52 0.31 0.38 150
84
+ shopping_list_update 0.48 0.89 0.62 150
85
+ smart_home 0.70 0.09 0.16 150
86
+ spending_history 0.67 0.77 0.71 150
87
+ sync_device 0.98 0.31 0.47 150
88
+ taxes 0.66 0.94 0.77 150
89
+ tell_joke 0.68 0.61 0.64 150
90
+ text 0.60 0.04 0.07 150
91
+ thank_you 0.59 0.69 0.64 150
92
+ time 0.92 0.72 0.81 150
93
+ timer 0.58 0.96 0.72 150
94
+ timezone 0.80 0.98 0.88 150
95
+ tire_change 0.69 0.91 0.79 150
96
+ tire_pressure 0.88 0.99 0.93 150
97
+ todo_list 0.49 0.25 0.33 150
98
+ todo_list_update 0.50 0.87 0.64 150
99
+ traffic 0.93 0.97 0.95 150
100
+ transactions 0.84 0.60 0.70 150
101
+ transfer 0.98 0.83 0.90 150
102
+ translate 0.93 0.17 0.29 150
103
+ travel_alert 0.95 0.67 0.78 150
104
+ travel_notification 0.38 0.02 0.04 150
105
+ travel_suggestion 0.12 0.99 0.21 150
106
+ update_playlist 0.62 0.95 0.75 150
107
+ vaccines 0.50 0.01 0.03 150
108
+ weather 0.91 0.98 0.94 150
109
+ what_are_your_hobbies 0.94 0.54 0.69 150
110
+ what_can_i_ask_you 0.91 0.94 0.92 150
111
+ what_is_your_name 0.45 0.59 0.51 150
112
+ what_song 0.89 0.39 0.55 150
113
+ where_are_you_from 0.61 0.77 0.68 150
114
+ whisper_mode 0.94 0.80 0.87 150
115
+ who_made_you 0.55 0.81 0.66 150
116
+ yes 1.00 0.05 0.09 150
117
+
118
+ accuracy 0.46 23700
119
+ macro avg 0.66 0.63 0.56 23700
120
+ weighted avg 0.62 0.46 0.41 23700
predictions-zeroshot/round4-oos-out-of-prompt/classification_report_llama3.2_3b_oos_open_vs_known.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ known 0.72 1.00 0.83 16950
4
+ open 0.51 0.01 0.01 6750
5
+
6
+ accuracy 0.72 23700
7
+ macro avg 0.61 0.50 0.42 23700
8
+ weighted avg 0.66 0.72 0.60 23700
predictions-zeroshot/round4-oos-out-of-prompt/classification_report_llama3.2_3b_stackoverflow.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ ajax 0.64 0.90 0.75 1000
4
+ apache 0.87 0.84 0.85 1000
5
+ bash 0.66 0.93 0.78 1000
6
+ cocoa 0.45 0.75 0.57 1000
7
+ drupal 0.39 0.92 0.54 1000
8
+ excel 0.64 0.94 0.76 1000
9
+ hibernate 0.57 0.94 0.71 1000
10
+ linq 0.55 0.94 0.69 1000
11
+ magento 0.76 0.92 0.83 1000
12
+ matlab 0.68 0.88 0.76 1000
13
+ oos 0.58 0.02 0.05 5000
14
+ oracle 0.97 0.82 0.89 1000
15
+ qt 0.94 0.79 0.86 1000
16
+ spring 0.94 0.87 0.90 1000
17
+ svn 0.88 0.93 0.91 1000
18
+ visual-studio 0.86 0.83 0.84 1000
19
+
20
+ accuracy 0.67 20000
21
+ macro avg 0.71 0.83 0.73 20000
22
+ weighted avg 0.68 0.67 0.59 20000
predictions-zeroshot/round4-oos-out-of-prompt/classification_report_llama3.2_3b_stackoverflow_open_vs_known.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ known 0.75 0.99 0.86 15000
4
+ open 0.58 0.02 0.05 5000
5
+
6
+ accuracy 0.75 20000
7
+ macro avg 0.66 0.51 0.45 20000
8
+ weighted avg 0.71 0.75 0.65 20000
predictions-zeroshot/round5-other-models/classification_report_deepseek-r1_7b_banking.txt ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ activate_my_card 0.51 0.93 0.66 199
4
+ age_limit 0.99 0.57 0.73 150
5
+ apple_pay_or_google_pay 0.95 0.69 0.80 166
6
+ atm_support 0.58 0.66 0.62 127
7
+ automatic_top_up 0.94 0.73 0.82 167
8
+ beneficiary_not_allowed 0.69 0.45 0.54 196
9
+ card_about_to_expire 0.50 0.56 0.53 169
10
+ card_acceptance 0.09 0.66 0.16 99
11
+ card_arrival 0.31 0.91 0.46 193
12
+ card_delivery_estimate 0.00 0.00 0.00 152
13
+ card_linking 0.60 0.50 0.55 179
14
+ card_not_working 0.30 0.80 0.43 152
15
+ card_payment_fee_charged 0.63 0.51 0.56 227
16
+ card_payment_not_recognised 0.07 0.56 0.13 208
17
+ card_swallowed 0.65 0.28 0.39 101
18
+ cash_withdrawal_charge 0.32 0.44 0.37 217
19
+ cash_withdrawal_not_recognised 0.22 0.38 0.28 200
20
+ change_pin 0.28 0.93 0.43 162
21
+ compromised_card 0.47 0.06 0.11 126
22
+ contactless_not_working 1.00 0.79 0.88 75
23
+ country_support 0.56 0.76 0.65 169
24
+ declined_card_payment 0.24 0.61 0.34 193
25
+ declined_cash_withdrawal 0.40 0.02 0.04 213
26
+ direct_debit_payment_not_recognised 0.95 0.47 0.63 222
27
+ disposable_card_limits 0.70 0.37 0.48 161
28
+ edit_personal_details 0.78 0.63 0.69 161
29
+ exchange_charge 0.35 0.73 0.47 161
30
+ exchange_rate 0.40 0.95 0.57 152
31
+ extra_charge_on_statement 0.37 0.65 0.47 206
32
+ failed_transfer nan 0.00 0.00 177
33
+ get_disposable_virtual_card 0.54 0.66 0.60 137
34
+ getting_virtual_card 1.00 0.03 0.06 138
35
+ lost_or_stolen_card 0.62 0.71 0.66 122
36
+ lost_or_stolen_phone 1.00 0.24 0.38 161
37
+ oos 0.14 0.01 0.02 3440
38
+ order_physical_card 0.50 0.01 0.02 160
39
+ passcode_forgotten nan 0.00 0.00 145
40
+ pending_cash_withdrawal 0.74 0.45 0.56 183
41
+ pending_top_up 0.24 0.46 0.31 189
42
+ receiving_money 1.00 0.02 0.04 135
43
+ request_refund 0.49 0.55 0.52 209
44
+ supported_cards_and_currencies 0.56 0.16 0.25 169
45
+ terminate_account 1.00 0.84 0.91 148
46
+ top_up_by_bank_transfer_charge 0.17 0.62 0.26 151
47
+ top_up_by_card_charge 0.17 0.39 0.24 154
48
+ top_up_by_cash_or_cheque 0.44 0.27 0.34 154
49
+ top_up_failed 0.09 0.63 0.15 185
50
+ top_up_limits 0.46 0.78 0.58 137
51
+ transaction_charged_twice 1.00 0.03 0.06 215
52
+ transfer_not_received_by_recipient 0.02 0.00 0.01 211
53
+ transfer_timing 0.43 0.02 0.03 168
54
+ unable_to_verify_identity 0.00 0.00 0.00 142
55
+ verify_my_identity 0.30 0.96 0.45 144
56
+ verify_top_up 0.95 0.34 0.50 166
57
+ virtual_card_not_working 0.46 0.74 0.57 81
58
+ visa_or_mastercard 1.00 0.11 0.20 175
59
+ why_verify_identity 1.00 0.01 0.01 161
60
+ wrong_amount_of_cash_received 0.80 0.05 0.10 220
61
+ wrong_exchange_rate_for_cash_withdrawal 0.00 0.00 0.00 203
62
+
63
+ accuracy 0.32 13083
64
+ macro avg 0.53 0.44 0.37 13083
65
+ weighted avg 0.42 0.32 0.27 13083
predictions-zeroshot/round5-other-models/classification_report_deepseek-r1_7b_banking_open_vs_known.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ known 0.73 0.97 0.84 9643
4
+ open 0.14 0.01 0.02 3440
5
+
6
+ accuracy 0.72 13083
7
+ macro avg 0.44 0.49 0.43 13083
8
+ weighted avg 0.58 0.72 0.62 13083
predictions-zeroshot/round5-other-models/classification_report_gemma3_4b-it-qa_banking.txt ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ activate_my_card 0.87 0.86 0.87 199
4
+ age_limit 0.98 0.67 0.79 150
5
+ apple_pay_or_google_pay 0.97 0.60 0.74 166
6
+ atm_support 0.46 0.25 0.32 127
7
+ automatic_top_up 0.95 0.75 0.84 167
8
+ beneficiary_not_allowed 0.83 0.22 0.35 196
9
+ card_about_to_expire 0.96 0.68 0.80 169
10
+ card_acceptance 0.43 0.98 0.60 99
11
+ card_arrival 0.27 0.06 0.10 193
12
+ card_delivery_estimate 0.36 0.95 0.52 152
13
+ card_linking 0.93 0.55 0.69 179
14
+ card_not_working 0.43 0.84 0.57 152
15
+ card_payment_fee_charged 0.87 0.21 0.34 227
16
+ card_payment_not_recognised 0.31 0.56 0.40 208
17
+ card_swallowed 1.00 0.32 0.48 101
18
+ cash_withdrawal_charge 0.79 0.82 0.80 217
19
+ cash_withdrawal_not_recognised 0.29 0.62 0.39 200
20
+ change_pin 0.36 0.98 0.52 162
21
+ compromised_card 0.51 0.90 0.65 126
22
+ contactless_not_working 0.86 0.87 0.86 75
23
+ country_support 0.82 0.56 0.66 169
24
+ declined_card_payment 0.35 0.48 0.41 193
25
+ declined_cash_withdrawal 0.54 0.42 0.47 213
26
+ direct_debit_payment_not_recognised 1.00 0.07 0.13 222
27
+ disposable_card_limits 0.98 0.75 0.85 161
28
+ edit_personal_details 0.97 0.97 0.97 161
29
+ exchange_charge 0.74 0.50 0.60 161
30
+ exchange_rate 0.22 0.96 0.36 152
31
+ extra_charge_on_statement 0.32 0.92 0.47 206
32
+ failed_transfer 0.69 0.23 0.34 177
33
+ get_disposable_virtual_card 0.64 0.91 0.75 137
34
+ getting_virtual_card 0.88 0.59 0.71 138
35
+ lost_or_stolen_card 0.46 0.93 0.61 122
36
+ lost_or_stolen_phone 1.00 0.66 0.79 161
37
+ oos 0.35 0.03 0.05 3440
38
+ order_physical_card 0.32 0.70 0.44 160
39
+ passcode_forgotten 0.92 0.92 0.92 145
40
+ pending_cash_withdrawal 0.85 0.70 0.77 183
41
+ pending_top_up 0.28 0.76 0.40 189
42
+ receiving_money 0.32 0.52 0.40 135
43
+ request_refund 0.39 0.91 0.54 209
44
+ supported_cards_and_currencies 0.26 0.54 0.35 169
45
+ terminate_account 0.80 0.99 0.89 148
46
+ top_up_by_bank_transfer_charge 0.41 0.28 0.33 151
47
+ top_up_by_card_charge 0.67 0.58 0.62 154
48
+ top_up_by_cash_or_cheque 0.53 0.94 0.67 154
49
+ top_up_failed 0.40 0.87 0.55 185
50
+ top_up_limits 0.62 0.99 0.76 137
51
+ transaction_charged_twice 0.66 0.91 0.77 215
52
+ transfer_not_received_by_recipient 0.13 0.66 0.22 211
53
+ transfer_timing 0.47 0.86 0.61 168
54
+ unable_to_verify_identity 0.65 0.56 0.60 142
55
+ verify_my_identity 0.39 0.97 0.56 144
56
+ verify_top_up 1.00 0.80 0.89 166
57
+ virtual_card_not_working 0.88 0.80 0.84 81
58
+ visa_or_mastercard 0.96 0.72 0.82 175
59
+ why_verify_identity 0.86 0.19 0.31 161
60
+ wrong_amount_of_cash_received 0.84 0.37 0.51 220
61
+ wrong_exchange_rate_for_cash_withdrawal 0.69 0.20 0.31 203
62
+
63
+ accuracy 0.48 13083
64
+ macro avg 0.64 0.65 0.57 13083
65
+ weighted avg 0.56 0.48 0.43 13083
predictions-zeroshot/round5-other-models/classification_report_gemma3_4b-it-qa_banking_open_vs_known.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ known 0.74 0.98 0.84 9643
4
+ open 0.35 0.03 0.05 3440
5
+
6
+ accuracy 0.73 13083
7
+ macro avg 0.55 0.50 0.45 13083
8
+ weighted avg 0.64 0.73 0.63 13083
predictions-zeroshot/round5-other-models/classification_report_mistral_7b_banking.txt ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ activate_my_card 0.96 0.41 0.58 199
4
+ age_limit 1.00 0.72 0.84 150
5
+ apple_pay_or_google_pay 0.97 0.55 0.70 166
6
+ atm_support 0.42 0.80 0.55 127
7
+ automatic_top_up 0.99 0.76 0.86 167
8
+ beneficiary_not_allowed 0.90 0.27 0.42 196
9
+ card_about_to_expire 0.72 0.51 0.60 169
10
+ card_acceptance 0.63 0.76 0.69 99
11
+ card_arrival 0.06 0.02 0.02 193
12
+ card_delivery_estimate 0.26 0.97 0.42 152
13
+ card_linking 0.49 0.84 0.61 179
14
+ card_not_working 0.40 0.82 0.54 152
15
+ card_payment_fee_charged 0.41 0.86 0.55 227
16
+ card_payment_not_recognised 0.41 0.66 0.50 208
17
+ card_swallowed 0.98 0.61 0.76 101
18
+ cash_withdrawal_charge 0.85 0.81 0.83 217
19
+ cash_withdrawal_not_recognised 0.23 0.22 0.22 200
20
+ change_pin 0.30 0.96 0.45 162
21
+ compromised_card 0.49 0.86 0.62 126
22
+ contactless_not_working 0.92 0.80 0.86 75
23
+ country_support 0.69 0.88 0.77 169
24
+ declined_card_payment 0.29 0.62 0.40 193
25
+ declined_cash_withdrawal 0.30 0.34 0.32 213
26
+ direct_debit_payment_not_recognised 0.63 0.42 0.51 222
27
+ disposable_card_limits 0.71 0.93 0.80 161
28
+ edit_personal_details 0.76 0.98 0.85 161
29
+ exchange_charge 0.74 0.56 0.64 161
30
+ exchange_rate 0.34 0.99 0.51 152
31
+ extra_charge_on_statement 0.48 0.34 0.40 206
32
+ failed_transfer 0.63 0.10 0.17 177
33
+ get_disposable_virtual_card 0.94 0.57 0.71 137
34
+ getting_virtual_card 0.90 0.87 0.88 138
35
+ lost_or_stolen_card 0.70 0.81 0.75 122
36
+ lost_or_stolen_phone 0.98 0.80 0.88 161
37
+ oos 0.21 0.00 0.00 3440
38
+ order_physical_card 0.52 0.28 0.37 160
39
+ passcode_forgotten 0.97 0.21 0.35 145
40
+ pending_cash_withdrawal 0.48 0.73 0.58 183
41
+ pending_top_up 0.32 0.68 0.44 189
42
+ receiving_money 0.22 0.41 0.29 135
43
+ request_refund 0.31 0.91 0.46 209
44
+ supported_cards_and_currencies 0.26 0.71 0.38 169
45
+ terminate_account 0.77 1.00 0.87 148
46
+ top_up_by_bank_transfer_charge 0.23 0.42 0.30 151
47
+ top_up_by_card_charge 0.45 0.71 0.55 154
48
+ top_up_by_cash_or_cheque 0.91 0.86 0.88 154
49
+ top_up_failed 0.43 0.78 0.56 185
50
+ top_up_limits 0.76 0.99 0.86 137
51
+ transaction_charged_twice 0.64 0.88 0.74 215
52
+ transfer_not_received_by_recipient 0.18 0.37 0.24 211
53
+ transfer_timing 0.23 0.99 0.37 168
54
+ unable_to_verify_identity 0.21 0.05 0.08 142
55
+ verify_my_identity 0.28 1.00 0.44 144
56
+ verify_top_up 0.97 0.81 0.89 166
57
+ virtual_card_not_working 1.00 0.49 0.66 81
58
+ visa_or_mastercard 0.95 0.67 0.79 175
59
+ why_verify_identity 0.93 0.08 0.15 161
60
+ wrong_amount_of_cash_received 0.88 0.33 0.48 220
61
+ wrong_exchange_rate_for_cash_withdrawal 0.49 0.57 0.52 203
62
+
63
+ accuracy 0.47 13083
64
+ macro avg 0.59 0.63 0.55 13083
65
+ weighted avg 0.49 0.47 0.40 13083
predictions-zeroshot/round5-other-models/classification_report_mistral_7b_banking_open_vs_known.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ known 0.74 1.00 0.85 9643
4
+ open 0.21 0.00 0.00 3440
5
+
6
+ accuracy 0.74 13083
7
+ macro avg 0.48 0.50 0.42 13083
8
+ weighted avg 0.60 0.74 0.63 13083
predictions-zeroshot/round5-other-models/classification_report_qwen3_8b_banking.txt ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ activate_my_card 0.84 0.90 0.87 199
4
+ age_limit 1.00 0.61 0.76 150
5
+ apple_pay_or_google_pay 1.00 0.60 0.75 166
6
+ atm_support 0.66 0.69 0.67 127
7
+ automatic_top_up 0.99 0.87 0.93 167
8
+ beneficiary_not_allowed 0.89 0.25 0.39 196
9
+ card_about_to_expire 0.99 0.61 0.75 169
10
+ card_acceptance 0.75 0.86 0.80 99
11
+ card_arrival 0.54 0.63 0.59 193
12
+ card_delivery_estimate 0.50 0.66 0.57 152
13
+ card_linking 0.67 0.64 0.65 179
14
+ card_not_working 0.74 0.84 0.79 152
15
+ card_payment_fee_charged 0.93 0.44 0.59 227
16
+ card_payment_not_recognised 0.20 0.12 0.15 208
17
+ card_swallowed 0.96 0.92 0.94 101
18
+ cash_withdrawal_charge 0.83 0.95 0.89 217
19
+ cash_withdrawal_not_recognised 0.21 0.23 0.22 200
20
+ change_pin 0.39 0.93 0.55 162
21
+ compromised_card 0.44 0.63 0.52 126
22
+ contactless_not_working 1.00 0.85 0.92 75
23
+ country_support 0.82 0.88 0.85 169
24
+ declined_card_payment 0.31 0.68 0.43 193
25
+ declined_cash_withdrawal 0.80 0.39 0.52 213
26
+ direct_debit_payment_not_recognised 0.97 0.45 0.62 222
27
+ disposable_card_limits 0.84 0.83 0.83 161
28
+ edit_personal_details 0.94 0.97 0.95 161
29
+ exchange_charge 0.47 0.87 0.61 161
30
+ exchange_rate 0.34 0.99 0.51 152
31
+ extra_charge_on_statement 0.40 0.93 0.56 206
32
+ failed_transfer 0.37 0.42 0.39 177
33
+ get_disposable_virtual_card 0.58 0.80 0.67 137
34
+ getting_virtual_card 0.97 0.56 0.71 138
35
+ lost_or_stolen_card 0.54 0.93 0.68 122
36
+ lost_or_stolen_phone 0.94 0.93 0.93 161
37
+ oos 0.44 0.07 0.12 3440
38
+ order_physical_card 0.43 0.67 0.52 160
39
+ passcode_forgotten 1.00 0.88 0.93 145
40
+ pending_cash_withdrawal 0.67 0.76 0.71 183
41
+ pending_top_up 0.33 0.53 0.40 189
42
+ receiving_money 0.40 0.36 0.38 135
43
+ request_refund 0.36 0.81 0.50 209
44
+ supported_cards_and_currencies 0.35 0.66 0.46 169
45
+ terminate_account 0.82 1.00 0.90 148
46
+ top_up_by_bank_transfer_charge 0.33 0.26 0.29 151
47
+ top_up_by_card_charge 0.43 0.83 0.57 154
48
+ top_up_by_cash_or_cheque 0.38 0.91 0.53 154
49
+ top_up_failed 0.33 0.88 0.48 185
50
+ top_up_limits 0.90 0.98 0.94 137
51
+ transaction_charged_twice 0.59 0.91 0.72 215
52
+ transfer_not_received_by_recipient 0.19 0.62 0.29 211
53
+ transfer_timing 0.34 0.92 0.50 168
54
+ unable_to_verify_identity 0.51 0.82 0.63 142
55
+ verify_my_identity 0.64 0.93 0.76 144
56
+ verify_top_up 0.98 0.87 0.92 166
57
+ virtual_card_not_working 0.95 0.85 0.90 81
58
+ visa_or_mastercard 0.95 0.72 0.82 175
59
+ why_verify_identity 0.92 0.41 0.57 161
60
+ wrong_amount_of_cash_received 0.88 0.75 0.81 220
61
+ wrong_exchange_rate_for_cash_withdrawal 0.47 0.60 0.53 203
62
+
63
+ accuracy 0.54 13083
64
+ macro avg 0.65 0.71 0.64 13083
65
+ weighted avg 0.59 0.54 0.50 13083
predictions-zeroshot/round5-other-models/classification_report_qwen3_8b_banking_open_vs_known.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ known 0.74 0.97 0.84 9643
4
+ open 0.44 0.07 0.12 3440
5
+
6
+ accuracy 0.73 13083
7
+ macro avg 0.59 0.52 0.48 13083
8
+ weighted avg 0.67 0.73 0.65 13083
predictions-zeroshot/round5-other-models/classification_report_tulu3_8b_banking.txt ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ activate_my_card 0.83 0.83 0.83 199
4
+ age_limit 0.96 0.71 0.82 150
5
+ apple_pay_or_google_pay 0.96 0.52 0.67 166
6
+ atm_support 0.93 0.73 0.82 127
7
+ automatic_top_up 0.84 0.89 0.86 167
8
+ beneficiary_not_allowed 0.97 0.19 0.32 196
9
+ card_about_to_expire 0.96 0.41 0.58 169
10
+ card_acceptance 0.59 0.97 0.73 99
11
+ card_arrival 0.36 0.89 0.51 193
12
+ card_delivery_estimate 0.68 0.43 0.52 152
13
+ card_linking 0.92 0.46 0.61 179
14
+ card_not_working 0.57 0.88 0.69 152
15
+ card_payment_fee_charged 0.96 0.52 0.67 227
16
+ card_payment_not_recognised 0.31 0.12 0.17 208
17
+ card_swallowed 0.90 0.77 0.83 101
18
+ cash_withdrawal_charge 0.88 0.93 0.90 217
19
+ cash_withdrawal_not_recognised 0.17 0.32 0.22 200
20
+ change_pin 0.28 0.99 0.44 162
21
+ compromised_card 0.64 0.85 0.73 126
22
+ contactless_not_working 1.00 0.83 0.91 75
23
+ country_support 0.92 0.75 0.82 169
24
+ declined_card_payment 0.34 0.69 0.46 193
25
+ declined_cash_withdrawal 0.31 0.31 0.31 213
26
+ direct_debit_payment_not_recognised 0.97 0.14 0.24 222
27
+ disposable_card_limits 1.00 0.19 0.32 161
28
+ edit_personal_details 0.74 0.96 0.84 161
29
+ exchange_charge 0.75 0.47 0.58 161
30
+ exchange_rate 0.19 0.99 0.32 152
31
+ extra_charge_on_statement 0.57 0.78 0.66 206
32
+ failed_transfer 0.15 0.07 0.10 177
33
+ get_disposable_virtual_card 0.31 0.98 0.47 137
34
+ getting_virtual_card 0.85 0.36 0.51 138
35
+ lost_or_stolen_card 0.58 0.90 0.71 122
36
+ lost_or_stolen_phone 0.99 0.84 0.91 161
37
+ oos 0.00 0.00 0.00 3440
38
+ order_physical_card 0.48 0.54 0.51 160
39
+ passcode_forgotten 0.82 0.22 0.35 145
40
+ pending_cash_withdrawal 0.35 0.82 0.50 183
41
+ pending_top_up 0.43 0.68 0.53 189
42
+ receiving_money 0.44 0.47 0.46 135
43
+ request_refund 0.31 0.93 0.46 209
44
+ supported_cards_and_currencies 0.27 0.32 0.29 169
45
+ terminate_account 0.94 1.00 0.97 148
46
+ top_up_by_bank_transfer_charge 0.24 0.23 0.24 151
47
+ top_up_by_card_charge 0.58 0.48 0.53 154
48
+ top_up_by_cash_or_cheque 0.34 0.96 0.50 154
49
+ top_up_failed 0.33 0.84 0.47 185
50
+ top_up_limits 0.61 0.99 0.75 137
51
+ transaction_charged_twice 0.39 0.91 0.54 215
52
+ transfer_not_received_by_recipient 0.10 0.38 0.15 211
53
+ transfer_timing 0.26 0.97 0.41 168
54
+ unable_to_verify_identity 0.00 0.00 0.00 142
55
+ verify_my_identity 0.28 1.00 0.44 144
56
+ verify_top_up 0.80 0.93 0.86 166
57
+ virtual_card_not_working 0.98 0.59 0.74 81
58
+ visa_or_mastercard 0.99 0.63 0.77 175
59
+ why_verify_identity nan 0.00 0.00 161
60
+ wrong_amount_of_cash_received 0.84 0.42 0.56 220
61
+ wrong_exchange_rate_for_cash_withdrawal nan 0.00 0.00 203
62
+
63
+ accuracy 0.45 13083
64
+ macro avg 0.60 0.61 0.53 13083
65
+ weighted avg 0.44 0.45 0.38 13083
predictions-zeroshot/round5-other-models/classification_report_tulu3_8b_banking_open_vs_known.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ known 0.74 1.00 0.85 9643
4
+ open 0.00 0.00 0.00 3440
5
+
6
+ accuracy 0.74 13083
7
+ macro avg 0.37 0.50 0.42 13083
8
+ weighted avg 0.54 0.74 0.63 13083