djsull/sent_classification

Browse files

Files changed (9) hide show

README.md +19 -18
added_tokens.json +3 -0
config.json +32 -15
model.safetensors +2 -2
special_tokens_map.json +7 -7
tokenizer.json +0 -0
tokenizer_config.json +278 -16
training_args.bin +1 -1
vocab.txt +0 -0

README.md CHANGED Viewed

@@ -1,6 +1,7 @@
 ---
 library_name: transformers
-base_model: klue/roberta-base
 tags:
 - generated_from_trainer
 metrics:
@@ -15,14 +16,14 @@ should probably proofread and complete it, then remove this comment. -->
 # aha_sentence_classification
-This model is a fine-tuned version of [klue/roberta-base](https://huggingface.co/klue/roberta-base) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.8977
-- Accuracy: 0.6701
-- F1 Micro: 0.6701
-- F1 Macro: 0.6301
-- Precision Macro: 0.5833
-- Recall Macro: 0.7331
 ## Model description
@@ -54,16 +55,16 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step  | Validation Loss | Accuracy | F1 Micro | F1 Macro | Precision Macro | Recall Macro |
 |:-------------:|:------:|:-----:|:---------------:|:--------:|:--------:|:--------:|:---------------:|:------------:|
-| 1.3115        | 0.5949 | 1000  | 1.3492          | 0.5285   | 0.5285   | 0.5233   | 0.5079          | 0.6638       |
-| 0.9152        | 1.1898 | 2000  | 1.0651          | 0.6243   | 0.6243   | 0.6065   | 0.5681          | 0.7156       |
-| 0.8716        | 1.7847 | 3000  | 1.0069          | 0.6374   | 0.6374   | 0.6044   | 0.5635          | 0.7218       |
-| 0.7678        | 2.3795 | 4000  | 0.9808          | 0.6293   | 0.6293   | 0.6154   | 0.5790          | 0.7316       |
-| 0.7341        | 2.9744 | 5000  | 0.8977          | 0.6701   | 0.6701   | 0.6301   | 0.5833          | 0.7331       |
-| 0.6194        | 3.5693 | 6000  | 0.9595          | 0.6499   | 0.6499   | 0.6221   | 0.5894          | 0.7091       |
-| 0.4718        | 4.1642 | 7000  | 0.9216          | 0.6731   | 0.6731   | 0.6361   | 0.6053          | 0.6995       |
-| 0.4739        | 4.7591 | 8000  | 0.9148          | 0.6739   | 0.6739   | 0.6484   | 0.6171          | 0.7059       |
-| 0.433         | 5.3540 | 9000  | 1.0006          | 0.6616   | 0.6616   | 0.6298   | 0.5953          | 0.7092       |
-| 0.3989        | 5.9488 | 10000 | 0.9988          | 0.6626   | 0.6626   | 0.6412   | 0.6157          | 0.7024       |
 ### Framework versions

 ---
 library_name: transformers
+license: apache-2.0
+base_model: skt/A.X-Encoder-base
 tags:
 - generated_from_trainer
 metrics:
 # aha_sentence_classification
+This model is a fine-tuned version of [skt/A.X-Encoder-base](https://huggingface.co/skt/A.X-Encoder-base) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.8454
+- Accuracy: 0.6900
+- F1 Micro: 0.6900
+- F1 Macro: 0.6503
+- Precision Macro: 0.6078
+- Recall Macro: 0.7221
 ## Model description
 | Training Loss | Epoch  | Step  | Validation Loss | Accuracy | F1 Micro | F1 Macro | Precision Macro | Recall Macro |
 |:-------------:|:------:|:-----:|:---------------:|:--------:|:--------:|:--------:|:---------------:|:------------:|
+| 0.9702        | 0.5949 | 1000  | 1.1520          | 0.5590   | 0.5590   | 0.5444   | 0.5142          | 0.6791       |
+| 0.7293        | 1.1898 | 2000  | 1.0469          | 0.5992   | 0.5992   | 0.5966   | 0.5599          | 0.7238       |
+| 0.7779        | 1.7847 | 3000  | 0.9977          | 0.6278   | 0.6278   | 0.5964   | 0.5646          | 0.7274       |
+| 0.5545        | 2.3795 | 4000  | 0.9847          | 0.6290   | 0.6290   | 0.6208   | 0.5849          | 0.7236       |
+| 0.5692        | 2.9744 | 5000  | 0.8454          | 0.6900   | 0.6900   | 0.6503   | 0.6078          | 0.7221       |
+| 0.3962        | 3.5693 | 6000  | 1.0074          | 0.6488   | 0.6488   | 0.6316   | 0.6093          | 0.7081       |
+| 0.1624        | 4.1642 | 7000  | 1.1059          | 0.6732   | 0.6732   | 0.6533   | 0.6322          | 0.6930       |
+| 0.1816        | 4.7591 | 8000  | 1.1277          | 0.6872   | 0.6872   | 0.6513   | 0.6429          | 0.6690       |
+| 0.0934        | 5.3540 | 9000  | 1.4084          | 0.6882   | 0.6882   | 0.6468   | 0.6380          | 0.6649       |
+| 0.0882        | 5.9488 | 10000 | 1.4941          | 0.6918   | 0.6918   | 0.6450   | 0.6428          | 0.6606       |
 ### Framework versions

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<pad>": 49999
+}

config.json CHANGED Viewed

@@ -1,15 +1,24 @@
 {
   "architectures": [
-    "RobertaForSequenceClassification"
   ],
-  "attention_probs_dropout_prob": 0.1,
   "bos_token_id": 0,
-  "classifier_dropout": null,
   "dtype": "float32",
-  "eos_token_id": 2,
   "gradient_checkpointing": false,
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "id2label": {
     "0": "\ud575\uc2ec\uc9c8\ubb38",
@@ -29,8 +38,9 @@
     "14": "\uc608\uc2dc\ub370\uc774\ud130/\uc99d\ube59",
     "15": "\uc758\ubbf8/\ud574\ub2f9\uc0ac\ud56d\uc5c6\uc74c"
   },
   "initializer_range": 0.02,
-  "intermediate_size": 3072,
   "label2id": {
     "\uadfc\uac70/\uc124\uba85": 7,
     "\ub300\uc548/\ube44\uad50": 10,
@@ -50,16 +60,23 @@
     "\ud575\uc2ec\uc9c8\ubb38": 0
   },
   "layer_norm_eps": 1e-05,
-  "max_position_embeddings": 514,
-  "model_type": "roberta",
   "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "pad_token_id": 1,
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
-  "tokenizer_class": "BertTokenizer",
   "transformers_version": "4.56.1",
-  "type_vocab_size": 1,
-  "use_cache": true,
-  "vocab_size": 32000
 }

 {
   "architectures": [
+    "ModernBertForSequenceClassification"
   ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
   "bos_token_id": 0,
+  "classifier_activation": "gelu",
+  "classifier_bias": false,
+  "classifier_dropout": 0.0,
+  "classifier_pooling": "mean",
+  "cls_token_id": 0,
+  "decoder_bias": true,
+  "deterministic_flash_attn": false,
   "dtype": "float32",
+  "embedding_dropout": 0.0,
+  "eos_token_id": 1,
+  "global_attn_every_n_layers": 3,
+  "global_rope_theta": 160000,
   "gradient_checkpointing": false,
+  "hidden_activation": "gelu",
   "hidden_size": 768,
   "id2label": {
     "0": "\ud575\uc2ec\uc9c8\ubb38",
     "14": "\uc608\uc2dc\ub370\uc774\ud130/\uc99d\ube59",
     "15": "\uc758\ubbf8/\ud574\ub2f9\uc0ac\ud56d\uc5c6\uc74c"
   },
+  "initializer_cutoff_factor": 2.0,
   "initializer_range": 0.02,
+  "intermediate_size": 1152,
   "label2id": {
     "\uadfc\uac70/\uc124\uba85": 7,
     "\ub300\uc548/\ube44\uad50": 10,
     "\ud575\uc2ec\uc9c8\ubb38": 0
   },
   "layer_norm_eps": 1e-05,
+  "local_attention": 128,
+  "local_rope_theta": 10000.0,
+  "max_position_embeddings": 16384,
+  "mlp_bias": false,
+  "mlp_dropout": 0.0,
+  "model_type": "modernbert",
+  "norm_bias": false,
+  "norm_eps": 1e-05,
   "num_attention_heads": 12,
+  "num_hidden_layers": 22,
+  "pad_token_id": 49999,
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
+  "repad_logits_with_grad": false,
+  "sep_token_id": 1,
+  "sparse_pred_ignore_index": -100,
+  "sparse_prediction": false,
   "transformers_version": "4.56.1",
+  "vocab_size": 50000
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5029cc5e5c4f78dbe3082b65b81081564ee6b3cb0782b23de8341e0360a0ff23
-size 442545808

 version https://git-lfs.github.com/spec/v1
+oid sha256:5163bed608f7349e76f6422df8f7c1262a06bb926993bf328bfba2d806281f12
+size 597352360

special_tokens_map.json CHANGED Viewed

@@ -1,48 +1,48 @@
 {
   "bos_token": {
-    "content": "[CLS]",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "cls_token": {
-    "content": "[CLS]",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "eos_token": {
-    "content": "[SEP]",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "mask_token": {
-    "content": "[MASK]",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
-    "content": "[PAD]",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "sep_token": {
-    "content": "[SEP]",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "unk_token": {
-    "content": "[UNK]",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

 {
   "bos_token": {
+    "content": "<s>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "cls_token": {
+    "content": "<cls>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "eos_token": {
+    "content": "<\\s>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "mask_token": {
+    "content": "<mask>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
+    "content": "<pad>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "sep_token": {
+    "content": "<sep>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "unk_token": {
+    "content": "<unk>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "added_tokens_decoder": {
     "0": {
-      "content": "[CLS]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -9,7 +9,7 @@
       "special": true
     },
     "1": {
-      "content": "[PAD]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -17,7 +17,7 @@
       "special": true
     },
     "2": {
-      "content": "[SEP]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -25,7 +25,7 @@
       "special": true
     },
     "3": {
-      "content": "[UNK]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -33,7 +33,271 @@
       "special": true
     },
     "4": {
-      "content": "[MASK]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -41,20 +305,18 @@
       "special": true
     }
   },
-  "bos_token": "[CLS]",
-  "clean_up_tokenization_spaces": false,
-  "cls_token": "[CLS]",
-  "do_basic_tokenize": true,
   "do_lower_case": false,
-  "eos_token": "[SEP]",
   "extra_special_tokens": {},
-  "mask_token": "[MASK]",
-  "model_max_length": 512,
-  "never_split": null,
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",
-  "unk_token": "[UNK]"
 }

 {
   "added_tokens_decoder": {
     "0": {
+      "content": "<s>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "special": true
     },
     "1": {
+      "content": "<\\s>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "special": true
     },
     "2": {
+      "content": "<unk>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "special": true
     },
     "3": {
+      "content": "<sep>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "special": true
     },
     "4": {
+      "content": "<mask>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<cls>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "<unused0>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<unused1>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "8": {
+      "content": "<unused2>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "9": {
+      "content": "<unused3>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "10": {
+      "content": "<unused4>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "11": {
+      "content": "<unused5>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "12": {
+      "content": "<unused6>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "13": {
+      "content": "<unused7>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "14": {
+      "content": "<unused8>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "15": {
+      "content": "<unused9>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "16": {
+      "content": "<unused10>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "17": {
+      "content": "<unused11>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "18": {
+      "content": "<unused12>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "19": {
+      "content": "<unused13>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "20": {
+      "content": "<unused14>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "21": {
+      "content": "<unused15>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "22": {
+      "content": "<unused16>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "23": {
+      "content": "<unused17>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "24": {
+      "content": "<unused18>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "25": {
+      "content": "<unused19>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "26": {
+      "content": "<unused20>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "27": {
+      "content": "<unused21>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "28": {
+      "content": "<unused22>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "29": {
+      "content": "<unused23>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "30": {
+      "content": "<unused24>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "31": {
+      "content": "<unused25>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32": {
+      "content": "<unused26>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "33": {
+      "content": "<unused27>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "34": {
+      "content": "<unused28>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "35": {
+      "content": "<unused29>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "36": {
+      "content": "<unused30>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49999": {
+      "content": "<pad>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "special": true
     }
   },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<cls>",
   "do_lower_case": false,
+  "eos_token": "<\\s>",
   "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 1024,
+  "pad_token": "<pad>",
+  "sep_token": "<sep>",
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",
+  "unk_token": "<unk>"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:89a1c4e17ad5d25dbf2f8b28524d6251bdac9277288d757f1766e38116fae9a5
 size 5905

 version https://git-lfs.github.com/spec/v1
+oid sha256:91f53086e3bb6cbe98433b44baa7f92d9e808cae3146fd2be289265614a28a4e
 size 5905

vocab.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff