open-sci
/

open-sci-ref-v0.01-1.7b-fineweb-edu-1.4t-300B-4096

Harsh1729 commited on Sep 26

Commit

de8d925

verified ·

1 Parent(s): b22010a

Replace whole-word `LossKwargs` with `TransformersKwargs` in modeling*.py

Files changed (1) hide show

modeling_opensci.py CHANGED Viewed

@@ -40,7 +40,7 @@ from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
 from transformers.processing_utils import Unpack
 from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
 from transformers.utils import (
-    LossKwargs,
     add_code_sample_docstrings,
     add_start_docstrings,
     add_start_docstrings_to_model_forward,
@@ -750,7 +750,7 @@ class OpensciModel(OpensciPreTrainedModel):
         return causal_mask
-class KwargsForCausalLM(FlashAttentionKwargs, LossKwargs): ...
 class OpensciForCausalLM(OpensciPreTrainedModel, GenerationMixin):

 from transformers.processing_utils import Unpack
 from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
 from transformers.utils import (
+    TransformersKwargs,
     add_code_sample_docstrings,
     add_start_docstrings,
     add_start_docstrings_to_model_forward,
         return causal_mask
+class KwargsForCausalLM(FlashAttentionKwargs, TransformersKwargs): ...
 class OpensciForCausalLM(OpensciPreTrainedModel, GenerationMixin):