Replace whole-word `LossKwargs` with `TransformersKwargs` in modeling*.py
Browse files- modeling_opensci.py +2 -2
modeling_opensci.py
CHANGED
|
@@ -40,7 +40,7 @@ from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
|
|
| 40 |
from transformers.processing_utils import Unpack
|
| 41 |
from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
|
| 42 |
from transformers.utils import (
|
| 43 |
-
|
| 44 |
add_code_sample_docstrings,
|
| 45 |
add_start_docstrings,
|
| 46 |
add_start_docstrings_to_model_forward,
|
|
@@ -750,7 +750,7 @@ class OpensciModel(OpensciPreTrainedModel):
|
|
| 750 |
return causal_mask
|
| 751 |
|
| 752 |
|
| 753 |
-
class KwargsForCausalLM(FlashAttentionKwargs,
|
| 754 |
|
| 755 |
|
| 756 |
class OpensciForCausalLM(OpensciPreTrainedModel, GenerationMixin):
|
|
|
|
| 40 |
from transformers.processing_utils import Unpack
|
| 41 |
from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
|
| 42 |
from transformers.utils import (
|
| 43 |
+
TransformersKwargs,
|
| 44 |
add_code_sample_docstrings,
|
| 45 |
add_start_docstrings,
|
| 46 |
add_start_docstrings_to_model_forward,
|
|
|
|
| 750 |
return causal_mask
|
| 751 |
|
| 752 |
|
| 753 |
+
class KwargsForCausalLM(FlashAttentionKwargs, TransformersKwargs): ...
|
| 754 |
|
| 755 |
|
| 756 |
class OpensciForCausalLM(OpensciPreTrainedModel, GenerationMixin):
|