Update preprocessing_molmo.py
Browse files- preprocessing_molmo.py +9 -6
    	
        preprocessing_molmo.py
    CHANGED
    
    | @@ -23,7 +23,7 @@ from transformers.processing_utils import ( | |
| 23 | 
             
                ProcessorMixin,
         | 
| 24 | 
             
            )
         | 
| 25 |  | 
| 26 | 
            -
            from transformers.tokenization_utils_base import TextInput
         | 
| 27 | 
             
            from transformers.utils import logging
         | 
| 28 |  | 
| 29 | 
             
            from transformers import AutoTokenizer
         | 
| @@ -116,6 +116,8 @@ class MolmoProcessor(ProcessorMixin): | |
| 116 | 
             
                    self,
         | 
| 117 | 
             
                    text: TextInput = None,
         | 
| 118 | 
             
                    images: ImageInput = None,
         | 
|  | |
|  | |
| 119 | 
             
                    **kwargs: Unpack[MolmoProcessorKwargs],
         | 
| 120 | 
             
                ):
         | 
| 121 | 
             
                    output_kwargs = self._merge_kwargs(
         | 
| @@ -124,11 +126,12 @@ class MolmoProcessor(ProcessorMixin): | |
| 124 | 
             
                        **kwargs,
         | 
| 125 | 
             
                    )
         | 
| 126 |  | 
| 127 | 
            -
                    tokens  | 
| 128 | 
            -
                         | 
| 129 | 
            -
             | 
| 130 | 
            -
             | 
| 131 | 
            -
             | 
|  | |
| 132 |  | 
| 133 | 
             
                    image_token_id = self.special_token_ids[IMAGE_PROMPT]
         | 
| 134 |  | 
|  | |
| 23 | 
             
                ProcessorMixin,
         | 
| 24 | 
             
            )
         | 
| 25 |  | 
| 26 | 
            +
            from transformers.tokenization_utils_base import TextInput, PreTokenizedInput
         | 
| 27 | 
             
            from transformers.utils import logging
         | 
| 28 |  | 
| 29 | 
             
            from transformers import AutoTokenizer
         | 
|  | |
| 116 | 
             
                    self,
         | 
| 117 | 
             
                    text: TextInput = None,
         | 
| 118 | 
             
                    images: ImageInput = None,
         | 
| 119 | 
            +
                    *,
         | 
| 120 | 
            +
                    tokens: Optional[PreTokenizedInput] = None,
         | 
| 121 | 
             
                    **kwargs: Unpack[MolmoProcessorKwargs],
         | 
| 122 | 
             
                ):
         | 
| 123 | 
             
                    output_kwargs = self._merge_kwargs(
         | 
|  | |
| 126 | 
             
                        **kwargs,
         | 
| 127 | 
             
                    )
         | 
| 128 |  | 
| 129 | 
            +
                    if tokens is None:
         | 
| 130 | 
            +
                        tokens = self.get_tokens_input(
         | 
| 131 | 
            +
                            text,
         | 
| 132 | 
            +
                            output_kwargs["text_kwargs"]["message_format"],
         | 
| 133 | 
            +
                            output_kwargs["text_kwargs"]["always_start_with_space"],
         | 
| 134 | 
            +
                        )
         | 
| 135 |  | 
| 136 | 
             
                    image_token_id = self.special_token_ids[IMAGE_PROMPT]
         | 
| 137 |  | 
