Trailing space breaks tokenization
Browse files
    	
        README.md
    CHANGED
    
    | @@ -98,7 +98,7 @@ from hf_olmo import OLMoForCausalLM, OLMoTokenizerFast | |
| 98 |  | 
| 99 | 
             
            olmo = OLMoForCausalLM.from_pretrained("allenai/OLMo-1B")
         | 
| 100 | 
             
            tokenizer = OLMoTokenizerFast.from_pretrained("allenai/OLMo-1B")
         | 
| 101 | 
            -
            message = ["Language modeling is | 
| 102 | 
             
            inputs = tokenizer(message, return_tensors='pt', return_token_type_ids=False)
         | 
| 103 | 
             
            # optional verifying cuda
         | 
| 104 | 
             
            # inputs = {k: v.to('cuda') for k,v in inputs.items()}
         | 
|  | |
| 98 |  | 
| 99 | 
             
            olmo = OLMoForCausalLM.from_pretrained("allenai/OLMo-1B")
         | 
| 100 | 
             
            tokenizer = OLMoTokenizerFast.from_pretrained("allenai/OLMo-1B")
         | 
| 101 | 
            +
            message = ["Language modeling is"]
         | 
| 102 | 
             
            inputs = tokenizer(message, return_tensors='pt', return_token_type_ids=False)
         | 
| 103 | 
             
            # optional verifying cuda
         | 
| 104 | 
             
            # inputs = {k: v.to('cuda') for k,v in inputs.items()}
         | 

