Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	| import os | |
| import ctranslate2 | |
| import gradio as gr | |
| from huggingface_hub import snapshot_download | |
| from sentencepiece import SentencePieceProcessor | |
| title = "MADLAD-400 Translation Demo" | |
| description = """ | |
| <p> | |
| Translator using <a href='https://arxiv.org/abs/2309.04662' target='_blank'>MADLAD-400</a>, a multilingual machine translation model on 250 billion tokens covering over 450 languages using publicly available data. This demo application uses <a href="https://huggingface.co/Heng666/madlad400-3b-mt-ct2-int8">Heng666/madlad400-3b-mt-ct2-int8</a> model, which is a ctranslate2 optimized model of <a href="https://huggingface.co/google/madlad400-3b-mt">google/madlad400-3b-mt</a> | |
| </p> | |
| """ | |
| # As per https://opennmt.net/CTranslate2/performance.html | |
| # By default CTranslate2 is compiled with intel MKL. | |
| # It is observed that this setting has a significant positive performance impact. | |
| os.environ["CT2_USE_EXPERIMENTAL_PACKED_GEMM"] = "1" | |
| model_name = "Heng666/madlad400-3b-mt-ct2-int8" | |
| model_path = snapshot_download(model_name) | |
| tokenizer = SentencePieceProcessor() | |
| tokenizer.load(f"{model_path}/spiece.model") | |
| translator = ctranslate2.Translator(model_path) | |
| tokens = [tokenizer.decode(i) for i in range(460)] | |
| lang_codes = [token[2:-1] for token in tokens if token.startswith("<2")] | |
| def translate(input_text, target_language): | |
| input_tokens = tokenizer.encode(f"<2{target_language}> {input_text}", out_type=str) | |
| results = translator.translate_batch( | |
| [input_tokens], | |
| batch_type="tokens", | |
| # max_batch_size=1024, | |
| beam_size=1, | |
| no_repeat_ngram_size=1, | |
| # repetition_penalty=2, | |
| ) | |
| translated_sentence = tokenizer.decode(results[0].hypotheses[0]) | |
| return translated_sentence | |
| def translate_interface(input_text, target_language): | |
| translated_text = translate(input_text, target_language) | |
| return translated_text | |
| input_text = gr.Textbox( | |
| label="Input Text", | |
| value="Imagine a world in which every single person on the planet is given free access to the sum of all human knowledge.", | |
| ) | |
| target_language = gr.Dropdown(lang_codes, value="ml", label="Target Language") | |
| output_text = gr.Textbox(label="Translated Text") | |
| gr.Interface( | |
| title=title, | |
| description=description, | |
| fn=translate_interface, | |
| inputs=[input_text, target_language], | |
| outputs=output_text, | |
| ).launch() | |
 
			
