Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	added app file
Browse files- app.py +49 -0
- languages.py +47 -0
- requirements.txt +3 -0
    	
        app.py
    ADDED
    
    | @@ -0,0 +1,49 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from transformers import AutoModelForSequenceClassification, AutoTokenizer
         | 
| 2 | 
            +
            from languages import LANGUANGE_MAP
         | 
| 3 | 
            +
            import gradio as gr
         | 
| 4 | 
            +
            import torch
         | 
| 5 | 
            +
             | 
| 6 | 
            +
             | 
| 7 | 
            +
            model_ckpt = "ivanlau/language-detection-fine-tuned-on-xlm-roberta-base"
         | 
| 8 | 
            +
            model = AutoModelForSequenceClassification.from_pretrained(model_ckpt)
         | 
| 9 | 
            +
            tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
         | 
| 10 | 
            +
             | 
| 11 | 
            +
             | 
| 12 | 
            +
             | 
| 13 | 
            +
            def detect_language(sentence):
         | 
| 14 | 
            +
              tokenized_sentence = tokenizer(sentence, return_tensors='pt')
         | 
| 15 | 
            +
              output = model(**tokenized_sentence)
         | 
| 16 | 
            +
              predictions = torch.nn.functional.softmax(output.logits, dim=-1)
         | 
| 17 | 
            +
              _, preds = torch.max(predictions, dim=-1)
         | 
| 18 | 
            +
              return LANGUANGE_MAP[preds.item()]
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            examples = [
         | 
| 21 | 
            +
                "I've been waiting for a HuggingFace course my whole life.",
         | 
| 22 | 
            +
                "恭喜发财!",
         | 
| 23 | 
            +
                "Jumpa lagi, saya pergi kerja.",
         | 
| 24 | 
            +
                "你食咗飯未呀?",
         | 
| 25 | 
            +
                "もう食べましたか?",
         | 
| 26 | 
            +
                "as-tu mangé"
         | 
| 27 | 
            +
            ]
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            inputs=gr.inputs.Textbox(placeholder="Enter your text here", label="Text content", lines=5)
         | 
| 30 | 
            +
            outputs=gr.outputs.Label(num_top_classes=3, label="Language detected:")
         | 
| 31 | 
            +
            article = """
         | 
| 32 | 
            +
            Supported languages: 
         | 
| 33 | 
            +
                'Arabic', 'Basque', 'Breton', 'Catalan', 'Chinese_China', 'Chinese_Hongkong', 'Chinese_Taiwan', 'Chuvash', 'Czech', 
         | 
| 34 | 
            +
                'Dhivehi', 'Dutch', 'English', 'Esperanto', 'Estonian', 'French', 'Frisian', 'Georgian', 'German', 'Greek', 'Hakha_Chin', 
         | 
| 35 | 
            +
                'Indonesian', 'Interlingua', 'Italian', 'Japanese', 'Kabyle', 'Kinyarwanda', 'Kyrgyz', 'Latvian', 'Maltese', 
         | 
| 36 | 
            +
                'Mangolian', 'Persian', 'Polish', 'Portuguese', 'Romanian', 'Romansh_Sursilvan', 'Russian', 'Sakha', 'Slovenian', 
         | 
| 37 | 
            +
                'Spanish', 'Swedish', 'Tamil', 'Tatar', 'Turkish', 'Ukranian', 'Welsh'
         | 
| 38 | 
            +
            """
         | 
| 39 | 
            +
            gr.Interface(
         | 
| 40 | 
            +
                fn=detect_language,
         | 
| 41 | 
            +
                inputs=inputs,
         | 
| 42 | 
            +
                outputs=outputs,
         | 
| 43 | 
            +
                verbose=True,
         | 
| 44 | 
            +
                examples = examples,
         | 
| 45 | 
            +
                title="Language Detector",
         | 
| 46 | 
            +
                description="A simple language detector fine-tuned from xlm-roberta-base model which can detect 45 languages.",
         | 
| 47 | 
            +
                article=article,
         | 
| 48 | 
            +
                theme="huggingface"
         | 
| 49 | 
            +
            ).launch()
         | 
    	
        languages.py
    ADDED
    
    | @@ -0,0 +1,47 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            LANGUANGE_MAP = {
         | 
| 2 | 
            +
                0: 'Arabic',
         | 
| 3 | 
            +
                1: 'Basque',
         | 
| 4 | 
            +
                2: 'Breton',
         | 
| 5 | 
            +
                3: 'Catalan',
         | 
| 6 | 
            +
                4: 'Chinese_China',
         | 
| 7 | 
            +
                5: 'Chinese_Hongkong',
         | 
| 8 | 
            +
                6: 'Chinese_Taiwan',
         | 
| 9 | 
            +
                7: 'Chuvash',
         | 
| 10 | 
            +
                8: 'Czech',
         | 
| 11 | 
            +
                9: 'Dhivehi',
         | 
| 12 | 
            +
                10: 'Dutch',
         | 
| 13 | 
            +
                11: 'English',
         | 
| 14 | 
            +
                12: 'Esperanto',
         | 
| 15 | 
            +
                13: 'Estonian',
         | 
| 16 | 
            +
                14: 'French',
         | 
| 17 | 
            +
                15: 'Frisian',
         | 
| 18 | 
            +
                16: 'Georgian',
         | 
| 19 | 
            +
                17: 'German',
         | 
| 20 | 
            +
                18: 'Greek',
         | 
| 21 | 
            +
                19: 'Hakha_Chin',
         | 
| 22 | 
            +
                20: 'Indonesian',
         | 
| 23 | 
            +
                21: 'Interlingua',
         | 
| 24 | 
            +
                22: 'Italian',
         | 
| 25 | 
            +
                23: 'Japanese',
         | 
| 26 | 
            +
                24: 'Kabyle',
         | 
| 27 | 
            +
                25: 'Kinyarwanda',
         | 
| 28 | 
            +
                26: 'Kyrgyz',
         | 
| 29 | 
            +
                27: 'Latvian',
         | 
| 30 | 
            +
                28: 'Maltese',
         | 
| 31 | 
            +
                29: 'Mongolian',
         | 
| 32 | 
            +
                30: 'Persian',
         | 
| 33 | 
            +
                31: 'Polish',
         | 
| 34 | 
            +
                32: 'Portuguese',
         | 
| 35 | 
            +
                33: 'Romanian',
         | 
| 36 | 
            +
                34: 'Romansh_Sursilvan',
         | 
| 37 | 
            +
                35: 'Russian',
         | 
| 38 | 
            +
                36: 'Sakha',
         | 
| 39 | 
            +
                37: 'Slovenian',
         | 
| 40 | 
            +
                38: 'Spanish',
         | 
| 41 | 
            +
                39: 'Swedish',
         | 
| 42 | 
            +
                40: 'Tamil',
         | 
| 43 | 
            +
                41: 'Tatar',
         | 
| 44 | 
            +
                42: 'Turkish',
         | 
| 45 | 
            +
                43: 'Ukranian',
         | 
| 46 | 
            +
                44: 'Welsh'
         | 
| 47 | 
            +
             }
         | 
    	
        requirements.txt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            gradio
         | 
| 2 | 
            +
            transformers
         | 
| 3 | 
            +
            torch
         |