Minte commited on
Commit
e61d7b5
·
1 Parent(s): d191a12

Fix Afan Oromo model processing and improve error handling

Browse files
Files changed (1) hide show
  1. app.py +21 -8
app.py CHANGED
@@ -63,7 +63,7 @@ except Exception as e:
63
  print("[ERROR] Failed to load SeamlessM4T model:", e)
64
  traceback.print_exc()
65
 
66
- # Load Afan Oromo model
67
  try:
68
  oromo_processor = AutoProcessor.from_pretrained("osanseviero/seamless-copy")
69
  oromo_model = AutoModelForSpeechSeq2Seq.from_pretrained("osanseviero/seamless-copy").to("cpu")
@@ -117,15 +117,28 @@ def transcribe_audio(audio_file, language):
117
  transcription = processor.batch_decode(predicted_ids)[0]
118
 
119
  elif language == "Afan Oromo":
120
- # Seamless-copy processing
121
- inputs = processor(audios=audio, sampling_rate=16000, return_tensors="pt")
122
- with torch.no_grad():
123
- generated_ids = model.generate(**inputs, tgt_lang=LANGUAGE_CONFIG[language]["code"])
124
- transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  else:
127
- # Standard SeamlessM4T processing
128
- inputs = processor(audios=audio, sampling_rate=16000, return_tensors="pt")
129
  with torch.no_grad():
130
  generated_ids = model.generate(**inputs, tgt_lang=LANGUAGE_CONFIG[language]["code"])
131
  transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
 
63
  print("[ERROR] Failed to load SeamlessM4T model:", e)
64
  traceback.print_exc()
65
 
66
+ # Load Afan Oromo model - FIXED IMPLEMENTATION
67
  try:
68
  oromo_processor = AutoProcessor.from_pretrained("osanseviero/seamless-copy")
69
  oromo_model = AutoModelForSpeechSeq2Seq.from_pretrained("osanseviero/seamless-copy").to("cpu")
 
117
  transcription = processor.batch_decode(predicted_ids)[0]
118
 
119
  elif language == "Afan Oromo":
120
+ # FIXED: Afan Oromo uses different processing
121
+ # The seamless-copy model might work differently
122
+ try:
123
+ # Try without tgt_lang first
124
+ inputs = processor(audio=audio, sampling_rate=16000, return_tensors="pt") # FIXED: audio instead of audios
125
+ with torch.no_grad():
126
+ generated_ids = model.generate(**inputs)
127
+ transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
128
+ except Exception as oromo_error:
129
+ print(f"[WARNING] Afan Oromo standard processing failed: {oromo_error}")
130
+ # Fallback: try with text generation
131
+ try:
132
+ inputs = processor(audio=audio, sampling_rate=16000, return_tensors="pt")
133
+ with torch.no_grad():
134
+ outputs = model(**inputs)
135
+ transcription = processor.decode(outputs.logits.argmax(dim=-1)[0])
136
+ except Exception as fallback_error:
137
+ transcription = f"Afan Oromo transcription failed: {str(fallback_error)[:100]}"
138
 
139
  else:
140
+ # Standard SeamlessM4T processing - FIXED: audio instead of audios
141
+ inputs = processor(audio=audio, sampling_rate=16000, return_tensors="pt") # FIXED HERE
142
  with torch.no_grad():
143
  generated_ids = model.generate(**inputs, tgt_lang=LANGUAGE_CONFIG[language]["code"])
144
  transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]