Minte
commited on
Commit
·
e61d7b5
1
Parent(s):
d191a12
Fix Afan Oromo model processing and improve error handling
Browse files
app.py
CHANGED
|
@@ -63,7 +63,7 @@ except Exception as e:
|
|
| 63 |
print("[ERROR] Failed to load SeamlessM4T model:", e)
|
| 64 |
traceback.print_exc()
|
| 65 |
|
| 66 |
-
# Load Afan Oromo model
|
| 67 |
try:
|
| 68 |
oromo_processor = AutoProcessor.from_pretrained("osanseviero/seamless-copy")
|
| 69 |
oromo_model = AutoModelForSpeechSeq2Seq.from_pretrained("osanseviero/seamless-copy").to("cpu")
|
|
@@ -117,15 +117,28 @@ def transcribe_audio(audio_file, language):
|
|
| 117 |
transcription = processor.batch_decode(predicted_ids)[0]
|
| 118 |
|
| 119 |
elif language == "Afan Oromo":
|
| 120 |
-
#
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
else:
|
| 127 |
-
# Standard SeamlessM4T processing
|
| 128 |
-
inputs = processor(
|
| 129 |
with torch.no_grad():
|
| 130 |
generated_ids = model.generate(**inputs, tgt_lang=LANGUAGE_CONFIG[language]["code"])
|
| 131 |
transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
|
|
|
| 63 |
print("[ERROR] Failed to load SeamlessM4T model:", e)
|
| 64 |
traceback.print_exc()
|
| 65 |
|
| 66 |
+
# Load Afan Oromo model - FIXED IMPLEMENTATION
|
| 67 |
try:
|
| 68 |
oromo_processor = AutoProcessor.from_pretrained("osanseviero/seamless-copy")
|
| 69 |
oromo_model = AutoModelForSpeechSeq2Seq.from_pretrained("osanseviero/seamless-copy").to("cpu")
|
|
|
|
| 117 |
transcription = processor.batch_decode(predicted_ids)[0]
|
| 118 |
|
| 119 |
elif language == "Afan Oromo":
|
| 120 |
+
# FIXED: Afan Oromo uses different processing
|
| 121 |
+
# The seamless-copy model might work differently
|
| 122 |
+
try:
|
| 123 |
+
# Try without tgt_lang first
|
| 124 |
+
inputs = processor(audio=audio, sampling_rate=16000, return_tensors="pt") # FIXED: audio instead of audios
|
| 125 |
+
with torch.no_grad():
|
| 126 |
+
generated_ids = model.generate(**inputs)
|
| 127 |
+
transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
| 128 |
+
except Exception as oromo_error:
|
| 129 |
+
print(f"[WARNING] Afan Oromo standard processing failed: {oromo_error}")
|
| 130 |
+
# Fallback: try with text generation
|
| 131 |
+
try:
|
| 132 |
+
inputs = processor(audio=audio, sampling_rate=16000, return_tensors="pt")
|
| 133 |
+
with torch.no_grad():
|
| 134 |
+
outputs = model(**inputs)
|
| 135 |
+
transcription = processor.decode(outputs.logits.argmax(dim=-1)[0])
|
| 136 |
+
except Exception as fallback_error:
|
| 137 |
+
transcription = f"Afan Oromo transcription failed: {str(fallback_error)[:100]}"
|
| 138 |
|
| 139 |
else:
|
| 140 |
+
# Standard SeamlessM4T processing - FIXED: audio instead of audios
|
| 141 |
+
inputs = processor(audio=audio, sampling_rate=16000, return_tensors="pt") # FIXED HERE
|
| 142 |
with torch.no_grad():
|
| 143 |
generated_ids = model.generate(**inputs, tgt_lang=LANGUAGE_CONFIG[language]["code"])
|
| 144 |
transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|