Spaces:
Running
on
Zero
Running
on
Zero
modify examples
Browse files- .gradio/cached_examples/13/log.csv +5 -0
- app.py +31 -48
.gradio/cached_examples/13/log.csv
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Predicted Dialect,timestamp
|
| 2 |
+
"{""label"": ""Gulf Arabic"", ""confidences"": [{""label"": ""Gulf Arabic"", ""confidence"": 0.9943048357963562}, {""label"": ""Levantine Arabic"", ""confidence"": 0.004683974664658308}, {""label"": ""Maghrebi Arabic"", ""confidence"": 0.0003852946974802762}, {""label"": ""Modern Standard Arabic"", ""confidence"": 0.0003597271570470184}, {""label"": ""Egyptian Arabic"", ""confidence"": 0.0002661938196979463}]}",2025-03-04 14:57:07.478940
|
| 3 |
+
"{""label"": ""Levantine Arabic"", ""confidences"": [{""label"": ""Levantine Arabic"", ""confidence"": 0.8999205827713013}, {""label"": ""Gulf Arabic"", ""confidence"": 0.09826569259166718}, {""label"": ""Maghrebi Arabic"", ""confidence"": 0.001049569109454751}, {""label"": ""Modern Standard Arabic"", ""confidence"": 0.0004323236644268036}, {""label"": ""Egyptian Arabic"", ""confidence"": 0.0003318020317237824}]}",2025-03-04 14:57:32.843399
|
| 4 |
+
"{""label"": ""Gulf Arabic"", ""confidences"": [{""label"": ""Gulf Arabic"", ""confidence"": 0.9867829084396362}, {""label"": ""Levantine Arabic"", ""confidence"": 0.011104526929557323}, {""label"": ""Maghrebi Arabic"", ""confidence"": 0.0016229108441621065}, {""label"": ""Modern Standard Arabic"", ""confidence"": 0.0003496674180496484}, {""label"": ""Egyptian Arabic"", ""confidence"": 0.00014002238458488137}]}",2025-03-04 14:57:54.273625
|
| 5 |
+
"{""label"": ""Levantine Arabic"", ""confidences"": [{""label"": ""Levantine Arabic"", ""confidence"": 0.9568566083908081}, {""label"": ""Gulf Arabic"", ""confidence"": 0.03988657519221306}, {""label"": ""Modern Standard Arabic"", ""confidence"": 0.002475168788805604}, {""label"": ""Egyptian Arabic"", ""confidence"": 0.0006239291978999972}, {""label"": ""Maghrebi Arabic"", ""confidence"": 0.00015768631419632584}]}",2025-03-04 14:58:14.103717
|
app.py
CHANGED
|
@@ -1,17 +1,13 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import pipeline
|
| 3 |
-
import numpy as np
|
| 4 |
import os
|
| 5 |
|
| 6 |
# Load the model
|
| 7 |
print("Loading model...")
|
| 8 |
model_id = "badrex/mms-300m-arabic-dialect-identifier"
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
except Exception as e:
|
| 13 |
-
print(f"Error loading model: {e}")
|
| 14 |
-
|
| 15 |
# Define dialect mapping
|
| 16 |
dialect_mapping = {
|
| 17 |
"MSA": "Modern Standard Arabic",
|
|
@@ -22,54 +18,41 @@ dialect_mapping = {
|
|
| 22 |
}
|
| 23 |
|
| 24 |
def predict_dialect(audio):
|
| 25 |
-
|
| 26 |
-
# The audio input from Gradio is a tuple of (sample_rate, audio_array)
|
| 27 |
-
if audio is None:
|
| 28 |
-
return {"Error": 1.0}
|
| 29 |
-
|
| 30 |
-
sr, audio_array = audio
|
| 31 |
-
|
| 32 |
-
# Process the audio input
|
| 33 |
-
if len(audio_array.shape) > 1:
|
| 34 |
-
audio_array = audio_array.mean(axis=1) # Convert stereo to mono
|
| 35 |
-
|
| 36 |
-
print(f"Processing audio: sample rate={sr}, shape={audio_array.shape}")
|
| 37 |
-
|
| 38 |
-
# Classify the dialect
|
| 39 |
-
predictions = classifier({"sampling_rate": sr, "raw": audio_array})
|
| 40 |
-
|
| 41 |
-
# Format results for display
|
| 42 |
-
results = {}
|
| 43 |
-
for pred in predictions:
|
| 44 |
-
dialect_name = dialect_mapping.get(pred['label'], pred['label'])
|
| 45 |
-
results[dialect_name] = float(pred['score'])
|
| 46 |
-
|
| 47 |
-
return results
|
| 48 |
-
except Exception as e:
|
| 49 |
-
print(f"Error in prediction: {e}")
|
| 50 |
return {"Error": 1.0}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
-
#
|
| 53 |
-
|
| 54 |
examples_dir = "examples"
|
| 55 |
if os.path.exists(examples_dir):
|
| 56 |
for filename in os.listdir(examples_dir):
|
| 57 |
if filename.endswith((".wav", ".mp3", ".ogg")):
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
print(f"Found {len(
|
| 61 |
else:
|
| 62 |
print("Examples directory not found")
|
| 63 |
|
| 64 |
-
# Examples with labels
|
| 65 |
-
examples = []
|
| 66 |
-
if example_files:
|
| 67 |
-
for file in example_files:
|
| 68 |
-
basename = os.path.basename(file)
|
| 69 |
-
dialect = basename.split("_")[0] if "_" in basename else basename.split(".")[0]
|
| 70 |
-
label = dialect_mapping.get(dialect, dialect.capitalize())
|
| 71 |
-
examples.append([file, f"{label} Sample"])
|
| 72 |
-
|
| 73 |
# Create the Gradio interface
|
| 74 |
demo = gr.Interface(
|
| 75 |
fn=predict_dialect,
|
|
@@ -80,8 +63,8 @@ demo = gr.Interface(
|
|
| 80 |
Upload an audio file or record your voice speaking Arabic to see which dialect it matches.
|
| 81 |
The model identifies: Modern Standard Arabic (MSA), Egyptian, Gulf, Levantine, and Maghrebi dialects.""",
|
| 82 |
examples=examples if examples else None,
|
| 83 |
-
|
| 84 |
-
flagging_mode=None
|
| 85 |
)
|
| 86 |
|
| 87 |
# Launch the app
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import pipeline
|
|
|
|
| 3 |
import os
|
| 4 |
|
| 5 |
# Load the model
|
| 6 |
print("Loading model...")
|
| 7 |
model_id = "badrex/mms-300m-arabic-dialect-identifier"
|
| 8 |
+
classifier = pipeline("audio-classification", model=model_id)
|
| 9 |
+
print("Model loaded successfully")
|
| 10 |
+
|
|
|
|
|
|
|
|
|
|
| 11 |
# Define dialect mapping
|
| 12 |
dialect_mapping = {
|
| 13 |
"MSA": "Modern Standard Arabic",
|
|
|
|
| 18 |
}
|
| 19 |
|
| 20 |
def predict_dialect(audio):
|
| 21 |
+
if audio is None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
return {"Error": 1.0}
|
| 23 |
+
|
| 24 |
+
# The audio input from Gradio is a tuple of (sample_rate, audio_array)
|
| 25 |
+
sr, audio_array = audio
|
| 26 |
+
|
| 27 |
+
# Process the audio input
|
| 28 |
+
if len(audio_array.shape) > 1:
|
| 29 |
+
audio_array = audio_array.mean(axis=1) # Convert stereo to mono
|
| 30 |
+
|
| 31 |
+
print(f"Processing audio: sample rate={sr}, shape={audio_array.shape}")
|
| 32 |
+
|
| 33 |
+
# Classify the dialect
|
| 34 |
+
predictions = classifier({"sampling_rate": sr, "raw": audio_array})
|
| 35 |
+
|
| 36 |
+
# Format results for display
|
| 37 |
+
results = {}
|
| 38 |
+
for pred in predictions:
|
| 39 |
+
dialect_name = dialect_mapping.get(pred['label'], pred['label'])
|
| 40 |
+
results[dialect_name] = float(pred['score'])
|
| 41 |
+
|
| 42 |
+
return results
|
| 43 |
|
| 44 |
+
# Manually prepare example file paths without metadata
|
| 45 |
+
examples = []
|
| 46 |
examples_dir = "examples"
|
| 47 |
if os.path.exists(examples_dir):
|
| 48 |
for filename in os.listdir(examples_dir):
|
| 49 |
if filename.endswith((".wav", ".mp3", ".ogg")):
|
| 50 |
+
examples.append([os.path.join(examples_dir, filename)])
|
| 51 |
+
|
| 52 |
+
print(f"Found {len(examples)} example files")
|
| 53 |
else:
|
| 54 |
print("Examples directory not found")
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
# Create the Gradio interface
|
| 57 |
demo = gr.Interface(
|
| 58 |
fn=predict_dialect,
|
|
|
|
| 63 |
Upload an audio file or record your voice speaking Arabic to see which dialect it matches.
|
| 64 |
The model identifies: Modern Standard Arabic (MSA), Egyptian, Gulf, Levantine, and Maghrebi dialects.""",
|
| 65 |
examples=examples if examples else None,
|
| 66 |
+
cache_examples=False, # Disable caching to avoid issues
|
| 67 |
+
flagging_mode=None
|
| 68 |
)
|
| 69 |
|
| 70 |
# Launch the app
|