| import streamlit as st | |
| from tabs import comparison, leaderboard, about | |
| import pandas as pd | |
| dataframe_path = "darija_tokenizers_leaderboard.jsonl" | |
| try: | |
| df = pd.read_json(dataframe_path, lines=True) | |
| assert all(col in df.columns for col in ["Tokenizer", "Vocabulary Size", "Token Count", "Tokens/Character Ratio", "Latin Support", "Tokenizer Class"]), "Invalid columns in leaderboard" | |
| except: | |
| df = pd.DataFrame( | |
| columns=[ | |
| "Tokenizer", | |
| "Vocabulary Size", | |
| "Token Count", | |
| "Tokens/Character Ratio", | |
| "Latin Support", | |
| "Tokenizer Class" | |
| ] | |
| ) | |
| def main(): | |
| st.title("Darija Tokenizer Explorer 🧭") | |
| tab1, tab2, tab3 = st.tabs(["Leaderboard", "Comparison", "About"]) | |
| with tab1: | |
| leaderboard.leaderboard_tab(df) | |
| with tab2: | |
| comparison.comparison_tab(df) | |
| with tab3: | |
| about.about_tab() | |
| if __name__ == "__main__": | |
| main() |