Spaces:
Build error
Build error
| import streamlit as st | |
| import os | |
| import pandas as pd | |
| import json | |
| from html import escape | |
| import difflib | |
| def generate_diff_html_word_level(text1, text2): | |
| """ | |
| Generates word-level difference between text1 and text2 as HTML, correctly handling spaces. | |
| """ | |
| words1 = text1.split() | |
| words2 = text2.split() | |
| diff = [] | |
| matcher = difflib.SequenceMatcher(None, words1, words2) | |
| for opcode in matcher.get_opcodes(): | |
| tag, i1, i2, j1, j2 = opcode | |
| if tag == 'replace': | |
| diff.append('<del style="background-color: #fbb6ce;">' + escape(' '.join(words1[i1:i2])) + '</del>') | |
| diff.append('<ins style="background-color: #b7e4c7;">' + escape(' '.join(words2[j1:j2])) + '</ins>') | |
| elif tag == 'delete': | |
| diff.append('<del style="background-color: #fbb6ce;">' + escape(' '.join(words1[i1:i2])) + '</del>') | |
| elif tag == 'insert': | |
| diff.append('<ins style="background-color: #b7e4c7;">' + escape(' '.join(words2[j1:j2])) + '</ins>') | |
| elif tag == 'equal': | |
| diff.append(escape(' '.join(words1[i1:i2]))) | |
| final_html = ' '.join(diff).replace('</del> <ins', '</del> <ins') | |
| return f'<pre style="white-space: pre-wrap;">{final_html}</pre>' | |
| os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" | |
| st.set_page_config(layout="wide") | |
| def convert_df(df): | |
| return df.to_csv(index=False, quotechar='"').encode('utf-8') | |
| def load_narratives_data(): | |
| data = [] | |
| with open("narratives.jsonl", "r") as f: | |
| for line in f: | |
| data.append(json.loads(line)) | |
| return pd.DataFrame(data) | |
| narratives_df = load_narratives_data() | |
| # Extract language from id | |
| narratives_df['language'] = narratives_df['id'].str.extract('-(rus|zho|fas)-') | |
| col1, col2 = st.columns([1, 3], gap="large") | |
| with st.sidebar: | |
| st.title("Options") | |
| with col1: | |
| st.title("Narratives") | |
| # Add language filter | |
| selected_language = st.selectbox( | |
| "Select language", | |
| ["All", "rus", "zho", "fas"] | |
| ) | |
| if selected_language != "All": | |
| filtered_df = narratives_df[narratives_df['language'] == selected_language] | |
| else: | |
| filtered_df = narratives_df | |
| narrative_ids = filtered_df["id"].tolist() | |
| container_for_nav = st.container() | |
| def sync_from_drop(): | |
| if st.session_state.selectbox_narrative == "Overview": | |
| st.session_state.narrative_index = -1 | |
| else: | |
| st.session_state.narrative_index = narrative_ids.index(st.session_state.selectbox_narrative) | |
| def sync_from_number(): | |
| st.session_state.narrative_index = st.session_state.narrative_number | |
| if st.session_state.narrative_number == -1: | |
| st.session_state.selectbox_narrative = "Overview" | |
| else: | |
| st.session_state.selectbox_narrative = narrative_ids[st.session_state.narrative_number] | |
| narrative_number = container_for_nav.number_input( | |
| min_value=-1, step=1, max_value=len(narrative_ids) - 1, | |
| on_change=sync_from_number, | |
| label=f"Select narrative by index (up to **{len(narrative_ids) - 1}**)", | |
| key="narrative_number" | |
| ) | |
| selectbox_narrative = container_for_nav.selectbox( | |
| "Select narrative by ID", | |
| ["Overview"] + narrative_ids, | |
| on_change=sync_from_drop, | |
| key="selectbox_narrative" | |
| ) | |
| st.divider() | |
| with col2: | |
| narrative_index = narrative_number | |
| if narrative_index >= 0: | |
| narrative = filtered_df.iloc[narrative_index] | |
| st.markdown("<h1 style='text-align: center; color: black;text-decoration: underline;'>Editor</h1>", unsafe_allow_html=True) | |
| container = st.container() | |
| container.subheader(f"Narrative ID: {narrative['id']}") | |
| container.divider() | |
| container.subheader("Diff: Original English vs Altered English") | |
| processed_diff = generate_diff_html_word_level(narrative['original_english'].strip(), narrative['altered_english'].strip()) | |
| with container.container(border=True): | |
| st.markdown(processed_diff, unsafe_allow_html=True) | |
| container.divider() | |
| container.subheader("Original Text") | |
| original_input = container.text_area("Edit the original text", value=narrative['original'].strip(), height=300) | |
| elif narrative_index < 0: | |
| st.title("Overview") | |
| st.write(f"Total number of narratives: {len(filtered_df)}") | |
| if selected_language != "All": | |
| st.write(f"Selected language: {selected_language}") | |
| st.write("Select a narrative from the sidebar to view and edit its details.") |