Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import re | |
| from utils.prediction import predict_sentence | |
| def split_sentences_regex(text): | |
| # Clean the text | |
| text = re.sub(r'[\n\r]', ' ', text) # Remove newlines | |
| text = re.sub(r'["\']', '', text) # Remove quotes | |
| text = re.sub(r'\s+', ' ', text) # Normalize whitespace | |
| # More aggressive pattern that looks for sentence endings | |
| #pattern = r'[.!?]+[\s]+|[.!?]+$' | |
| pattern = r'[.]' | |
| # Split and clean resulting sentences | |
| sentences = [s.strip() for s in re.split(pattern, text) if s] | |
| # Filter out empty strings but keep sentences that don't start with capitals | |
| return [s for s in sentences if len(s) > 0] | |
| def split_sentences_with_abbrev(text): | |
| # Common abbreviations to ignore | |
| abbreviations = {'mr.', 'mrs.', 'dr.', 'sr.', 'jr.', 'vs.', 'e.g.', 'i.e.', 'etc.'} | |
| # Split initially by potential sentence endings | |
| parts = text.split('. ') | |
| sentences = [] | |
| current = parts[0] | |
| for part in parts[1:]: | |
| # Check if the previous part ends with an abbreviation | |
| ends_with_abbrev = any(current.lower().endswith(abbr) for abbr in abbreviations) | |
| if ends_with_abbrev: | |
| current = current + '. ' + part | |
| else: | |
| sentences.append(current) | |
| current = part | |
| sentences.append(current) | |
| return sentences | |
| def show_analysis(): | |
| st.title("Text Analysis") | |
| st.write("Use this section to analyze the logical structure of your text.") | |
| try: | |
| if 'model' not in st.session_state: | |
| st.error("Please initialize the model from the home page first.") | |
| return | |
| model = st.session_state.model | |
| label_encoder = st.session_state.label_encoder | |
| tokenizer = st.session_state.tokenizer | |
| # Text input section | |
| st.header("Analyze Your Text") | |
| user_text = st.text_area("Enter your text here (multiple sentences allowed):", height=150) | |
| if st.button("Analyze"): | |
| if user_text: | |
| # Split and analyze sentences | |
| sentences = split_sentences_regex(user_text) | |
| st.subheader("Analysis Results:") | |
| for i, sentence in enumerate(sentences, 1): | |
| with st.container(): | |
| label, confidence = predict_sentence( | |
| model, sentence, tokenizer, label_encoder | |
| ) | |
| if label not in ("Unknown", "Error"): | |
| st.write("---") | |
| st.write(f"**Sentence:** {sentence}") | |
| st.write(f"**Predicted:** {label}") | |
| st.progress(confidence) | |
| else: | |
| st.warning("Please enter some text to analyze.") | |
| # Example Analysis Section | |
| st.header("Example Analysis") | |
| show_examples = st.checkbox("Show example analysis", key='show_examples') | |
| if show_examples: | |
| try: | |
| df = pd.read_csv('data/raw/history_01.csv') | |
| for sentence in df['Sentence'].head(5): # Limit to 5 examples | |
| with st.container(): | |
| label, confidence = predict_sentence( | |
| model, sentence, tokenizer, label_encoder | |
| ) | |
| if label not in ("Unknown", "Error"): | |
| st.write("---") | |
| st.write(f"**Sentence:** {sentence}") | |
| st.write(f"**Predicted:** {label}") | |
| st.progress(confidence) | |
| except FileNotFoundError: | |
| st.warning("Example file not found. Please check the data path.") | |
| except Exception as e: | |
| st.error(f"Error: {str(e)}") | |
| if __name__ == "__main__": | |
| show_analysis() |