Spaces:
Runtime error
Runtime error
| import os | |
| from git import Repo | |
| import streamlit as st | |
| import time | |
| from PIL import Image | |
| import base64 | |
| from transformers import pipeline | |
| import spacy | |
| import numpy as np | |
| from sentence_transformers import SentenceTransformer | |
| from matplotlib import colormaps | |
| from matplotlib.colors import ListedColormap | |
| GITHUB_PAT = os.environ['GITHUB'] | |
| SENTIMENT = os.environ['SENTIMENT'] | |
| EMBEDDING = os.environ['EMBEDDING'] | |
| if not os.path.exists('repo_directory'): | |
| try: | |
| Repo.clone_from(f'https://marcus-t-s:{GITHUB_PAT}@github.com/marcus-t-s/yt-comment-analyser.git', 'repo_directory' ) | |
| except: | |
| st.error("Error: Oops there's an issue on our end, please wait a moment and try again.") | |
| st.stop() | |
| from repo_directory.utils.chart_utils import * | |
| from repo_directory.youtube_comment_class import * | |
| # Streamlit configuration | |
| st.set_page_config( | |
| page_title="ViewerVoice | YouTube Comment Analyser", | |
| layout="wide", | |
| page_icon=Image.open('images/page_icon.png') | |
| ) | |
| # Define and load cached resources | |
| def load_models(): | |
| sentiment_pipeline = pipeline("sentiment-analysis", model=r"cardiffnlp/twitter-roberta-base-sentiment") | |
| embedding_model = SentenceTransformer('flax-sentence-embeddings/all_datasets_v4_MiniLM-L6') | |
| spacy_nlp = spacy.load("en_core_web_sm") | |
| add_custom_stopwords(spacy_nlp, {"bring", "know", "come"}) | |
| return sentiment_pipeline, embedding_model, spacy_nlp | |
| def load_colors_image(): | |
| mask = np.array(Image.open('images/youtube_icon.jpg')) | |
| Reds = colormaps['Reds'] | |
| colors = ListedColormap(Reds(np.linspace(0.4, 0.8, 256))) | |
| with open("images/viewervoice_logo_crop.png", "rb") as img_file: | |
| logo_image = base64.b64encode(img_file.read()).decode("utf-8") | |
| return mask, colors, logo_image | |
| sentiment_pipeline, embedding_model, spacy_nlp = load_models() | |
| mask, colors, logo_image = load_colors_image() | |
| # Hide line at the top and "made with streamlit" text | |
| hide_decoration_bar_style = """ | |
| <style> | |
| header {visibility: hidden;} | |
| footer {visibility: hidden;} | |
| </style> | |
| """ | |
| st.markdown(hide_decoration_bar_style, unsafe_allow_html=True) | |
| if 'YouTubeParser' not in st.session_state: | |
| st.session_state['YouTubeParser'] = YoutubeCommentParser() | |
| if 'comment_fig' not in st.session_state: | |
| st.session_state["comment_fig"] = None | |
| st.session_state["wordcloud_fig"] = None | |
| st.session_state["topic_fig"] = None | |
| st.session_state["sentiment_fig"] = None | |
| if 'rerun_button' not in st.session_state: | |
| st.session_state['rerun_button'] = "INIT" | |
| if 'topic_filter' not in st.session_state: | |
| st.session_state['topic_filter'] = False | |
| if 'sentiment_filter' not in st.session_state: | |
| st.session_state['sentiment_filter'] = False | |
| if 'filter_state' not in st.session_state: | |
| st.session_state['filter_state'] = "INIT" | |
| if 'video_link' not in st.session_state: | |
| st.session_state["video_link"] = None | |
| if 'num_comments' not in st.session_state: | |
| st.session_state['num_comments'] = None | |
| # Set reference to YouTubeParser object for more concise code | |
| yt_parser = st.session_state['YouTubeParser'] | |
| main_page = st.container() | |
| def query_comments_button(): | |
| # Delete larger objects from session state to later replace | |
| del st.session_state["comment_fig"] | |
| del st.session_state["wordcloud_fig"] | |
| del st.session_state["topic_fig"] | |
| del st.session_state["sentiment_fig"] | |
| del st.session_state["YouTubeParser"] | |
| # Reset session state variables back to placeholder values | |
| st.session_state.rerun_button = "QUERYING" | |
| st.session_state['filter_state'] = "INIT" | |
| st.session_state["topic_filter"] = False | |
| st.session_state["sentiment_filter"] = False | |
| st.session_state["semantic_filter"] = False | |
| st.session_state["figures_built"] = False | |
| st.session_state["comment_fig"] = None | |
| st.session_state["wordcloud_fig"] = None | |
| st.session_state["topic_fig"] = None | |
| st.session_state["sentiment_fig"] = None | |
| st.session_state["YouTubeParser"] = YoutubeCommentParser() | |
| def filter_visuals_button(): | |
| st.session_state["filter_state"] = "FILTERING" | |
| with st.sidebar: | |
| st.session_state["video_link"] = st.text_input('YouTube Video URL', value="") | |
| st.session_state["max_comments"] = st.slider(label="Maximum number of comments to query", | |
| min_value=100, | |
| max_value=2000, | |
| step=100) | |
| st.session_state["max_topics"] = st.slider(label="Maximum number of topics", | |
| min_value=5, | |
| max_value=20, | |
| step=1) | |
| st.button('Query comments :left_speech_bubble:', on_click=query_comments_button) | |
| with main_page: | |
| # Reduce space at the top | |
| reduce_header_height_style = """ | |
| <style> | |
| div.block-container {padding-top:0rem;} | |
| div.block-container {padding-bottom:1rem;} | |
| div.block-container {padding-left:1.5rem;} | |
| </style> | |
| """ | |
| st.markdown(reduce_header_height_style, unsafe_allow_html=True) | |
| # Title and intro section | |
| markdown_content = f""" | |
| <div style='display: flex; align-items: center; justify-content: center;'> | |
| <img src='data:image/png;base64,{logo_image}' height='135px';/> | |
| </div> | |
| """ | |
| st.markdown(markdown_content, unsafe_allow_html=True) | |
| # LinkedIn links | |
| lnk = '<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">' | |
| st.markdown(lnk + """ | |
| <div style="display: flex; justify-content: center; align-items: center; flex-direction: column;"> | |
| <br> | |
| <p style="text-align: center;"><b>Made by</b> | |
| <b> | |
| <a href='https://www.linkedin.com/in/afiba-7715ab166/' style="text-decoration: none"> | |
| <i class='fa fa-linkedin-square'></i> <span style='color: #000000'>Afiba Annor</span></a> | |
| <a href='https://www.linkedin.com/in/marcus-singh-305927172/' style="text-decoration: none"> | |
| <i class='fa fa-linkedin-square'></i> <span style='color: #000000'>Marcus Singh</span></a> | |
| </b></p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| st.markdown("<hr>", unsafe_allow_html=True) | |
| # Notes section | |
| st.markdown("<p style='font-size: 1.3rem;'><b>📝 Notes</b></p>", unsafe_allow_html=True) | |
| html_content = """ | |
| <ul> | |
| <li style='font-size: 0.95rem;'>This dashboard is still under development; further updates will be implemented | |
| in due course.</li> | |
| <li style='font-size: 0.95rem;'>Currently, the dashboard exclusively caters to comments in English and does not | |
| include comment replies.</li> | |
| <li style='font-size: 0.95rem;'>Comments undergo cleaning and pre-processing to optimise modelling. As a result, | |
| the returned comment count may fall short of the maximum queried amount.</li> | |
| <li style='font-size: 0.95rem;'>Please note that the sentiment analysis currently does not take emojis into | |
| account.</li> | |
| <li style='font-size: 0.95rem;'>For optimal performance of the current topic model, we recommend retrieving | |
| thousands of comments.</li> | |
| <li style='font-size: 0.95rem;'>Please anticipate that querying comments and running the models may require | |
| a few minutes to complete.</li> | |
| </ul> | |
| <hr> | |
| """ | |
| # Display the HTML content using st.markdown() | |
| st.markdown(html_content, unsafe_allow_html=True) | |
| # Query comments section | |
| if (st.session_state.rerun_button == "QUERYING") and (st.session_state["video_link"] is not None): | |
| with st.spinner('Querying comments and running models'): | |
| yt_parser = st.session_state["YouTubeParser"] | |
| try: | |
| yt_parser.get_comments(st.session_state['video_link'], st.session_state['max_comments']) | |
| yt_parser.get_video_title(st.session_state['video_link']) | |
| except: | |
| st.error("Error: Unable to query comments, incorrect YouTube URL.") | |
| st.stop() | |
| # Run formatting and models | |
| try: | |
| yt_parser.format_comments() | |
| yt_parser.clean_comments() | |
| yt_parser.run_sentiment_pipeline(sentiment_pipeline) | |
| yt_parser.run_topic_modelling_pipeline(embedding_model, | |
| nlp=spacy_nlp, | |
| max_topics=st.session_state['max_topics']) | |
| except ValueError: | |
| st.error("Error: Oops there are not enough comments to analyse, please try a different video.") | |
| st.stop() | |
| except: | |
| st.error("Error: Oops there's an issue on our end, please wait a moment and try again.") | |
| st.stop() | |
| # Set "QUERY COMPLETE" to bypass running this section on script re-run | |
| st.session_state.rerun_button = "QUERY COMPLETE" | |
| # Once comments are queried, build charts ready to visualise | |
| if st.session_state.rerun_button == "QUERY COMPLETE": | |
| # Check for built figures: | |
| if (not st.session_state["figures_built"]) or (st.session_state.filter_state == "FILTERING"): | |
| # Select colors for wordcloud | |
| # If filtering button pressed | |
| if st.session_state.filter_state == "FILTERING": | |
| df_filtered = yt_parser.df_comments.copy() | |
| if st.session_state["topic_filter"]: | |
| df_filtered = df_filtered.query(f"Topic == {st.session_state.topic_filter}") | |
| if st.session_state["sentiment_filter"]: | |
| df_filtered = df_filtered.query(f"Sentiment == {st.session_state.sentiment_filter}") | |
| if st.session_state["semantic_filter"]: | |
| df_filtered = semantic_search(df=df_filtered, query=st.session_state["semantic_filter"], | |
| embedding_model=embedding_model, | |
| text_col='Comment_Clean') | |
| if len(df_filtered) == 0: | |
| st.session_state['num_comments'] = 0 | |
| else: | |
| st.session_state['num_comments'] = len(df_filtered) | |
| # Build filtered table figure | |
| st.session_state["table_fig"] = comments_table(df_filtered, | |
| ['publishedAt', 'Comment_Formatted', 'Likes', | |
| 'Sentiment', 'Topic'], | |
| {'publishedAt': 'Date', 'Comment_Formatted': 'Comment'}) | |
| # Build filtered wordcloud figure | |
| st.session_state["wordcloud_fig"] = comment_wordcloud(df_filtered, mask, colors) | |
| # Build filtered topic figure | |
| st.session_state["topic_fig"] = topic_treemap(df_filtered, "Topic") | |
| # Build filtered sentiment figure | |
| st.session_state["sentiment_fig"] = sentiment_chart(df_filtered, "Sentiment") | |
| st.session_state["figures_built"] = True | |
| st.session_state.filter_state = "FILTERED" | |
| # No filtering selected | |
| else: | |
| st.session_state['num_comments'] = len(yt_parser.df_comments) | |
| # Can only build graphs if we have comments | |
| if st.session_state['num_comments'] > 0: | |
| try: | |
| # Build unfiltered table figure | |
| st.session_state["table_fig"] = comments_table(yt_parser.df_comments, | |
| ['publishedAt', 'Comment_Formatted', 'Likes', | |
| 'Sentiment', 'Topic'], | |
| {'publishedAt': 'Date', | |
| 'Comment_Formatted': 'Comment'}) | |
| # Build unfiltered wordcloud figure | |
| st.session_state["wordcloud_fig"] = comment_wordcloud(yt_parser.df_comments, | |
| mask, colors) | |
| # Build unfiltered topic figure | |
| st.session_state["topic_fig"] = topic_treemap(yt_parser.df_comments, "Topic") | |
| # Build unfiltered sentiment figure | |
| st.session_state["sentiment_fig"] = sentiment_chart(yt_parser.df_comments, "Sentiment") | |
| st.session_state["figures_built"] = True | |
| except: | |
| st.error("Error: Oops there's an issue on our end, please wait a moment and try again.") | |
| st.stop() | |
| with main_page: | |
| if st.session_state.rerun_button == "QUERY COMPLETE": | |
| st.subheader(f"{yt_parser.title}") | |
| st.markdown("<hr><br>", unsafe_allow_html=True) | |
| if st.session_state['num_comments'] > 0: | |
| table_col, word_cloud_col = st.columns([0.55, 0.45]) | |
| with table_col: | |
| st.markdown(f"""<p style='font-size: 1.3rem; | |
| display: flex; align-items: center; justify-content: center;'><b> | |
| Comments</b></p>""", unsafe_allow_html=True) | |
| st.plotly_chart(st.session_state["table_fig"], use_container_width=True) | |
| with word_cloud_col: | |
| st.markdown(f"""<p style='font-size: 1.3rem; | |
| display: flex; align-items: center; justify-content: center;'><b> | |
| Word Cloud</b></p>""", unsafe_allow_html=True) | |
| st.pyplot(st.session_state["wordcloud_fig"], use_container_width=True) | |
| treemap_col, sentiment_donut_col = st.columns([0.55, 0.45]) | |
| with treemap_col: | |
| st.markdown(f"""<p style='font-size: 1.3rem; | |
| display: flex; align-items: center; justify-content: center;'><b> | |
| Topic Proportions</b></p>""", unsafe_allow_html=True) | |
| st.plotly_chart(st.session_state["topic_fig"], use_container_width=True) | |
| with sentiment_donut_col: | |
| st.markdown(f"""<p style='font-size: 1.3rem; | |
| display: flex; align-items: center; justify-content: center;'><b> | |
| Sentiment Distribution</b></p>""", unsafe_allow_html=True) | |
| st.plotly_chart(st.session_state["sentiment_fig"], use_container_width=True) | |
| # st.table(yt_parser.df_comments.head()) | |
| else: | |
| st.write("Unfortunately we couldn't find any comments for this set of filters, please try " | |
| "editing the filters and try again") | |
| with st.sidebar: | |
| # Define the HTML and CSS for the button-style container | |
| if st.session_state['num_comments'] is not None: | |
| num_comments = st.session_state['num_comments'] | |
| else: | |
| num_comments = 0 | |
| htmlstr = f""" | |
| <p style='background-color: rgb(255, 255, 255, 0.75); | |
| color: rgb(0, 0, 0, 0.75); | |
| font-size: 40px; | |
| border-radius: 7px; | |
| padding-top: 25px; | |
| padding-bottom: 25px; | |
| padding-right: 25px; | |
| padding-left: 25px; | |
| line-height:25px; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);'> | |
| {num_comments}</p> | |
| """ | |
| # Display the button-style container with number of comments | |
| st.subheader("Number of comments") | |
| st.markdown(htmlstr, unsafe_allow_html=True) | |
| # Filters section | |
| st.subheader("Filters") | |
| if yt_parser.df_comments is not None: | |
| st.session_state["topic_filter"] = st.multiselect("Topic", | |
| options=sorted(list(yt_parser.df_comments['Topic'].unique()))) | |
| st.session_state["sentiment_filter"] = st.multiselect("Sentiment", | |
| options=list(yt_parser.df_comments['Sentiment'].unique())) | |
| st.session_state["semantic_filter"] = st.text_input("Keyword search", | |
| max_chars=30) | |
| st.button('Filter visualisations :sleuth_or_spy:', on_click=filter_visuals_button) | |
| else: | |
| st.multiselect("Topic", | |
| options=["Please query comments from a video"], | |
| disabled=True) | |
| st.multiselect("Sentiment", | |
| options=["Please query comments from a video"], | |
| disabled=True) | |
| st.text_input("Keyword search", | |
| disabled=True) | |
| st.button('Please query comments before filtering', | |
| disabled=True) |