Spaces:

viewervoice-analytics
/

viewervoice

Runtime error

App Files Files Community

aannor commited on Mar 15, 2024

Commit

b64e04f

verified ·

1 Parent(s): ef2ba90

updated methodology

Browse files

Files changed (1) hide show

app.py +16 -37

app.py CHANGED Viewed

@@ -31,15 +31,15 @@ from repo_directory.youtube_comment_class import *
 st.set_page_config(
     page_title="ViewerVoice | YouTube Comment Analyser",
     layout="wide",
-    page_icon=Image.open('page_icon.png')
 )
 # Define and load cached resources
 @st.cache_resource
 def load_models():
-    sentiment_pipeline = pipeline("sentiment-analysis", model=f'{SENTIMENT}')
-    embedding_model = SentenceTransformer(f'{EMBEDDING}')
     spacy_nlp = spacy.load("en_core_web_sm")
     add_custom_stopwords(spacy_nlp, {"bring", "know", "come"})
     return sentiment_pipeline, embedding_model, spacy_nlp
@@ -47,10 +47,10 @@ def load_models():
 @st.cache_resource
 def load_colors_image():
-    mask = np.array(Image.open('youtube_icon.jpg'))
     Reds = colormaps['Reds']
     colors = ListedColormap(Reds(np.linspace(0.4, 0.8, 256)))
-    with open("viewervoice_logo_crop.png", "rb") as img_file:
         logo_image = base64.b64encode(img_file.read()).decode("utf-8")
     return mask, colors, logo_image
@@ -68,7 +68,6 @@ hide_decoration_bar_style = """
 """
 st.markdown(hide_decoration_bar_style, unsafe_allow_html=True)
-main_page = st.container()
 if 'YouTubeParser' not in st.session_state:
     st.session_state['YouTubeParser'] = YoutubeCommentParser()
@@ -93,6 +92,7 @@ if 'num_comments' not in st.session_state:
 # Set reference to YouTubeParser object for more concise code
 yt_parser = st.session_state['YouTubeParser']
 def query_comments_button():
     # Delete larger objects from session state to later replace
@@ -121,7 +121,6 @@ def filter_visuals_button():
 with st.sidebar:
-    st.session_state["api_key"] = st.text_input('YouTube API key', value="", type='password')
     st.session_state["video_link"] = st.text_input('YouTube Video URL', value="")
     st.session_state["max_comments"] = st.slider(label="Maximum number of comments to query",
                                                  min_value=100,
@@ -176,12 +175,7 @@ with main_page:
     <ul>
         <li style='font-size: 0.95rem;'>This dashboard is still under development; further updates will be implemented
         in due course.</li>
-        <li style='font-size: 0.95rem;'>Kindly refer to the instructions provided in this
-        <a href='https://medium.com/@afibannor/beyond-the-views-how-viewervoice-enriches-content-performance-analytics-3c46854db697?source=friends_link&sk=042267d3c0ed460c3ad0743ec4e16456'>link</a>.
-        This will guide you in acquiring your API key to retrieve comments.</li>
-        <li style='font-size: 0.95rem;'>Please be aware that each API key facilitates up to 10,000 API calls within
-        a 24-hour period.</li>
-        <li style='font-size: 0.95rem;'>Currently, the dashboard caters to comments in English and does not
         include comment replies.</li>
         <li style='font-size: 0.95rem;'>Comments undergo cleaning and pre-processing to optimise modelling. As a result,
         the returned comment count may fall short of the maximum queried amount.</li>
@@ -202,35 +196,20 @@ if (st.session_state.rerun_button == "QUERYING") and (st.session_state["video_li
     with st.spinner('Querying comments and running models'):
         yt_parser = st.session_state["YouTubeParser"]
         try:
-            yt_parser.build_youtube_api(st.session_state['api_key'])
-        except:
-            st.error("Error: Unable to query comments, please check your API key")
-            st.stop()
-        try:
-            yt_parser.query_comments(st.session_state['video_link'], st.session_state['max_comments'])
-        except googleapiclient.errors.HttpError:
-            st.error("Error: Unable to query comments, incorrect YouTube URL or API key.")
-            st.stop()
         except:
             st.error("Error: Unable to query comments, incorrect YouTube URL or maximum \
                               API call limit reached.")
             st.stop()
         # Run formatting and models
-        try:
-            yt_parser.format_comments()
-            yt_parser.clean_comments()
-            yt_parser.run_sentiment_pipeline(sentiment_pipeline)
-            yt_parser.run_topic_modelling_pipeline(embedding_model,
-                                                   nlp=spacy_nlp,
-                                                   max_topics=st.session_state['max_topics'])
-        except ValueError:
-            st.error("Error: Oops there are not enough comments to analyse, please try a different video.")
-            st.stop()
-        except:
-            st.error("Error: Oops there's an issue on our end, please wait a moment and try again.")
-            st.stop()
     # Set "QUERY COMPLETE" to bypass running this section on script re-run
     st.session_state.rerun_button = "QUERY COMPLETE"
@@ -389,4 +368,4 @@ with st.sidebar:
         st.text_input("Keyword search",
                       disabled=True)
         st.button('Please query comments before filtering',
-                  disabled=True)

 st.set_page_config(
     page_title="ViewerVoice | YouTube Comment Analyser",
     layout="wide",
+    page_icon=Image.open('images/page_icon.png')
 )
 # Define and load cached resources
 @st.cache_resource
 def load_models():
+    sentiment_pipeline = pipeline("sentiment-analysis", model=r"cardiffnlp/twitter-roberta-base-sentiment")
+    embedding_model = SentenceTransformer('flax-sentence-embeddings/all_datasets_v4_MiniLM-L6')
     spacy_nlp = spacy.load("en_core_web_sm")
     add_custom_stopwords(spacy_nlp, {"bring", "know", "come"})
     return sentiment_pipeline, embedding_model, spacy_nlp
 @st.cache_resource
 def load_colors_image():
+    mask = np.array(Image.open('images/youtube_icon.jpg'))
     Reds = colormaps['Reds']
     colors = ListedColormap(Reds(np.linspace(0.4, 0.8, 256)))
+    with open("images/viewervoice_logo_crop.png", "rb") as img_file:
         logo_image = base64.b64encode(img_file.read()).decode("utf-8")
     return mask, colors, logo_image
 """
 st.markdown(hide_decoration_bar_style, unsafe_allow_html=True)
 if 'YouTubeParser' not in st.session_state:
     st.session_state['YouTubeParser'] = YoutubeCommentParser()
 # Set reference to YouTubeParser object for more concise code
 yt_parser = st.session_state['YouTubeParser']
+main_page = st.container()
 def query_comments_button():
     # Delete larger objects from session state to later replace
 with st.sidebar:
     st.session_state["video_link"] = st.text_input('YouTube Video URL', value="")
     st.session_state["max_comments"] = st.slider(label="Maximum number of comments to query",
                                                  min_value=100,
     <ul>
         <li style='font-size: 0.95rem;'>This dashboard is still under development; further updates will be implemented
         in due course.</li>
+        <li style='font-size: 0.95rem;'>Currently, the dashboard exclusively caters to comments in English and does not
         include comment replies.</li>
         <li style='font-size: 0.95rem;'>Comments undergo cleaning and pre-processing to optimise modelling. As a result,
         the returned comment count may fall short of the maximum queried amount.</li>
     with st.spinner('Querying comments and running models'):
         yt_parser = st.session_state["YouTubeParser"]
         try:
+            yt_parser.scrape_comments(st.session_state['video_link'])
+            yt_parser.scrape_video_title()
         except:
             st.error("Error: Unable to query comments, incorrect YouTube URL or maximum \
                               API call limit reached.")
             st.stop()
         # Run formatting and models
+        yt_parser.format_comments()
+        yt_parser.clean_comments()
+        yt_parser.run_sentiment_pipeline(sentiment_pipeline)
+        yt_parser.run_topic_modelling_pipeline(embedding_model,
+                                               nlp=spacy_nlp,
+                                               max_topics=st.session_state['max_topics'])
     # Set "QUERY COMPLETE" to bypass running this section on script re-run
     st.session_state.rerun_button = "QUERY COMPLETE"
         st.text_input("Keyword search",
                       disabled=True)
         st.button('Please query comments before filtering',
+                  disabled=True)