Spaces:
Runtime error
Runtime error
updated methodology
Browse files
app.py
CHANGED
|
@@ -31,15 +31,15 @@ from repo_directory.youtube_comment_class import *
|
|
| 31 |
st.set_page_config(
|
| 32 |
page_title="ViewerVoice | YouTube Comment Analyser",
|
| 33 |
layout="wide",
|
| 34 |
-
page_icon=Image.open('page_icon.png')
|
| 35 |
)
|
| 36 |
|
| 37 |
|
| 38 |
# Define and load cached resources
|
| 39 |
@st.cache_resource
|
| 40 |
def load_models():
|
| 41 |
-
sentiment_pipeline = pipeline("sentiment-analysis", model=
|
| 42 |
-
embedding_model = SentenceTransformer(
|
| 43 |
spacy_nlp = spacy.load("en_core_web_sm")
|
| 44 |
add_custom_stopwords(spacy_nlp, {"bring", "know", "come"})
|
| 45 |
return sentiment_pipeline, embedding_model, spacy_nlp
|
|
@@ -47,10 +47,10 @@ def load_models():
|
|
| 47 |
|
| 48 |
@st.cache_resource
|
| 49 |
def load_colors_image():
|
| 50 |
-
mask = np.array(Image.open('youtube_icon.jpg'))
|
| 51 |
Reds = colormaps['Reds']
|
| 52 |
colors = ListedColormap(Reds(np.linspace(0.4, 0.8, 256)))
|
| 53 |
-
with open("viewervoice_logo_crop.png", "rb") as img_file:
|
| 54 |
logo_image = base64.b64encode(img_file.read()).decode("utf-8")
|
| 55 |
return mask, colors, logo_image
|
| 56 |
|
|
@@ -68,7 +68,6 @@ hide_decoration_bar_style = """
|
|
| 68 |
"""
|
| 69 |
st.markdown(hide_decoration_bar_style, unsafe_allow_html=True)
|
| 70 |
|
| 71 |
-
main_page = st.container()
|
| 72 |
|
| 73 |
if 'YouTubeParser' not in st.session_state:
|
| 74 |
st.session_state['YouTubeParser'] = YoutubeCommentParser()
|
|
@@ -93,6 +92,7 @@ if 'num_comments' not in st.session_state:
|
|
| 93 |
# Set reference to YouTubeParser object for more concise code
|
| 94 |
yt_parser = st.session_state['YouTubeParser']
|
| 95 |
|
|
|
|
| 96 |
|
| 97 |
def query_comments_button():
|
| 98 |
# Delete larger objects from session state to later replace
|
|
@@ -121,7 +121,6 @@ def filter_visuals_button():
|
|
| 121 |
|
| 122 |
|
| 123 |
with st.sidebar:
|
| 124 |
-
st.session_state["api_key"] = st.text_input('YouTube API key', value="", type='password')
|
| 125 |
st.session_state["video_link"] = st.text_input('YouTube Video URL', value="")
|
| 126 |
st.session_state["max_comments"] = st.slider(label="Maximum number of comments to query",
|
| 127 |
min_value=100,
|
|
@@ -176,12 +175,7 @@ with main_page:
|
|
| 176 |
<ul>
|
| 177 |
<li style='font-size: 0.95rem;'>This dashboard is still under development; further updates will be implemented
|
| 178 |
in due course.</li>
|
| 179 |
-
<li style='font-size: 0.95rem;'>
|
| 180 |
-
<a href='https://medium.com/@afibannor/beyond-the-views-how-viewervoice-enriches-content-performance-analytics-3c46854db697?source=friends_link&sk=042267d3c0ed460c3ad0743ec4e16456'>link</a>.
|
| 181 |
-
This will guide you in acquiring your API key to retrieve comments.</li>
|
| 182 |
-
<li style='font-size: 0.95rem;'>Please be aware that each API key facilitates up to 10,000 API calls within
|
| 183 |
-
a 24-hour period.</li>
|
| 184 |
-
<li style='font-size: 0.95rem;'>Currently, the dashboard caters to comments in English and does not
|
| 185 |
include comment replies.</li>
|
| 186 |
<li style='font-size: 0.95rem;'>Comments undergo cleaning and pre-processing to optimise modelling. As a result,
|
| 187 |
the returned comment count may fall short of the maximum queried amount.</li>
|
|
@@ -202,35 +196,20 @@ if (st.session_state.rerun_button == "QUERYING") and (st.session_state["video_li
|
|
| 202 |
with st.spinner('Querying comments and running models'):
|
| 203 |
yt_parser = st.session_state["YouTubeParser"]
|
| 204 |
try:
|
| 205 |
-
yt_parser.
|
| 206 |
-
|
| 207 |
-
st.error("Error: Unable to query comments, please check your API key")
|
| 208 |
-
st.stop()
|
| 209 |
-
try:
|
| 210 |
-
yt_parser.query_comments(st.session_state['video_link'], st.session_state['max_comments'])
|
| 211 |
-
except googleapiclient.errors.HttpError:
|
| 212 |
-
st.error("Error: Unable to query comments, incorrect YouTube URL or API key.")
|
| 213 |
-
st.stop()
|
| 214 |
except:
|
| 215 |
st.error("Error: Unable to query comments, incorrect YouTube URL or maximum \
|
| 216 |
API call limit reached.")
|
| 217 |
st.stop()
|
| 218 |
|
| 219 |
# Run formatting and models
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
max_topics=st.session_state['max_topics'])
|
| 227 |
-
except ValueError:
|
| 228 |
-
st.error("Error: Oops there are not enough comments to analyse, please try a different video.")
|
| 229 |
-
st.stop()
|
| 230 |
-
except:
|
| 231 |
-
st.error("Error: Oops there's an issue on our end, please wait a moment and try again.")
|
| 232 |
-
st.stop()
|
| 233 |
-
|
| 234 |
# Set "QUERY COMPLETE" to bypass running this section on script re-run
|
| 235 |
st.session_state.rerun_button = "QUERY COMPLETE"
|
| 236 |
|
|
@@ -389,4 +368,4 @@ with st.sidebar:
|
|
| 389 |
st.text_input("Keyword search",
|
| 390 |
disabled=True)
|
| 391 |
st.button('Please query comments before filtering',
|
| 392 |
-
disabled=True)
|
|
|
|
| 31 |
st.set_page_config(
|
| 32 |
page_title="ViewerVoice | YouTube Comment Analyser",
|
| 33 |
layout="wide",
|
| 34 |
+
page_icon=Image.open('images/page_icon.png')
|
| 35 |
)
|
| 36 |
|
| 37 |
|
| 38 |
# Define and load cached resources
|
| 39 |
@st.cache_resource
|
| 40 |
def load_models():
|
| 41 |
+
sentiment_pipeline = pipeline("sentiment-analysis", model=r"cardiffnlp/twitter-roberta-base-sentiment")
|
| 42 |
+
embedding_model = SentenceTransformer('flax-sentence-embeddings/all_datasets_v4_MiniLM-L6')
|
| 43 |
spacy_nlp = spacy.load("en_core_web_sm")
|
| 44 |
add_custom_stopwords(spacy_nlp, {"bring", "know", "come"})
|
| 45 |
return sentiment_pipeline, embedding_model, spacy_nlp
|
|
|
|
| 47 |
|
| 48 |
@st.cache_resource
|
| 49 |
def load_colors_image():
|
| 50 |
+
mask = np.array(Image.open('images/youtube_icon.jpg'))
|
| 51 |
Reds = colormaps['Reds']
|
| 52 |
colors = ListedColormap(Reds(np.linspace(0.4, 0.8, 256)))
|
| 53 |
+
with open("images/viewervoice_logo_crop.png", "rb") as img_file:
|
| 54 |
logo_image = base64.b64encode(img_file.read()).decode("utf-8")
|
| 55 |
return mask, colors, logo_image
|
| 56 |
|
|
|
|
| 68 |
"""
|
| 69 |
st.markdown(hide_decoration_bar_style, unsafe_allow_html=True)
|
| 70 |
|
|
|
|
| 71 |
|
| 72 |
if 'YouTubeParser' not in st.session_state:
|
| 73 |
st.session_state['YouTubeParser'] = YoutubeCommentParser()
|
|
|
|
| 92 |
# Set reference to YouTubeParser object for more concise code
|
| 93 |
yt_parser = st.session_state['YouTubeParser']
|
| 94 |
|
| 95 |
+
main_page = st.container()
|
| 96 |
|
| 97 |
def query_comments_button():
|
| 98 |
# Delete larger objects from session state to later replace
|
|
|
|
| 121 |
|
| 122 |
|
| 123 |
with st.sidebar:
|
|
|
|
| 124 |
st.session_state["video_link"] = st.text_input('YouTube Video URL', value="")
|
| 125 |
st.session_state["max_comments"] = st.slider(label="Maximum number of comments to query",
|
| 126 |
min_value=100,
|
|
|
|
| 175 |
<ul>
|
| 176 |
<li style='font-size: 0.95rem;'>This dashboard is still under development; further updates will be implemented
|
| 177 |
in due course.</li>
|
| 178 |
+
<li style='font-size: 0.95rem;'>Currently, the dashboard exclusively caters to comments in English and does not
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
include comment replies.</li>
|
| 180 |
<li style='font-size: 0.95rem;'>Comments undergo cleaning and pre-processing to optimise modelling. As a result,
|
| 181 |
the returned comment count may fall short of the maximum queried amount.</li>
|
|
|
|
| 196 |
with st.spinner('Querying comments and running models'):
|
| 197 |
yt_parser = st.session_state["YouTubeParser"]
|
| 198 |
try:
|
| 199 |
+
yt_parser.scrape_comments(st.session_state['video_link'])
|
| 200 |
+
yt_parser.scrape_video_title()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
except:
|
| 202 |
st.error("Error: Unable to query comments, incorrect YouTube URL or maximum \
|
| 203 |
API call limit reached.")
|
| 204 |
st.stop()
|
| 205 |
|
| 206 |
# Run formatting and models
|
| 207 |
+
yt_parser.format_comments()
|
| 208 |
+
yt_parser.clean_comments()
|
| 209 |
+
yt_parser.run_sentiment_pipeline(sentiment_pipeline)
|
| 210 |
+
yt_parser.run_topic_modelling_pipeline(embedding_model,
|
| 211 |
+
nlp=spacy_nlp,
|
| 212 |
+
max_topics=st.session_state['max_topics'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
# Set "QUERY COMPLETE" to bypass running this section on script re-run
|
| 214 |
st.session_state.rerun_button = "QUERY COMPLETE"
|
| 215 |
|
|
|
|
| 368 |
st.text_input("Keyword search",
|
| 369 |
disabled=True)
|
| 370 |
st.button('Please query comments before filtering',
|
| 371 |
+
disabled=True)
|