Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 # 學號:R09942097、姓名:陳建成、streamlit cloud link: https://jeffeuxmartin-assignment-1-jeffeuxmartin-twnlp-appsrcapp-8mil4y.streamlitapp.com/ | |
| import streamlit as st, pandas as pd, re, time | |
| from views.components.spinner import dowload_ckip_package, download_cwn_drivers | |
| def load_corpus(path): | |
| full_df = pd.read_json(path) | |
| full_df.sort_values('index', ascending=False) | |
| df = full_df[['title', 'web_url']] | |
| return df, full_df | |
| def make_clickable(url, text): | |
| # Ref.: https://discuss.streamlit.io/t/display-urls-in-dataframe-column-as-a-clickable-hyperlink/743/7 | |
| return f'<a target="_blank" href="{url}">{text}</a>' | |
| def run_app(path, ckip_nlp_models, cwn_upgrade) -> None: | |
| # need to download first because CWN packages will first check whether | |
| # there is .cwn_graph folder in the root directory. | |
| download_cwn_drivers(cwn_upgrade) | |
| dowload_ckip_package(ckip_nlp_models) | |
| from views.components.sidebar import visualize_side_bar | |
| from views.containers import display_cwn, display_ckip, display_data_form | |
| st.title("PTT 語料庫搜尋分析工具 (ver. 0.1)") | |
| input_data = display_data_form() | |
| max_articles = st.slider('最多標題數:', min_value=0, max_value=30, step=1, value=3) | |
| model, pipeline, active_visualizers = visualize_side_bar(ckip_nlp_models) | |
| display_factories = { | |
| "CKIP": display_ckip, | |
| "CWN": display_cwn, | |
| } | |
| df, full_df = load_corpus(path) | |
| if "input_data" in st.session_state: | |
| queries = st.session_state["input_data"] | |
| for query in queries: | |
| df = df[df["title"].str.contains(query)] | |
| df = df.iloc[:max_articles] | |
| if len(df) > 0: | |
| st.markdown("#### 搜尋文章標題結果 ####") | |
| st.markdown('\n'.join( | |
| f"1. [{it.title}]({it.web_url})" | |
| for it in df.itertuples()) | |
| ) | |
| _cleaned_titles = [ | |
| re.sub('^\[[^]]*\] *', '', | |
| re.sub('^R\: *', '', title)) | |
| for title in df['title']] | |
| cleaned_titles = [] | |
| for t in _cleaned_titles: | |
| if t not in cleaned_titles: | |
| cleaned_titles.append(t) | |
| display_factories[pipeline]( | |
| model, active_visualizers, | |
| cleaned_titles, | |
| ) | |
| else: | |
| st.markdown("## No results match! Q_Q... ##") | |
| if __name__ == "__main__": | |
| ckip_nlp_models = ["bert-base", "albert-tiny", "bert-tiny", "albert-base"] | |
| run_app('../data/corpus.json', ckip_nlp_models, cwn_upgrade=False) | |