Spaces:

amirhoseinsedaghati
/

multi-purpose-text-application

Build error

App Files Files Community

multi-purpose-text-application / pages /Find_Topic.py

amirhoseinsedaghati

Update pages/Find_Topic.py

afd2097 verified over 1 year ago

raw

history blame contribute delete

2.01 kB

	import streamlit as st
	import numpy as np
	from sklearn.feature_extraction.text import CountVectorizer
	from scipy import linalg
	import regex as re
	from configs.db_configs import add_one_item
	from streamlit.components.v1 import html
	from configs.html_features import set_image
	from configs.download_files import FileDownloader



	def preprocess_text(text):
	vectorizer = CountVectorizer(stop_words='english')
	vector = vectorizer.fit_transform([text]).todense()
	vocab = np.array(vectorizer.get_feature_names_out())
	U, s, Vh = linalg.svd(vector, full_matrices=False)
	return vocab, U, s, Vh


	def show_topics(text, num_top_words):
	vocab, U, s, Vh = preprocess_text(text)
	pattern = '\d+'
	top_words = lambda Vh: [vocab[i] for i in np.argsort(Vh)[:-num_top_words-1:-1]]
	topic_words = top_words(Vh[0])
	topic_words = ' '.join(topic_words)
	return ' '.join([re.sub(pattern, '', word) for word in topic_words.split()])


	def main():
	st.title('Topic Modeling by Top Keywords')
	im1, im2, im3 = st.columns([1, 5.3, 1])
	with im1:
	pass
	with im2:
	url = "https://i.postimg.cc/jdF1hPng/combined.png"
	html(set_image(url), height=400, width=400)
	with im3:
	pass

	text = st.text_area('Find Topic', placeholder='Enter your input text here ...', height=200, label_visibility='hidden')
	num_top_words = st.sidebar.slider('Number of Top Keywords', min_value=5, max_value=20, step=1, value=10)

	if st.button('Find Topic'):
	if text != '':
	with st.expander('Original Text'):
	st.write(text)
	add_one_item(text, 'Topic Modeling')

	with st.expander(f'Show Topic by {num_top_words} Top Keywords'):
	topic_words = show_topics(text, num_top_words)
	st.write(topic_words)

	with st.expander('Download Topic words'):
	FileDownloader(data=topic_words, file_ext='txt').download()


	if __name__ == '__main__':
	main()