Update functions.py
Browse files- functions.py +4 -5
functions.py
CHANGED
|
@@ -81,7 +81,7 @@ def inference(link, upload, _asr_model):
|
|
| 81 |
def sentiment_pipe(earnings_text):
|
| 82 |
'''Determine the sentiment of the text'''
|
| 83 |
|
| 84 |
-
earnings_sentences = chunk_long_text(earnings_text,
|
| 85 |
earnings_sentiment = sent_pipe(earnings_sentences)
|
| 86 |
|
| 87 |
return earnings_sentiment, earnings_sentences
|
|
@@ -99,12 +99,11 @@ def clean_text(text):
|
|
| 99 |
return text
|
| 100 |
|
| 101 |
@st.experimental_memo(suppress_st_warning=True)
|
| 102 |
-
def chunk_long_text(text,threshold,window_size=3):
|
| 103 |
'''Preprocess text and chunk for semantic search and sentiment analysis'''
|
| 104 |
|
| 105 |
#Convert cleaned text into sentences
|
| 106 |
sentences = sent_tokenize(text)
|
| 107 |
-
|
| 108 |
out = []
|
| 109 |
|
| 110 |
#Limit the length of each sentence to a threshold
|
|
@@ -121,12 +120,12 @@ def chunk_long_text(text,threshold,window_size=3):
|
|
| 121 |
|
| 122 |
#Combine sentences into a window of size window_size
|
| 123 |
for paragraph in [out]:
|
| 124 |
-
for start_idx in range(0, len(paragraph),
|
| 125 |
end_idx = min(start_idx+window_size, len(paragraph))
|
| 126 |
passages.append(" ".join(paragraph[start_idx:end_idx]))
|
| 127 |
|
| 128 |
return passages
|
| 129 |
-
|
| 130 |
@st.experimental_memo(suppress_st_warning=True)
|
| 131 |
def chunk_and_preprocess_text(text,thresh=500):
|
| 132 |
|
|
|
|
| 81 |
def sentiment_pipe(earnings_text):
|
| 82 |
'''Determine the sentiment of the text'''
|
| 83 |
|
| 84 |
+
earnings_sentences = chunk_long_text(earnings_text,150,1,1)
|
| 85 |
earnings_sentiment = sent_pipe(earnings_sentences)
|
| 86 |
|
| 87 |
return earnings_sentiment, earnings_sentences
|
|
|
|
| 99 |
return text
|
| 100 |
|
| 101 |
@st.experimental_memo(suppress_st_warning=True)
|
| 102 |
+
def chunk_long_text(text,threshold,window_size=3,stride=2):
|
| 103 |
'''Preprocess text and chunk for semantic search and sentiment analysis'''
|
| 104 |
|
| 105 |
#Convert cleaned text into sentences
|
| 106 |
sentences = sent_tokenize(text)
|
|
|
|
| 107 |
out = []
|
| 108 |
|
| 109 |
#Limit the length of each sentence to a threshold
|
|
|
|
| 120 |
|
| 121 |
#Combine sentences into a window of size window_size
|
| 122 |
for paragraph in [out]:
|
| 123 |
+
for start_idx in range(0, len(paragraph), stride):
|
| 124 |
end_idx = min(start_idx+window_size, len(paragraph))
|
| 125 |
passages.append(" ".join(paragraph[start_idx:end_idx]))
|
| 126 |
|
| 127 |
return passages
|
| 128 |
+
|
| 129 |
@st.experimental_memo(suppress_st_warning=True)
|
| 130 |
def chunk_and_preprocess_text(text,thresh=500):
|
| 131 |
|