Dmitry Beresnev commited on
Commit
3795647
Β·
1 Parent(s): 5016313

add Finnhub News Sentiment Analysis Module

Browse files
Dockerfile CHANGED
@@ -58,6 +58,6 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
58
 
59
  # Run the application when the container starts HF
60
  #CMD ["uvicorn", "src.telegram_bot:app", "--host", "0.0.0.0", "--port", "7860"]
61
- CMD ["python", "main.py"]
62
  # Uncomment the line below to run the application locally
63
- #CMD ["python", "-m", "telegram_bot"]
 
58
 
59
  # Run the application when the container starts HF
60
  #CMD ["uvicorn", "src.telegram_bot:app", "--host", "0.0.0.0", "--port", "7860"]
61
+ #CMD ["python", "main.py"]
62
  # Uncomment the line below to run the application locally
63
+ CMD ["python", "-m", "telegram_bot"]
src/{lexicon_based_sentiment_analyzer.py β†’ api/__init__.py} RENAMED
File without changes
src/{financial_news_requester.py β†’ api/financial_news_requester.py} RENAMED
@@ -18,6 +18,9 @@ def fetch_comp_financial_news(ticker: str = 'NVDA', date_from = "2025-07-31", da
18
  try:
19
  finnhub_client = finnhub.Client(api_key=api_key)
20
  news_feed = finnhub_client.company_news(ticker, _from=date_from, to=date_to)
 
 
 
21
  logging.info(f'got total amount of news: {len(news_feed)} for ticker: {ticker}')
22
  return news_feed
23
  except Exception as e:
 
18
  try:
19
  finnhub_client = finnhub.Client(api_key=api_key)
20
  news_feed = finnhub_client.company_news(ticker, _from=date_from, to=date_to)
21
+ if not news_feed:
22
+ logging.warning(f"No news found for ticker {ticker}")
23
+ return None
24
  logging.info(f'got total amount of news: {len(news_feed)} for ticker: {ticker}')
25
  return news_feed
26
  except Exception as e:
src/core/__init__.py ADDED
File without changes
src/core/news_analysis/__init__.py ADDED
File without changes
src/core/news_analysis/data/__init__.py ADDED
File without changes
src/core/news_analysis/data/financial_lexicon.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "positive": {
3
+ "profit": 3, "profits": 3, "profitable": 3, "earnings": 2, "revenue": 2,
4
+ "growth": 2, "growing": 2, "grew": 2, "increase": 2, "increased": 2,
5
+ "rising": 2, "rise": 2, "surge": 3, "surged": 3, "surging": 3,
6
+ "rally": 2, "rallied": 2, "rallying": 2, "gain": 2, "gains": 2,
7
+ "gained": 2, "outperform": 2, "outperformed": 2, "outperforming": 2,
8
+ "beat": 2, "beats": 2, "exceeded": 2, "exceeds": 2, "exceed": 2,
9
+ "strong": 2, "stronger": 2, "strength": 2, "robust": 2, "solid": 2,
10
+ "positive": 2, "optimistic": 2, "bullish": 2, "bull": 2,
11
+ "breakthrough": 3, "innovation": 2, "innovative": 2, "launch": 1,
12
+ "launched": 1, "expansion": 2, "expand": 2, "expanding": 2,
13
+ "acquisition": 1, "merger": 1, "deal": 1, "partnership": 1,
14
+ "agreement": 1, "contract": 1, "approve": 1, "approved": 1,
15
+ "upgrade": 2, "upgraded": 2, "buy": 1, "recommend": 1,
16
+ "target": 1, "opportunity": 1, "opportunities": 1,
17
+ "record": 2, "milestone": 2, "achievement": 2, "success": 2,
18
+ "successful": 2, "win": 2, "winner": 2, "winning": 2,
19
+ "excellent": 3, "outstanding": 3, "exceptional": 3, "remarkable": 3,
20
+ "impressive": 2, "good": 1, "great": 2, "better": 1, "best": 2,
21
+ "high": 1, "higher": 1, "up": 1, "upward": 1, "improve": 2,
22
+ "improved": 2, "improvement": 2, "recovery": 2, "recovering": 2,
23
+ "momentum": 1, "confident": 2, "confidence": 2, "stable": 1,
24
+ "stability": 1, "steady": 1, "secure": 1, "attractive": 1
25
+ },
26
+ "negative": {
27
+ "loss": -2, "losses": -2, "losing": -2, "lost": -2, "decline": -2,
28
+ "declined": -2, "declining": -2, "decrease": -2, "decreased": -2,
29
+ "fall": -2, "fell": -2, "falling": -2, "drop": -2, "dropped": -2,
30
+ "dropping": -2, "plunge": -3, "plunged": -3, "plunging": -3,
31
+ "crash": -3, "crashed": -3, "crashing": -3, "collapse": -3,
32
+ "collapsed": -3, "collapsing": -3, "tumble": -2, "tumbled": -2,
33
+ "weak": -2, "weakness": -2, "weaker": -2, "poor": -2, "worse": -2,
34
+ "worst": -3, "bad": -2, "negative": -2, "pessimistic": -2,
35
+ "bearish": -2, "bear": -2, "concern": -1, "concerns": -1,
36
+ "worried": -2, "worry": -2, "worrying": -2, "risk": -1, "risks": -1,
37
+ "risky": -1, "threat": -2, "threaten": -2, "threatening": -2,
38
+ "problem": -2, "problems": -2, "issue": -1, "issues": -1,
39
+ "challenge": -1, "challenges": -1, "difficulty": -2, "difficulties": -2,
40
+ "struggle": -2, "struggling": -2, "struggled": -2, "fail": -2,
41
+ "failed": -2, "failing": -2, "failure": -2, "miss": -2, "missed": -2,
42
+ "disappointing": -2, "disappointed": -2, "disappointment": -2,
43
+ "underperform": -2, "underperformed": -2, "underperforming": -2,
44
+ "downgrade": -2, "downgraded": -2, "sell": -1, "cut": -1,
45
+ "cuts": -1, "cutting": -1, "reduce": -1, "reduced": -1, "reduction": -1,
46
+ "layoff": -2, "layoffs": -2, "fire": -2, "fired": -2, "firing": -2,
47
+ "eliminate": -2, "elimination": -2, "suspend": -2, "suspended": -2,
48
+ "delay": -1, "delayed": -1, "postpone": -1, "postponed": -1,
49
+ "cancel": -2, "cancelled": -2, "bankruptcy": -3, "bankrupt": -3,
50
+ "debt": -1, "debts": -1, "deficit": -2, "shortage": -2, "shortages": -2,
51
+ "crisis": -3, "recession": -3, "downturn": -2, "slump": -2,
52
+ "volatile": -1, "volatility": -1, "uncertain": -1, "uncertainty": -2,
53
+ "unstable": -2, "instability": -2, "warning": -2, "warn": -2,
54
+ "warned": -2, "alert": -1, "caution": -1, "cautious": -1,
55
+ "investigation": -2, "lawsuit": -2, "sue": -2, "sued": -2,
56
+ "fraud": -3, "scandal": -3, "controversy": -2, "dispute": -2,
57
+ "conflict": -2, "penalty": -2, "fine": -2, "violation": -2
58
+ }
59
+ }
src/core/news_analysis/data/news_analysis.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass, asdict
2
+ from typing import Any
3
+
4
+ from src.core.news_analysis.data.news_item import NewsItem
5
+ from src.core.news_analysis.data.sentiment_result import SentimentResult
6
+
7
+
8
+ @dataclass
9
+ class NewsAnalysis:
10
+ news_item: NewsItem
11
+ headline_sentiment: SentimentResult
12
+ summary_sentiment: SentimentResult
13
+ overall_sentiment: SentimentResult
14
+
15
+ def to_dict(self) -> dict[str, Any]:
16
+ return {
17
+ 'news_item': asdict(self.news_item),
18
+ 'headline_sentiment': asdict(self.headline_sentiment),
19
+ 'summary_sentiment': asdict(self.summary_sentiment),
20
+ 'overall_sentiment': asdict(self.overall_sentiment)
21
+ }
src/core/news_analysis/data/news_item.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass
5
+ class NewsItem:
6
+ id: int
7
+ headline: str
8
+ category: str
9
+ summary: str
10
+ datetime: int
11
+ source: str
12
+ related: str
13
+ image: str
14
+ url: str
src/core/news_analysis/data/sentiment_result.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass
5
+ class SentimentResult:
6
+ score: float
7
+ polarity: str
8
+ confidence: float
9
+ positive_words: list[str]
10
+ negative_words: list[str]
11
+ word_count: int
src/core/news_analysis/lexicon_based_sentiment_analyzer.py ADDED
@@ -0,0 +1,530 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Finnhub News Sentiment Analysis Module
3
+
4
+ This module provides sentiment analysis for Finnhub company news API responses
5
+ using NLTK for text processing and TextBlob for sentiment analysis.
6
+ """
7
+
8
+ import re
9
+ import logging
10
+ from typing import Any
11
+ import statistics
12
+ import json
13
+ import os
14
+
15
+ import nltk
16
+ from textblob import TextBlob
17
+
18
+ import ssl
19
+ try:
20
+ _create_unverified_https_context = ssl._create_unverified_context
21
+ except AttributeError:
22
+ pass
23
+ else:
24
+ ssl._create_default_https_context = _create_unverified_https_context
25
+
26
+ def _download_nltk_requirements():
27
+ """Download required NLTK data."""
28
+ nltk_downloads = [
29
+ 'punkt',
30
+ 'stopwords',
31
+ 'wordnet',
32
+ 'vader_lexicon',
33
+ 'averaged_perceptron_tagger',
34
+ 'omw-1.4'
35
+ ]
36
+ for item in nltk_downloads:
37
+ nltk.download(item, quiet=True)
38
+
39
+ _download_nltk_requirements()
40
+
41
+
42
+ from nltk.corpus import stopwords
43
+
44
+ from nltk.tokenize import word_tokenize
45
+ from nltk.stem import WordNetLemmatizer
46
+
47
+ from src.core.news_analysis.data.news_item import NewsItem
48
+ from src.core.news_analysis.data.sentiment_result import SentimentResult
49
+ from src.core.news_analysis.data.news_analysis import NewsAnalysis
50
+ from src.api.financial_news_requester import fetch_comp_financial_news
51
+
52
+
53
+ class FinnhubSentimentAnalyzer:
54
+ """
55
+ Sentiment analyzer for Finnhub news data using NLTK and TextBlob.
56
+
57
+ This analyzer combines lexicon-based approach with TextBlob's machine learning
58
+ sentiment analysis for more accurate results.
59
+ """
60
+
61
+ def __init__(self):
62
+ self.logger = logging.getLogger(__name__)
63
+ self._nltk_initialize()
64
+ self.financial_lexicon = self._get_financial_lexicon()
65
+
66
+ def _nltk_initialize(self) -> None:
67
+ self._download_nltk_requirements()
68
+ # Initialize NLTK components
69
+ try:
70
+ self.stop_words = set(stopwords.words('english'))
71
+ self.lemmatizer = WordNetLemmatizer()
72
+ except Exception as e:
73
+ self.logger.error(f"NLTK data not found: {e}")
74
+ self.logger.info("Please run nltk.download() to install required data")
75
+ raise
76
+
77
+ def _download_nltk_requirements(self):
78
+ """Download required NLTK data."""
79
+ nltk_downloads = [
80
+ 'punkt',
81
+ 'stopwords',
82
+ 'wordnet',
83
+ 'vader_lexicon',
84
+ 'averaged_perceptron_tagger',
85
+ 'omw-1.4'
86
+ ]
87
+ for item in nltk_downloads:
88
+ try:
89
+ nltk.download(item, quiet=True)
90
+ except Exception as e:
91
+ self.logger.warning(f"Failed to download {item}: {e}")
92
+
93
+ def _get_financial_lexicon(self) -> dict[str, dict[str, float]]:
94
+ """
95
+ Load financial-specific sentiment lexicon from a JSON file.
96
+ """
97
+ lexicon_path = os.path.join(
98
+ os.path.dirname(__file__),
99
+ "./data/financial_lexicon.json"
100
+ )
101
+ with open(lexicon_path, "r", encoding="utf-8") as f:
102
+ lexicon = json.load(f)
103
+ return lexicon
104
+
105
+ def _preprocess_text(self, text: str) -> tuple[str, list[str]]:
106
+ """
107
+ Preprocess text using NLTK.
108
+
109
+ Args:
110
+ text: Raw text to preprocess
111
+
112
+ Returns:
113
+ Tuple of (cleaned_text, tokenized_words)
114
+ """
115
+ if not text:
116
+ return "", []
117
+ # Convert to lowercase
118
+ text = text.lower()
119
+ # Remove URLs, email addresses, and special characters
120
+ text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', text)
121
+ text = re.sub(r'\S+@\S+', '', text)
122
+ text = re.sub(r'[^\w\s]', ' ', text)
123
+ # Tokenize
124
+ try:
125
+ tokens = word_tokenize(text)
126
+ except LookupError:
127
+ # Fallback if punkt tokenizer is not available
128
+ tokens = text.split()
129
+ # Remove stop words and lemmatize
130
+ processed_tokens = []
131
+ for token in tokens:
132
+ if token not in self.stop_words and len(token) > 2:
133
+ try:
134
+ lemmatized = self.lemmatizer.lemmatize(token)
135
+ processed_tokens.append(lemmatized)
136
+ except:
137
+ processed_tokens.append(token)
138
+ processed_text = ' '.join(processed_tokens)
139
+ return processed_text, processed_tokens
140
+
141
+ def _get_lexicon_sentiment(self, tokens: list[str]) -> tuple[float, list[str], list[str]]:
142
+ """
143
+ Calculate sentiment using financial lexicon.
144
+
145
+ Args:
146
+ tokens: List of preprocessed tokens
147
+
148
+ Returns:
149
+ Tuple of (score, positive_words, negative_words)
150
+ """
151
+ score = 0.0
152
+ positive_words = []
153
+ negative_words = []
154
+ for token in tokens:
155
+ if token in self.financial_lexicon['positive']:
156
+ word_score = self.financial_lexicon['positive'][token]
157
+ score += word_score
158
+ positive_words.append(token)
159
+ elif token in self.financial_lexicon['negative']:
160
+ word_score = self.financial_lexicon['negative'][token]
161
+ score += word_score
162
+ negative_words.append(token)
163
+ return score, positive_words, negative_words
164
+
165
+ def _analyze_text_sentiment(self, text: str) -> SentimentResult:
166
+ """
167
+ Analyze sentiment of text using both TextBlob and lexicon approach.
168
+
169
+ Args:
170
+ text: Text to analyze
171
+
172
+ Returns:
173
+ SentimentResult object
174
+ """
175
+ if not text:
176
+ return SentimentResult(
177
+ score=0.0,
178
+ polarity='neutral',
179
+ confidence=0.0,
180
+ positive_words=[],
181
+ negative_words=[],
182
+ word_count=0
183
+ )
184
+ processed_text, tokens = self._preprocess_text(text)
185
+ if not tokens:
186
+ return SentimentResult(
187
+ score=0.0,
188
+ polarity='neutral',
189
+ confidence=0.0,
190
+ positive_words=[],
191
+ negative_words=[],
192
+ word_count=0
193
+ )
194
+ blob = TextBlob(text)
195
+ textblob_polarity = blob.sentiment.polarity # Range: -1 to 1
196
+ textblob_subjectivity = blob.sentiment.subjectivity # Range: 0 to 1
197
+ lexicon_score, positive_words, negative_words = self._get_lexicon_sentiment(tokens)
198
+ # Normalize lexicon score
199
+ normalized_lexicon_score = lexicon_score / len(tokens) if tokens else 0.0
200
+ # Combine TextBlob and lexicon scores (weighted average)
201
+ # TextBlob: 60%, Lexicon: 40%
202
+ combined_score = (textblob_polarity * 0.6) + (normalized_lexicon_score * 0.4)
203
+ # Determine polarity
204
+ if combined_score > 0.1:
205
+ polarity = 'positive'
206
+ elif combined_score < -0.1:
207
+ polarity = 'negative'
208
+ else:
209
+ polarity = 'neutral'
210
+ # Calculate confidence
211
+ # Based on TextBlob subjectivity and number of sentiment words found
212
+ sentiment_word_ratio = (len(positive_words) + len(negative_words)) / len(tokens)
213
+ confidence = (textblob_subjectivity * 0.5) + (sentiment_word_ratio * 0.5)
214
+ confidence = min(confidence, 1.0)
215
+ return SentimentResult(
216
+ score=round(combined_score, 4),
217
+ polarity=polarity,
218
+ confidence=round(confidence, 4),
219
+ positive_words=positive_words,
220
+ negative_words=negative_words,
221
+ word_count=len(tokens)
222
+ )
223
+
224
+ def analyze_news_item(self, news_data: dict[str, Any]) -> NewsAnalysis:
225
+ """
226
+ Analyze sentiment of a single news item.
227
+
228
+ Args:
229
+ news_data: Dictionary containing news data from Finnhub API
230
+
231
+ Returns:
232
+ NewsAnalysis object
233
+ """
234
+ news_item = NewsItem(**news_data)
235
+ headline_sentiment = self._analyze_text_sentiment(news_item.headline)
236
+ summary_sentiment = self._analyze_text_sentiment(news_item.summary)
237
+
238
+ # TODO: this snippet should be refactored :)
239
+ # Calculate overall sentiment (weighted: headline 30%, summary 70%)
240
+ if headline_sentiment.word_count > 0 and summary_sentiment.word_count > 0:
241
+ overall_score = (headline_sentiment.score * 0.3) + (summary_sentiment.score * 0.7)
242
+ overall_positive_words = list(set(headline_sentiment.positive_words + summary_sentiment.positive_words))
243
+ overall_negative_words = list(set(headline_sentiment.negative_words + summary_sentiment.negative_words))
244
+ total_words = headline_sentiment.word_count + summary_sentiment.word_count
245
+ overall_confidence = ((headline_sentiment.confidence * headline_sentiment.word_count) +
246
+ (summary_sentiment.confidence * summary_sentiment.word_count)) / total_words
247
+ elif headline_sentiment.word_count > 0:
248
+ overall_score = headline_sentiment.score
249
+ overall_positive_words = headline_sentiment.positive_words
250
+ overall_negative_words = headline_sentiment.negative_words
251
+ total_words = headline_sentiment.word_count
252
+ overall_confidence = headline_sentiment.confidence
253
+ elif summary_sentiment.word_count > 0:
254
+ overall_score = summary_sentiment.score
255
+ overall_positive_words = summary_sentiment.positive_words
256
+ overall_negative_words = summary_sentiment.negative_words
257
+ total_words = summary_sentiment.word_count
258
+ overall_confidence = summary_sentiment.confidence
259
+ else:
260
+ overall_score = 0.0
261
+ overall_positive_words = []
262
+ overall_negative_words = []
263
+ total_words = 0
264
+ overall_confidence = 0.0
265
+
266
+ # Determine overall polarity
267
+ if overall_score > 0.1:
268
+ overall_polarity = 'positive'
269
+ elif overall_score < -0.1:
270
+ overall_polarity = 'negative'
271
+ else:
272
+ overall_polarity = 'neutral'
273
+ overall_sentiment = SentimentResult(
274
+ score=round(overall_score, 4),
275
+ polarity=overall_polarity,
276
+ confidence=round(overall_confidence, 4),
277
+ positive_words=overall_positive_words,
278
+ negative_words=overall_negative_words,
279
+ word_count=total_words
280
+ )
281
+ return NewsAnalysis(
282
+ news_item=news_item,
283
+ headline_sentiment=headline_sentiment,
284
+ summary_sentiment=summary_sentiment,
285
+ overall_sentiment=overall_sentiment
286
+ )
287
+
288
+ def analyze_news_batch(self, news_data: list[dict[str, Any]]) -> list[NewsAnalysis]:
289
+ """
290
+ Analyze sentiment for a batch of news items.
291
+
292
+ Args:
293
+ news_data: List of news dictionaries from Finnhub API
294
+
295
+ Returns:
296
+ List of NewsAnalysis objects
297
+ """
298
+ if not news_data:
299
+ self.logger.warning("No news data provided for analysis")
300
+ return []
301
+ analyses = []
302
+ for news_item in news_data:
303
+ try:
304
+ analysis = self.analyze_news_item(news_item)
305
+ analyses.append(analysis)
306
+ except Exception as e:
307
+ self.logger.error(f"Error analyzing news item {news_item.get('id', 'unknown')}: {e}")
308
+ continue
309
+ self.logger.info(f"Successfully analyzed {len(analyses)} out of {len(news_data)} news items")
310
+ return analyses
311
+
312
+ # TODO: refactor this method
313
+ def get_sentiment_summary(self, analyses: list[NewsAnalysis]) -> dict[str, Any]:
314
+ """
315
+ Generate summary statistics for sentiment analyses.
316
+
317
+ Args:
318
+ analyses: List of NewsAnalysis objects
319
+
320
+ Returns:
321
+ Dictionary containing summary statistics
322
+ """
323
+ if not analyses:
324
+ return {
325
+ 'total_articles': 0,
326
+ 'positive_count': 0,
327
+ 'negative_count': 0,
328
+ 'neutral_count': 0,
329
+ 'average_sentiment': 0.0,
330
+ 'sentiment_distribution': {'positive': 0.0, 'negative': 0.0, 'neutral': 0.0},
331
+ 'average_confidence': 0.0,
332
+ 'most_positive': None,
333
+ 'most_negative': None,
334
+ 'sentiment_trend': 'neutral'
335
+ }
336
+
337
+ # Count sentiment polarities
338
+ positive_count = sum(1 for a in analyses if a.overall_sentiment.polarity == 'positive')
339
+ negative_count = sum(1 for a in analyses if a.overall_sentiment.polarity == 'negative')
340
+ neutral_count = sum(1 for a in analyses if a.overall_sentiment.polarity == 'neutral')
341
+
342
+ # Calculate averages
343
+ scores = [a.overall_sentiment.score for a in analyses]
344
+ confidences = [a.overall_sentiment.confidence for a in analyses]
345
+
346
+ avg_sentiment = statistics.mean(scores) if scores else 0.0
347
+ avg_confidence = statistics.mean(confidences) if confidences else 0.0
348
+
349
+ # Determine overall sentiment trend
350
+ if avg_sentiment > 0.1:
351
+ sentiment_trend = 'positive'
352
+ elif avg_sentiment < -0.1:
353
+ sentiment_trend = 'negative'
354
+ else:
355
+ sentiment_trend = 'neutral'
356
+
357
+ # Find most positive and negative articles
358
+ most_positive = max(analyses, key=lambda x: x.overall_sentiment.score) if analyses else None
359
+ most_negative = min(analyses, key=lambda x: x.overall_sentiment.score) if analyses else None
360
+
361
+ total = len(analyses)
362
+
363
+ return {
364
+ 'total_articles': total,
365
+ 'positive_count': positive_count,
366
+ 'negative_count': negative_count,
367
+ 'neutral_count': neutral_count,
368
+ 'average_sentiment': round(avg_sentiment, 4),
369
+ 'sentiment_distribution': {
370
+ 'positive': round(positive_count / total * 100, 2) if total > 0 else 0.0,
371
+ 'negative': round(negative_count / total * 100, 2) if total > 0 else 0.0,
372
+ 'neutral': round(neutral_count / total * 100, 2) if total > 0 else 0.0
373
+ },
374
+ 'average_confidence': round(avg_confidence, 4),
375
+ 'sentiment_trend': sentiment_trend,
376
+ 'most_positive': {
377
+ 'id': most_positive.news_item.id,
378
+ 'headline': most_positive.news_item.headline,
379
+ 'score': most_positive.overall_sentiment.score,
380
+ 'source': most_positive.news_item.source
381
+ } if most_positive else None,
382
+ 'most_negative': {
383
+ 'id': most_negative.news_item.id,
384
+ 'headline': most_negative.news_item.headline,
385
+ 'score': most_negative.overall_sentiment.score,
386
+ 'source': most_negative.news_item.source
387
+ } if most_negative else None
388
+ }
389
+
390
+
391
+ # Integration function with your existing Finnhub API code
392
+ def analyze_finnhub_news_sentiment(ticker: str = 'NVDA',
393
+ date_from: str = "2025-07-31",
394
+ date_to: str = "2025-08-01") -> dict[str, Any] | None:
395
+ """
396
+ Complete pipeline to fetch and analyze Finnhub news sentiment using NLTK and TextBlob.
397
+
398
+ Args:
399
+ ticker: Stock ticker symbol
400
+ date_from: Start date for news fetch
401
+ date_to: End date for news fetch
402
+
403
+ Returns:
404
+ Dictionary containing analysis results or None if error
405
+ """
406
+ import os
407
+ from dotenv import load_dotenv
408
+ import finnhub
409
+
410
+ load_dotenv()
411
+ api_key = os.getenv('FINNHUB_API_TOKEN')
412
+
413
+ if not api_key:
414
+ logging.error("FINNHUB_API_TOKEN not found in environment variables")
415
+ return None
416
+
417
+ try:
418
+ # Fetch news data using your existing function
419
+ finnhub_client = finnhub.Client(api_key=api_key)
420
+ news_feed = finnhub_client.company_news(ticker, _from=date_from, to=date_to)
421
+ logging.info(f'Got total amount of news: {len(news_feed)} for ticker: {ticker}')
422
+
423
+ if not news_feed:
424
+ logging.warning(f"No news found for ticker {ticker}")
425
+ return None
426
+
427
+ # Initialize analyzer and analyze sentiment
428
+ analyzer = FinnhubSentimentAnalyzer()
429
+ analyses = analyzer.analyze_news_batch(news_feed)
430
+
431
+ if not analyses:
432
+ logging.warning("No successful sentiment analyses")
433
+ return None
434
+
435
+ # Generate summary
436
+ summary = analyzer.get_sentiment_summary(analyses)
437
+
438
+ return {
439
+ 'ticker': ticker,
440
+ 'date_range': {'from': date_from, 'to': date_to},
441
+ 'summary': summary,
442
+ 'detailed_analyses': [analysis.to_dict() for analysis in analyses]
443
+ }
444
+
445
+ except Exception as e:
446
+ logging.error(f"Error in sentiment analysis pipeline: {e}")
447
+ return None
448
+
449
+
450
+ # TODO: this function should be refactored
451
+ if __name__ == "__main__":
452
+ # Example usage
453
+ logging.basicConfig(level=logging.INFO)
454
+
455
+ '''
456
+ # Sample news data for testing
457
+ sample_news = [
458
+ {
459
+ "category": "company news",
460
+ "datetime": 1569550360,
461
+ "headline": "Apple reports record quarterly earnings with exceptional iPhone sales growth",
462
+ "id": 25286,
463
+ "image": "https://example.com/image1.jpg",
464
+ "related": "AAPL",
465
+ "source": "Reuters",
466
+ "summary": "Apple Inc. announced outstanding quarterly results, with revenue reaching new record highs driven by strong iPhone sales and robust performance across all product categories. The company exceeded analyst expectations and demonstrated excellent growth in services revenue, leading to optimistic investor sentiment.",
467
+ "url": "https://example.com/news1"
468
+ },
469
+ {
470
+ "category": "company news",
471
+ "datetime": 1569528720,
472
+ "headline": "Tesla struggles with production delays amid supply chain crisis",
473
+ "id": 25287,
474
+ "image": "https://example.com/image2.jpg",
475
+ "related": "TSLA",
476
+ "source": "Bloomberg",
477
+ "summary": "Tesla is experiencing significant production challenges due to severe supply chain disruptions and semiconductor shortages. The company warned investors about potential negative impact on delivery targets and profit margins, raising concerns about disappointing quarterly results.",
478
+ "url": "https://example.com/news2"
479
+ }
480
+ ]
481
+ '''
482
+
483
+ sample_news = fetch_comp_financial_news(ticker='NVDA', date_from='2025-08-02', date_to='2025-08-03')
484
+ print(sample_news)
485
+ print(len(sample_news))
486
+
487
+ # Initialize analyzer
488
+ analyzer = FinnhubSentimentAnalyzer()
489
+
490
+ # Analyze sample news, # Limit to last 2 items for testing
491
+ results = analyzer.analyze_news_batch(sample_news[:3])
492
+
493
+ # Print results
494
+ for result in results:
495
+ print(f"\n{'=' * 60}")
496
+ print(f"News ID: {result.news_item.id}")
497
+ print(f"Headline: {result.news_item.headline}")
498
+ print(f"Source: {result.news_item.source}")
499
+ print(f"\nHeadline Sentiment: {result.headline_sentiment.polarity} "
500
+ f"(Score: {result.headline_sentiment.score}, "
501
+ f"Confidence: {result.headline_sentiment.confidence})")
502
+ print(f"Summary Sentiment: {result.summary_sentiment.polarity} "
503
+ f"(Score: {result.summary_sentiment.score}, "
504
+ f"Confidence: {result.summary_sentiment.confidence})")
505
+ print(f"Overall Sentiment: {result.overall_sentiment.polarity} "
506
+ f"(Score: {result.overall_sentiment.score}, "
507
+ f"Confidence: {result.overall_sentiment.confidence})")
508
+ print(f"Positive words: {result.overall_sentiment.positive_words}")
509
+ print(f"Negative words: {result.overall_sentiment.negative_words}")
510
+
511
+ # Generate and print summary
512
+ summary = analyzer.get_sentiment_summary(results)
513
+ print(f"\n{'=' * 60}")
514
+ print("SENTIMENT SUMMARY")
515
+ print(f"{'=' * 60}")
516
+ print(f"Total articles: {summary['total_articles']}")
517
+ print(f"Positive: {summary['positive_count']} ({summary['sentiment_distribution']['positive']}%)")
518
+ print(f"Negative: {summary['negative_count']} ({summary['sentiment_distribution']['negative']}%)")
519
+ print(f"Neutral: {summary['neutral_count']} ({summary['sentiment_distribution']['neutral']}%)")
520
+ print(f"Average sentiment: {summary['average_sentiment']}")
521
+ print(f"Average confidence: {summary['average_confidence']}")
522
+ print(f"Overall trend: {summary['sentiment_trend']}")
523
+
524
+ if summary['most_positive']:
525
+ print(f"\nMost positive article: {summary['most_positive']['headline']}")
526
+ print(f"Score: {summary['most_positive']['score']}")
527
+
528
+ if summary['most_negative']:
529
+ print(f"\nMost negative article: {summary['most_negative']['headline']}")
530
+ print(f"Score: {summary['most_negative']['score']}")
src/telegram_bot.py CHANGED
@@ -5,7 +5,7 @@ from typing import Any
5
  from telegram import Update
6
  from telegram.ext import Application, CommandHandler, ContextTypes
7
  from dotenv import load_dotenv
8
- from src.financial_news_requester import fetch_comp_financial_news
9
  from fastapi import FastAPI, Request
10
  import uvicorn
11
  import httpx
 
5
  from telegram import Update
6
  from telegram.ext import Application, CommandHandler, ContextTypes
7
  from dotenv import load_dotenv
8
+ from api.financial_news_requester import fetch_comp_financial_news
9
  from fastapi import FastAPI, Request
10
  import uvicorn
11
  import httpx