fix data loading
Browse files
data.py
CHANGED
|
@@ -57,7 +57,10 @@ KEYS_TO_KEEP = [
|
|
| 57 |
"job_link_nvidia",
|
| 58 |
]
|
| 59 |
|
|
|
|
| 60 |
# HELPER FUNCTIONS
|
|
|
|
|
|
|
| 61 |
def generate_fake_dates(num_days: int = 7) -> List[str]:
|
| 62 |
"""Generate fake dates for the last N days."""
|
| 63 |
today = datetime.now()
|
|
@@ -85,7 +88,10 @@ def safe_extract(row: pd.Series, key: str) -> int:
|
|
| 85 |
"""Safely extract an integer value from a DataFrame row."""
|
| 86 |
return int(row.get(key, 0)) if pd.notna(row.get(key, 0)) else 0
|
| 87 |
|
|
|
|
| 88 |
# DATA LOADING FUNCTIONS
|
|
|
|
|
|
|
| 89 |
def log_dataframe_link(link: str) -> str:
|
| 90 |
"""
|
| 91 |
Adds the link to the dataset in the logs, modifies it to get a clockable link and then returns the date of the
|
|
@@ -164,11 +170,17 @@ def get_available_dates() -> List[str]:
|
|
| 164 |
common_dates = sorted(amd_dates.intersection(nvidia_dates), reverse=True)
|
| 165 |
logger.info(f"Common dates: {len(common_dates)} dates where both AMD and NVIDIA have data")
|
| 166 |
|
| 167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
|
| 169 |
except Exception as e:
|
| 170 |
logger.error(f"Error getting available dates: {e}")
|
| 171 |
-
return
|
| 172 |
|
| 173 |
|
| 174 |
def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
|
|
@@ -501,7 +513,8 @@ class CIResults:
|
|
| 501 |
self.sample_data = True
|
| 502 |
new_df, latest_update_msg = get_sample_data()
|
| 503 |
self.latest_update_msg = latest_update_msg
|
| 504 |
-
|
|
|
|
| 505 |
|
| 506 |
# Update attributes
|
| 507 |
self.df = new_df
|
|
@@ -531,8 +544,9 @@ class CIResults:
|
|
| 531 |
"""Load all available historical data at startup."""
|
| 532 |
try:
|
| 533 |
if not self.available_dates:
|
| 534 |
-
|
| 535 |
-
|
|
|
|
| 536 |
|
| 537 |
logger.info(f"Loading all historical data for {len(self.available_dates)} dates...")
|
| 538 |
start_date, end_date = self.available_dates[-1], self.available_dates[0]
|
|
|
|
| 57 |
"job_link_nvidia",
|
| 58 |
]
|
| 59 |
|
| 60 |
+
# ============================================================================
|
| 61 |
# HELPER FUNCTIONS
|
| 62 |
+
# ============================================================================
|
| 63 |
+
|
| 64 |
def generate_fake_dates(num_days: int = 7) -> List[str]:
|
| 65 |
"""Generate fake dates for the last N days."""
|
| 66 |
today = datetime.now()
|
|
|
|
| 88 |
"""Safely extract an integer value from a DataFrame row."""
|
| 89 |
return int(row.get(key, 0)) if pd.notna(row.get(key, 0)) else 0
|
| 90 |
|
| 91 |
+
# ============================================================================
|
| 92 |
# DATA LOADING FUNCTIONS
|
| 93 |
+
# ============================================================================
|
| 94 |
+
|
| 95 |
def log_dataframe_link(link: str) -> str:
|
| 96 |
"""
|
| 97 |
Adds the link to the dataset in the logs, modifies it to get a clockable link and then returns the date of the
|
|
|
|
| 170 |
common_dates = sorted(amd_dates.intersection(nvidia_dates), reverse=True)
|
| 171 |
logger.info(f"Common dates: {len(common_dates)} dates where both AMD and NVIDIA have data")
|
| 172 |
|
| 173 |
+
if common_dates:
|
| 174 |
+
return common_dates[:30] # Limit to last 30 days
|
| 175 |
+
|
| 176 |
+
# No real dates available - log warning and return empty list
|
| 177 |
+
# This will allow the system to fall back to sample data properly
|
| 178 |
+
logger.warning("No common dates found between AMD and NVIDIA datasets")
|
| 179 |
+
return []
|
| 180 |
|
| 181 |
except Exception as e:
|
| 182 |
logger.error(f"Error getting available dates: {e}")
|
| 183 |
+
return []
|
| 184 |
|
| 185 |
|
| 186 |
def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
|
|
|
|
| 513 |
self.sample_data = True
|
| 514 |
new_df, latest_update_msg = get_sample_data()
|
| 515 |
self.latest_update_msg = latest_update_msg
|
| 516 |
+
# Generate fake dates for sample data historical functionality
|
| 517 |
+
self.available_dates = generate_fake_dates()
|
| 518 |
|
| 519 |
# Update attributes
|
| 520 |
self.df = new_df
|
|
|
|
| 544 |
"""Load all available historical data at startup."""
|
| 545 |
try:
|
| 546 |
if not self.available_dates:
|
| 547 |
+
logger.warning("No available dates found, skipping historical data load")
|
| 548 |
+
self.all_historical_data = pd.DataFrame()
|
| 549 |
+
return
|
| 550 |
|
| 551 |
logger.info(f"Loading all historical data for {len(self.available_dates)} dates...")
|
| 552 |
start_date, end_date = self.available_dates[-1], self.available_dates[0]
|