badaoui HF Staff commited on
Commit
646bbcb
·
1 Parent(s): 0f8d3a8

fix data loading

Browse files
Files changed (1) hide show
  1. data.py +19 -5
data.py CHANGED
@@ -57,7 +57,10 @@ KEYS_TO_KEEP = [
57
  "job_link_nvidia",
58
  ]
59
 
 
60
  # HELPER FUNCTIONS
 
 
61
  def generate_fake_dates(num_days: int = 7) -> List[str]:
62
  """Generate fake dates for the last N days."""
63
  today = datetime.now()
@@ -85,7 +88,10 @@ def safe_extract(row: pd.Series, key: str) -> int:
85
  """Safely extract an integer value from a DataFrame row."""
86
  return int(row.get(key, 0)) if pd.notna(row.get(key, 0)) else 0
87
 
 
88
  # DATA LOADING FUNCTIONS
 
 
89
  def log_dataframe_link(link: str) -> str:
90
  """
91
  Adds the link to the dataset in the logs, modifies it to get a clockable link and then returns the date of the
@@ -164,11 +170,17 @@ def get_available_dates() -> List[str]:
164
  common_dates = sorted(amd_dates.intersection(nvidia_dates), reverse=True)
165
  logger.info(f"Common dates: {len(common_dates)} dates where both AMD and NVIDIA have data")
166
 
167
- return common_dates[:30] if common_dates else generate_fake_dates()
 
 
 
 
 
 
168
 
169
  except Exception as e:
170
  logger.error(f"Error getting available dates: {e}")
171
- return generate_fake_dates()
172
 
173
 
174
  def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
@@ -501,7 +513,8 @@ class CIResults:
501
  self.sample_data = True
502
  new_df, latest_update_msg = get_sample_data()
503
  self.latest_update_msg = latest_update_msg
504
- self.available_dates = None
 
505
 
506
  # Update attributes
507
  self.df = new_df
@@ -531,8 +544,9 @@ class CIResults:
531
  """Load all available historical data at startup."""
532
  try:
533
  if not self.available_dates:
534
- self.available_dates = generate_fake_dates()
535
- logger.info(f"No available dates found, generated {len(self.available_dates)} sample dates.")
 
536
 
537
  logger.info(f"Loading all historical data for {len(self.available_dates)} dates...")
538
  start_date, end_date = self.available_dates[-1], self.available_dates[0]
 
57
  "job_link_nvidia",
58
  ]
59
 
60
+ # ============================================================================
61
  # HELPER FUNCTIONS
62
+ # ============================================================================
63
+
64
  def generate_fake_dates(num_days: int = 7) -> List[str]:
65
  """Generate fake dates for the last N days."""
66
  today = datetime.now()
 
88
  """Safely extract an integer value from a DataFrame row."""
89
  return int(row.get(key, 0)) if pd.notna(row.get(key, 0)) else 0
90
 
91
+ # ============================================================================
92
  # DATA LOADING FUNCTIONS
93
+ # ============================================================================
94
+
95
  def log_dataframe_link(link: str) -> str:
96
  """
97
  Adds the link to the dataset in the logs, modifies it to get a clockable link and then returns the date of the
 
170
  common_dates = sorted(amd_dates.intersection(nvidia_dates), reverse=True)
171
  logger.info(f"Common dates: {len(common_dates)} dates where both AMD and NVIDIA have data")
172
 
173
+ if common_dates:
174
+ return common_dates[:30] # Limit to last 30 days
175
+
176
+ # No real dates available - log warning and return empty list
177
+ # This will allow the system to fall back to sample data properly
178
+ logger.warning("No common dates found between AMD and NVIDIA datasets")
179
+ return []
180
 
181
  except Exception as e:
182
  logger.error(f"Error getting available dates: {e}")
183
+ return []
184
 
185
 
186
  def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
 
513
  self.sample_data = True
514
  new_df, latest_update_msg = get_sample_data()
515
  self.latest_update_msg = latest_update_msg
516
+ # Generate fake dates for sample data historical functionality
517
+ self.available_dates = generate_fake_dates()
518
 
519
  # Update attributes
520
  self.df = new_df
 
544
  """Load all available historical data at startup."""
545
  try:
546
  if not self.available_dates:
547
+ logger.warning("No available dates found, skipping historical data load")
548
+ self.all_historical_data = pd.DataFrame()
549
+ return
550
 
551
  logger.info(f"Loading all historical data for {len(self.available_dates)} dates...")
552
  start_date, end_date = self.available_dates[-1], self.available_dates[0]