# src/feature.py import pandas as pd import numpy as np from typing import List from src.utils import logger def engineer_features(df:pd.DataFrame) -> pd.DataFrame: """ Engineer Features from raw metrics Args: df(pd.DataFrame): Raw Data from the system Returns: pf.DataFrame: Data with added Features """ try: df["timestamp"] = pd.to_datetime(df["timestamp"]) df = df.sort_values(["node", "timestamp"]) grouped = df.groupby("node") df["cpu_trend"] = grouped["cpu_usage"].transform(lambda x:x.diff()) df["cpu_rolling_mean"] = grouped["cpu_usage"].transform(lambda x:x.rolling(window=5, min_periods=1).mean()) df["error_rate_lag1"] = grouped["rpc_error_rate"].shift(1) df["latency_rolling_std"] = grouped["rpc_latency_ms"].transform(lambda x:x.rolling(window=5).std()) df = df.fillna(0) return df except KeyError as e: logger.error(f"Missing Column in Data: {e}") raise except Exception as e: logger.error(f"Error engineering features: {e}") def main(input_path:str = "data/raw/synthetic_rpc_metrics_realistic.csv", output_path:str = "data/processed/engineered_metrics.csv") -> None: """ Main function to engineer features from raw data Args: input_path(str): Path to raw data CSV output_path(str): Path to save engineered features CSV """ try: df = pd.read_csv(input_path) df_engineered = engineer_features(df) df_engineered.to_csv(output_path, index=False) logger.info(f"Engineered features saved to {output_path}") except Exception as e: logger.error(f"Error in main function: {e}") if __name__ == "__main__": main()