Seyomi commited on
Commit
0b0933a
Β·
verified Β·
1 Parent(s): 6bb5fac

Upload 3 files

Browse files
pipeline/evaluate_pipeline.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ import json
4
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
5
+
6
+ import torch
7
+ import torch.nn as nn
8
+ from torch.utils.data import TensorDataset, DataLoader
9
+ import pandas as pd
10
+ import numpy as np
11
+ import matplotlib.pyplot as plt
12
+ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
13
+ from models.forecasting.lstm import LSTMForecaster
14
+
15
+
16
+ def load_test_data(csv_path="../data/processed/merged_features.csv", seq_len=30, test_ratio=0.2):
17
+ df = pd.read_csv(csv_path)
18
+ df = df.select_dtypes(include=[np.number]).dropna()
19
+ data = df.values
20
+
21
+ split = int(len(data) * (1 - test_ratio))
22
+ test_data = data[split:]
23
+
24
+ X_test, y_test = [], []
25
+ for i in range(len(test_data) - seq_len - 1):
26
+ X_test.append(test_data[i:i+seq_len])
27
+ y_test.append(test_data[i+seq_len][0])
28
+
29
+ X_test = torch.tensor(np.array(X_test), dtype=torch.float32)
30
+ y_test = torch.tensor(np.array(y_test), dtype=torch.float32).unsqueeze(1)
31
+
32
+ return DataLoader(TensorDataset(X_test, y_test), batch_size=32), X_test.shape[2]
33
+
34
+
35
+ def evaluate_model(test_loader, model_path="./trained_models/lstm_forecaster.pt", config_path="./trained_models/config.json"):
36
+ with open(config_path, "r") as f:
37
+ config = json.load(f)
38
+
39
+ model = LSTMForecaster(
40
+ input_size=config["input_size"],
41
+ hidden_size=config["hidden_size"],
42
+ num_layers=config["num_layers"],
43
+ output_size=config["output_size"]
44
+ )
45
+
46
+ model.load_state_dict(torch.load(model_path))
47
+ model.eval()
48
+
49
+ preds, targets = [], []
50
+
51
+ with torch.no_grad():
52
+ for X, y in test_loader:
53
+ out = model(X)
54
+ preds.append(out.numpy())
55
+ targets.append(y.numpy())
56
+
57
+ preds = np.concatenate(preds)
58
+ targets = np.concatenate(targets)
59
+
60
+ mse = mean_squared_error(targets, preds)
61
+ mae = mean_absolute_error(targets, preds)
62
+ r2 = r2_score(targets, preds)
63
+
64
+ print("\nπŸ“Š Evaluation Metrics:")
65
+ print(f"➑️ MSE: {mse:.4f}")
66
+ print(f"➑️ MAE: {mae:.4f}")
67
+ print(f"➑️ R2 Score: {r2:.4f}")
68
+
69
+ os.makedirs("outputs", exist_ok=True)
70
+ plt.figure(figsize=(12, 5))
71
+ plt.plot(targets, label='Actual', color='blue')
72
+ plt.plot(preds, label='Predicted', color='orange')
73
+ plt.title("πŸ“ˆ LSTM Forecast vs Actual")
74
+ plt.xlabel("Time Step")
75
+ plt.ylabel("Value")
76
+ plt.legend()
77
+ plt.grid(True)
78
+ plt.tight_layout()
79
+ plt.savefig("outputs/evaluation_plot.png")
80
+ plt.show()
81
+
82
+
83
+ if __name__ == "__main__":
84
+ test_loader, input_size = load_test_data()
85
+ print(f"πŸ§ͺ Detected input feature size: {input_size}")
86
+ evaluate_model(test_loader)
pipeline/inference_pipeline.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
4
+
5
+ import json
6
+ import torch
7
+ import numpy as np
8
+ import pandas as pd
9
+ import argparse
10
+ from models.forecasting.lstm import LSTMForecaster
11
+
12
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
+
14
+ def load_model(model_path="pipeline/trained_models/lstm_forecaster.pt", config_path="pipeline/trained_models/config.json"):
15
+ if not os.path.exists(config_path):
16
+ raise FileNotFoundError(f"❌ Missing config file: {config_path}")
17
+ if not os.path.exists(model_path):
18
+ raise FileNotFoundError(f"❌ Missing model file: {model_path}")
19
+
20
+ with open(config_path, "r") as f:
21
+ config = json.load(f)
22
+
23
+ model = LSTMForecaster(
24
+ input_size=config["input_size"],
25
+ hidden_size=config["hidden_size"],
26
+ num_layers=config["num_layers"],
27
+ output_size=config["output_size"]
28
+ ).to(device)
29
+
30
+ model.load_state_dict(torch.load(model_path, map_location=device))
31
+ model.eval()
32
+ return model
33
+
34
+ def predict_batch(input_data, model=None):
35
+ """
36
+ input_data: numpy array of shape [batch_size, seq_len, input_size]
37
+ returns: list of predictions
38
+ """
39
+ if model is None:
40
+ model = load_model()
41
+
42
+ if isinstance(input_data, list):
43
+ input_data = np.array(input_data)
44
+
45
+ input_tensor = torch.tensor(input_data, dtype=torch.float32).to(device)
46
+
47
+ with torch.no_grad():
48
+ outputs = model(input_tensor)
49
+
50
+ return outputs.cpu().numpy().flatten().tolist()
51
+
52
+ if __name__ == "__main__":
53
+ parser = argparse.ArgumentParser(description="Batch Inference for LSTM Forecasting")
54
+ parser.add_argument("--input", type=str, required=True, help="Path to .npy or .csv input file")
55
+ args = parser.parse_args()
56
+
57
+ if args.input.endswith(".npy"):
58
+ input_data = np.load(args.input)
59
+ elif args.input.endswith(".csv"):
60
+ df = pd.read_csv(args.input).dropna().select_dtypes(include=[np.number])
61
+ data = df.values
62
+ if len(data.shape) == 2:
63
+ input_data = np.expand_dims(data, axis=0)
64
+ else:
65
+ input_data = data
66
+ else:
67
+ raise ValueError("Input must be a .npy or .csv file")
68
+
69
+ model = load_model()
70
+ predictions = predict_batch(input_data, model)
71
+ print("πŸ“ˆ Predictions:", predictions)
pipeline/train_pipeline.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ import json
4
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
5
+
6
+ import torch
7
+ import torch.nn as nn
8
+ import pandas as pd
9
+ import numpy as np
10
+ from torch.utils.data import TensorDataset, DataLoader
11
+ from sklearn.model_selection import train_test_split
12
+ from models.forecasting.lstm import LSTMForecaster
13
+
14
+ df = pd.read_csv("../data/processed/merged_features.csv")
15
+ df = df.select_dtypes(include=[np.number]).dropna()
16
+ data = df.values
17
+
18
+ seq_len = 30
19
+ X, y = [], []
20
+ for i in range(len(data) - seq_len - 1):
21
+ X.append(data[i:i+seq_len])
22
+ y.append(data[i+seq_len][0])
23
+
24
+ X = torch.tensor(np.array(X), dtype=torch.float32)
25
+ y = torch.tensor(np.array(y), dtype=torch.float32).unsqueeze(1)
26
+
27
+ X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
28
+ train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=32, shuffle=True)
29
+ val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=32)
30
+
31
+ input_size = X.shape[2]
32
+ hidden_size = 256
33
+ num_layers = 2
34
+ output_size = 1
35
+
36
+ model = LSTMForecaster(
37
+ input_size=input_size,
38
+ hidden_size=hidden_size,
39
+ num_layers=num_layers,
40
+ output_size=output_size
41
+ )
42
+
43
+ criterion = nn.MSELoss()
44
+ optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
45
+
46
+ for epoch in range(10):
47
+ model.train()
48
+ total_loss = 0
49
+ for xb, yb in train_loader:
50
+ optimizer.zero_grad()
51
+ loss = criterion(model(xb), yb)
52
+ loss.backward()
53
+ optimizer.step()
54
+ total_loss += loss.item()
55
+ avg_loss = total_loss / len(train_loader)
56
+ print(f"Epoch {epoch+1}: Train Loss = {avg_loss:.4f}")
57
+
58
+ os.makedirs("trained_models", exist_ok=True)
59
+ torch.save(model.state_dict(), "trained_models/lstm_forecaster.pt")
60
+
61
+ config = {
62
+ "input_size": input_size,
63
+ "hidden_size": hidden_size,
64
+ "num_layers": num_layers,
65
+ "output_size": output_size
66
+ }
67
+ with open("trained_models/config.json", "w") as f:
68
+ json.dump(config, f)
69
+
70
+ print("βœ… Model trained and saved.")