Upload 3 files
Browse files- pipeline/evaluate_pipeline.py +86 -0
- pipeline/inference_pipeline.py +71 -0
- pipeline/train_pipeline.py +70 -0
pipeline/evaluate_pipeline.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
| 5 |
+
|
| 6 |
+
import torch
|
| 7 |
+
import torch.nn as nn
|
| 8 |
+
from torch.utils.data import TensorDataset, DataLoader
|
| 9 |
+
import pandas as pd
|
| 10 |
+
import numpy as np
|
| 11 |
+
import matplotlib.pyplot as plt
|
| 12 |
+
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
|
| 13 |
+
from models.forecasting.lstm import LSTMForecaster
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def load_test_data(csv_path="../data/processed/merged_features.csv", seq_len=30, test_ratio=0.2):
|
| 17 |
+
df = pd.read_csv(csv_path)
|
| 18 |
+
df = df.select_dtypes(include=[np.number]).dropna()
|
| 19 |
+
data = df.values
|
| 20 |
+
|
| 21 |
+
split = int(len(data) * (1 - test_ratio))
|
| 22 |
+
test_data = data[split:]
|
| 23 |
+
|
| 24 |
+
X_test, y_test = [], []
|
| 25 |
+
for i in range(len(test_data) - seq_len - 1):
|
| 26 |
+
X_test.append(test_data[i:i+seq_len])
|
| 27 |
+
y_test.append(test_data[i+seq_len][0])
|
| 28 |
+
|
| 29 |
+
X_test = torch.tensor(np.array(X_test), dtype=torch.float32)
|
| 30 |
+
y_test = torch.tensor(np.array(y_test), dtype=torch.float32).unsqueeze(1)
|
| 31 |
+
|
| 32 |
+
return DataLoader(TensorDataset(X_test, y_test), batch_size=32), X_test.shape[2]
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def evaluate_model(test_loader, model_path="./trained_models/lstm_forecaster.pt", config_path="./trained_models/config.json"):
|
| 36 |
+
with open(config_path, "r") as f:
|
| 37 |
+
config = json.load(f)
|
| 38 |
+
|
| 39 |
+
model = LSTMForecaster(
|
| 40 |
+
input_size=config["input_size"],
|
| 41 |
+
hidden_size=config["hidden_size"],
|
| 42 |
+
num_layers=config["num_layers"],
|
| 43 |
+
output_size=config["output_size"]
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
model.load_state_dict(torch.load(model_path))
|
| 47 |
+
model.eval()
|
| 48 |
+
|
| 49 |
+
preds, targets = [], []
|
| 50 |
+
|
| 51 |
+
with torch.no_grad():
|
| 52 |
+
for X, y in test_loader:
|
| 53 |
+
out = model(X)
|
| 54 |
+
preds.append(out.numpy())
|
| 55 |
+
targets.append(y.numpy())
|
| 56 |
+
|
| 57 |
+
preds = np.concatenate(preds)
|
| 58 |
+
targets = np.concatenate(targets)
|
| 59 |
+
|
| 60 |
+
mse = mean_squared_error(targets, preds)
|
| 61 |
+
mae = mean_absolute_error(targets, preds)
|
| 62 |
+
r2 = r2_score(targets, preds)
|
| 63 |
+
|
| 64 |
+
print("\nπ Evaluation Metrics:")
|
| 65 |
+
print(f"β‘οΈ MSE: {mse:.4f}")
|
| 66 |
+
print(f"β‘οΈ MAE: {mae:.4f}")
|
| 67 |
+
print(f"β‘οΈ R2 Score: {r2:.4f}")
|
| 68 |
+
|
| 69 |
+
os.makedirs("outputs", exist_ok=True)
|
| 70 |
+
plt.figure(figsize=(12, 5))
|
| 71 |
+
plt.plot(targets, label='Actual', color='blue')
|
| 72 |
+
plt.plot(preds, label='Predicted', color='orange')
|
| 73 |
+
plt.title("π LSTM Forecast vs Actual")
|
| 74 |
+
plt.xlabel("Time Step")
|
| 75 |
+
plt.ylabel("Value")
|
| 76 |
+
plt.legend()
|
| 77 |
+
plt.grid(True)
|
| 78 |
+
plt.tight_layout()
|
| 79 |
+
plt.savefig("outputs/evaluation_plot.png")
|
| 80 |
+
plt.show()
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
if __name__ == "__main__":
|
| 84 |
+
test_loader, input_size = load_test_data()
|
| 85 |
+
print(f"π§ͺ Detected input feature size: {input_size}")
|
| 86 |
+
evaluate_model(test_loader)
|
pipeline/inference_pipeline.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
| 4 |
+
|
| 5 |
+
import json
|
| 6 |
+
import torch
|
| 7 |
+
import numpy as np
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import argparse
|
| 10 |
+
from models.forecasting.lstm import LSTMForecaster
|
| 11 |
+
|
| 12 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 13 |
+
|
| 14 |
+
def load_model(model_path="pipeline/trained_models/lstm_forecaster.pt", config_path="pipeline/trained_models/config.json"):
|
| 15 |
+
if not os.path.exists(config_path):
|
| 16 |
+
raise FileNotFoundError(f"β Missing config file: {config_path}")
|
| 17 |
+
if not os.path.exists(model_path):
|
| 18 |
+
raise FileNotFoundError(f"β Missing model file: {model_path}")
|
| 19 |
+
|
| 20 |
+
with open(config_path, "r") as f:
|
| 21 |
+
config = json.load(f)
|
| 22 |
+
|
| 23 |
+
model = LSTMForecaster(
|
| 24 |
+
input_size=config["input_size"],
|
| 25 |
+
hidden_size=config["hidden_size"],
|
| 26 |
+
num_layers=config["num_layers"],
|
| 27 |
+
output_size=config["output_size"]
|
| 28 |
+
).to(device)
|
| 29 |
+
|
| 30 |
+
model.load_state_dict(torch.load(model_path, map_location=device))
|
| 31 |
+
model.eval()
|
| 32 |
+
return model
|
| 33 |
+
|
| 34 |
+
def predict_batch(input_data, model=None):
|
| 35 |
+
"""
|
| 36 |
+
input_data: numpy array of shape [batch_size, seq_len, input_size]
|
| 37 |
+
returns: list of predictions
|
| 38 |
+
"""
|
| 39 |
+
if model is None:
|
| 40 |
+
model = load_model()
|
| 41 |
+
|
| 42 |
+
if isinstance(input_data, list):
|
| 43 |
+
input_data = np.array(input_data)
|
| 44 |
+
|
| 45 |
+
input_tensor = torch.tensor(input_data, dtype=torch.float32).to(device)
|
| 46 |
+
|
| 47 |
+
with torch.no_grad():
|
| 48 |
+
outputs = model(input_tensor)
|
| 49 |
+
|
| 50 |
+
return outputs.cpu().numpy().flatten().tolist()
|
| 51 |
+
|
| 52 |
+
if __name__ == "__main__":
|
| 53 |
+
parser = argparse.ArgumentParser(description="Batch Inference for LSTM Forecasting")
|
| 54 |
+
parser.add_argument("--input", type=str, required=True, help="Path to .npy or .csv input file")
|
| 55 |
+
args = parser.parse_args()
|
| 56 |
+
|
| 57 |
+
if args.input.endswith(".npy"):
|
| 58 |
+
input_data = np.load(args.input)
|
| 59 |
+
elif args.input.endswith(".csv"):
|
| 60 |
+
df = pd.read_csv(args.input).dropna().select_dtypes(include=[np.number])
|
| 61 |
+
data = df.values
|
| 62 |
+
if len(data.shape) == 2:
|
| 63 |
+
input_data = np.expand_dims(data, axis=0)
|
| 64 |
+
else:
|
| 65 |
+
input_data = data
|
| 66 |
+
else:
|
| 67 |
+
raise ValueError("Input must be a .npy or .csv file")
|
| 68 |
+
|
| 69 |
+
model = load_model()
|
| 70 |
+
predictions = predict_batch(input_data, model)
|
| 71 |
+
print("π Predictions:", predictions)
|
pipeline/train_pipeline.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
| 5 |
+
|
| 6 |
+
import torch
|
| 7 |
+
import torch.nn as nn
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import numpy as np
|
| 10 |
+
from torch.utils.data import TensorDataset, DataLoader
|
| 11 |
+
from sklearn.model_selection import train_test_split
|
| 12 |
+
from models.forecasting.lstm import LSTMForecaster
|
| 13 |
+
|
| 14 |
+
df = pd.read_csv("../data/processed/merged_features.csv")
|
| 15 |
+
df = df.select_dtypes(include=[np.number]).dropna()
|
| 16 |
+
data = df.values
|
| 17 |
+
|
| 18 |
+
seq_len = 30
|
| 19 |
+
X, y = [], []
|
| 20 |
+
for i in range(len(data) - seq_len - 1):
|
| 21 |
+
X.append(data[i:i+seq_len])
|
| 22 |
+
y.append(data[i+seq_len][0])
|
| 23 |
+
|
| 24 |
+
X = torch.tensor(np.array(X), dtype=torch.float32)
|
| 25 |
+
y = torch.tensor(np.array(y), dtype=torch.float32).unsqueeze(1)
|
| 26 |
+
|
| 27 |
+
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
|
| 28 |
+
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=32, shuffle=True)
|
| 29 |
+
val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=32)
|
| 30 |
+
|
| 31 |
+
input_size = X.shape[2]
|
| 32 |
+
hidden_size = 256
|
| 33 |
+
num_layers = 2
|
| 34 |
+
output_size = 1
|
| 35 |
+
|
| 36 |
+
model = LSTMForecaster(
|
| 37 |
+
input_size=input_size,
|
| 38 |
+
hidden_size=hidden_size,
|
| 39 |
+
num_layers=num_layers,
|
| 40 |
+
output_size=output_size
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
criterion = nn.MSELoss()
|
| 44 |
+
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
|
| 45 |
+
|
| 46 |
+
for epoch in range(10):
|
| 47 |
+
model.train()
|
| 48 |
+
total_loss = 0
|
| 49 |
+
for xb, yb in train_loader:
|
| 50 |
+
optimizer.zero_grad()
|
| 51 |
+
loss = criterion(model(xb), yb)
|
| 52 |
+
loss.backward()
|
| 53 |
+
optimizer.step()
|
| 54 |
+
total_loss += loss.item()
|
| 55 |
+
avg_loss = total_loss / len(train_loader)
|
| 56 |
+
print(f"Epoch {epoch+1}: Train Loss = {avg_loss:.4f}")
|
| 57 |
+
|
| 58 |
+
os.makedirs("trained_models", exist_ok=True)
|
| 59 |
+
torch.save(model.state_dict(), "trained_models/lstm_forecaster.pt")
|
| 60 |
+
|
| 61 |
+
config = {
|
| 62 |
+
"input_size": input_size,
|
| 63 |
+
"hidden_size": hidden_size,
|
| 64 |
+
"num_layers": num_layers,
|
| 65 |
+
"output_size": output_size
|
| 66 |
+
}
|
| 67 |
+
with open("trained_models/config.json", "w") as f:
|
| 68 |
+
json.dump(config, f)
|
| 69 |
+
|
| 70 |
+
print("β
Model trained and saved.")
|