File size: 8,049 Bytes
d9f5c15 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
"""
Demo Data for Portfolio Optimization
This module provides sample stock data for the portfolio optimization quickstart.
The data includes 20 stocks across 4 sectors with ML-predicted returns.
In a real application, these predictions would come from an ML model trained
on historical stock data. For this quickstart, we use hardcoded realistic values.
FINANCE CONCEPTS:
- predicted_return: Expected percentage gain (0.12 = 12% expected return)
- sector: Industry classification for diversification
- Equal weight: Each selected stock gets 100%/20 = 5% of the portfolio
"""
from enum import Enum
from dataclasses import dataclass
from .domain import StockSelection, PortfolioOptimizationPlan, PortfolioConfig
class DemoData(Enum):
"""Available demo datasets."""
SMALL = 'SMALL' # 20 stocks - good for learning
LARGE = 'LARGE' # 50 stocks - more realistic
@dataclass
class DemoDataConfig:
"""Configuration for demo data generation."""
target_position_count: int
max_sector_percentage: float
demo_data_configs = {
DemoData.SMALL: DemoDataConfig(
target_position_count=20,
max_sector_percentage=0.25,
),
DemoData.LARGE: DemoDataConfig(
target_position_count=20,
max_sector_percentage=0.25,
),
}
# Stock data with realistic ML predictions
# Format: (ticker, name, sector, predicted_return)
#
# SMALL dataset: 25 stocks, need to select 20
# This is FEASIBLE because we have 5+ stocks in each of 4 sectors (5*4=20 max from limits)
# Plus we have extra stocks to choose from in each sector
SMALL_DATASET_STOCKS = [
# TECHNOLOGY (7 stocks) - typically higher predicted returns
# Solver can pick max 5, so must choose best 5 from 7
("AAPL", "Apple Inc.", "Technology", 0.12),
("GOOGL", "Alphabet (Google)", "Technology", 0.15),
("MSFT", "Microsoft Corp.", "Technology", 0.10),
("NVDA", "NVIDIA Corp.", "Technology", 0.18),
("META", "Meta Platforms", "Technology", 0.08),
("TSLA", "Tesla Inc.", "Technology", 0.20),
("AMD", "AMD Inc.", "Technology", 0.14),
# HEALTHCARE (6 stocks) - moderate returns
# Solver can pick max 5, so must choose best 5 from 6
("JNJ", "Johnson & Johnson", "Healthcare", 0.09),
("UNH", "UnitedHealth Group", "Healthcare", 0.11),
("PFE", "Pfizer Inc.", "Healthcare", 0.07),
("ABBV", "AbbVie Inc.", "Healthcare", 0.10),
("TMO", "Thermo Fisher", "Healthcare", 0.13),
("DHR", "Danaher Corp.", "Healthcare", 0.12),
# FINANCE (6 stocks) - stable returns
# Solver can pick max 5, so must choose best 5 from 6
("JPM", "JPMorgan Chase", "Finance", 0.08),
("BAC", "Bank of America", "Finance", 0.06),
("WFC", "Wells Fargo", "Finance", 0.07),
("GS", "Goldman Sachs", "Finance", 0.09),
("MS", "Morgan Stanley", "Finance", 0.08),
("C", "Citigroup", "Finance", 0.05),
# ENERGY (6 stocks) - variable returns
# Solver can pick max 5, so must choose best 5 from 6
("XOM", "Exxon Mobil", "Energy", 0.04),
("CVX", "Chevron Corp.", "Energy", 0.05),
("COP", "ConocoPhillips", "Energy", 0.06),
("SLB", "Schlumberger", "Energy", 0.03),
("EOG", "EOG Resources", "Energy", 0.07),
("PXD", "Pioneer Natural", "Energy", 0.08),
]
LARGE_DATASET_STOCKS = SMALL_DATASET_STOCKS + [
# Additional TECHNOLOGY (6 more -> 13 total)
("CRM", "Salesforce", "Technology", 0.11),
("ADBE", "Adobe Inc.", "Technology", 0.09),
("ORCL", "Oracle Corp.", "Technology", 0.07),
("CSCO", "Cisco Systems", "Technology", 0.06),
("IBM", "IBM Corp.", "Technology", 0.04),
("QCOM", "Qualcomm", "Technology", 0.13),
# Additional HEALTHCARE (6 more -> 12 total)
("MRK", "Merck & Co.", "Healthcare", 0.08),
("LLY", "Eli Lilly", "Healthcare", 0.16),
("BMY", "Bristol-Myers", "Healthcare", 0.06),
("AMGN", "Amgen Inc.", "Healthcare", 0.09),
("GILD", "Gilead Sciences", "Healthcare", 0.05),
("ISRG", "Intuitive Surgical", "Healthcare", 0.14),
# Additional FINANCE (4 more -> 10 total, no duplicates)
("AXP", "American Express", "Finance", 0.10),
("BLK", "BlackRock", "Finance", 0.11),
("SCHW", "Charles Schwab", "Finance", 0.07),
("USB", "U.S. Bancorp", "Finance", 0.04),
# Additional ENERGY (2 more -> 8 total, no duplicates)
("OXY", "Occidental Petroleum", "Energy", 0.06),
("HAL", "Halliburton", "Energy", 0.05),
# CONSUMER (new sector - 8 stocks)
("AMZN", "Amazon.com", "Consumer", 0.14),
("WMT", "Walmart", "Consumer", 0.06),
("HD", "Home Depot", "Consumer", 0.08),
("MCD", "McDonald's", "Consumer", 0.07),
("NKE", "Nike Inc.", "Consumer", 0.09),
("SBUX", "Starbucks", "Consumer", 0.05),
("PG", "Procter & Gamble", "Consumer", 0.04),
("KO", "Coca-Cola", "Consumer", 0.05),
]
# LARGE total: 25 + 6 + 6 + 4 + 2 + 8 = 51 stocks
def generate_demo_data(demo_data: DemoData) -> PortfolioOptimizationPlan:
"""
Generate demo data for portfolio optimization.
Args:
demo_data: Which demo dataset to generate (SMALL or LARGE)
Returns:
PortfolioOptimizationPlan with candidate stocks (all unselected initially)
Example:
>>> plan = generate_demo_data(DemoData.SMALL)
>>> len(plan.stocks)
20
>>> plan.stocks[0].stock_id
'AAPL'
"""
config = demo_data_configs[demo_data]
stock_data = SMALL_DATASET_STOCKS if demo_data == DemoData.SMALL else LARGE_DATASET_STOCKS
stocks = [
StockSelection(
stock_id=ticker,
stock_name=name,
sector=sector,
predicted_return=predicted_return,
selection=None, # To be decided by solver
)
for ticker, name, sector, predicted_return in stock_data
]
# Calculate max_per_sector from percentage
target_count = config.target_position_count
max_per_sector = max(1, int(config.max_sector_percentage * target_count))
# Create PortfolioConfig for constraints to access
portfolio_config = PortfolioConfig(
target_count=target_count,
max_per_sector=max_per_sector,
unselected_penalty=10000,
)
return PortfolioOptimizationPlan(
stocks=stocks,
target_position_count=config.target_position_count,
max_sector_percentage=config.max_sector_percentage,
portfolio_config=portfolio_config,
)
def get_stock_summary(plan: PortfolioOptimizationPlan) -> str:
"""
Generate a human-readable summary of the portfolio.
Useful for debugging and understanding the solution.
"""
lines = [
"=" * 60,
"PORTFOLIO SUMMARY",
"=" * 60,
]
selected = plan.get_selected_stocks()
if not selected:
lines.append("No stocks selected yet.")
return "\n".join(lines)
weight = plan.get_weight_per_stock()
expected_return = plan.get_expected_return()
lines.append(f"Selected: {len(selected)} stocks @ {weight*100:.1f}% each")
lines.append(f"Expected Return: {expected_return*100:.2f}%")
lines.append("")
# Group by sector
sector_stocks: dict[str, list[StockSelection]] = {}
for stock in selected:
if stock.sector not in sector_stocks:
sector_stocks[stock.sector] = []
sector_stocks[stock.sector].append(stock)
lines.append("BY SECTOR:")
for sector, stocks in sorted(sector_stocks.items()):
sector_weight = len(stocks) * weight * 100
lines.append(f" {sector}: {len(stocks)} stocks = {sector_weight:.1f}%")
for stock in sorted(stocks, key=lambda s: -s.predicted_return):
lines.append(f" - {stock.stock_id}: {stock.stock_name} ({stock.predicted_return*100:.1f}% pred)")
lines.append("")
lines.append(f"Score: {plan.score}")
lines.append("=" * 60)
return "\n".join(lines)
|