Spaces:
Runtime error
Runtime error
| """ | |
| Demo is based on https://scikit-learn.org/stable/auto_examples/applications/plot_stock_market.html | |
| """ | |
| import sys | |
| import numpy as np | |
| import pandas as pd | |
| symbol_dict = { | |
| "TOT": "Total", | |
| "XOM": "Exxon", | |
| "CVX": "Chevron", | |
| "COP": "ConocoPhillips", | |
| "VLO": "Valero Energy", | |
| "MSFT": "Microsoft", | |
| "IBM": "IBM", | |
| "TWX": "Time Warner", | |
| "CMCSA": "Comcast", | |
| "CVC": "Cablevision", | |
| "YHOO": "Yahoo", | |
| "DELL": "Dell", | |
| "HPQ": "HP", | |
| "AMZN": "Amazon", | |
| "TM": "Toyota", | |
| "CAJ": "Canon", | |
| "SNE": "Sony", | |
| "F": "Ford", | |
| "HMC": "Honda", | |
| "NAV": "Navistar", | |
| "NOC": "Northrop Grumman", | |
| "BA": "Boeing", | |
| "KO": "Coca Cola", | |
| "MMM": "3M", | |
| "MCD": "McDonald's", | |
| "PEP": "Pepsi", | |
| "K": "Kellogg", | |
| "UN": "Unilever", | |
| "MAR": "Marriott", | |
| "PG": "Procter Gamble", | |
| "CL": "Colgate-Palmolive", | |
| "GE": "General Electrics", | |
| "WFC": "Wells Fargo", | |
| "JPM": "JPMorgan Chase", | |
| "AIG": "AIG", | |
| "AXP": "American express", | |
| "BAC": "Bank of America", | |
| "GS": "Goldman Sachs", | |
| "AAPL": "Apple", | |
| "SAP": "SAP", | |
| "CSCO": "Cisco", | |
| "TXN": "Texas Instruments", | |
| "XRX": "Xerox", | |
| "WMT": "Wal-Mart", | |
| "HD": "Home Depot", | |
| "GSK": "GlaxoSmithKline", | |
| "PFE": "Pfizer", | |
| "SNY": "Sanofi-Aventis", | |
| "NVS": "Novartis", | |
| "KMB": "Kimberly-Clark", | |
| "R": "Ryder", | |
| "GD": "General Dynamics", | |
| "RTN": "Raytheon", | |
| "CVS": "CVS", | |
| "CAT": "Caterpillar", | |
| "DD": "DuPont de Nemours", | |
| } | |
| symbols, names = np.array(sorted(symbol_dict.items())).T | |
| quotes = [] | |
| for symbol in symbols: | |
| print("Fetching quote history for %r" % symbol, file=sys.stderr) | |
| url = ( | |
| "https://raw.githubusercontent.com/scikit-learn/examples-data/" | |
| "master/financial-data/{}.csv" | |
| ) | |
| quotes.append(pd.read_csv(url.format(symbol))) | |
| close_prices = np.vstack([q["close"] for q in quotes]) | |
| open_prices = np.vstack([q["open"] for q in quotes]) | |
| # The daily variations of the quotes are what carry the most information | |
| variation = close_prices - open_prices | |
| from sklearn import covariance | |
| alphas = np.logspace(-1.5, 1, num=10) | |
| edge_model = covariance.GraphicalLassoCV(alphas=alphas) | |
| # standardize the time series: using correlations rather than covariance | |
| # former is more efficient for structurerelations rather than covariance | |
| # former is more efficient for structure recovery | |
| X = variation.copy().T | |
| X /= X.std(axis=0) | |
| edge_model.fit(X) | |
| from sklearn import cluster | |
| _, labels = cluster.affinity_propagation(edge_model.covariance_, random_state=0) | |
| n_labels = labels.max() | |
| # Finding a low-dimension embedding for visualization: find the best position of | |
| # the nodes (the stocks) on a 2D plane | |
| from sklearn import manifold | |
| node_position_model = manifold.LocallyLinearEmbedding( | |
| n_components=2, eigen_solver="dense", n_neighbors=6 | |
| ) | |
| embedding = node_position_model.fit_transform(X.T).T | |
| import matplotlib.pyplot as plt | |
| from matplotlib.collections import LineCollection | |
| def visualize_stocks(): | |
| fig = plt.figure(1, facecolor="w", figsize=(10, 8)) | |
| plt.clf() | |
| ax = plt.axes([0.0, 0.0, 1.0, 1.0]) | |
| plt.axis("off") | |
| # Plot the graph of partial correlations | |
| partial_correlations = edge_model.precision_.copy() | |
| d = 1 / np.sqrt(np.diag(partial_correlations)) | |
| partial_correlations *= d | |
| partial_correlations *= d[:, np.newaxis] | |
| non_zero = np.abs(np.triu(partial_correlations, k=1)) > 0.02 | |
| # Plot the nodes using the coordinates of our embedding | |
| plt.scatter( | |
| embedding[0], embedding[1], s=100 * d**2, c=labels, cmap=plt.cm.nipy_spectral | |
| ) | |
| # Plot the edges | |
| start_idx, end_idx = np.where(non_zero) | |
| # a sequence of (*line0*, *line1*, *line2*), where:: | |
| # linen = (x0, y0), (x1, y1), ... (xm, ym) | |
| segments = [ | |
| [embedding[:, start], embedding[:, stop]] for start, stop in zip(start_idx, end_idx) | |
| ] | |
| values = np.abs(partial_correlations[non_zero]) | |
| lc = LineCollection( | |
| segments, zorder=0, cmap=plt.cm.hot_r, norm=plt.Normalize(0, 0.7 * values.max()) | |
| ) | |
| lc.set_array(values) | |
| lc.set_linewidths(15 * values) | |
| ax.add_collection(lc) | |
| # Add a label to each node. The challenge here is that we want to | |
| # position the labels to avoid overlap with other labels | |
| for index, (name, label, (x, y)) in enumerate(zip(names, labels, embedding.T)): | |
| dx = x - embedding[0] | |
| dx[index] = 1 | |
| dy = y - embedding[1] | |
| dy[index] = 1 | |
| this_dx = dx[np.argmin(np.abs(dy))] | |
| this_dy = dy[np.argmin(np.abs(dx))] | |
| if this_dx > 0: | |
| horizontalalignment = "left" | |
| x = x + 0.002 | |
| else: | |
| horizontalalignment = "right" | |
| x = x - 0.002 | |
| if this_dy > 0: | |
| verticalalignment = "bottom" | |
| y = y + 0.002 | |
| else: | |
| verticalalignment = "top" | |
| y = y - 0.002 | |
| plt.text( | |
| x, | |
| y, | |
| name, | |
| size=10, | |
| horizontalalignment=horizontalalignment, | |
| verticalalignment=verticalalignment, | |
| bbox=dict( | |
| facecolor="w", | |
| edgecolor=plt.cm.nipy_spectral(label / float(n_labels)), | |
| alpha=0.6, | |
| ), | |
| ) | |
| plt.xlim( | |
| embedding[0].min() - 0.15 * embedding[0].ptp(), | |
| embedding[0].max() + 0.10 * embedding[0].ptp(), | |
| ) | |
| plt.ylim( | |
| embedding[1].min() - 0.03 * embedding[1].ptp(), | |
| embedding[1].max() + 0.03 * embedding[1].ptp(), | |
| ) | |
| return fig | |
| import gradio as gr | |
| title = " π Visualizing the stock market structure π" | |
| with gr.Blocks(title=title) as demo: | |
| gr.Markdown(f"# {title}") | |
| gr.Markdown(" Data is of 56 stocks between the period of 2003 - 2008 <br>") | |
| gr.Markdown(" Stocks the move in together with each other are grouped together in a cluster <br>") | |
| gr.Markdown(" **[Demo is based on sklearn docs](https://scikit-learn.org/stable/auto_examples/applications/plot_stock_market.html)**") | |
| for i in range(n_labels + 1): | |
| gr.Markdown( f"Cluster {i + 1}: {', '.join(names[labels == i])}") | |
| btn = gr.Button(value="Visualize") | |
| btn.click(visualize_stocks, outputs= gr.Plot(label='Visualizing stock into clusters') ) | |
| gr.Markdown( f"## In progress") | |
| demo.launch() |