Spaces:
Configuration error
Configuration error
| from io import BytesIO | |
| import streamlit as st | |
| import pandas as pd | |
| from scipy.cluster.hierarchy import linkage | |
| from datetime import date | |
| from dateutil.relativedelta import relativedelta | |
| import plotly.figure_factory as ff | |
| from fastdtw import fastdtw | |
| from scipy.spatial.distance import euclidean | |
| import numpy as np | |
| from modules import tables | |
| def data_request(country_to_request, start, currency='USD', end=date.today()): | |
| data = tables.EquityMaster(country=country_to_request, field='IQ_CLOSEPRICE_ADJ', currency=currency).query(rename=['asset'], | |
| start=start, end=str(end)) | |
| adtv = tables.EquityMaster(country=country_to_request, field='IQ_VALUE_TRADED', currency=currency).query(rename=['asset'], | |
| start=start, end=str(end)).median() | |
| marketcap = tables.EquityMaster(country=country_to_request, field='IQ_MARKETCAP', currency=currency).query(rename=['asset'], | |
| start=start, end=str(end)).median() | |
| return data, adtv, marketcap | |
| def data_filter(data, adtv, marketcap, adtv_threshold, mktcap_threshold, p): | |
| adtv_filter = (adtv >= adtv_threshold) | |
| adtv = adtv.loc[adtv_filter] | |
| marketcap_filter = (marketcap >= mktcap_threshold) | |
| marketcap = marketcap.loc[marketcap_filter] | |
| data = data.loc[:, data.columns.isin(adtv.index)] | |
| data = data.loc[:, data.columns.isin(marketcap.index)] | |
| file_to_read = 'Data/Company_Base_Definitivo.xlsx' | |
| company_base = pd.read_excel(file_to_read, sheet_name='Compilado') | |
| id_to_ticker = {str(row['ID_Quant']): str(row['Ticker Bloomberg']).split()[0] for i, row in company_base.iterrows()} | |
| data = data.loc[:, data.columns.isin(id_to_ticker.keys())] | |
| data.columns = [id_to_ticker[col] for col in data.columns] | |
| if isinstance(p, str): | |
| returns_final = data.resample(p).last().pct_change().fillna(0) | |
| else: | |
| returns_final = data.iloc[::p].pct_change().fillna(0) | |
| return returns_final | |
| def dist(correlation): | |
| return ((1-correlation)/2.)**.5 | |
| def to_excel(df_to_write): | |
| output = BytesIO() | |
| writer = pd.ExcelWriter(output, engine='xlsxwriter') | |
| df_to_write.to_excel(writer, index=False, sheet_name='Sheet1') | |
| workbook = writer.book | |
| worksheet = writer.sheets['Sheet1'] | |
| format1 = workbook.add_format({'num_format': '0.00'}) | |
| worksheet.set_column('A:A', None, format1) | |
| writer.save() | |
| processed_data = output.getvalue() | |
| return processed_data | |
| def get_dtw_distance(x, y): | |
| distance_dtw = fastdtw(x, y, dist=euclidean)[0] | |
| return distance_dtw | |
| def clustering_basado_en_correlacion(): | |
| form = st.form("Correlation Clustering") | |
| posible_countries = ('Todos', 'Argentina', 'Brazil', 'Chile', 'Colombia', 'Mexico', 'Peru') | |
| countries = form.multiselect('¿Qué países desea visualizar?', posible_countries) | |
| if 'Todos' in countries: | |
| countries = ('Argentina', 'Brazil', 'Chile', 'Colombia', 'Mexico', 'Peru') | |
| adtv_p = form.number_input('Ingrese el mínimo Average Daily Traded Value que desea considerar', value=1., format="%.2f") | |
| mktcap_thresh = form.number_input('Ingrese el mínimo Market Cap que desea considerar', value=200., format="%.2f") | |
| start_date = form.selectbox('Ingrese la fecha de inicio que desea considerar', ('3 Meses', '6 Meses', '1 Año')) | |
| period = form.number_input('Defina la frecuencia en la que desea las observaciones (en días)', value=1) | |
| accept = form.form_submit_button('Aceptar') | |
| if accept: | |
| start_date = str(date.today() - relativedelta(months=int(start_date[0]))) | |
| for country in countries: | |
| data_, adtv_, marketcap_ = data_request(country, start_date) | |
| # Filtramos para que se cumplan los filtros del usuario en los datos | |
| returns = data_filter(data_, adtv_, marketcap_, adtv_p, mktcap_thresh, period) | |
| # Normalizamos | |
| base = (returns.subtract(returns.mean(0), axis=1)).div(returns.std(axis=0), axis=1) | |
| base = base.sort_index(axis=1) | |
| # Procedemos a calcular correlación y covarianza | |
| corr, covs = base.corr(), base.cov() | |
| file = to_excel(corr) | |
| # Definimos la matriz de distancia | |
| dist_matrix = dist(corr) | |
| hierarchy = linkage(dist_matrix) | |
| ct = 0.54 * max(hierarchy[:, 2]) | |
| fig = ff.create_dendrogram(dist_matrix, orientation='left', labels=list(base.columns), | |
| color_threshold=ct, linkagefun=linkage) | |
| fig.update_layout(title='{} desde {} hasta {}'.format(country, | |
| returns.index[0].date(), | |
| returns.index[-1].date())) | |
| if country == 'Brazil': | |
| fig.update_layout(height=2000) | |
| else: | |
| fig.update_layout(height=900) | |
| st.plotly_chart(fig, use_container_width=True) | |
| st.download_button(label='Descargar Matriz de Correlación para {}'.format(country), data=file, | |
| file_name='{}_correlacion.xlsx'.format(country)) | |
| def clustering_con_dtw(): | |
| form = st.form("Dynamic Time Warping Clustering") | |
| posible_countries = ('Todos', 'Brazil', 'Argentina', 'Chile', 'Colombia', 'Mexico', 'Peru') | |
| countries = form.multiselect('¿Qué países desea visualizar?', posible_countries) | |
| if 'Todos' in countries: | |
| countries = ('Brazil', 'Argentina', 'Chile', 'Colombia', 'Mexico', 'Peru') | |
| adtv_p = form.number_input('Ingrese el mínimo Average Daily Traded Value que desea considerar', value=1., | |
| format="%.2f") | |
| mktcap_thresh = form.number_input('Ingrese el Mínimo Market Cap que desea considerar', value=200., format="%.2f") | |
| start_date = form.selectbox('Ingrese la fecha de inicio que desea considerar', ('3 Meses', '6 Meses', '1 Año')) | |
| period = form.number_input('Defina la frecuencia en la que desea las observaciones (en días)', value=1) | |
| accept = form.form_submit_button('Aceptar') | |
| if accept: | |
| start_date = str(date.today() - relativedelta(months=int(start_date[0]))) | |
| for country in countries: | |
| data_, adtv_, marketcap_ = data_request(country, start_date) | |
| # Filtramos para que se cumplan los filtros del usuario en los datos | |
| returns = data_filter(data_, adtv_, marketcap_, adtv_p, mktcap_thresh, period) | |
| # Normalizamos returns | |
| base = (returns.subtract(returns.mean(0), axis=1)).div(returns.std(axis=0), axis=1) | |
| base = base.sort_index(axis=1) | |
| # Procedemos a calcular correlación y covarianza | |
| N = len(base[:base.index[0]].T) | |
| # Creamos la Matriz de Distancias para DTW | |
| Dist = np.zeros((N, N)) | |
| place = st.empty() | |
| for i in range(N): | |
| place.write("Cargando: " + str(round(i*100/N)) + " %") | |
| for j in range(i - 1, N): | |
| company_1 = base.columns[i] | |
| company_2 = base.columns[j] | |
| Dist[i, j] = get_dtw_distance(base[company_1], base[company_2]) | |
| # La matriz es simétrica | |
| Dist[j, i] = Dist[i, j] | |
| # Creamos un DataFrame con la matriz de distancias | |
| df = pd.DataFrame(Dist) | |
| df.index = base.columns | |
| df.columns = base.columns | |
| # Pasamos el df a excel para descarga del usuario | |
| file = to_excel(df) | |
| hierarchy = linkage(Dist) | |
| ct = 0.54 * max(hierarchy[:, 2]) | |
| fig = ff.create_dendrogram(Dist, orientation='left', labels=list(base.columns), | |
| color_threshold=ct, linkagefun=linkage) | |
| fig.update_layout( | |
| title='{} desde {} hasta {}'.format(country, returns.index[0].date(), returns.index[-1].date())) | |
| if country == 'Brazil': | |
| fig.update_layout(height=2000) | |
| else: | |
| fig.update_layout(height=900) | |
| st.plotly_chart(fig, use_container_width=True) | |
| st.download_button(label='Descargar Matriz de distancias con DTW para {}'.format(country), data=file, | |
| file_name='{}_correlacion.xlsx'.format(country)) | |