Spaces:
Configuration error
Configuration error
| # Imports | |
| import pandas as pd | |
| import streamlit as st | |
| import pybase64 as base64 | |
| # from modules.tables import RealEstateMaste | |
| from sqlalchemy import create_engine | |
| import plotly.express as px | |
| import io | |
| import urllib.request | |
| import bs4 as bs | |
| import scipy.stats as stats | |
| import plotly.figure_factory as ff | |
| import scipy.stats as stats | |
| import plotly.graph_objects as go | |
| import numpy as np | |
| from datetime import date | |
| from datetime import timedelta | |
| from Data.credentials import credentials_postgresql as credpost | |
| def formatnum(numero): | |
| ''' | |
| Esta función permite dar formato a los montos de saldo y valor cuota en | |
| las cartolas. | |
| ''' | |
| return '{:,.0f}'.format(numero).replace(",", "@").replace(".", ",").replace("@", ".") | |
| def get_UF(): | |
| link = "https://valoruf.cl/" | |
| req = urllib.request.Request(link) | |
| res = urllib.request.urlopen(req) | |
| resData = res.read() | |
| soup = bs.BeautifulSoup(resData) | |
| uf = soup.find(class_="vpr").contents[0] | |
| uf_value = uf.split(' ')[1] | |
| uf_value = uf_value.replace('.', '') | |
| uf_value = uf_value.replace(',', '.') | |
| return uf_value | |
| def button_style(): | |
| style_button = """ | |
| <style> | |
| button { | |
| display: inline-block; | |
| background-color: #e8e8e8; | |
| border-radius: 15px; | |
| border: 4px #cccccc; | |
| color: #4a4a4a; | |
| text-align: center; | |
| font-size: 18px; | |
| padding: 2px; | |
| width: 300px; | |
| transition: all 0.5s; | |
| cursor: pointer; | |
| margin: 5px; | |
| } | |
| button span { | |
| cursor: pointer; | |
| display: inline-block; | |
| position: relative; | |
| transition: 0.5s; | |
| } | |
| button span:after { | |
| content: '\00bb'; | |
| position: absolute; | |
| opacity: 0; | |
| top: 0; | |
| right: -20px; | |
| transition: 0.5s; | |
| } | |
| button:hover { | |
| background-color: #bb1114; | |
| color:#e8e8e8; | |
| } | |
| button:hover span { | |
| padding-right: 25px; | |
| } | |
| button:hover span:after { | |
| opacity: 1; | |
| right: 0; | |
| } | |
| </style> | |
| """ | |
| st.markdown(style_button, unsafe_allow_html=True) | |
| def plot_column(data, column, nom_var_inv, placeholder, sigmas=3, limite=None): | |
| data = data.rename(columns=nom_var_inv) | |
| column = nom_var_inv[column] | |
| limit = data[column].mean() + sigmas * data[column].std() | |
| data = data.loc[data[column] <= limit] | |
| if limite is not None: | |
| data = data.loc[data[column] <= limite] | |
| fig = px.scatter_mapbox(data_frame=data, lat='latitud', lon='longitud', | |
| zoom=10, color=column, | |
| hover_data=['codigo', 'Superficie', 'dormitorios', 'banos', 'Precio en CLP'], | |
| color_continuous_scale=px.colors.diverging.RdYlBu, | |
| width=1000, height=800) | |
| placeholder.plotly_chart(fig, use_container_width=True) | |
| def plot_column2(data, column, nom_var_inv, placeholder, zoom, sigmas=10, limite=None): | |
| data = data.rename(columns=nom_var_inv) | |
| column = nom_var_inv[column] | |
| limit = data[column].mean() + sigmas * data[column].std() | |
| data = data.loc[data[column] <= limit] | |
| if limite is not None: | |
| data = data.loc[data[column] <= limite] | |
| fig = px.scatter_mapbox(data_frame=data, lat='latitud', lon='longitud', | |
| zoom=zoom, color=column, | |
| hover_data=['codigo', 'Superficie', 'dormitorios', 'banos', 'Precio en CLP', 'Comuna'], | |
| color_continuous_scale=px.colors.diverging.RdYlBu, | |
| width=1000, height=800,size='Superficie',size_max=25) | |
| placeholder.plotly_chart(fig, use_container_width=True) | |
| def get_comuna(direction): | |
| values = direction.split(',') | |
| return values[-2] | |
| def unit_separator(string): | |
| values = string.split(" ") | |
| return values[0] | |
| def transform_df(scraping_df): | |
| # Extract numbers from string | |
| # Change datatype | |
| # Add 'Comuna' and 'Región' | |
| scraping_df[["Comuna"]] = scraping_df[["direccion"]].applymap(lambda x: get_comuna(x)) | |
| # Scrap UF value for date | |
| #scraping_df[["Valor UF"]] = get_UF() | |
| return scraping_df | |
| def get_table_excel_link(df, name): | |
| towrite = io.BytesIO() | |
| downloaded_file = df.to_excel(towrite, encoding='utf-8', index=False, | |
| header=True) | |
| towrite.seek(0) # reset pointer | |
| file_name = 'Data' + name + '.xlsx' | |
| style = 'style="color:black;text-decoration: none; font-size:18px;"' | |
| name_mark = "Descargar " + name + ".xlsx" | |
| b64 = base64.b64encode(towrite.read()).decode() # some strings | |
| linko= f'<center><a href="data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}" '+style+'download="'+file_name+'"><button>'+name_mark+'</button></a></center>' | |
| return linko | |
| def query_inmob(): | |
| url = credpost["POSTGRESQL"] | |
| engine = create_engine(url, echo=False) | |
| semana_pasada = date.today() - timedelta(7) | |
| semana_pasada = semana_pasada.strftime("%Y-%m-%d") | |
| data = pd.read_sql_query("""select * from scraping_inmob where Fecha > | |
| '{Fecha} 00:00:00' ORDER BY Fecha desc""".format(Fecha=semana_pasada) , con=engine) | |
| return data | |
| def run_scrapping(): | |
| button_style() | |
| px.set_mapbox_access_token('pk.eyJ1IjoibW9rc2VuYmVyZyIsImEiOiJja3QwOTc3dHgyNzBhMnFsczJ2Y2w3bWJlIn0.m8c3duvR5hQVjbjEByorWQ') | |
| data = query_inmob() | |
| precio_uf = get_UF() | |
| data["valor_uf"] = data["valor_peso"]/float(precio_uf) | |
| data['predicted_venta_por_m2_UF']=data['predicted_venta_por_m2']/float(precio_uf) | |
| nom_var = {"Precio en UF": "valor_uf", | |
| "Precio en CLP": "valor_peso", | |
| "Superficie": "superficie", | |
| "Predicted venta por M2 en CLP": "predicted_venta_por_m2", | |
| "Predicted venta por M2 en UF": "predicted_venta_por_m2_UF", | |
| "Predicted arriendo por M2": "predicted_arriendo_por_m2", | |
| "Predicted yield anual": "predicted_yield_anual" | |
| } | |
| nom_var_inv = {v: k for k, v in nom_var.items()} | |
| var = list(nom_var.keys()) | |
| with st.form(key='my_form'): | |
| col1, col2, col3, col4 = st.columns((9, 1, 9, 1)) | |
| cols = st.columns((9, 1, 5, 5)) | |
| pre_selection = col1.selectbox("Variable", var) | |
| selection = nom_var[pre_selection] | |
| mercado = col3.selectbox("Mercado", ["Venta", "Arriendo", "Todos"]) | |
| if mercado != "Todos": | |
| mercado = mercado.lower() | |
| data_mercado = data[data["mercado"] == mercado] | |
| else: | |
| data_mercado = data | |
| cifras = len(str(int(max(data_mercado[selection])))) | |
| tipo_prop = cols[0].selectbox("Tipo de propiedad", | |
| ["Todos", "Casa", "Departamento"]) | |
| if tipo_prop != "Todos": | |
| tipo_prop2 = tipo_prop.lower() | |
| data_mercado = data_mercado[data_mercado["tipo_propiedad"] == tipo_prop2] | |
| if cifras > 3: | |
| corte = 10 ** (cifras - 3) | |
| cota_final = (int(max(data_mercado[selection].dropna()))//corte+1) *corte | |
| cota_inf = cols[2].number_input(label="Valor Mínimo", | |
| min_value=0, | |
| value=0, | |
| max_value=cota_final | |
| ) | |
| cota_sup = cols[3].number_input(label="Valor Máximo", | |
| min_value=0, | |
| value=cota_final, | |
| max_value=cota_final | |
| ) | |
| else: | |
| cota_final = max(data_mercado[selection].dropna())+1 | |
| cota_inf = cols[2].number_input(label="Valor Mínimo", | |
| min_value=0.0, | |
| value=0.0, | |
| step=0.1, | |
| max_value=cota_final | |
| ) | |
| cota_sup = cols[3].number_input(label="Valor Máximo", | |
| min_value=0.0, | |
| step=0.1, | |
| value=cota_final, | |
| max_value=cota_final | |
| ) | |
| data_final = data_mercado[data_mercado[selection] < cota_sup] | |
| data_final = data_final[data_final[selection] > cota_inf] | |
| submit_button = st.form_submit_button(label='Actualizar') | |
| col1, col2 = st.columns((0.65, 1)) | |
| placeholder = st.empty() | |
| placeholder2 = st.empty() | |
| placeholder2.markdown(get_table_excel_link(data, "Data Completa"), | |
| unsafe_allow_html=True) | |
| plot_column(data, selection, nom_var_inv, placeholder) | |
| if submit_button: | |
| plot_column(data_final, selection, nom_var_inv, placeholder) | |
| st.markdown(get_table_excel_link(data_final, " Data filtrada"), | |
| unsafe_allow_html=True) | |
| def scraping_localizado(): | |
| button_style() | |
| data = query_inmob() | |
| data = transform_df(data) | |
| precio_uf = get_UF() | |
| data["valor_uf"] = data["valor_peso"]/float(precio_uf) | |
| data['predicted_venta_por_m2_UF'] = data['predicted_venta_por_m2']/float(precio_uf) | |
| nom_var = {"Precio en UF": "valor_uf", | |
| "Precio en CLP": "valor_peso", | |
| "Superficie": "superficie", | |
| "Predicted venta por M2 en CLP": "predicted_venta_por_m2", | |
| "Predicted venta por M2 en UF": "predicted_venta_por_m2_UF", | |
| "Predicted arriendo por M2": "predicted_arriendo_por_m2", | |
| "Predicted yield anual": "predicted_yield_anual" | |
| } | |
| nom_var_inv = {v : k for k, v in nom_var.items()} | |
| var = list(nom_var.keys()) | |
| with st.form(key='my_form'): | |
| col1, col2, col3, col4 = st.columns((9, 1, 9, 1)) | |
| cols = st.columns((9, 1, 5, 5)) | |
| pre_selection = col1.selectbox("Variable", var) | |
| selection = nom_var[pre_selection] | |
| mercado = col3.selectbox("Mercado", ["Venta", "Arriendo", "Todos"]) | |
| banos = col1.slider("Baños", value=(1, 5), min_value=0, max_value=10) | |
| dormitorios = col3.slider("Dormitorios", value=(1, 5), min_value=0, | |
| max_value=10) | |
| com = ["Todas"] + sorted(list(dict.fromkeys(data["Comuna"]))) | |
| comuna = col1.selectbox("Comuna", com) | |
| zoom = col3.number_input("Zoom", value=12) | |
| if mercado != "Todos": | |
| mercado = mercado.lower() | |
| data_mercado = data[data["mercado"] == mercado] | |
| else: | |
| data_mercado = data | |
| if comuna != "Todas": | |
| data_mercado = data_mercado[data_mercado["Comuna"] == comuna] | |
| if len(data_mercado) > 0: | |
| cifras = len(str(int(max(data_mercado[selection].dropna())))) | |
| else: | |
| cifras = 0 | |
| tipo_prop = cols[0].selectbox("Tipo de Propiedad", | |
| ["Todos", "Casa", "Departamento"]) | |
| if tipo_prop != "Todos": | |
| tipo_prop2 = tipo_prop.lower() | |
| data_mercado = data_mercado[data_mercado["tipo_propiedad"] == tipo_prop2] | |
| if cifras > 3: | |
| corte = 10 ** (cifras - 3) | |
| cota_final = (int(max(data_mercado[selection].dropna()))//corte+1)*corte | |
| cota_inf = cols[2].number_input(label="Valor Mínimo", | |
| min_value=0, | |
| value=0, | |
| max_value=cota_final | |
| ) | |
| cota_sup = cols[3].number_input(label="Valor Máximo", | |
| min_value=0, | |
| value=cota_final, | |
| max_value=cota_final | |
| ) | |
| else: | |
| cota_final = max(data_mercado[selection].dropna())+1 | |
| cota_inf = cols[2].number_input(label="Valor Mínimo", | |
| min_value=0.0, | |
| value=0.0, | |
| step=0.1, | |
| max_value=cota_final | |
| ) | |
| cota_sup = cols[3].number_input(label="Valor Máximo", | |
| min_value=0.0, | |
| step=0.1, | |
| value=cota_final, | |
| max_value=cota_final | |
| ) | |
| data_final = data_mercado[data_mercado[selection] < cota_sup] | |
| data_final = data_final[data_final[selection] > cota_inf] | |
| data_final = data_final[data_final["banos"] > banos[0]] | |
| data_final = data_final[data_final["banos"] < banos[1]] | |
| data_final = data_final[data_final["dormitorios"] > dormitorios[0]] | |
| data_final = data_final[data_final["dormitorios"] < dormitorios[1]] | |
| st.write('Stats principales del análisis') | |
| st.write('Valor promedio en UF por metro cuadrado: {:.2f} UF'.format((data_final['valor_uf']/data_final['superficie']).mean())) | |
| st.write('Superficie promedio: {:.0f} m2'.format(data_final['superficie'].mean())) | |
| st.write('N° de dormitorios promedio: {:.2f}'.format(data_final['dormitorios'].mean())) | |
| st.write('N° de baños promedio: {:.2f}'.format(data_final['banos'].mean())) | |
| submit_button = st.form_submit_button(label='Actualizar') | |
| col1, col2 = st.columns((0.65, 1)) | |
| placeholder = st.empty() | |
| placeholder2 = st.empty() | |
| placeholder3 = st.empty() | |
| placeholder2.markdown(get_table_excel_link(data, "Data Completa"), | |
| unsafe_allow_html=True) | |
| plot_column2(data, selection, nom_var_inv, placeholder, zoom) | |
| dist_gamma = stats.gamma.rvs(1, scale=250000, size=2965) | |
| if submit_button: | |
| plot_column2(data_final, selection, nom_var_inv, placeholder, zoom) | |
| st.markdown(get_table_excel_link(data_final, " Data filtrada"), | |
| unsafe_allow_html=True) | |
| data_hist = data_final[data_final["mercado"] == "venta"] | |
| data_hist = data_hist[data_hist["valor_peso"] > 40000000] | |
| data_hist = data_hist[data_hist["valor_peso"] < 1000000000]["valor_peso"] * 0.2 | |
| fig2 = px.histogram((data_hist), x='valor_peso') | |
| #placeholder3.plotly_chart(fig2) | |
| # streamlit run analisis_inmob.py | |
| # scraping_localizado() | |