NextPlace / StatisticalBaseModel.py
ckoozzzu's picture
Upload folder using huggingface_hub
dbacacd verified
from typing import Tuple, TypedDict, Optional
import datetime
class ProcessedSynapse(TypedDict):
id: Optional[str]
nextplace_id: Optional[str]
property_id: Optional[str]
listing_id: Optional[str]
address: Optional[str]
city: Optional[str]
state: Optional[str]
zip_code: Optional[str]
price: Optional[float]
beds: Optional[int]
baths: Optional[float]
sqft: Optional[int]
lot_size: Optional[int]
year_built: Optional[int]
days_on_market: Optional[int]
latitude: Optional[float]
longitude: Optional[float]
property_type: Optional[str]
last_sale_date: Optional[str]
hoa_dues: Optional[float]
query_date: Optional[str]
market: Optional[str]
class StatisticalBaseModel:
def __init__(self):
self._load_model()
def _load_model(self):
"""
Perform any actions needed to load the model.
EX: Establish API connections, download an ML model for inference, etc...
"""
print("Loading model...")
# Optional model loading
print("Model loaded.")
def _get_average_for_market(self, market: str) -> int:
"""
Get the average days on market for a house in a given market
:param market: the housing market
:return: the average days on market
"""
# You probably want to update this based on the current season. Houses sell faster in the summer.
# Add more logic for other housing markets!
if market == 'San Francisco':
return 23
elif market == 'Los Angeles':
return 68
elif market == 'Seattle':
return 27
elif market == 'Austin':
return 78
elif market == 'Houston':
return 73
elif market == 'Chicago':
return 25
elif market == 'New York':
return 20
elif market == 'Denver':
return 24
return 34
def _sale_date_predictor(self, input_data: ProcessedSynapse):
"""
Calculate the expected sale date based on the national average
:param days_on_market: number of days this house has been on the market
:return: the predicted sale date, based on the national average of 34 days
"""
if 'days_on_market' not in input_data:
return datetime.date.today() + datetime.timedelta(days=1)
if 'market' not in input_data:
average = 34
else:
average = self._get_average_for_market(input_data['market'])
days_on_market = input_data['days_on_market']
if days_on_market < average:
days_until_sale = average - days_on_market
sale_date = datetime.date.today() + datetime.timedelta(days=days_until_sale)
return sale_date
else:
return datetime.date.today() + datetime.timedelta(days=1)
def _get_price_multiplier(self, market: str) -> float:
"""
Calculate the price multiplier based on the market
:param market: the marked the house is in
:return: the multiplier for the predicted price
"""
# You may want to add more logic to check zipcode for more precise price multipliers
# Add more logic for other housing markets!
if market == 'San Francisco':
return 1.18 # 18% above listing
elif market == 'Los Angeles':
return 1.2 # 22% above listing
elif market == 'Seattle':
return 1.13 # 13% above listing
elif market == 'Austin':
return 1.11 # 11% above listing
elif market == 'Houston':
return 1.15 # 15% above listing
elif market == 'Chicago':
return 1.12 # 12% above listing
elif market == 'New York':
return 1.05 # 5% above listing
elif market == 'Denver':
return 1.11 # 11% above listing
return 1.0
def run_inference(self, input_data: ProcessedSynapse) -> Tuple[float, str]:
"""
Predict the sale price and sale date for the house represented by `input_data`
:param input_data: a formatted Synapse from the validator, representing a currently listed house
:return: the predicted sale price and predicted sale date for this home
"""
listing_price = float(input_data['price']) if 'price' in input_data else 1.0
sale_multiplier = self._get_price_multiplier(input_data['market']) if 'market' in input_data else 1.0
predicted_sale_price = listing_price * sale_multiplier
predicted_sale_date = self._sale_date_predictor(input_data)
predicted_sale_date = predicted_sale_date.strftime("%Y-%m-%d")
return predicted_sale_price, predicted_sale_date