Spaces:

SustainabilityLabIITGN
/

VayuChat

Running

App Files Files Community

VayuChat / test_image.py

Nipun

Implement ultra-high DPI plots and fix UI responsiveness

95b3c75 2 months ago

raw

history blame contribute delete

5.46 kB

	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns
	import uuid
	import calendar
	import numpy as np
	# Set professional matplotlib styling with high resolution
	#plt.style.use('vayuchat.mplstyle')
	df = pd.read_csv("AQ_met_data.csv")
	df["Timestamp"] = pd.to_datetime(df["Timestamp"])
	states_df = pd.read_csv("states_data.csv")
	ncap_df = pd.read_csv("ncap_funding_data.csv")
	# df is pandas DataFrame with air quality data from India. Data frequency is daily from 2017 to 2024. The data has the following columns and data types:
	# Unnamed: 0 int64
	# Timestamp datetime64[ns]
	# State object
	# City object
	# Station object
	# site_id object
	# Year int64
	# PM2.5 (µg/m³) float64
	# PM10 (µg/m³) float64
	# NO (µg/m³) float64
	# NO2 (µg/m³) float64
	# NOx (ppb) float64
	# NH3 (µg/m³) float64
	# SO2 (µg/m³) float64
	# CO (mg/m³) float64
	# Ozone (µg/m³) float64
	# AT (°C) float64
	# RH (%) float64
	# WS (m/s) float64
	# WD (deg) float64
	# RF (mm) float64
	# TOT-RF (mm) float64
	# SR (W/mt2) float64
	# BP (mmHg) float64
	# VWS (m/s) float64
	# dtype: object
	# states_df is a pandas DataFrame of state-wise population, area and whether state is union territory or not of India.
	# state object
	# population int64
	# area (km2) int64
	# isUnionTerritory bool
	# dtype: object
	# ncap_df is a pandas DataFrame of funding given to the cities of India from 2019-2022, under The National Clean Air Program (NCAP).
	# S. No. int64
	# state object
	# city object
	# Amount released during FY 2019-20 float64
	# Amount released during FY 2020-21 float64
	# Amount released during FY 2021-22 float64
	# Total fund released float64
	# Utilisation as on June 2022 float64
	# dtype: object
	# Question: Compare the wind speed and PM2.5 levels during Delhi’s most polluted week (highest PM2.5) in December 2024 with the previous 15 days and the following 15 days on a time series plot.
	# Generate code to answer the question and save result in 'answer' variable
	# If creating a plot, save it with a unique filename and store the filename in 'answer'
	# If returning text/numbers, store the result directly in 'answer'


	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns
	import uuid
	import numpy as np

	# Ensure data is loaded
	if df.empty:
	answer = "No data available"
	else:
	try:
	# Filter for Delhi in December 2024
	df_delhi = df[
	(df['City'].str.contains('Delhi', case=False, na=False)) &
	(df['Timestamp'].dt.year == 2024) &
	(df['Timestamp'].dt.month == 12)
	].copy()
	df_delhi = df_delhi.dropna(subset=['PM2.5 (µg/m³)', 'WS (m/s)'])
	df_delhi = df_delhi.sort_values('Timestamp')

	# Need at least a full week to compute rolling mean
	if len(df_delhi) < 7:
	answer = "Insufficient data"
	else:
	# Compute 7‑day rolling mean of PM2.5
	df_delhi['PM2.5_roll7'] = df_delhi['PM2.5 (µg/m³)'].rolling(window=7, min_periods=7).mean()
	# Identify the window with the highest mean PM2.5
	max_idx = df_delhi['PM2.5_roll7'].idxmax()
	max_end_date = df_delhi.loc[max_idx, 'Timestamp']
	max_start_date = max_end_date - pd.Timedelta(days=6)

	# Define extended window: 15 days before start and 15 days after end
	ext_start = max_start_date - pd.Timedelta(days=15)
	ext_end = max_end_date + pd.Timedelta(days=15)

	# Filter data for the extended period
	mask = (df_delhi['Timestamp'] >= ext_start) & (df_delhi['Timestamp'] <= ext_end)
	df_plot = df_delhi.loc[mask].copy()

	if df_plot.empty or len(df_plot) < 30:
	answer = "Insufficient data"
	else:
	# Plot time series
	plt.figure(figsize=(9, 6))
	ax1 = plt.gca()
	sns.lineplot(data=df_plot, x='Timestamp', y='PM2.5 (µg/m³)', ax=ax1,
	label='PM2.5 (µg/m³)', color='tab:red')
	ax1.set_ylabel('PM2.5 (µg/m³)', color='tab:red')
	ax1.tick_params(axis='y', labelcolor='tab:red')

	ax2 = ax1.twinx()
	sns.lineplot(data=df_plot, x='Timestamp', y='WS (m/s)', ax=ax2,
	label='Wind Speed (m/s)', color='tab:blue')
	ax2.set_ylabel('Wind Speed (m/s)', color='tab:blue')
	ax2.tick_params(axis='y', labelcolor='tab:blue')

	plt.title('Delhi – PM2.5 and Wind Speed around Most Polluted Week (Dec 2024)')
	plt.xlabel('Date')
	plt.tight_layout()

	# Save plot
	filename = f"plot.png"
	plt.savefig(filename, dpi=1200, bbox_inches='tight', facecolor='white')
	plt.close()

	answer = filename
	except Exception as e:
	answer = "Unable to complete analysis with available data"