Fixed AQI Downloader Python Notebook (#3)
Browse files- Fixed AQI Downloader Python Notebook (a3111ddf6fbbef607fab3b4aa64d0482e96e2849)
Co-authored-by: Kalp Shah <kalpshah18@users.noreply.huggingface.co>
- aqi_downloader.ipynb +125 -84
aqi_downloader.ipynb
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
-
"execution_count":
|
| 6 |
"metadata": {},
|
| 7 |
"outputs": [],
|
| 8 |
"source": [
|
|
@@ -20,7 +20,7 @@
|
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"cell_type": "code",
|
| 23 |
-
"execution_count":
|
| 24 |
"metadata": {},
|
| 25 |
"outputs": [],
|
| 26 |
"source": [
|
|
@@ -34,7 +34,9 @@
|
|
| 34 |
" file_exists, file_path, file_name = check_exists(date)\n",
|
| 35 |
" if file_exists:\n",
|
| 36 |
" return file_path\n",
|
| 37 |
-
"
|
|
|
|
|
|
|
| 38 |
" url = f\"https://cpcb.nic.in//upload/Downloads/{file_name}\"\n",
|
| 39 |
" response = requests.get(url)\n",
|
| 40 |
" if response.status_code == 200:\n",
|
|
@@ -48,7 +50,7 @@
|
|
| 48 |
},
|
| 49 |
{
|
| 50 |
"cell_type": "code",
|
| 51 |
-
"execution_count":
|
| 52 |
"metadata": {},
|
| 53 |
"outputs": [
|
| 54 |
{
|
|
@@ -59,19 +61,19 @@
|
|
| 59 |
" '2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08',\n",
|
| 60 |
" '2016-01-09', '2016-01-10',\n",
|
| 61 |
" ...\n",
|
| 62 |
-
" '
|
| 63 |
-
" '
|
| 64 |
-
" '
|
| 65 |
-
" dtype='datetime64[ns]', length=
|
| 66 |
]
|
| 67 |
},
|
| 68 |
{
|
| 69 |
"data": {
|
| 70 |
"text/plain": [
|
| 71 |
-
"(None,
|
| 72 |
]
|
| 73 |
},
|
| 74 |
-
"execution_count":
|
| 75 |
"metadata": {},
|
| 76 |
"output_type": "execute_result"
|
| 77 |
}
|
|
@@ -84,7 +86,7 @@
|
|
| 84 |
},
|
| 85 |
{
|
| 86 |
"cell_type": "code",
|
| 87 |
-
"execution_count":
|
| 88 |
"metadata": {},
|
| 89 |
"outputs": [
|
| 90 |
{
|
|
@@ -92,8 +94,9 @@
|
|
| 92 |
"output_type": "stream",
|
| 93 |
"text": [
|
| 94 |
"Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20160606.pdf with status code 404\n",
|
|
|
|
| 95 |
"Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20171014.pdf with status code 404\n",
|
| 96 |
-
"Failed to download https://cpcb.nic.in//upload/Downloads/
|
| 97 |
]
|
| 98 |
}
|
| 99 |
],
|
|
@@ -104,23 +107,23 @@
|
|
| 104 |
},
|
| 105 |
{
|
| 106 |
"cell_type": "code",
|
| 107 |
-
"execution_count":
|
| 108 |
"metadata": {},
|
| 109 |
"outputs": [
|
| 110 |
{
|
| 111 |
"name": "stdout",
|
| 112 |
"output_type": "stream",
|
| 113 |
"text": [
|
| 114 |
-
"
|
| 115 |
]
|
| 116 |
},
|
| 117 |
{
|
| 118 |
"data": {
|
| 119 |
"text/plain": [
|
| 120 |
-
"
|
| 121 |
]
|
| 122 |
},
|
| 123 |
-
"execution_count":
|
| 124 |
"metadata": {},
|
| 125 |
"output_type": "execute_result"
|
| 126 |
}
|
|
@@ -133,35 +136,22 @@
|
|
| 133 |
},
|
| 134 |
{
|
| 135 |
"cell_type": "code",
|
| 136 |
-
"execution_count":
|
| 137 |
"metadata": {},
|
| 138 |
"outputs": [
|
| 139 |
{
|
| 140 |
"data": {
|
| 141 |
"application/vnd.jupyter.widget-view+json": {
|
| 142 |
-
"model_id": "
|
| 143 |
"version_major": 2,
|
| 144 |
"version_minor": 0
|
| 145 |
},
|
| 146 |
"text/plain": [
|
| 147 |
-
" 0%| | 0/
|
| 148 |
]
|
| 149 |
},
|
| 150 |
"metadata": {},
|
| 151 |
"output_type": "display_data"
|
| 152 |
-
},
|
| 153 |
-
{
|
| 154 |
-
"name": "stdout",
|
| 155 |
-
"output_type": "stream",
|
| 156 |
-
"text": [
|
| 157 |
-
"File AQI_Bulletin_20160606.pdf does not exist\n",
|
| 158 |
-
"No tables found in AQI_data/AQI_Bulletin_20160704.pdf\n",
|
| 159 |
-
"No tables found in AQI_data/AQI_Bulletin_20160721.pdf\n",
|
| 160 |
-
"No tables found in AQI_data/AQI_Bulletin_20160723.pdf\n",
|
| 161 |
-
"No tables found in AQI_data/AQI_Bulletin_20160722.pdf\n",
|
| 162 |
-
"File AQI_Bulletin_20170618.pdf does not exist\n",
|
| 163 |
-
"File AQI_Bulletin_20171014.pdf does not exist\n"
|
| 164 |
-
]
|
| 165 |
}
|
| 166 |
],
|
| 167 |
"source": [
|
|
@@ -309,6 +299,11 @@
|
|
| 309 |
" raise ValueError(\"Table pattern not recognized\")\n",
|
| 310 |
"\n",
|
| 311 |
"def process_file(date):\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 312 |
" file_exists, file_path, file_name = check_exists(date)\n",
|
| 313 |
" if not file_exists:\n",
|
| 314 |
" print(f\"File {file_name} does not exist\")\n",
|
|
@@ -364,6 +359,49 @@
|
|
| 364 |
"_ = Parallel(48)(delayed(process_file)(file_path) for file_path in tqdm(dates))"
|
| 365 |
]
|
| 366 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
{
|
| 368 |
"cell_type": "markdown",
|
| 369 |
"metadata": {},
|
|
@@ -373,16 +411,16 @@
|
|
| 373 |
},
|
| 374 |
{
|
| 375 |
"cell_type": "code",
|
| 376 |
-
"execution_count":
|
| 377 |
"metadata": {},
|
| 378 |
"outputs": [
|
| 379 |
{
|
| 380 |
"data": {
|
| 381 |
"text/plain": [
|
| 382 |
-
"
|
| 383 |
]
|
| 384 |
},
|
| 385 |
-
"execution_count":
|
| 386 |
"metadata": {},
|
| 387 |
"output_type": "execute_result"
|
| 388 |
}
|
|
@@ -394,14 +432,14 @@
|
|
| 394 |
},
|
| 395 |
{
|
| 396 |
"cell_type": "code",
|
| 397 |
-
"execution_count":
|
| 398 |
"metadata": {},
|
| 399 |
"outputs": [
|
| 400 |
{
|
| 401 |
"name": "stdout",
|
| 402 |
"output_type": "stream",
|
| 403 |
"text": [
|
| 404 |
-
"['Agartala', 'Agra', 'Ahmedabad', 'Ahmednagar', 'Aizawl', 'Ajmer', 'Akola', 'Alwar', 'Ambala', 'Amravati', 'Amritsar', 'Anantapur', 'Angul', 'Ankleshwar', 'Araria', 'Ariyalur', 'Arrah', 'Asansol', 'Aurangabad (Bihar)', 'Aurangabad(Maharashtra)', 'Baddi', 'Badlapur', 'Bagalkot', 'Baghpat', 'Bahadurgarh', 'Balasore', 'Ballabgarh', 'Banswara', 'Baran', 'Barbil', 'Bareilly', 'Baripada', 'Barmer', 'Barrackpore', 'Bathinda', 'Begusarai', 'Belapur', 'Belgaum', 'Bengaluru', 'Bettiah', 'Bhagalpur', 'Bharatpur', 'Bhilai', 'Bhilwara', 'Bhiwadi', 'Bhiwandi', 'Bhiwani', 'Bhopal', 'Bhubaneswar', 'Bidar', 'Bihar Sharif', 'Bikaner', 'Bilaspur', 'Bileipada', 'Boisar', 'Brajrajnagar', 'Bulandshahr', 'Bundi', 'Buxar', 'Byasanagar', 'Byrnihat', 'Chamarajanagar', 'Chandigarh', 'Chandrapur', 'Charkhi Dadri', 'Chengalpattu', 'Chennai', 'Chhal', 'Chhapra', 'Chikkaballapur', 'Chikkamagaluru', 'Chittoor', 'Chittorgarh', 'Churu', 'Coimbtore', 'Cuddalore', 'Cuttack', 'Damoh', 'Darbhanga', 'Dausa', 'Davanagere', 'Dehradun', 'Delhi', 'Dewas', 'Dhanbad', 'Dharuhera', 'Dharwad', 'Dholpur', 'Dhule', 'Dindigul', 'Dungarpur', 'Durgapur', 'Eloor', 'Ernakulam', 'Faridabad', 'Fatehabad', 'Firozabad', 'Gadag', 'Gandhinagar', 'Gangtok', 'Gaya', 'Ghaziabad', 'Gorakhpur', 'Greater_Noida', 'Gummidipoondi', 'Gurugram', 'Guwahati', 'Gwalior', 'Hajipur', 'Haldia', 'Hanumangarh', 'Hapur', 'Hassan', 'Haveri', 'Hisar', 'Hosur', 'Howrah', 'Hubballi', 'Hyderabad', 'Imphal', 'Indore', 'Jabalpur', 'Jaipur', 'Jaisalmer', 'Jalandhar', 'Jalgaon', 'Jalna', 'Jalore', 'Jhalawar', 'Jhansi', 'Jharsuguda', 'Jhunjhunu', 'Jind', 'Jodhpur', 'Jorapokhar', 'Kadapa', 'Kaithal', 'Kalaburgi', 'Kalyan', 'Kanchipuram', 'Kannur', 'Kanpur', 'Karauli', 'Karnal', 'Karur', 'Karwar', 'Kashipur', 'Katihar', 'Katni', 'Keonjhar', 'Khanna', 'Khurja', 'Kishanganj', 'Kochi', 'Kohima', 'Kolar', 'Kolhapur', 'Kolkata', 'Kollam', 'Koppal', 'Korba', 'Kota', 'Kozhikode', 'Kunjemura', 'Kurushketra', 'Latur', 'Loni_Ghaziabad', 'Lucknow', 'Ludhiana', 'Madurai', 'Mahad', 'Maihar', 'Malegaon', 'Mandi Gobindgarh', 'Mandideep', 'Mandikhera', 'Manesar', 'Mangalore', 'Manguraha', 'Medikeri', 'Meerut', 'Milupara', 'Mira-Bhayandar', 'Moradabad', 'Motihari', 'Mumbai', 'Munger', 'Muzaffarnagar', 'Muzaffarpur', 'Mysuru', 'NOIDA', 'Nagaon', 'Nagapattinam', 'Nagaur', 'Nagpur', 'Naharlagun', 'Nalbari', 'Nanded', 'Nandesari', 'Narnaul', 'Nashik', 'Navi Mumbai', 'Nayagarh', 'Noida', 'Ooty', 'Pali', 'Palkalaiperur', 'Palwal', 'Panchkula', 'Panipat', 'Parbhani', 'Pathardih', 'Patiala', 'Patna', 'Pimpri-Chinchwad', 'Pithampur', 'Pratapgarh', 'Prayagraj', 'Puducherry', 'Pudukottai', 'Pune', 'Purnia', 'Raichur', 'Raipur', 'Rairangpur', 'Rajamahendravaram', 'Rajgir', 'Rajsamand', 'Ramanagara', 'Ramanathapuram', 'Ranipet', 'Ratlam', 'Rishikesh', 'Rohtak', 'Rourkela', 'Rupnagar', 'Sagar', 'Saharsa', 'Salem', 'Samastipur', 'Sangli', 'Sasaram', 'Satna', 'Sawai Madhopur', 'Shillong', 'Shivamogga', 'Sikar', 'Silchar', 'Siliguri', 'Singrauli', 'Sirohi', 'Sirsa', 'Sivasagar', 'Siwan', 'Solapur', 'Sonipat', 'Sri Ganganagar', 'Srinagar', 'Suakati', 'Surat', 'Talcher', 'Tensa', 'Thane', 'Thanjavur', 'Thiruvananthapuram', 'Thoothukudi', 'Thrissur', 'Tiruchirappalli', 'Tirunelveli', 'Tirupati', 'Tirupur', 'Tonk', 'Tumidih', 'Udaipur', 'Udupi', 'Ujjain', 'Ulhasnagar', 'Vapi', 'Varanasi', 'Vatva', 'Vellore', 'Vijayapura', 'Vijayawada', 'Virar', 'Virudhunagar', 'Visakhapatnam', 'Vrindavan', 'Yadgir', 'Yamunanagar']\n"
|
| 405 |
]
|
| 406 |
}
|
| 407 |
],
|
|
@@ -420,6 +458,7 @@
|
|
| 420 |
" \"Manglore\": \"Mangalore\",\n",
|
| 421 |
" \"Pimpri Chinchwad\": \"Pimpri-Chinchwad\",\n",
|
| 422 |
" \"Tumakuru\": \"Tumidih\",\n",
|
|
|
|
| 423 |
" \"Tiruppur\": \"Tirupur\",\n",
|
| 424 |
" \"Yamuna Nagar\": \"Yamunanagar\",\n",
|
| 425 |
" \"vellore\": \"Vellore\" # duplicate, can map to itself or be handled separately\n",
|
|
@@ -438,48 +477,49 @@
|
|
| 438 |
},
|
| 439 |
{
|
| 440 |
"cell_type": "code",
|
| 441 |
-
"execution_count":
|
| 442 |
"metadata": {},
|
| 443 |
"outputs": [
|
| 444 |
{
|
| 445 |
"data": {
|
| 446 |
"text/plain": [
|
| 447 |
"State\n",
|
| 448 |
-
"
|
| 449 |
-
"
|
| 450 |
-
"
|
| 451 |
-
"
|
| 452 |
-
"
|
| 453 |
-
"
|
| 454 |
-
"
|
| 455 |
-
"
|
| 456 |
-
"
|
| 457 |
-
"
|
| 458 |
-
"
|
| 459 |
-
"
|
| 460 |
-
"
|
| 461 |
-
"
|
| 462 |
-
"
|
| 463 |
-
"
|
| 464 |
-
"
|
| 465 |
-
"
|
| 466 |
-
"
|
| 467 |
-
"
|
| 468 |
-
"
|
| 469 |
-
"
|
| 470 |
-
"
|
| 471 |
-
"
|
| 472 |
-
"
|
| 473 |
-
"
|
| 474 |
-
"
|
| 475 |
-
"
|
| 476 |
-
"
|
| 477 |
-
"
|
| 478 |
-
"
|
|
|
|
| 479 |
"Name: count, dtype: int64"
|
| 480 |
]
|
| 481 |
},
|
| 482 |
-
"execution_count":
|
| 483 |
"metadata": {},
|
| 484 |
"output_type": "execute_result"
|
| 485 |
}
|
|
@@ -593,6 +633,7 @@
|
|
| 593 |
" 'Solapur': 'Maharashtra',\n",
|
| 594 |
" 'Sonipat': 'Haryana',\n",
|
| 595 |
" 'Sri Ganganagar': 'Rajasthan',\n",
|
|
|
|
| 596 |
" 'Srinagar': 'Jammu and Kashmir',\n",
|
| 597 |
" 'Suakati': 'Odisha',\n",
|
| 598 |
" 'Surat': 'Gujarat',\n",
|
|
@@ -631,7 +672,7 @@
|
|
| 631 |
},
|
| 632 |
{
|
| 633 |
"cell_type": "code",
|
| 634 |
-
"execution_count":
|
| 635 |
"metadata": {},
|
| 636 |
"outputs": [
|
| 637 |
{
|
|
@@ -694,12 +735,12 @@
|
|
| 694 |
"0 Agra 417 PM\\n2.5 Severe \n",
|
| 695 |
"1 Bengaluru 95 PM , PM\\n2.5 10 Satisfactory \n",
|
| 696 |
"\n",
|
| 697 |
-
"
|
| 698 |
-
"0
|
| 699 |
-
"1
|
| 700 |
]
|
| 701 |
},
|
| 702 |
-
"execution_count":
|
| 703 |
"metadata": {},
|
| 704 |
"output_type": "execute_result"
|
| 705 |
}
|
|
@@ -712,16 +753,16 @@
|
|
| 712 |
},
|
| 713 |
{
|
| 714 |
"cell_type": "code",
|
| 715 |
-
"execution_count":
|
| 716 |
"metadata": {},
|
| 717 |
"outputs": [
|
| 718 |
{
|
| 719 |
"data": {
|
| 720 |
"text/plain": [
|
| 721 |
-
"
|
| 722 |
]
|
| 723 |
},
|
| 724 |
-
"execution_count":
|
| 725 |
"metadata": {},
|
| 726 |
"output_type": "execute_result"
|
| 727 |
}
|
|
@@ -732,15 +773,15 @@
|
|
| 732 |
},
|
| 733 |
{
|
| 734 |
"cell_type": "code",
|
| 735 |
-
"execution_count":
|
| 736 |
"metadata": {},
|
| 737 |
"outputs": [
|
| 738 |
{
|
| 739 |
"name": "stdout",
|
| 740 |
"output_type": "stream",
|
| 741 |
"text": [
|
| 742 |
-
"
|
| 743 |
-
"
|
| 744 |
]
|
| 745 |
}
|
| 746 |
],
|
|
@@ -753,7 +794,7 @@
|
|
| 753 |
},
|
| 754 |
{
|
| 755 |
"cell_type": "code",
|
| 756 |
-
"execution_count":
|
| 757 |
"metadata": {},
|
| 758 |
"outputs": [],
|
| 759 |
"source": [
|
|
@@ -763,7 +804,7 @@
|
|
| 763 |
],
|
| 764 |
"metadata": {
|
| 765 |
"kernelspec": {
|
| 766 |
-
"display_name": "
|
| 767 |
"language": "python",
|
| 768 |
"name": "python3"
|
| 769 |
},
|
|
@@ -777,7 +818,7 @@
|
|
| 777 |
"name": "python",
|
| 778 |
"nbconvert_exporter": "python",
|
| 779 |
"pygments_lexer": "ipython3",
|
| 780 |
-
"version": "3.
|
| 781 |
}
|
| 782 |
},
|
| 783 |
"nbformat": 4,
|
|
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
"metadata": {},
|
| 7 |
"outputs": [],
|
| 8 |
"source": [
|
|
|
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"cell_type": "code",
|
| 23 |
+
"execution_count": 5,
|
| 24 |
"metadata": {},
|
| 25 |
"outputs": [],
|
| 26 |
"source": [
|
|
|
|
| 34 |
" file_exists, file_path, file_name = check_exists(date)\n",
|
| 35 |
" if file_exists:\n",
|
| 36 |
" return file_path\n",
|
| 37 |
+
"\n",
|
| 38 |
+
" os.makedirs(\"AQI_data\", exist_ok=True)\n",
|
| 39 |
+
"\n",
|
| 40 |
" url = f\"https://cpcb.nic.in//upload/Downloads/{file_name}\"\n",
|
| 41 |
" response = requests.get(url)\n",
|
| 42 |
" if response.status_code == 200:\n",
|
|
|
|
| 50 |
},
|
| 51 |
{
|
| 52 |
"cell_type": "code",
|
| 53 |
+
"execution_count": 6,
|
| 54 |
"metadata": {},
|
| 55 |
"outputs": [
|
| 56 |
{
|
|
|
|
| 61 |
" '2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08',\n",
|
| 62 |
" '2016-01-09', '2016-01-10',\n",
|
| 63 |
" ...\n",
|
| 64 |
+
" '2025-02-17', '2025-02-18', '2025-02-19', '2025-02-20',\n",
|
| 65 |
+
" '2025-02-21', '2025-02-22', '2025-02-23', '2025-02-24',\n",
|
| 66 |
+
" '2025-02-25', '2025-02-26'],\n",
|
| 67 |
+
" dtype='datetime64[ns]', length=3345, freq='D')\n"
|
| 68 |
]
|
| 69 |
},
|
| 70 |
{
|
| 71 |
"data": {
|
| 72 |
"text/plain": [
|
| 73 |
+
"(None, 3345)"
|
| 74 |
]
|
| 75 |
},
|
| 76 |
+
"execution_count": 6,
|
| 77 |
"metadata": {},
|
| 78 |
"output_type": "execute_result"
|
| 79 |
}
|
|
|
|
| 86 |
},
|
| 87 |
{
|
| 88 |
"cell_type": "code",
|
| 89 |
+
"execution_count": 7,
|
| 90 |
"metadata": {},
|
| 91 |
"outputs": [
|
| 92 |
{
|
|
|
|
| 94 |
"output_type": "stream",
|
| 95 |
"text": [
|
| 96 |
"Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20160606.pdf with status code 404\n",
|
| 97 |
+
"Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20170618.pdf with status code 404\n",
|
| 98 |
"Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20171014.pdf with status code 404\n",
|
| 99 |
+
"Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20250101.pdf with status code 404\n"
|
| 100 |
]
|
| 101 |
}
|
| 102 |
],
|
|
|
|
| 107 |
},
|
| 108 |
{
|
| 109 |
"cell_type": "code",
|
| 110 |
+
"execution_count": 8,
|
| 111 |
"metadata": {},
|
| 112 |
"outputs": [
|
| 113 |
{
|
| 114 |
"name": "stdout",
|
| 115 |
"output_type": "stream",
|
| 116 |
"text": [
|
| 117 |
+
"3345\n"
|
| 118 |
]
|
| 119 |
},
|
| 120 |
{
|
| 121 |
"data": {
|
| 122 |
"text/plain": [
|
| 123 |
+
"3341"
|
| 124 |
]
|
| 125 |
},
|
| 126 |
+
"execution_count": 8,
|
| 127 |
"metadata": {},
|
| 128 |
"output_type": "execute_result"
|
| 129 |
}
|
|
|
|
| 136 |
},
|
| 137 |
{
|
| 138 |
"cell_type": "code",
|
| 139 |
+
"execution_count": 9,
|
| 140 |
"metadata": {},
|
| 141 |
"outputs": [
|
| 142 |
{
|
| 143 |
"data": {
|
| 144 |
"application/vnd.jupyter.widget-view+json": {
|
| 145 |
+
"model_id": "438a2a0c07fb4367b18a4deff93364e9",
|
| 146 |
"version_major": 2,
|
| 147 |
"version_minor": 0
|
| 148 |
},
|
| 149 |
"text/plain": [
|
| 150 |
+
" 0%| | 0/3345 [00:00<?, ?it/s]"
|
| 151 |
]
|
| 152 |
},
|
| 153 |
"metadata": {},
|
| 154 |
"output_type": "display_data"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
}
|
| 156 |
],
|
| 157 |
"source": [
|
|
|
|
| 299 |
" raise ValueError(\"Table pattern not recognized\")\n",
|
| 300 |
"\n",
|
| 301 |
"def process_file(date):\n",
|
| 302 |
+
" folders = [\"AQI_data\", \"AQI_data_csv\"]\n",
|
| 303 |
+
"\n",
|
| 304 |
+
" for folder in folders:\n",
|
| 305 |
+
" if not os.path.exists(folder):\n",
|
| 306 |
+
" os.makedirs(folder)\n",
|
| 307 |
" file_exists, file_path, file_name = check_exists(date)\n",
|
| 308 |
" if not file_exists:\n",
|
| 309 |
" print(f\"File {file_name} does not exist\")\n",
|
|
|
|
| 359 |
"_ = Parallel(48)(delayed(process_file)(file_path) for file_path in tqdm(dates))"
|
| 360 |
]
|
| 361 |
},
|
| 362 |
+
{
|
| 363 |
+
"cell_type": "markdown",
|
| 364 |
+
"metadata": {},
|
| 365 |
+
"source": [
|
| 366 |
+
"## Creating Merged DataFrame"
|
| 367 |
+
]
|
| 368 |
+
},
|
| 369 |
+
{
|
| 370 |
+
"cell_type": "code",
|
| 371 |
+
"execution_count": 10,
|
| 372 |
+
"metadata": {},
|
| 373 |
+
"outputs": [
|
| 374 |
+
{
|
| 375 |
+
"name": "stdout",
|
| 376 |
+
"output_type": "stream",
|
| 377 |
+
"text": [
|
| 378 |
+
"Merged CSV saved as AQI_data_csv/merged.csv\n"
|
| 379 |
+
]
|
| 380 |
+
}
|
| 381 |
+
],
|
| 382 |
+
"source": [
|
| 383 |
+
"import os\n",
|
| 384 |
+
"import pandas as pd\n",
|
| 385 |
+
"\n",
|
| 386 |
+
"def merge_csv_files(folder_path, output_file):\n",
|
| 387 |
+
" csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]\n",
|
| 388 |
+
" \n",
|
| 389 |
+
" if not csv_files:\n",
|
| 390 |
+
" print(\"No CSV files found in the folder.\")\n",
|
| 391 |
+
" return\n",
|
| 392 |
+
"\n",
|
| 393 |
+
" df_list = [pd.read_csv(os.path.join(folder_path, file)) for file in csv_files]\n",
|
| 394 |
+
" merged_df = pd.concat(df_list, ignore_index=True)\n",
|
| 395 |
+
"\n",
|
| 396 |
+
" merged_df.to_csv(output_file, index=False)\n",
|
| 397 |
+
" print(f\"Merged CSV saved as {output_file}\")\n",
|
| 398 |
+
"\n",
|
| 399 |
+
"# Example usage\n",
|
| 400 |
+
"folder_path = \"AQI_data_csv\"\n",
|
| 401 |
+
"output_file = \"AQI_data_csv/merged.csv\"\n",
|
| 402 |
+
"merge_csv_files(folder_path, output_file)"
|
| 403 |
+
]
|
| 404 |
+
},
|
| 405 |
{
|
| 406 |
"cell_type": "markdown",
|
| 407 |
"metadata": {},
|
|
|
|
| 411 |
},
|
| 412 |
{
|
| 413 |
"cell_type": "code",
|
| 414 |
+
"execution_count": 11,
|
| 415 |
"metadata": {},
|
| 416 |
"outputs": [
|
| 417 |
{
|
| 418 |
"data": {
|
| 419 |
"text/plain": [
|
| 420 |
+
"397732"
|
| 421 |
]
|
| 422 |
},
|
| 423 |
+
"execution_count": 11,
|
| 424 |
"metadata": {},
|
| 425 |
"output_type": "execute_result"
|
| 426 |
}
|
|
|
|
| 432 |
},
|
| 433 |
{
|
| 434 |
"cell_type": "code",
|
| 435 |
+
"execution_count": 12,
|
| 436 |
"metadata": {},
|
| 437 |
"outputs": [
|
| 438 |
{
|
| 439 |
"name": "stdout",
|
| 440 |
"output_type": "stream",
|
| 441 |
"text": [
|
| 442 |
+
"['Agartala', 'Agra', 'Ahmedabad', 'Ahmednagar', 'Aizawl', 'Ajmer', 'Akola', 'Alwar', 'Ambala', 'Amravati', 'Amritsar', 'Anantapur', 'Angul', 'Ankleshwar', 'Araria', 'Ariyalur', 'Arrah', 'Asansol', 'Aurangabad (Bihar)', 'Aurangabad(Maharashtra)', 'Baddi', 'Badlapur', 'Bagalkot', 'Baghpat', 'Bahadurgarh', 'Balasore', 'Ballabgarh', 'Banswara', 'Baran', 'Barbil', 'Bareilly', 'Baripada', 'Barmer', 'Barrackpore', 'Bathinda', 'Begusarai', 'Belapur', 'Belgaum', 'Bengaluru', 'Bettiah', 'Bhagalpur', 'Bharatpur', 'Bhilai', 'Bhilwara', 'Bhiwadi', 'Bhiwandi', 'Bhiwani', 'Bhopal', 'Bhubaneswar', 'Bidar', 'Bihar Sharif', 'Bikaner', 'Bilaspur', 'Bileipada', 'Boisar', 'Brajrajnagar', 'Bulandshahr', 'Bundi', 'Buxar', 'Byasanagar', 'Byrnihat', 'Chamarajanagar', 'Chandigarh', 'Chandrapur', 'Charkhi Dadri', 'Chengalpattu', 'Chennai', 'Chhal', 'Chhapra', 'Chikkaballapur', 'Chikkamagaluru', 'Chittoor', 'Chittorgarh', 'Churu', 'Coimbtore', 'Cuddalore', 'Cuttack', 'Damoh', 'Darbhanga', 'Dausa', 'Davanagere', 'Dehradun', 'Delhi', 'Dewas', 'Dhanbad', 'Dharuhera', 'Dharwad', 'Dholpur', 'Dhule', 'Dindigul', 'Dungarpur', 'Durgapur', 'Eloor', 'Ernakulam', 'Faridabad', 'Fatehabad', 'Firozabad', 'Gadag', 'Gandhinagar', 'Gangtok', 'Gaya', 'Ghaziabad', 'Gorakhpur', 'Greater_Noida', 'Gummidipoondi', 'Gurugram', 'Guwahati', 'Gwalior', 'Hajipur', 'Haldia', 'Hanumangarh', 'Hapur', 'Hassan', 'Haveri', 'Hisar', 'Hosur', 'Howrah', 'Hubballi', 'Hyderabad', 'Imphal', 'Indore', 'Jabalpur', 'Jaipur', 'Jaisalmer', 'Jalandhar', 'Jalgaon', 'Jalna', 'Jalore', 'Jhalawar', 'Jhansi', 'Jharsuguda', 'Jhunjhunu', 'Jind', 'Jodhpur', 'Jorapokhar', 'Kadapa', 'Kaithal', 'Kalaburgi', 'Kalyan', 'Kanchipuram', 'Kannur', 'Kanpur', 'Karauli', 'Karnal', 'Karur', 'Karwar', 'Kashipur', 'Katihar', 'Katni', 'Keonjhar', 'Khanna', 'Khurja', 'Kishanganj', 'Kochi', 'Kohima', 'Kolar', 'Kolhapur', 'Kolkata', 'Kollam', 'Koppal', 'Korba', 'Kota', 'Kozhikode', 'Kunjemura', 'Kurushketra', 'Latur', 'Loni_Ghaziabad', 'Lucknow', 'Ludhiana', 'Madurai', 'Mahad', 'Maihar', 'Malegaon', 'Mandi Gobindgarh', 'Mandideep', 'Mandikhera', 'Manesar', 'Mangalore', 'Manguraha', 'Medikeri', 'Meerut', 'Milupara', 'Mira-Bhayandar', 'Moradabad', 'Motihari', 'Mumbai', 'Munger', 'Muzaffarnagar', 'Muzaffarpur', 'Mysuru', 'NOIDA', 'Nagaon', 'Nagapattinam', 'Nagaur', 'Nagpur', 'Naharlagun', 'Nalbari', 'Nanded', 'Nandesari', 'Narnaul', 'Nashik', 'Navi Mumbai', 'Nayagarh', 'Noida', 'Ooty', 'Pali', 'Palkalaiperur', 'Palwal', 'Panchkula', 'Panipat', 'Parbhani', 'Pathardih', 'Patiala', 'Patna', 'Pimpri-Chinchwad', 'Pithampur', 'Pratapgarh', 'Prayagraj', 'Puducherry', 'Pudukottai', 'Pune', 'Purnia', 'Raichur', 'Raipur', 'Rairangpur', 'Rajamahendravaram', 'Rajgir', 'Rajsamand', 'Ramanagara', 'Ramanathapuram', 'Ranipet', 'Ratlam', 'Rishikesh', 'Rohtak', 'Rourkela', 'Rupnagar', 'Sagar', 'Saharsa', 'Salem', 'Samastipur', 'Sangli', 'Sasaram', 'Satna', 'Sawai Madhopur', 'Shillong', 'Shivamogga', 'Sikar', 'Silchar', 'Siliguri', 'Singrauli', 'Sirohi', 'Sirsa', 'Sivasagar', 'Siwan', 'Solapur', 'Sonipat', 'Sri Ganganagar', 'Sri Vijaya Puram', 'Srinagar', 'Suakati', 'Surat', 'Talcher', 'Tensa', 'Thane', 'Thanjavur', 'Thiruvananthapuram', 'Thoothukudi', 'Thrissur', 'Tiruchirappalli', 'Tirunelveli', 'Tirupati', 'Tirupur', 'Tonk', 'Tumidih', 'Udaipur', 'Udupi', 'Ujjain', 'Ulhasnagar', 'Vapi', 'Varanasi', 'Vatva', 'Vellore', 'Vijayapura', 'Vijayawada', 'Virar', 'Virudhunagar', 'Visakhapatnam', 'Vrindavan', 'Yadgir', 'Yamunanagar']\n"
|
| 443 |
]
|
| 444 |
}
|
| 445 |
],
|
|
|
|
| 458 |
" \"Manglore\": \"Mangalore\",\n",
|
| 459 |
" \"Pimpri Chinchwad\": \"Pimpri-Chinchwad\",\n",
|
| 460 |
" \"Tumakuru\": \"Tumidih\",\n",
|
| 461 |
+
" \"Tirumala\": \"Tirupati\",\n",
|
| 462 |
" \"Tiruppur\": \"Tirupur\",\n",
|
| 463 |
" \"Yamuna Nagar\": \"Yamunanagar\",\n",
|
| 464 |
" \"vellore\": \"Vellore\" # duplicate, can map to itself or be handled separately\n",
|
|
|
|
| 477 |
},
|
| 478 |
{
|
| 479 |
"cell_type": "code",
|
| 480 |
+
"execution_count": 13,
|
| 481 |
"metadata": {},
|
| 482 |
"outputs": [
|
| 483 |
{
|
| 484 |
"data": {
|
| 485 |
"text/plain": [
|
| 486 |
"State\n",
|
| 487 |
+
"Andaman and Nicobar 6\n",
|
| 488 |
+
"Andhra Pradesh 11546\n",
|
| 489 |
+
"Arunachal Pradesh 614\n",
|
| 490 |
+
"Assam 5099\n",
|
| 491 |
+
"Bihar 28633\n",
|
| 492 |
+
"Chandigarh 1980\n",
|
| 493 |
+
"Chhattisgarh 5357\n",
|
| 494 |
+
"Delhi 3330\n",
|
| 495 |
+
"Gujarat 12195\n",
|
| 496 |
+
"Haryana 50177\n",
|
| 497 |
+
"Himachal Pradesh 1022\n",
|
| 498 |
+
"Jammu and Kashmir 822\n",
|
| 499 |
+
"Jharkhand 2076\n",
|
| 500 |
+
"Karnataka 35248\n",
|
| 501 |
+
"Kerala 11549\n",
|
| 502 |
+
"Madhya Pradesh 31326\n",
|
| 503 |
+
"Maharashtra 39193\n",
|
| 504 |
+
"Manipur 790\n",
|
| 505 |
+
"Meghalaya 1956\n",
|
| 506 |
+
"Mizoram 1535\n",
|
| 507 |
+
"Nagaland 1398\n",
|
| 508 |
+
"Odisha 12363\n",
|
| 509 |
+
"Puducherry 1433\n",
|
| 510 |
+
"Punjab 19676\n",
|
| 511 |
+
"Rajasthan 37729\n",
|
| 512 |
+
"Sikkim 812\n",
|
| 513 |
+
"Tamil Nadu 14080\n",
|
| 514 |
+
"Telangana 3322\n",
|
| 515 |
+
"Tripura 1442\n",
|
| 516 |
+
"Uttar Pradesh 41800\n",
|
| 517 |
+
"Uttarakhand 2156\n",
|
| 518 |
+
"West Bengal 15406\n",
|
| 519 |
"Name: count, dtype: int64"
|
| 520 |
]
|
| 521 |
},
|
| 522 |
+
"execution_count": 13,
|
| 523 |
"metadata": {},
|
| 524 |
"output_type": "execute_result"
|
| 525 |
}
|
|
|
|
| 633 |
" 'Solapur': 'Maharashtra',\n",
|
| 634 |
" 'Sonipat': 'Haryana',\n",
|
| 635 |
" 'Sri Ganganagar': 'Rajasthan',\n",
|
| 636 |
+
" 'Sri Vijaya Puram': 'Andaman and Nicobar',\n",
|
| 637 |
" 'Srinagar': 'Jammu and Kashmir',\n",
|
| 638 |
" 'Suakati': 'Odisha',\n",
|
| 639 |
" 'Surat': 'Gujarat',\n",
|
|
|
|
| 672 |
},
|
| 673 |
{
|
| 674 |
"cell_type": "code",
|
| 675 |
+
"execution_count": 14,
|
| 676 |
"metadata": {},
|
| 677 |
"outputs": [
|
| 678 |
{
|
|
|
|
| 735 |
"0 Agra 417 PM\\n2.5 Severe \n",
|
| 736 |
"1 Bengaluru 95 PM , PM\\n2.5 10 Satisfactory \n",
|
| 737 |
"\n",
|
| 738 |
+
" Based on number of monitoring stations Date State \n",
|
| 739 |
+
"0 1 2016-01-01 Uttar Pradesh \n",
|
| 740 |
+
"1 5 2016-01-01 Karnataka "
|
| 741 |
]
|
| 742 |
},
|
| 743 |
+
"execution_count": 14,
|
| 744 |
"metadata": {},
|
| 745 |
"output_type": "execute_result"
|
| 746 |
}
|
|
|
|
| 753 |
},
|
| 754 |
{
|
| 755 |
"cell_type": "code",
|
| 756 |
+
"execution_count": 15,
|
| 757 |
"metadata": {},
|
| 758 |
"outputs": [
|
| 759 |
{
|
| 760 |
"data": {
|
| 761 |
"text/plain": [
|
| 762 |
+
"396071"
|
| 763 |
]
|
| 764 |
},
|
| 765 |
+
"execution_count": 15,
|
| 766 |
"metadata": {},
|
| 767 |
"output_type": "execute_result"
|
| 768 |
}
|
|
|
|
| 773 |
},
|
| 774 |
{
|
| 775 |
"cell_type": "code",
|
| 776 |
+
"execution_count": 16,
|
| 777 |
"metadata": {},
|
| 778 |
"outputs": [
|
| 779 |
{
|
| 780 |
"name": "stdout",
|
| 781 |
"output_type": "stream",
|
| 782 |
"text": [
|
| 783 |
+
"396071\n",
|
| 784 |
+
"395213\n"
|
| 785 |
]
|
| 786 |
}
|
| 787 |
],
|
|
|
|
| 794 |
},
|
| 795 |
{
|
| 796 |
"cell_type": "code",
|
| 797 |
+
"execution_count": 17,
|
| 798 |
"metadata": {},
|
| 799 |
"outputs": [],
|
| 800 |
"source": [
|
|
|
|
| 804 |
],
|
| 805 |
"metadata": {
|
| 806 |
"kernelspec": {
|
| 807 |
+
"display_name": "Python 3",
|
| 808 |
"language": "python",
|
| 809 |
"name": "python3"
|
| 810 |
},
|
|
|
|
| 818 |
"name": "python",
|
| 819 |
"nbconvert_exporter": "python",
|
| 820 |
"pygments_lexer": "ipython3",
|
| 821 |
+
"version": "3.12.4"
|
| 822 |
}
|
| 823 |
},
|
| 824 |
"nbformat": 4,
|