File size: 45,450 Bytes
0d00d62 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 |
{
"title": "DBSCAN Mastery: 100 MCQs",
"description": "A comprehensive set of 100 multiple-choice questions designed to test and deepen your understanding of DBSCAN (Density-Based Spatial Clustering of Applications with Noise), covering fundamental concepts, parameters, advantages, limitations, and practical scenarios.",
"questions": [
{
"id": 1,
"questionText": "What is the main idea behind DBSCAN clustering?",
"options": [
"Clusters are dense regions separated by sparse regions",
"All points are assigned to a cluster",
"Clusters are linearly separable",
"Clusters are formed by equal-sized groups"
],
"correctAnswerIndex": 0,
"explanation": "DBSCAN identifies clusters based on density: areas with many points form clusters, and sparse regions separate them."
},
{
"id": 2,
"questionText": "DBSCAN requires which key parameters?",
"options": [
"Number of clusters (k) only",
"Learning rate and iterations",
"Distance metric only",
"Epsilon (eps) and Minimum points (minPts)"
],
"correctAnswerIndex": 3,
"explanation": "DBSCAN uses eps (neighborhood radius) and minPts (minimum points to form a dense region) to define clusters."
},
{
"id": 3,
"questionText": "In DBSCAN, what is a 'core point'?",
"options": [
"Point on the boundary of clusters",
"Point with no neighbors",
"Point with at least minPts neighbors within eps",
"Any point in the dataset"
],
"correctAnswerIndex": 2,
"explanation": "A core point has enough neighboring points within eps to be considered part of a dense cluster."
},
{
"id": 4,
"questionText": "In DBSCAN, what is a 'border point'?",
"options": [
"Point not in any cluster",
"Point reachable from a core point but with fewer than minPts neighbors",
"Point with more than minPts neighbors",
"Centroid of a cluster"
],
"correctAnswerIndex": 1,
"explanation": "Border points are density-reachable from core points but do not have enough neighbors themselves to be core points."
},
{
"id": 5,
"questionText": "In DBSCAN, what is a 'noise point'?",
"options": [
"Point with maximum density",
"Point on the cluster centroid",
"Point with exactly minPts neighbors",
"Point not reachable from any core point"
],
"correctAnswerIndex": 3,
"explanation": "Noise points are isolated points that do not belong to any cluster."
},
{
"id": 6,
"questionText": "Scenario: You have clusters of varying density. Challenge for DBSCAN?",
"options": [
"DBSCAN fails to run",
"Always finds all clusters perfectly",
"Clusters become linearly separable",
"May merge dense clusters and miss sparse ones"
],
"correctAnswerIndex": 3,
"explanation": "DBSCAN struggles with clusters of differing densities because eps and minPts are global parameters."
},
{
"id": 7,
"questionText": "Scenario: Choosing eps too large. Effect?",
"options": [
"Algorithm fails",
"Noise increases",
"More clusters detected",
"Clusters may merge; noise reduced"
],
"correctAnswerIndex": 3,
"explanation": "Large eps connects distant points, possibly merging distinct clusters."
},
{
"id": 8,
"questionText": "Scenario: Choosing eps too small. Effect?",
"options": [
"Clusters merge",
"Many points labeled as noise; clusters fragmented",
"No effect",
"EM applied instead"
],
"correctAnswerIndex": 1,
"explanation": "Small eps results in fewer neighbors; many points cannot form clusters and are marked as noise."
},
{
"id": 9,
"questionText": "Scenario: Setting minPts too high. Effect?",
"options": [
"Clusters merge",
"More points labeled as noise; small clusters ignored",
"Algorithm fails",
"Clusters increase"
],
"correctAnswerIndex": 1,
"explanation": "High minPts requires dense regions to form clusters, excluding smaller or sparse clusters."
},
{
"id": 10,
"questionText": "Scenario: Setting minPts too low. Effect?",
"options": [
"DBSCAN fails",
"Clusters disappear",
"Many small clusters; noise reduced",
"Clusters merge automatically"
],
"correctAnswerIndex": 2,
"explanation": "Low minPts allows small groups to form clusters, potentially splitting natural clusters."
},
{
"id": 11,
"questionText": "Scenario: A border point is connected to multiple core points of different clusters. How is it assigned?",
"options": [
"Becomes noise automatically",
"Forms a new cluster",
"Assigned to any one cluster arbitrarily or first reachable",
"Algorithm fails"
],
"correctAnswerIndex": 2,
"explanation": "Border points can belong to one cluster; usually assigned to the first core point that reaches it."
},
{
"id": 12,
"questionText": "Scenario: You have 2D spatial data with noise. DBSCAN advantage?",
"options": [
"Detects clusters of arbitrary shape and identifies noise",
"Requires clusters to be circular",
"Sensitive to number of clusters parameter",
"Assigns all points to clusters"
],
"correctAnswerIndex": 0,
"explanation": "DBSCAN works well for arbitrary shapes and identifies noise points."
},
{
"id": 13,
"questionText": "Scenario: Using Euclidean distance vs Manhattan distance in DBSCAN. Effect?",
"options": [
"Distance metric affects cluster shapes and eps choice",
"DBSCAN fails",
"No effect; clusters same",
"Noise ignored"
],
"correctAnswerIndex": 0,
"explanation": "Different distance metrics affect neighborhood calculation, which can change clustering."
},
{
"id": 14,
"questionText": "Scenario: DBSCAN applied on high-dimensional data. Challenge?",
"options": [
"Distance measures become less meaningful (curse of dimensionality)",
"Algorithm runs faster",
"Clusters automatically reduce",
"Noise decreases"
],
"correctAnswerIndex": 0,
"explanation": "High dimensions can make points appear equidistant, complicating density estimation."
},
{
"id": 15,
"questionText": "Scenario: You have concentric clusters. DBSCAN challenge?",
"options": [
"May fail to separate inner and outer clusters depending on eps",
"Always separates perfectly",
"Clusters merge automatically",
"Noise increases"
],
"correctAnswerIndex": 0,
"explanation": "Density difference between inner and outer rings may cause DBSCAN to merge or mislabel clusters."
},
{
"id": 16,
"questionText": "Scenario: Using DBSCAN for geospatial clustering. Advantage?",
"options": [
"Finds clusters of arbitrary shape like regions or neighborhoods",
"Clusters must be circular",
"All points assigned",
"Sensitive to number of clusters"
],
"correctAnswerIndex": 0,
"explanation": "DBSCAN can identify irregularly shaped spatial clusters without specifying cluster count."
},
{
"id": 17,
"questionText": "Scenario: You want clusters of varying density. DBSCAN limitation?",
"options": [
"Noise removed automatically",
"Single global eps may not detect all clusters",
"Algorithm adapts automatically",
"All clusters found"
],
"correctAnswerIndex": 1,
"explanation": "DBSCAN uses a fixed eps, which can miss sparse clusters or merge dense clusters."
},
{
"id": 18,
"questionText": "Scenario: You apply DBSCAN on streaming data. Challenge?",
"options": [
"Noise ignored",
"Automatically updates clusters",
"All points reassigned automatically",
"Standard DBSCAN is static; streaming adaptation required"
],
"correctAnswerIndex": 3,
"explanation": "DBSCAN is not incremental; modifications are needed for dynamic/streaming data."
},
{
"id": 19,
"questionText": "Scenario: Using DBSCAN for anomaly detection. Approach?",
"options": [
"Assign random labels",
"Label points not in any cluster as anomalies",
"Clusters merged manually",
"Use all clusters for prediction"
],
"correctAnswerIndex": 1,
"explanation": "Noise points are naturally flagged as outliers."
},
{
"id": 20,
"questionText": "Scenario: DBSCAN vs K-Means on arbitrary-shaped clusters. Advantage?",
"options": [
"DBSCAN fails for shapes",
"Both perform equally",
"K-Means better for arbitrary shapes",
"DBSCAN can capture non-spherical clusters; K-Means cannot"
],
"correctAnswerIndex": 3,
"explanation": "DBSCAN works with clusters of any shape without requiring centroids."
},
{
"id": 21,
"questionText": "Scenario: Two clusters are close together but separated by sparse points. DBSCAN outcome?",
"options": [
"Fails to converge",
"Marks everything as noise",
"Correctly separates clusters using density differences",
"Merges clusters automatically"
],
"correctAnswerIndex": 2,
"explanation": "Sparse points allow DBSCAN to distinguish dense clusters even if they are close."
},
{
"id": 22,
"questionText": "Scenario: Applying DBSCAN on 3D point cloud data. Advantage?",
"options": [
"Clusters must be spherical",
"All points assigned to clusters",
"Can find clusters of arbitrary 3D shape and ignore noise",
"Requires predefining cluster centers"
],
"correctAnswerIndex": 2,
"explanation": "DBSCAN handles multi-dimensional data and can identify irregular clusters and noise."
},
{
"id": 23,
"questionText": "Scenario: DBSCAN uses Manhattan distance on grid data. Effect?",
"options": [
"Algorithm fails",
"Noise increases automatically",
"Clusters align with grid; eps choice differs",
"No effect on clusters"
],
"correctAnswerIndex": 2,
"explanation": "Distance metric changes the neighborhood definition, affecting cluster formation."
},
{
"id": 24,
"questionText": "Scenario: You want small but dense clusters. How to set parameters?",
"options": [
"Small eps and appropriate minPts",
"Large eps",
"Ignore parameters",
"Large minPts"
],
"correctAnswerIndex": 0,
"explanation": "Smaller eps ensures that small dense regions form separate clusters."
},
{
"id": 25,
"questionText": "Scenario: You have noisy sensor data. DBSCAN benefit?",
"options": [
"Clusters all points",
"Fails with noise",
"Requires K-Means preprocessing",
"Automatically labels isolated points as noise"
],
"correctAnswerIndex": 3,
"explanation": "DBSCAN identifies low-density points as noise, avoiding misclassification."
},
{
"id": 26,
"questionText": "Scenario: Data with hierarchical cluster structure. Limitation of DBSCAN?",
"options": [
"Noise ignored",
"All clusters merged",
"Cannot detect hierarchy; only flat clusters",
"Automatically finds hierarchy"
],
"correctAnswerIndex": 2,
"explanation": "DBSCAN provides flat clustering; hierarchical relationships are not captured."
},
{
"id": 27,
"questionText": "Scenario: Using DBSCAN for image segmentation. Advantage?",
"options": [
"Requires predefined number of segments",
"All pixels assigned to clusters",
"Identifies irregular regions and isolates noise",
"Clusters must be circular"
],
"correctAnswerIndex": 2,
"explanation": "DBSCAN captures arbitrary-shaped regions and treats background/noisy pixels as noise."
},
{
"id": 28,
"questionText": "Scenario: You have clusters of different densities. How to adapt DBSCAN?",
"options": [
"Reduce dimensionality",
"Use varying eps with methods like HDBSCAN",
"Keep single global eps",
"Increase minPts"
],
"correctAnswerIndex": 1,
"explanation": "Standard DBSCAN struggles with varying densities; adaptive versions like HDBSCAN help."
},
{
"id": 29,
"questionText": "Scenario: DBSCAN fails to detect clusters in high-dimensional text embeddings. Solution?",
"options": [
"Increase minPts arbitrarily",
"Use full covariance",
"Reduce dimensions using PCA or t-SNE before clustering",
"Ignore scaling"
],
"correctAnswerIndex": 2,
"explanation": "Dimensionality reduction improves distance computation and density estimation."
},
{
"id": 30,
"questionText": "Scenario: Choosing minPts in DBSCAN. Rule of thumb?",
"options": [
"MinPts = 1 always",
"MinPts = dataset size",
"MinPts ignored",
"MinPts ≥ dimensionality + 1"
],
"correctAnswerIndex": 3,
"explanation": "MinPts should be slightly larger than the data dimensionality for meaningful clusters."
},
{
"id": 31,
"questionText": "Scenario: DBSCAN applied on GPS data of taxis in a city. Best use case?",
"options": [
"Assign random clusters",
"Identify high-density pickup/drop-off hotspots",
"Detect only circular areas",
"Cluster by taxi color"
],
"correctAnswerIndex": 1,
"explanation": "DBSCAN can detect dense regions where taxis frequently gather without assuming cluster shape."
},
{
"id": 32,
"questionText": "Scenario: You notice DBSCAN marks too many points as noise. Likely cause?",
"options": [
"Algorithm failed",
"All clusters are too dense",
"Distance metric wrong",
"eps too small or minPts too high"
],
"correctAnswerIndex": 3,
"explanation": "Small eps or high minPts can make points unable to form clusters, labeling them as noise."
},
{
"id": 33,
"questionText": "Scenario: DBSCAN applied to social network graph. Challenge?",
"options": [
"Clusters are always detected",
"DBSCAN works directly on graph",
"Noise ignored",
"Graph edges may not correspond to meaningful distances; need transformation"
],
"correctAnswerIndex": 3,
"explanation": "DBSCAN requires distance metrics; graphs need embedding or distance conversion."
},
{
"id": 34,
"questionText": "Scenario: Using DBSCAN for anomaly detection in network traffic. How?",
"options": [
"Label low-density patterns as anomalies",
"All high-traffic nodes flagged",
"Randomly assign anomalies",
"Clusters merged manually"
],
"correctAnswerIndex": 0,
"explanation": "Low-density points correspond to unusual patterns, suitable for anomaly detection."
},
{
"id": 35,
"questionText": "Scenario: High-dimensional DBSCAN performance issue. Solution?",
"options": [
"Use dimensionality reduction or HDBSCAN",
"Ignore distance metric",
"Use K-Means instead",
"Increase eps arbitrarily"
],
"correctAnswerIndex": 0,
"explanation": "Reducing dimensions or using hierarchical density clustering helps in high-dimensional spaces."
},
{
"id": 36,
"questionText": "Scenario: Clusters are elongated. DBSCAN vs K-Means?",
"options": [
"K-Means works better",
"All points assigned to noise",
"DBSCAN captures arbitrary shapes better",
"Both fail"
],
"correctAnswerIndex": 2,
"explanation": "DBSCAN does not assume spherical clusters, so elongated shapes are captured well."
},
{
"id": 37,
"questionText": "Scenario: DBSCAN fails on variable-density clusters. Solution?",
"options": [
"Reduce minPts to 1",
"Use HDBSCAN for adaptive density clustering",
"Increase eps globally",
"Ignore problem"
],
"correctAnswerIndex": 1,
"explanation": "HDBSCAN handles clusters with varying density better than standard DBSCAN."
},
{
"id": 38,
"questionText": "Scenario: You want reproducible DBSCAN results. Requirement?",
"options": [
"Ignore minPts",
"Deterministic neighbor search and consistent distance metric",
"Random initialization",
"Vary eps each run"
],
"correctAnswerIndex": 1,
"explanation": "Reproducibility requires deterministic calculations for neighborhoods and distances."
},
{
"id": 39,
"questionText": "Scenario: DBSCAN applied on time-series sensor readings. Approach?",
"options": [
"Use sliding windows to extract features before clustering",
"Clusters automatically detected",
"Apply DBSCAN on raw timestamps",
"Ignore feature extraction"
],
"correctAnswerIndex": 0,
"explanation": "Time-series features are extracted to represent temporal patterns for density-based clustering."
},
{
"id": 40,
"questionText": "Scenario: You need clusters and hierarchy. Limitation of DBSCAN?",
"options": [
"Noise ignored",
"DBSCAN provides only flat clustering",
"Automatically generates hierarchy",
"Clusters nested by default"
],
"correctAnswerIndex": 1,
"explanation": "DBSCAN produces flat clusters; hierarchical relationships require extensions like HDBSCAN."
},
{
"id": 41,
"questionText": "Scenario: DBSCAN applied on customer purchase patterns. Advantage?",
"options": [
"Requires predefined cluster number",
"Sensitive to initial seed",
"Detects dense buying behavior groups and isolates rare patterns",
"All points assigned"
],
"correctAnswerIndex": 2,
"explanation": "DBSCAN identifies dense purchasing patterns and separates anomalies naturally."
},
{
"id": 42,
"questionText": "Scenario: You want to tune DBSCAN eps parameter. Approach?",
"options": [
"MinPts adjustment only",
"Always choose maximum distance",
"Use k-distance graph to identify elbow point",
"Randomly guess eps"
],
"correctAnswerIndex": 2,
"explanation": "Plotting k-distance helps find a suitable eps where distances start increasing sharply."
},
{
"id": 43,
"questionText": "Scenario: DBSCAN with overlapping clusters. Effect?",
"options": [
"Points duplicated",
"Clusters fail completely",
"Overlap handled by density; border points assigned to one cluster",
"Noise ignored"
],
"correctAnswerIndex": 2,
"explanation": "DBSCAN assigns border points to a reachable cluster; soft assignment is not available."
},
{
"id": 44,
"questionText": "Scenario: Applying DBSCAN to text embeddings. Challenge?",
"options": [
"Noise ignored",
"All points assigned to clusters",
"DBSCAN always works",
"High-dimensional distances may be less meaningful"
],
"correctAnswerIndex": 3,
"explanation": "Distance measures in high dimensions can reduce effectiveness; dimensionality reduction helps."
},
{
"id": 45,
"questionText": "Scenario: Noise proportion is high. DBSCAN behavior?",
"options": [
"Many points labeled as noise; cluster detection limited",
"Algorithm fails",
"Clusters detected perfectly",
"All points assigned to clusters"
],
"correctAnswerIndex": 0,
"explanation": "High noise density can prevent formation of dense clusters."
},
{
"id": 46,
"questionText": "Scenario: DBSCAN on streaming data. Limitation?",
"options": [
"Standard DBSCAN is static; needs incremental adaptation",
"All points reassigned automatically",
"Noise ignored",
"Automatically updates clusters"
],
"correctAnswerIndex": 0,
"explanation": "DBSCAN is not inherently incremental; streaming data requires modified algorithms."
},
{
"id": 47,
"questionText": "Scenario: DBSCAN vs K-Means for non-spherical clusters. Advantage?",
"options": [
"Both fail",
"K-Means better",
"Noise ignored",
"DBSCAN detects arbitrary shapes; K-Means cannot"
],
"correctAnswerIndex": 3,
"explanation": "DBSCAN does not rely on centroid or spherical assumption."
},
{
"id": 48,
"questionText": "Scenario: You apply DBSCAN on noisy sensor readings. Outcome?",
"options": [
"Isolates isolated points as noise automatically",
"Clusters all points",
"Noise merged into clusters",
"Algorithm fails"
],
"correctAnswerIndex": 0,
"explanation": "Low-density or isolated points are correctly treated as noise."
},
{
"id": 49,
"questionText": "Scenario: Choosing distance metric affects DBSCAN. Why?",
"options": [
"All clusters merge",
"No effect",
"Neighborhood depends on distance; cluster shape affected",
"Noise ignored"
],
"correctAnswerIndex": 2,
"explanation": "Different metrics change neighbor counts, affecting core points and cluster formation."
},
{
"id": 50,
"questionText": "Scenario: DBSCAN on highly skewed 2D data. Challenge?",
"options": [
"Clusters detected automatically",
"Algorithm fails",
"Fixed eps may not capture sparse areas",
"Noise reduced"
],
"correctAnswerIndex": 2,
"explanation": "Single eps cannot adapt to varying densities; sparse regions may be misclassified."
},
{
"id": 51,
"questionText": "Scenario: DBSCAN applied to customer segmentation with varying buying density. Issue?",
"options": [
"Noise eliminated automatically",
"Clusters merged randomly",
"All clusters detected perfectly",
"Some smaller or sparser clusters may be missed"
],
"correctAnswerIndex": 3,
"explanation": "DBSCAN’s global eps struggles with clusters of different densities; adaptive methods recommended."
},
{
"id": 52,
"questionText": "Scenario: You want DBSCAN to detect small anomalies in large dataset. How to adjust?",
"options": [
"Increase eps arbitrarily",
"Decrease minPts and eps appropriately",
"Ignore small clusters",
"Use K-Means instead"
],
"correctAnswerIndex": 1,
"explanation": "Smaller minPts and eps allow DBSCAN to detect small dense regions representing anomalies."
},
{
"id": 53,
"questionText": "Scenario: Using DBSCAN for clustering Wi-Fi signals in a building. Advantage?",
"options": [
"Identifies dense signal regions and ignores noise",
"All points assigned to clusters",
"Requires number of clusters",
"Clusters must be circular"
],
"correctAnswerIndex": 0,
"explanation": "DBSCAN can detect regions with strong signal density and label isolated weak signals as noise."
},
{
"id": 54,
"questionText": "Scenario: DBSCAN on image pixel intensities for segmentation. Outcome?",
"options": [
"Requires predefined cluster number",
"Clusters must be circular",
"All pixels assigned",
"Arbitrary-shaped regions segmented; noise isolated"
],
"correctAnswerIndex": 3,
"explanation": "DBSCAN can segment regions of any shape and label scattered pixels as noise."
},
{
"id": 55,
"questionText": "Scenario: Using DBSCAN on 3D point cloud of a city. Advantage?",
"options": [
"Detects clusters like buildings, trees, and separates sparse points",
"Noise merged into clusters",
"All points assigned",
"Clusters must be spherical"
],
"correctAnswerIndex": 0,
"explanation": "DBSCAN works in multi-dimensional data and identifies meaningful dense clusters."
},
{
"id": 56,
"questionText": "Scenario: DBSCAN fails with high-dimensional word embeddings. Solution?",
"options": [
"Apply dimensionality reduction before clustering",
"Use K-Means",
"Increase eps globally",
"Ignore problem"
],
"correctAnswerIndex": 0,
"explanation": "High-dimensional spaces make distance less meaningful; reduction helps clustering performance."
},
{
"id": 57,
"questionText": "Scenario: Border points connected to multiple core points. Assignment?",
"options": [
"Assigned to one cluster reachable first",
"Algorithm fails",
"Assigned to all clusters simultaneously",
"Become noise"
],
"correctAnswerIndex": 0,
"explanation": "DBSCAN assigns border points to a single cluster; typically the first reachable core point."
},
{
"id": 58,
"questionText": "Scenario: DBSCAN on streaming data. Limitation?",
"options": [
"Noise ignored",
"Automatically updates clusters",
"All points reassigned automatically",
"Standard DBSCAN cannot update incrementally; adaptation needed"
],
"correctAnswerIndex": 3,
"explanation": "Incremental or streaming adaptations of DBSCAN are required for dynamic datasets."
},
{
"id": 59,
"questionText": "Scenario: Clusters are elongated and dense. DBSCAN vs K-Means?",
"options": [
"Both fail",
"K-Means better",
"All points assigned to noise",
"DBSCAN captures shape; K-Means fails with elongated clusters"
],
"correctAnswerIndex": 3,
"explanation": "DBSCAN’s density-based approach handles arbitrary shapes like elongated clusters well."
},
{
"id": 60,
"questionText": "Scenario: Choosing minPts parameter. Rule of thumb?",
"options": [
"minPts = dataset size",
"minPts ≥ dimensionality + 1",
"minPts = 1 always",
"minPts ignored"
],
"correctAnswerIndex": 1,
"explanation": "Choosing minPts slightly larger than data dimensionality ensures meaningful cluster formation."
},
{
"id": 61,
"questionText": "Scenario: DBSCAN applied to weather station locations. Advantage?",
"options": [
"All stations assigned",
"Noise merged into clusters",
"Detects dense station clusters and separates isolated stations as noise",
"Clusters must be circular"
],
"correctAnswerIndex": 2,
"explanation": "DBSCAN can find groups of stations in dense regions while labeling isolated ones as noise."
},
{
"id": 62,
"questionText": "Scenario: DBSCAN applied to vehicle GPS tracks. Best outcome?",
"options": [
"All vehicles assigned to same cluster",
"Requires predefined cluster number",
"Clusters must be circular",
"Detect hotspots of vehicle activity and identify sparse routes"
],
"correctAnswerIndex": 3,
"explanation": "DBSCAN identifies dense routes or locations and marks sparse movements as noise."
},
{
"id": 63,
"questionText": "Scenario: DBSCAN applied to detect fraudulent transactions. Advantage?",
"options": [
"All transactions clustered",
"Isolates unusual low-density transactions as potential fraud",
"Clusters merged arbitrarily",
"Noise ignored"
],
"correctAnswerIndex": 1,
"explanation": "Low-density points are flagged naturally, useful for anomaly detection in finance."
},
{
"id": 64,
"questionText": "Scenario: eps too large. Effect on clusters?",
"options": [
"Clusters may merge; noise reduced",
"Noise increases",
"More clusters detected",
"Algorithm fails"
],
"correctAnswerIndex": 0,
"explanation": "Large eps connects distant points, merging separate clusters and reducing noise."
},
{
"id": 65,
"questionText": "Scenario: eps too small. Effect on clusters?",
"options": [
"Clusters merge",
"Noise decreases",
"Many points labeled as noise; clusters fragmented",
"Algorithm fails"
],
"correctAnswerIndex": 2,
"explanation": "Small eps prevents points from forming dense clusters; many become noise."
},
{
"id": 66,
"questionText": "Scenario: High-dimensional clustering. DBSCAN limitation?",
"options": [
"Clusters detected perfectly",
"All points assigned",
"Distances lose meaning; density estimation difficult",
"Noise ignored"
],
"correctAnswerIndex": 2,
"explanation": "High dimensions make points appear equidistant, complicating density-based clustering."
},
{
"id": 67,
"questionText": "Scenario: Data with multiple density clusters. Solution?",
"options": [
"Use HDBSCAN for adaptive density clustering",
"Increase eps globally",
"Reduce minPts to 1",
"Ignore problem"
],
"correctAnswerIndex": 0,
"explanation": "HDBSCAN adapts to varying densities, unlike standard DBSCAN."
},
{
"id": 68,
"questionText": "Scenario: Using DBSCAN on customer browsing patterns. Advantage?",
"options": [
"Noise ignored",
"Requires predefined cluster number",
"All points assigned",
"Detects dense behavioral patterns and isolates outliers"
],
"correctAnswerIndex": 3,
"explanation": "Dense browsing behaviors form clusters; rare patterns become noise."
},
{
"id": 69,
"questionText": "Scenario: Noise points in DBSCAN. Definition?",
"options": [
"Cluster centroids",
"All points in clusters",
"Points not reachable from any core point",
"Points with minPts neighbors"
],
"correctAnswerIndex": 2,
"explanation": "Noise points are isolated points not part of any cluster."
},
{
"id": 70,
"questionText": "Scenario: Choosing distance metric in DBSCAN. Effect?",
"options": [
"Affects neighborhood definition and cluster shape",
"Noise ignored",
"No effect",
"All points merged"
],
"correctAnswerIndex": 0,
"explanation": "The distance metric changes how neighbors are counted, affecting cluster formation."
},
{
"id": 71,
"questionText": "Scenario: DBSCAN applied to earthquake epicenters. Advantage?",
"options": [
"Clusters must be circular",
"Detects clusters of seismic activity and isolates isolated events",
"Noise merged into clusters",
"All events assigned"
],
"correctAnswerIndex": 1,
"explanation": "DBSCAN identifies dense seismic regions and separates rare events as noise."
},
{
"id": 72,
"questionText": "Scenario: Varying eps across dataset. How to achieve?",
"options": [
"Ignore variation",
"Random eps each run",
"Use adaptive DBSCAN variants like HDBSCAN",
"Standard DBSCAN suffices"
],
"correctAnswerIndex": 2,
"explanation": "Adaptive algorithms adjust density thresholds to handle varying densities."
},
{
"id": 73,
"questionText": "Scenario: Applying DBSCAN on medical imaging. Benefit?",
"options": [
"Clusters must be spherical",
"Requires fixed cluster number",
"Detects regions of interest and separates background noise",
"All pixels clustered"
],
"correctAnswerIndex": 2,
"explanation": "DBSCAN segments irregular shapes and isolates sparse/noisy regions."
},
{
"id": 74,
"questionText": "Scenario: Using DBSCAN for anomaly detection in IoT sensors. Approach?",
"options": [
"Ignore isolated readings",
"Label low-density readings as anomalies",
"Cluster all points",
"Random assignment"
],
"correctAnswerIndex": 1,
"explanation": "Isolated readings or sparse patterns naturally become noise, indicating anomalies."
},
{
"id": 75,
"questionText": "Scenario: DBSCAN on financial transactions. Noise points indicate?",
"options": [
"Noise merged",
"All transactions are legitimate",
"Potential fraudulent or unusual transactions",
"Clusters merged"
],
"correctAnswerIndex": 2,
"explanation": "Sparse points in dense transaction space are flagged as unusual or fraudulent."
},
{
"id": 76,
"questionText": "Scenario: DBSCAN applied to traffic accident locations. Advantage?",
"options": [
"Identifies accident hotspots and isolates rare events",
"All accidents assigned",
"Noise merged into clusters",
"Clusters must be circular"
],
"correctAnswerIndex": 0,
"explanation": "DBSCAN finds dense accident regions and treats isolated incidents as noise."
},
{
"id": 77,
"questionText": "Scenario: eps and minPts selection using k-distance plot. What is the elbow point?",
"options": [
"Minimum distance",
"Random point",
"Point where distance sharply increases, suitable for eps",
"Maximum distance"
],
"correctAnswerIndex": 2,
"explanation": "The elbow in the k-distance graph indicates the transition from dense to sparse regions, guiding eps selection."
},
{
"id": 78,
"questionText": "Scenario: Border points connected to multiple clusters. Assignment in DBSCAN?",
"options": [
"Assigned to all clusters",
"Become noise",
"Assigned to the first reachable cluster",
"Clusters merge automatically"
],
"correctAnswerIndex": 2,
"explanation": "Border points are assigned to one cluster, typically the first one that reaches them."
},
{
"id": 79,
"questionText": "Scenario: DBSCAN on social media check-ins. Benefit?",
"options": [
"Clusters must be predefined",
"All users assigned",
"Noise ignored",
"Detects popular locations and identifies sparse users"
],
"correctAnswerIndex": 3,
"explanation": "DBSCAN identifies dense activity areas and treats isolated check-ins as noise."
},
{
"id": 80,
"questionText": "Scenario: Standard DBSCAN fails on variable density data. Solution?",
"options": [
"Ignore the problem",
"Increase eps globally",
"Decrease minPts arbitrarily",
"Use HDBSCAN for hierarchical density-based clustering"
],
"correctAnswerIndex": 3,
"explanation": "HDBSCAN adapts to varying density, unlike standard DBSCAN."
},
{
"id": 81,
"questionText": "Scenario: DBSCAN on genomic data. Advantage?",
"options": [
"Clusters must be circular",
"All genes assigned",
"Identifies dense gene clusters and isolates rare genes",
"Noise merged"
],
"correctAnswerIndex": 2,
"explanation": "DBSCAN can identify dense gene expression patterns and separate sparse or rare genes as noise."
},
{
"id": 82,
"questionText": "Scenario: Choosing minPts too high. Effect?",
"options": [
"Algorithm fails",
"Clusters merge",
"Small clusters ignored; many points labeled noise",
"More clusters detected"
],
"correctAnswerIndex": 2,
"explanation": "High minPts requires dense regions; sparse or small clusters are lost."
},
{
"id": 83,
"questionText": "Scenario: Choosing minPts too low. Effect?",
"options": [
"Many small clusters formed; noise reduced",
"Clusters merge",
"Algorithm fails",
"All points noise"
],
"correctAnswerIndex": 0,
"explanation": "Low minPts allows small groups to form clusters, potentially splitting natural clusters."
},
{
"id": 84,
"questionText": "Scenario: DBSCAN applied on customer location data. Advantage?",
"options": [
"Requires predefined cluster count",
"All points assigned",
"Clusters must be circular",
"Identifies dense shopping areas and isolates isolated customers"
],
"correctAnswerIndex": 3,
"explanation": "DBSCAN captures dense shopping locations and labels scattered customers as noise."
},
{
"id": 85,
"questionText": "Scenario: High-dimensional text embeddings. DBSCAN limitation?",
"options": [
"Distances lose meaning; clusters may be unreliable",
"Algorithm faster",
"Noise ignored",
"Clusters always detected"
],
"correctAnswerIndex": 0,
"explanation": "In high dimensions, distances are less discriminative, affecting density and clustering."
},
{
"id": 86,
"questionText": "Scenario: Using DBSCAN on image feature vectors. Benefit?",
"options": [
"Requires predefined cluster count",
"All features assigned",
"Groups similar image features and isolates outliers",
"Clusters must be circular"
],
"correctAnswerIndex": 2,
"explanation": "DBSCAN detects dense feature groups and treats isolated features as noise."
},
{
"id": 87,
"questionText": "Scenario: eps too small. Effect?",
"options": [
"Clusters fragmented; many points labeled noise",
"Clusters merge",
"All points assigned",
"Algorithm fails"
],
"correctAnswerIndex": 0,
"explanation": "Small eps prevents formation of dense clusters; isolated points become noise."
},
{
"id": 88,
"questionText": "Scenario: eps too large. Effect?",
"options": [
"More clusters detected",
"Algorithm fails",
"Clusters merge; fewer noise points",
"Noise increases"
],
"correctAnswerIndex": 2,
"explanation": "Large eps connects distant points, merging separate clusters and reducing noise."
},
{
"id": 89,
"questionText": "Scenario: DBSCAN on irregularly shaped 2D clusters. Advantage?",
"options": [
"Clusters must be circular",
"Captures arbitrary shapes unlike K-Means",
"All points assigned",
"Noise ignored"
],
"correctAnswerIndex": 1,
"explanation": "DBSCAN does not assume cluster shape, so it captures elongated or irregular clusters."
},
{
"id": 90,
"questionText": "Scenario: Border point connected to multiple core points. Assignment?",
"options": [
"Assigned to first reachable cluster",
"Assigned to all clusters",
"Clusters merge",
"Becomes noise"
],
"correctAnswerIndex": 0,
"explanation": "DBSCAN assigns a border point to one cluster, typically the first core point that reaches it."
},
{
"id": 91,
"questionText": "Scenario: DBSCAN on IoT sensor anomaly detection. Advantage?",
"options": [
"Sparse readings flagged as anomalies automatically",
"Noise ignored",
"All readings clustered",
"Clusters merged arbitrarily"
],
"correctAnswerIndex": 0,
"explanation": "DBSCAN labels low-density points as noise, which is useful for detecting anomalies."
},
{
"id": 92,
"questionText": "Scenario: DBSCAN with streaming data. Limitation?",
"options": [
"Needs adaptation; standard DBSCAN is static",
"Noise ignored",
"Automatically updates clusters",
"All points reassigned automatically"
],
"correctAnswerIndex": 0,
"explanation": "DBSCAN is not incremental; streaming or dynamic data requires modified algorithms."
},
{
"id": 93,
"questionText": "Scenario: Using DBSCAN on earthquake data. Benefit?",
"options": [
"Detects dense seismic zones; isolates rare events",
"All events clustered",
"Noise merged",
"Clusters must be circular"
],
"correctAnswerIndex": 0,
"explanation": "DBSCAN identifies dense clusters of earthquakes and labels isolated events as noise."
},
{
"id": 94,
"questionText": "Scenario: Noise in DBSCAN definition?",
"options": [
"Cluster centroids",
"Points not reachable from any core point",
"Points with minPts neighbors",
"All points assigned"
],
"correctAnswerIndex": 1,
"explanation": "Noise points are isolated points not part of any cluster."
},
{
"id": 95,
"questionText": "Scenario: Varying density clusters. Best DBSCAN variant?",
"options": [
"Standard DBSCAN",
"K-Means",
"HDBSCAN",
"Agglomerative clustering"
],
"correctAnswerIndex": 2,
"explanation": "HDBSCAN adapts to different densities and creates a hierarchy of clusters."
},
{
"id": 96,
"questionText": "Scenario: Choosing eps using k-distance plot. How?",
"options": [
"Select maximum distance",
"Select value at elbow point where distances sharply rise",
"Randomly select eps",
"Select minimum distance"
],
"correctAnswerIndex": 1,
"explanation": "The elbow point indicates the transition from dense to sparse points, guiding eps choice."
},
{
"id": 97,
"questionText": "Scenario: DBSCAN on customer behavior patterns. Benefit?",
"options": [
"Groups dense behavior patterns; isolates rare customers",
"Requires fixed number of clusters",
"Noise ignored",
"All points assigned"
],
"correctAnswerIndex": 0,
"explanation": "DBSCAN identifies dense behavioral clusters and labels rare behaviors as noise."
},
{
"id": 98,
"questionText": "Scenario: DBSCAN vs K-Means for non-spherical clusters. Advantage?",
"options": [
"K-Means better",
"Noise ignored",
"Both fail",
"DBSCAN captures arbitrary shapes"
],
"correctAnswerIndex": 3,
"explanation": "DBSCAN does not assume cluster shape and handles irregular or elongated clusters."
},
{
"id": 99,
"questionText": "Scenario: High-dimensional DBSCAN problem. Solution?",
"options": [
"Dimensionality reduction (PCA, t-SNE) or HDBSCAN",
"Increase minPts arbitrarily",
"Ignore scaling",
"Use raw distances"
],
"correctAnswerIndex": 0,
"explanation": "High-dimensional spaces make distances less meaningful; reduction or adaptive methods improve clustering."
},
{
"id": 100,
"questionText": "Scenario: Choosing minPts in DBSCAN. Rule of thumb?",
"options": [
"minPts = 1 always",
"minPts = dataset size",
"Ignore minPts",
"minPts ≥ dimensionality + 1"
],
"correctAnswerIndex": 3,
"explanation": "minPts should slightly exceed data dimensionality to ensure meaningful clusters."
}
]
}
|