{ "title": "DBSCAN Mastery: 100 MCQs", "description": "A comprehensive set of 100 multiple-choice questions designed to test and deepen your understanding of DBSCAN (Density-Based Spatial Clustering of Applications with Noise), covering fundamental concepts, parameters, advantages, limitations, and practical scenarios.", "questions": [ { "id": 1, "questionText": "What is the main idea behind DBSCAN clustering?", "options": [ "Clusters are dense regions separated by sparse regions", "All points are assigned to a cluster", "Clusters are linearly separable", "Clusters are formed by equal-sized groups" ], "correctAnswerIndex": 0, "explanation": "DBSCAN identifies clusters based on density: areas with many points form clusters, and sparse regions separate them." }, { "id": 2, "questionText": "DBSCAN requires which key parameters?", "options": [ "Number of clusters (k) only", "Learning rate and iterations", "Distance metric only", "Epsilon (eps) and Minimum points (minPts)" ], "correctAnswerIndex": 3, "explanation": "DBSCAN uses eps (neighborhood radius) and minPts (minimum points to form a dense region) to define clusters." }, { "id": 3, "questionText": "In DBSCAN, what is a 'core point'?", "options": [ "Point on the boundary of clusters", "Point with no neighbors", "Point with at least minPts neighbors within eps", "Any point in the dataset" ], "correctAnswerIndex": 2, "explanation": "A core point has enough neighboring points within eps to be considered part of a dense cluster." }, { "id": 4, "questionText": "In DBSCAN, what is a 'border point'?", "options": [ "Point not in any cluster", "Point reachable from a core point but with fewer than minPts neighbors", "Point with more than minPts neighbors", "Centroid of a cluster" ], "correctAnswerIndex": 1, "explanation": "Border points are density-reachable from core points but do not have enough neighbors themselves to be core points." }, { "id": 5, "questionText": "In DBSCAN, what is a 'noise point'?", "options": [ "Point with maximum density", "Point on the cluster centroid", "Point with exactly minPts neighbors", "Point not reachable from any core point" ], "correctAnswerIndex": 3, "explanation": "Noise points are isolated points that do not belong to any cluster." }, { "id": 6, "questionText": "Scenario: You have clusters of varying density. Challenge for DBSCAN?", "options": [ "DBSCAN fails to run", "Always finds all clusters perfectly", "Clusters become linearly separable", "May merge dense clusters and miss sparse ones" ], "correctAnswerIndex": 3, "explanation": "DBSCAN struggles with clusters of differing densities because eps and minPts are global parameters." }, { "id": 7, "questionText": "Scenario: Choosing eps too large. Effect?", "options": [ "Algorithm fails", "Noise increases", "More clusters detected", "Clusters may merge; noise reduced" ], "correctAnswerIndex": 3, "explanation": "Large eps connects distant points, possibly merging distinct clusters." }, { "id": 8, "questionText": "Scenario: Choosing eps too small. Effect?", "options": [ "Clusters merge", "Many points labeled as noise; clusters fragmented", "No effect", "EM applied instead" ], "correctAnswerIndex": 1, "explanation": "Small eps results in fewer neighbors; many points cannot form clusters and are marked as noise." }, { "id": 9, "questionText": "Scenario: Setting minPts too high. Effect?", "options": [ "Clusters merge", "More points labeled as noise; small clusters ignored", "Algorithm fails", "Clusters increase" ], "correctAnswerIndex": 1, "explanation": "High minPts requires dense regions to form clusters, excluding smaller or sparse clusters." }, { "id": 10, "questionText": "Scenario: Setting minPts too low. Effect?", "options": [ "DBSCAN fails", "Clusters disappear", "Many small clusters; noise reduced", "Clusters merge automatically" ], "correctAnswerIndex": 2, "explanation": "Low minPts allows small groups to form clusters, potentially splitting natural clusters." }, { "id": 11, "questionText": "Scenario: A border point is connected to multiple core points of different clusters. How is it assigned?", "options": [ "Becomes noise automatically", "Forms a new cluster", "Assigned to any one cluster arbitrarily or first reachable", "Algorithm fails" ], "correctAnswerIndex": 2, "explanation": "Border points can belong to one cluster; usually assigned to the first core point that reaches it." }, { "id": 12, "questionText": "Scenario: You have 2D spatial data with noise. DBSCAN advantage?", "options": [ "Detects clusters of arbitrary shape and identifies noise", "Requires clusters to be circular", "Sensitive to number of clusters parameter", "Assigns all points to clusters" ], "correctAnswerIndex": 0, "explanation": "DBSCAN works well for arbitrary shapes and identifies noise points." }, { "id": 13, "questionText": "Scenario: Using Euclidean distance vs Manhattan distance in DBSCAN. Effect?", "options": [ "Distance metric affects cluster shapes and eps choice", "DBSCAN fails", "No effect; clusters same", "Noise ignored" ], "correctAnswerIndex": 0, "explanation": "Different distance metrics affect neighborhood calculation, which can change clustering." }, { "id": 14, "questionText": "Scenario: DBSCAN applied on high-dimensional data. Challenge?", "options": [ "Distance measures become less meaningful (curse of dimensionality)", "Algorithm runs faster", "Clusters automatically reduce", "Noise decreases" ], "correctAnswerIndex": 0, "explanation": "High dimensions can make points appear equidistant, complicating density estimation." }, { "id": 15, "questionText": "Scenario: You have concentric clusters. DBSCAN challenge?", "options": [ "May fail to separate inner and outer clusters depending on eps", "Always separates perfectly", "Clusters merge automatically", "Noise increases" ], "correctAnswerIndex": 0, "explanation": "Density difference between inner and outer rings may cause DBSCAN to merge or mislabel clusters." }, { "id": 16, "questionText": "Scenario: Using DBSCAN for geospatial clustering. Advantage?", "options": [ "Finds clusters of arbitrary shape like regions or neighborhoods", "Clusters must be circular", "All points assigned", "Sensitive to number of clusters" ], "correctAnswerIndex": 0, "explanation": "DBSCAN can identify irregularly shaped spatial clusters without specifying cluster count." }, { "id": 17, "questionText": "Scenario: You want clusters of varying density. DBSCAN limitation?", "options": [ "Noise removed automatically", "Single global eps may not detect all clusters", "Algorithm adapts automatically", "All clusters found" ], "correctAnswerIndex": 1, "explanation": "DBSCAN uses a fixed eps, which can miss sparse clusters or merge dense clusters." }, { "id": 18, "questionText": "Scenario: You apply DBSCAN on streaming data. Challenge?", "options": [ "Noise ignored", "Automatically updates clusters", "All points reassigned automatically", "Standard DBSCAN is static; streaming adaptation required" ], "correctAnswerIndex": 3, "explanation": "DBSCAN is not incremental; modifications are needed for dynamic/streaming data." }, { "id": 19, "questionText": "Scenario: Using DBSCAN for anomaly detection. Approach?", "options": [ "Assign random labels", "Label points not in any cluster as anomalies", "Clusters merged manually", "Use all clusters for prediction" ], "correctAnswerIndex": 1, "explanation": "Noise points are naturally flagged as outliers." }, { "id": 20, "questionText": "Scenario: DBSCAN vs K-Means on arbitrary-shaped clusters. Advantage?", "options": [ "DBSCAN fails for shapes", "Both perform equally", "K-Means better for arbitrary shapes", "DBSCAN can capture non-spherical clusters; K-Means cannot" ], "correctAnswerIndex": 3, "explanation": "DBSCAN works with clusters of any shape without requiring centroids." }, { "id": 21, "questionText": "Scenario: Two clusters are close together but separated by sparse points. DBSCAN outcome?", "options": [ "Fails to converge", "Marks everything as noise", "Correctly separates clusters using density differences", "Merges clusters automatically" ], "correctAnswerIndex": 2, "explanation": "Sparse points allow DBSCAN to distinguish dense clusters even if they are close." }, { "id": 22, "questionText": "Scenario: Applying DBSCAN on 3D point cloud data. Advantage?", "options": [ "Clusters must be spherical", "All points assigned to clusters", "Can find clusters of arbitrary 3D shape and ignore noise", "Requires predefining cluster centers" ], "correctAnswerIndex": 2, "explanation": "DBSCAN handles multi-dimensional data and can identify irregular clusters and noise." }, { "id": 23, "questionText": "Scenario: DBSCAN uses Manhattan distance on grid data. Effect?", "options": [ "Algorithm fails", "Noise increases automatically", "Clusters align with grid; eps choice differs", "No effect on clusters" ], "correctAnswerIndex": 2, "explanation": "Distance metric changes the neighborhood definition, affecting cluster formation." }, { "id": 24, "questionText": "Scenario: You want small but dense clusters. How to set parameters?", "options": [ "Small eps and appropriate minPts", "Large eps", "Ignore parameters", "Large minPts" ], "correctAnswerIndex": 0, "explanation": "Smaller eps ensures that small dense regions form separate clusters." }, { "id": 25, "questionText": "Scenario: You have noisy sensor data. DBSCAN benefit?", "options": [ "Clusters all points", "Fails with noise", "Requires K-Means preprocessing", "Automatically labels isolated points as noise" ], "correctAnswerIndex": 3, "explanation": "DBSCAN identifies low-density points as noise, avoiding misclassification." }, { "id": 26, "questionText": "Scenario: Data with hierarchical cluster structure. Limitation of DBSCAN?", "options": [ "Noise ignored", "All clusters merged", "Cannot detect hierarchy; only flat clusters", "Automatically finds hierarchy" ], "correctAnswerIndex": 2, "explanation": "DBSCAN provides flat clustering; hierarchical relationships are not captured." }, { "id": 27, "questionText": "Scenario: Using DBSCAN for image segmentation. Advantage?", "options": [ "Requires predefined number of segments", "All pixels assigned to clusters", "Identifies irregular regions and isolates noise", "Clusters must be circular" ], "correctAnswerIndex": 2, "explanation": "DBSCAN captures arbitrary-shaped regions and treats background/noisy pixels as noise." }, { "id": 28, "questionText": "Scenario: You have clusters of different densities. How to adapt DBSCAN?", "options": [ "Reduce dimensionality", "Use varying eps with methods like HDBSCAN", "Keep single global eps", "Increase minPts" ], "correctAnswerIndex": 1, "explanation": "Standard DBSCAN struggles with varying densities; adaptive versions like HDBSCAN help." }, { "id": 29, "questionText": "Scenario: DBSCAN fails to detect clusters in high-dimensional text embeddings. Solution?", "options": [ "Increase minPts arbitrarily", "Use full covariance", "Reduce dimensions using PCA or t-SNE before clustering", "Ignore scaling" ], "correctAnswerIndex": 2, "explanation": "Dimensionality reduction improves distance computation and density estimation." }, { "id": 30, "questionText": "Scenario: Choosing minPts in DBSCAN. Rule of thumb?", "options": [ "MinPts = 1 always", "MinPts = dataset size", "MinPts ignored", "MinPts ≥ dimensionality + 1" ], "correctAnswerIndex": 3, "explanation": "MinPts should be slightly larger than the data dimensionality for meaningful clusters." }, { "id": 31, "questionText": "Scenario: DBSCAN applied on GPS data of taxis in a city. Best use case?", "options": [ "Assign random clusters", "Identify high-density pickup/drop-off hotspots", "Detect only circular areas", "Cluster by taxi color" ], "correctAnswerIndex": 1, "explanation": "DBSCAN can detect dense regions where taxis frequently gather without assuming cluster shape." }, { "id": 32, "questionText": "Scenario: You notice DBSCAN marks too many points as noise. Likely cause?", "options": [ "Algorithm failed", "All clusters are too dense", "Distance metric wrong", "eps too small or minPts too high" ], "correctAnswerIndex": 3, "explanation": "Small eps or high minPts can make points unable to form clusters, labeling them as noise." }, { "id": 33, "questionText": "Scenario: DBSCAN applied to social network graph. Challenge?", "options": [ "Clusters are always detected", "DBSCAN works directly on graph", "Noise ignored", "Graph edges may not correspond to meaningful distances; need transformation" ], "correctAnswerIndex": 3, "explanation": "DBSCAN requires distance metrics; graphs need embedding or distance conversion." }, { "id": 34, "questionText": "Scenario: Using DBSCAN for anomaly detection in network traffic. How?", "options": [ "Label low-density patterns as anomalies", "All high-traffic nodes flagged", "Randomly assign anomalies", "Clusters merged manually" ], "correctAnswerIndex": 0, "explanation": "Low-density points correspond to unusual patterns, suitable for anomaly detection." }, { "id": 35, "questionText": "Scenario: High-dimensional DBSCAN performance issue. Solution?", "options": [ "Use dimensionality reduction or HDBSCAN", "Ignore distance metric", "Use K-Means instead", "Increase eps arbitrarily" ], "correctAnswerIndex": 0, "explanation": "Reducing dimensions or using hierarchical density clustering helps in high-dimensional spaces." }, { "id": 36, "questionText": "Scenario: Clusters are elongated. DBSCAN vs K-Means?", "options": [ "K-Means works better", "All points assigned to noise", "DBSCAN captures arbitrary shapes better", "Both fail" ], "correctAnswerIndex": 2, "explanation": "DBSCAN does not assume spherical clusters, so elongated shapes are captured well." }, { "id": 37, "questionText": "Scenario: DBSCAN fails on variable-density clusters. Solution?", "options": [ "Reduce minPts to 1", "Use HDBSCAN for adaptive density clustering", "Increase eps globally", "Ignore problem" ], "correctAnswerIndex": 1, "explanation": "HDBSCAN handles clusters with varying density better than standard DBSCAN." }, { "id": 38, "questionText": "Scenario: You want reproducible DBSCAN results. Requirement?", "options": [ "Ignore minPts", "Deterministic neighbor search and consistent distance metric", "Random initialization", "Vary eps each run" ], "correctAnswerIndex": 1, "explanation": "Reproducibility requires deterministic calculations for neighborhoods and distances." }, { "id": 39, "questionText": "Scenario: DBSCAN applied on time-series sensor readings. Approach?", "options": [ "Use sliding windows to extract features before clustering", "Clusters automatically detected", "Apply DBSCAN on raw timestamps", "Ignore feature extraction" ], "correctAnswerIndex": 0, "explanation": "Time-series features are extracted to represent temporal patterns for density-based clustering." }, { "id": 40, "questionText": "Scenario: You need clusters and hierarchy. Limitation of DBSCAN?", "options": [ "Noise ignored", "DBSCAN provides only flat clustering", "Automatically generates hierarchy", "Clusters nested by default" ], "correctAnswerIndex": 1, "explanation": "DBSCAN produces flat clusters; hierarchical relationships require extensions like HDBSCAN." }, { "id": 41, "questionText": "Scenario: DBSCAN applied on customer purchase patterns. Advantage?", "options": [ "Requires predefined cluster number", "Sensitive to initial seed", "Detects dense buying behavior groups and isolates rare patterns", "All points assigned" ], "correctAnswerIndex": 2, "explanation": "DBSCAN identifies dense purchasing patterns and separates anomalies naturally." }, { "id": 42, "questionText": "Scenario: You want to tune DBSCAN eps parameter. Approach?", "options": [ "MinPts adjustment only", "Always choose maximum distance", "Use k-distance graph to identify elbow point", "Randomly guess eps" ], "correctAnswerIndex": 2, "explanation": "Plotting k-distance helps find a suitable eps where distances start increasing sharply." }, { "id": 43, "questionText": "Scenario: DBSCAN with overlapping clusters. Effect?", "options": [ "Points duplicated", "Clusters fail completely", "Overlap handled by density; border points assigned to one cluster", "Noise ignored" ], "correctAnswerIndex": 2, "explanation": "DBSCAN assigns border points to a reachable cluster; soft assignment is not available." }, { "id": 44, "questionText": "Scenario: Applying DBSCAN to text embeddings. Challenge?", "options": [ "Noise ignored", "All points assigned to clusters", "DBSCAN always works", "High-dimensional distances may be less meaningful" ], "correctAnswerIndex": 3, "explanation": "Distance measures in high dimensions can reduce effectiveness; dimensionality reduction helps." }, { "id": 45, "questionText": "Scenario: Noise proportion is high. DBSCAN behavior?", "options": [ "Many points labeled as noise; cluster detection limited", "Algorithm fails", "Clusters detected perfectly", "All points assigned to clusters" ], "correctAnswerIndex": 0, "explanation": "High noise density can prevent formation of dense clusters." }, { "id": 46, "questionText": "Scenario: DBSCAN on streaming data. Limitation?", "options": [ "Standard DBSCAN is static; needs incremental adaptation", "All points reassigned automatically", "Noise ignored", "Automatically updates clusters" ], "correctAnswerIndex": 0, "explanation": "DBSCAN is not inherently incremental; streaming data requires modified algorithms." }, { "id": 47, "questionText": "Scenario: DBSCAN vs K-Means for non-spherical clusters. Advantage?", "options": [ "Both fail", "K-Means better", "Noise ignored", "DBSCAN detects arbitrary shapes; K-Means cannot" ], "correctAnswerIndex": 3, "explanation": "DBSCAN does not rely on centroid or spherical assumption." }, { "id": 48, "questionText": "Scenario: You apply DBSCAN on noisy sensor readings. Outcome?", "options": [ "Isolates isolated points as noise automatically", "Clusters all points", "Noise merged into clusters", "Algorithm fails" ], "correctAnswerIndex": 0, "explanation": "Low-density or isolated points are correctly treated as noise." }, { "id": 49, "questionText": "Scenario: Choosing distance metric affects DBSCAN. Why?", "options": [ "All clusters merge", "No effect", "Neighborhood depends on distance; cluster shape affected", "Noise ignored" ], "correctAnswerIndex": 2, "explanation": "Different metrics change neighbor counts, affecting core points and cluster formation." }, { "id": 50, "questionText": "Scenario: DBSCAN on highly skewed 2D data. Challenge?", "options": [ "Clusters detected automatically", "Algorithm fails", "Fixed eps may not capture sparse areas", "Noise reduced" ], "correctAnswerIndex": 2, "explanation": "Single eps cannot adapt to varying densities; sparse regions may be misclassified." }, { "id": 51, "questionText": "Scenario: DBSCAN applied to customer segmentation with varying buying density. Issue?", "options": [ "Noise eliminated automatically", "Clusters merged randomly", "All clusters detected perfectly", "Some smaller or sparser clusters may be missed" ], "correctAnswerIndex": 3, "explanation": "DBSCAN’s global eps struggles with clusters of different densities; adaptive methods recommended." }, { "id": 52, "questionText": "Scenario: You want DBSCAN to detect small anomalies in large dataset. How to adjust?", "options": [ "Increase eps arbitrarily", "Decrease minPts and eps appropriately", "Ignore small clusters", "Use K-Means instead" ], "correctAnswerIndex": 1, "explanation": "Smaller minPts and eps allow DBSCAN to detect small dense regions representing anomalies." }, { "id": 53, "questionText": "Scenario: Using DBSCAN for clustering Wi-Fi signals in a building. Advantage?", "options": [ "Identifies dense signal regions and ignores noise", "All points assigned to clusters", "Requires number of clusters", "Clusters must be circular" ], "correctAnswerIndex": 0, "explanation": "DBSCAN can detect regions with strong signal density and label isolated weak signals as noise." }, { "id": 54, "questionText": "Scenario: DBSCAN on image pixel intensities for segmentation. Outcome?", "options": [ "Requires predefined cluster number", "Clusters must be circular", "All pixels assigned", "Arbitrary-shaped regions segmented; noise isolated" ], "correctAnswerIndex": 3, "explanation": "DBSCAN can segment regions of any shape and label scattered pixels as noise." }, { "id": 55, "questionText": "Scenario: Using DBSCAN on 3D point cloud of a city. Advantage?", "options": [ "Detects clusters like buildings, trees, and separates sparse points", "Noise merged into clusters", "All points assigned", "Clusters must be spherical" ], "correctAnswerIndex": 0, "explanation": "DBSCAN works in multi-dimensional data and identifies meaningful dense clusters." }, { "id": 56, "questionText": "Scenario: DBSCAN fails with high-dimensional word embeddings. Solution?", "options": [ "Apply dimensionality reduction before clustering", "Use K-Means", "Increase eps globally", "Ignore problem" ], "correctAnswerIndex": 0, "explanation": "High-dimensional spaces make distance less meaningful; reduction helps clustering performance." }, { "id": 57, "questionText": "Scenario: Border points connected to multiple core points. Assignment?", "options": [ "Assigned to one cluster reachable first", "Algorithm fails", "Assigned to all clusters simultaneously", "Become noise" ], "correctAnswerIndex": 0, "explanation": "DBSCAN assigns border points to a single cluster; typically the first reachable core point." }, { "id": 58, "questionText": "Scenario: DBSCAN on streaming data. Limitation?", "options": [ "Noise ignored", "Automatically updates clusters", "All points reassigned automatically", "Standard DBSCAN cannot update incrementally; adaptation needed" ], "correctAnswerIndex": 3, "explanation": "Incremental or streaming adaptations of DBSCAN are required for dynamic datasets." }, { "id": 59, "questionText": "Scenario: Clusters are elongated and dense. DBSCAN vs K-Means?", "options": [ "Both fail", "K-Means better", "All points assigned to noise", "DBSCAN captures shape; K-Means fails with elongated clusters" ], "correctAnswerIndex": 3, "explanation": "DBSCAN’s density-based approach handles arbitrary shapes like elongated clusters well." }, { "id": 60, "questionText": "Scenario: Choosing minPts parameter. Rule of thumb?", "options": [ "minPts = dataset size", "minPts ≥ dimensionality + 1", "minPts = 1 always", "minPts ignored" ], "correctAnswerIndex": 1, "explanation": "Choosing minPts slightly larger than data dimensionality ensures meaningful cluster formation." }, { "id": 61, "questionText": "Scenario: DBSCAN applied to weather station locations. Advantage?", "options": [ "All stations assigned", "Noise merged into clusters", "Detects dense station clusters and separates isolated stations as noise", "Clusters must be circular" ], "correctAnswerIndex": 2, "explanation": "DBSCAN can find groups of stations in dense regions while labeling isolated ones as noise." }, { "id": 62, "questionText": "Scenario: DBSCAN applied to vehicle GPS tracks. Best outcome?", "options": [ "All vehicles assigned to same cluster", "Requires predefined cluster number", "Clusters must be circular", "Detect hotspots of vehicle activity and identify sparse routes" ], "correctAnswerIndex": 3, "explanation": "DBSCAN identifies dense routes or locations and marks sparse movements as noise." }, { "id": 63, "questionText": "Scenario: DBSCAN applied to detect fraudulent transactions. Advantage?", "options": [ "All transactions clustered", "Isolates unusual low-density transactions as potential fraud", "Clusters merged arbitrarily", "Noise ignored" ], "correctAnswerIndex": 1, "explanation": "Low-density points are flagged naturally, useful for anomaly detection in finance." }, { "id": 64, "questionText": "Scenario: eps too large. Effect on clusters?", "options": [ "Clusters may merge; noise reduced", "Noise increases", "More clusters detected", "Algorithm fails" ], "correctAnswerIndex": 0, "explanation": "Large eps connects distant points, merging separate clusters and reducing noise." }, { "id": 65, "questionText": "Scenario: eps too small. Effect on clusters?", "options": [ "Clusters merge", "Noise decreases", "Many points labeled as noise; clusters fragmented", "Algorithm fails" ], "correctAnswerIndex": 2, "explanation": "Small eps prevents points from forming dense clusters; many become noise." }, { "id": 66, "questionText": "Scenario: High-dimensional clustering. DBSCAN limitation?", "options": [ "Clusters detected perfectly", "All points assigned", "Distances lose meaning; density estimation difficult", "Noise ignored" ], "correctAnswerIndex": 2, "explanation": "High dimensions make points appear equidistant, complicating density-based clustering." }, { "id": 67, "questionText": "Scenario: Data with multiple density clusters. Solution?", "options": [ "Use HDBSCAN for adaptive density clustering", "Increase eps globally", "Reduce minPts to 1", "Ignore problem" ], "correctAnswerIndex": 0, "explanation": "HDBSCAN adapts to varying densities, unlike standard DBSCAN." }, { "id": 68, "questionText": "Scenario: Using DBSCAN on customer browsing patterns. Advantage?", "options": [ "Noise ignored", "Requires predefined cluster number", "All points assigned", "Detects dense behavioral patterns and isolates outliers" ], "correctAnswerIndex": 3, "explanation": "Dense browsing behaviors form clusters; rare patterns become noise." }, { "id": 69, "questionText": "Scenario: Noise points in DBSCAN. Definition?", "options": [ "Cluster centroids", "All points in clusters", "Points not reachable from any core point", "Points with minPts neighbors" ], "correctAnswerIndex": 2, "explanation": "Noise points are isolated points not part of any cluster." }, { "id": 70, "questionText": "Scenario: Choosing distance metric in DBSCAN. Effect?", "options": [ "Affects neighborhood definition and cluster shape", "Noise ignored", "No effect", "All points merged" ], "correctAnswerIndex": 0, "explanation": "The distance metric changes how neighbors are counted, affecting cluster formation." }, { "id": 71, "questionText": "Scenario: DBSCAN applied to earthquake epicenters. Advantage?", "options": [ "Clusters must be circular", "Detects clusters of seismic activity and isolates isolated events", "Noise merged into clusters", "All events assigned" ], "correctAnswerIndex": 1, "explanation": "DBSCAN identifies dense seismic regions and separates rare events as noise." }, { "id": 72, "questionText": "Scenario: Varying eps across dataset. How to achieve?", "options": [ "Ignore variation", "Random eps each run", "Use adaptive DBSCAN variants like HDBSCAN", "Standard DBSCAN suffices" ], "correctAnswerIndex": 2, "explanation": "Adaptive algorithms adjust density thresholds to handle varying densities." }, { "id": 73, "questionText": "Scenario: Applying DBSCAN on medical imaging. Benefit?", "options": [ "Clusters must be spherical", "Requires fixed cluster number", "Detects regions of interest and separates background noise", "All pixels clustered" ], "correctAnswerIndex": 2, "explanation": "DBSCAN segments irregular shapes and isolates sparse/noisy regions." }, { "id": 74, "questionText": "Scenario: Using DBSCAN for anomaly detection in IoT sensors. Approach?", "options": [ "Ignore isolated readings", "Label low-density readings as anomalies", "Cluster all points", "Random assignment" ], "correctAnswerIndex": 1, "explanation": "Isolated readings or sparse patterns naturally become noise, indicating anomalies." }, { "id": 75, "questionText": "Scenario: DBSCAN on financial transactions. Noise points indicate?", "options": [ "Noise merged", "All transactions are legitimate", "Potential fraudulent or unusual transactions", "Clusters merged" ], "correctAnswerIndex": 2, "explanation": "Sparse points in dense transaction space are flagged as unusual or fraudulent." }, { "id": 76, "questionText": "Scenario: DBSCAN applied to traffic accident locations. Advantage?", "options": [ "Identifies accident hotspots and isolates rare events", "All accidents assigned", "Noise merged into clusters", "Clusters must be circular" ], "correctAnswerIndex": 0, "explanation": "DBSCAN finds dense accident regions and treats isolated incidents as noise." }, { "id": 77, "questionText": "Scenario: eps and minPts selection using k-distance plot. What is the elbow point?", "options": [ "Minimum distance", "Random point", "Point where distance sharply increases, suitable for eps", "Maximum distance" ], "correctAnswerIndex": 2, "explanation": "The elbow in the k-distance graph indicates the transition from dense to sparse regions, guiding eps selection." }, { "id": 78, "questionText": "Scenario: Border points connected to multiple clusters. Assignment in DBSCAN?", "options": [ "Assigned to all clusters", "Become noise", "Assigned to the first reachable cluster", "Clusters merge automatically" ], "correctAnswerIndex": 2, "explanation": "Border points are assigned to one cluster, typically the first one that reaches them." }, { "id": 79, "questionText": "Scenario: DBSCAN on social media check-ins. Benefit?", "options": [ "Clusters must be predefined", "All users assigned", "Noise ignored", "Detects popular locations and identifies sparse users" ], "correctAnswerIndex": 3, "explanation": "DBSCAN identifies dense activity areas and treats isolated check-ins as noise." }, { "id": 80, "questionText": "Scenario: Standard DBSCAN fails on variable density data. Solution?", "options": [ "Ignore the problem", "Increase eps globally", "Decrease minPts arbitrarily", "Use HDBSCAN for hierarchical density-based clustering" ], "correctAnswerIndex": 3, "explanation": "HDBSCAN adapts to varying density, unlike standard DBSCAN." }, { "id": 81, "questionText": "Scenario: DBSCAN on genomic data. Advantage?", "options": [ "Clusters must be circular", "All genes assigned", "Identifies dense gene clusters and isolates rare genes", "Noise merged" ], "correctAnswerIndex": 2, "explanation": "DBSCAN can identify dense gene expression patterns and separate sparse or rare genes as noise." }, { "id": 82, "questionText": "Scenario: Choosing minPts too high. Effect?", "options": [ "Algorithm fails", "Clusters merge", "Small clusters ignored; many points labeled noise", "More clusters detected" ], "correctAnswerIndex": 2, "explanation": "High minPts requires dense regions; sparse or small clusters are lost." }, { "id": 83, "questionText": "Scenario: Choosing minPts too low. Effect?", "options": [ "Many small clusters formed; noise reduced", "Clusters merge", "Algorithm fails", "All points noise" ], "correctAnswerIndex": 0, "explanation": "Low minPts allows small groups to form clusters, potentially splitting natural clusters." }, { "id": 84, "questionText": "Scenario: DBSCAN applied on customer location data. Advantage?", "options": [ "Requires predefined cluster count", "All points assigned", "Clusters must be circular", "Identifies dense shopping areas and isolates isolated customers" ], "correctAnswerIndex": 3, "explanation": "DBSCAN captures dense shopping locations and labels scattered customers as noise." }, { "id": 85, "questionText": "Scenario: High-dimensional text embeddings. DBSCAN limitation?", "options": [ "Distances lose meaning; clusters may be unreliable", "Algorithm faster", "Noise ignored", "Clusters always detected" ], "correctAnswerIndex": 0, "explanation": "In high dimensions, distances are less discriminative, affecting density and clustering." }, { "id": 86, "questionText": "Scenario: Using DBSCAN on image feature vectors. Benefit?", "options": [ "Requires predefined cluster count", "All features assigned", "Groups similar image features and isolates outliers", "Clusters must be circular" ], "correctAnswerIndex": 2, "explanation": "DBSCAN detects dense feature groups and treats isolated features as noise." }, { "id": 87, "questionText": "Scenario: eps too small. Effect?", "options": [ "Clusters fragmented; many points labeled noise", "Clusters merge", "All points assigned", "Algorithm fails" ], "correctAnswerIndex": 0, "explanation": "Small eps prevents formation of dense clusters; isolated points become noise." }, { "id": 88, "questionText": "Scenario: eps too large. Effect?", "options": [ "More clusters detected", "Algorithm fails", "Clusters merge; fewer noise points", "Noise increases" ], "correctAnswerIndex": 2, "explanation": "Large eps connects distant points, merging separate clusters and reducing noise." }, { "id": 89, "questionText": "Scenario: DBSCAN on irregularly shaped 2D clusters. Advantage?", "options": [ "Clusters must be circular", "Captures arbitrary shapes unlike K-Means", "All points assigned", "Noise ignored" ], "correctAnswerIndex": 1, "explanation": "DBSCAN does not assume cluster shape, so it captures elongated or irregular clusters." }, { "id": 90, "questionText": "Scenario: Border point connected to multiple core points. Assignment?", "options": [ "Assigned to first reachable cluster", "Assigned to all clusters", "Clusters merge", "Becomes noise" ], "correctAnswerIndex": 0, "explanation": "DBSCAN assigns a border point to one cluster, typically the first core point that reaches it." }, { "id": 91, "questionText": "Scenario: DBSCAN on IoT sensor anomaly detection. Advantage?", "options": [ "Sparse readings flagged as anomalies automatically", "Noise ignored", "All readings clustered", "Clusters merged arbitrarily" ], "correctAnswerIndex": 0, "explanation": "DBSCAN labels low-density points as noise, which is useful for detecting anomalies." }, { "id": 92, "questionText": "Scenario: DBSCAN with streaming data. Limitation?", "options": [ "Needs adaptation; standard DBSCAN is static", "Noise ignored", "Automatically updates clusters", "All points reassigned automatically" ], "correctAnswerIndex": 0, "explanation": "DBSCAN is not incremental; streaming or dynamic data requires modified algorithms." }, { "id": 93, "questionText": "Scenario: Using DBSCAN on earthquake data. Benefit?", "options": [ "Detects dense seismic zones; isolates rare events", "All events clustered", "Noise merged", "Clusters must be circular" ], "correctAnswerIndex": 0, "explanation": "DBSCAN identifies dense clusters of earthquakes and labels isolated events as noise." }, { "id": 94, "questionText": "Scenario: Noise in DBSCAN definition?", "options": [ "Cluster centroids", "Points not reachable from any core point", "Points with minPts neighbors", "All points assigned" ], "correctAnswerIndex": 1, "explanation": "Noise points are isolated points not part of any cluster." }, { "id": 95, "questionText": "Scenario: Varying density clusters. Best DBSCAN variant?", "options": [ "Standard DBSCAN", "K-Means", "HDBSCAN", "Agglomerative clustering" ], "correctAnswerIndex": 2, "explanation": "HDBSCAN adapts to different densities and creates a hierarchy of clusters." }, { "id": 96, "questionText": "Scenario: Choosing eps using k-distance plot. How?", "options": [ "Select maximum distance", "Select value at elbow point where distances sharply rise", "Randomly select eps", "Select minimum distance" ], "correctAnswerIndex": 1, "explanation": "The elbow point indicates the transition from dense to sparse points, guiding eps choice." }, { "id": 97, "questionText": "Scenario: DBSCAN on customer behavior patterns. Benefit?", "options": [ "Groups dense behavior patterns; isolates rare customers", "Requires fixed number of clusters", "Noise ignored", "All points assigned" ], "correctAnswerIndex": 0, "explanation": "DBSCAN identifies dense behavioral clusters and labels rare behaviors as noise." }, { "id": 98, "questionText": "Scenario: DBSCAN vs K-Means for non-spherical clusters. Advantage?", "options": [ "K-Means better", "Noise ignored", "Both fail", "DBSCAN captures arbitrary shapes" ], "correctAnswerIndex": 3, "explanation": "DBSCAN does not assume cluster shape and handles irregular or elongated clusters." }, { "id": 99, "questionText": "Scenario: High-dimensional DBSCAN problem. Solution?", "options": [ "Dimensionality reduction (PCA, t-SNE) or HDBSCAN", "Increase minPts arbitrarily", "Ignore scaling", "Use raw distances" ], "correctAnswerIndex": 0, "explanation": "High-dimensional spaces make distances less meaningful; reduction or adaptive methods improve clustering." }, { "id": 100, "questionText": "Scenario: Choosing minPts in DBSCAN. Rule of thumb?", "options": [ "minPts = 1 always", "minPts = dataset size", "Ignore minPts", "minPts ≥ dimensionality + 1" ], "correctAnswerIndex": 3, "explanation": "minPts should slightly exceed data dimensionality to ensure meaningful clusters." } ] }