Spaces:

deedrop1140
/

MachineLearningAlgorithms

Running

App Files Files Community

MachineLearningAlgorithms / data /LightGBM.json

deedrop1140

Upload 41 files

0d00d62 verified 3 months ago

raw

history blame contribute delete

46.8 kB

	{
	"title": "LightGBM Mastery: Part 1 (Q1–25)",
	"description": "Questions 1–25 covering LightGBM fundamentals — boosting basics, leaf-wise growth, histogram optimization, and key parameters.",
	"questions": [
	{
	"id": 1,
	"questionText": "What type of algorithm is LightGBM based on?",
	"options": [
	"Bagging",
	"Boosting",
	"Stacking",
	"Voting"
	],
	"correctAnswerIndex": 1,
	"explanation": "LightGBM is a boosting algorithm, specifically a gradient boosting framework that builds models sequentially."
	},
	{
	"id": 2,
	"questionText": "Which of the following is a unique characteristic of LightGBM?",
	"options": [
	"It grows trees level-wise",
	"It grows trees leaf-wise",
	"It uses deep neural networks",
	"It averages models"
	],
	"correctAnswerIndex": 1,
	"explanation": "LightGBM grows trees leaf-wise (best-first) to reduce loss more efficiently compared to level-wise methods."
	},
	{
	"id": 3,
	"questionText": "What type of trees does LightGBM primarily use?",
	"options": [
	"Shallow random trees",
	"Deep neural trees",
	"Decision trees",
	"Regression trees"
	],
	"correctAnswerIndex": 3,
	"explanation": "LightGBM primarily uses regression trees for both classification and regression tasks."
	},
	{
	"id": 4,
	"questionText": "Which technique allows LightGBM to handle large datasets efficiently?",
	"options": [
	"Feature hashing",
	"Histogram-based algorithm",
	"Random sampling",
	"PCA compression"
	],
	"correctAnswerIndex": 1,
	"explanation": "LightGBM uses a histogram-based algorithm to reduce computation by discretizing continuous features into bins."
	},
	{
	"id": 5,
	"questionText": "Scenario: You have very large dataset with millions of rows. Why is LightGBM preferred over XGBoost?",
	"options": [
	"Because it uses neural networks internally",
	"Because it uses histogram-based splits and leaf-wise growth for efficiency",
	"Because it reduces model interpretability",
	"Because it doesn’t require gradient computation"
	],
	"correctAnswerIndex": 1,
	"explanation": "LightGBM is optimized for large datasets using histogram-based splits and leaf-wise tree growth."
	},
	{
	"id": 6,
	"questionText": "Which LightGBM parameter controls the number of leaves in a single tree?",
	"options": [
	"num_leaves",
	"max_depth",
	"min_child_samples",
	"n_estimators"
	],
	"correctAnswerIndex": 0,
	"explanation": "The num_leaves parameter sets the maximum number of leaves in one tree, controlling model complexity."
	},
	{
	"id": 7,
	"questionText": "What happens if num_leaves is set too high?",
	"options": [
	"Model becomes underfit",
	"Model becomes overfit",
	"Model trains faster",
	"Model ignores small features"
	],
	"correctAnswerIndex": 1,
	"explanation": "Too many leaves can make the model overly complex and prone to overfitting."
	},
	{
	"id": 8,
	"questionText": "Which LightGBM parameter controls the learning rate?",
	"options": [
	"shrinkage_rate",
	"alpha",
	"learning_rate",
	"lambda"
	],
	"correctAnswerIndex": 2,
	"explanation": "The learning_rate parameter determines how much each new tree contributes to the model."
	},
	{
	"id": 9,
	"questionText": "Scenario: Model accuracy stagnates early during training. Which parameter can you increase?",
	"options": [
	"num_leaves",
	"learning_rate",
	"n_estimators",
	"feature_fraction"
	],
	"correctAnswerIndex": 2,
	"explanation": "Increasing n_estimators (number of boosting iterations) allows the model to learn longer."
	},
	{
	"id": 10,
	"questionText": "What is feature_fraction used for in LightGBM?",
	"options": [
	"Regularization to reduce overfitting by randomly selecting a fraction of features per tree",
	"Adjusting leaf size",
	"Reducing data size by sampling rows",
	"Controlling the number of leaves"
	],
	"correctAnswerIndex": 0,
	"explanation": "feature_fraction randomly selects a fraction of features to train each tree, helping regularize the model."
	},
	{
	"id": 11,
	"questionText": "Which LightGBM parameter limits tree depth?",
	"options": [
	"max_depth",
	"num_leaves",
	"min_split_gain",
	"subsample"
	],
	"correctAnswerIndex": 0,
	"explanation": "max_depth directly limits how deep trees can grow, preventing overfitting."
	},
	{
	"id": 12,
	"questionText": "What does min_child_samples control?",
	"options": [
	"Minimum number of samples required in a leaf",
	"Minimum number of features used in a tree",
	"Minimum iterations before early stopping",
	"Minimum value for learning rate"
	],
	"correctAnswerIndex": 0,
	"explanation": "min_child_samples ensures that leaves have enough data points, acting as a regularization technique."
	},
	{
	"id": 13,
	"questionText": "Scenario: LightGBM model is overfitting. Which change helps reduce it?",
	"options": [
	"Decrease num_leaves",
	"Increase num_leaves",
	"Increase learning rate",
	"Remove regularization"
	],
	"correctAnswerIndex": 0,
	"explanation": "Reducing num_leaves decreases model complexity and helps combat overfitting."
	},
	{
	"id": 14,
	"questionText": "What does boosting_type='dart' mean in LightGBM?",
	"options": [
	"It uses Dropouts meet Multiple Additive Regression Trees",
	"It disables boosting",
	"It performs bagging only",
	"It builds random forests"
	],
	"correctAnswerIndex": 0,
	"explanation": "The DART variant of LightGBM randomly drops trees during boosting to improve generalization."
	},
	{
	"id": 15,
	"questionText": "Which LightGBM boosting type uses dropouts for regularization?",
	"options": [
	"gbdt",
	"dart",
	"goss",
	"rf"
	],
	"correctAnswerIndex": 1,
	"explanation": "The DART boosting type introduces dropout in boosting to prevent overfitting."
	},
	{
	"id": 16,
	"questionText": "What does goss stand for in LightGBM?",
	"options": [
	"Gradient-based One-Side Sampling",
	"Gradient Optimization Sampling System",
	"Global Outlier Sampling Strategy",
	"Generalized Optimization Split Search"
	],
	"correctAnswerIndex": 0,
	"explanation": "GOSS is Gradient-based One-Side Sampling — a LightGBM optimization that speeds up training by sampling instances with large gradients."
	},
	{
	"id": 17,
	"questionText": "Which LightGBM parameter helps in row subsampling?",
	"options": [
	"bagging_fraction",
	"feature_fraction",
	"lambda_l1",
	"min_split_gain"
	],
	"correctAnswerIndex": 0,
	"explanation": "bagging_fraction controls the fraction of data used per iteration, providing row-wise subsampling."
	},
	{
	"id": 18,
	"questionText": "Scenario: You want faster training but can tolerate a small loss in accuracy. Which parameter can you reduce?",
	"options": [
	"bagging_fraction",
	"num_leaves",
	"max_depth",
	"feature_fraction"
	],
	"correctAnswerIndex": 0,
	"explanation": "Reducing bagging_fraction increases speed by using fewer data rows per iteration."
	},
	{
	"id": 19,
	"questionText": "What does lambda_l1 control in LightGBM?",
	"options": [
	"L1 regularization term on weights",
	"L2 regularization term on weights",
	"Dropout rate",
	"Learning rate decay"
	],
	"correctAnswerIndex": 0,
	"explanation": "lambda_l1 adds L1 regularization on leaf weights to encourage sparsity and reduce overfitting."
	},
	{
	"id": 20,
	"questionText": "Which LightGBM regularization term penalizes large leaf weights using L2 norm?",
	"options": [
	"lambda_l1",
	"lambda_l2",
	"min_child_samples",
	"feature_fraction"
	],
	"correctAnswerIndex": 1,
	"explanation": "lambda_l2 applies L2 regularization to prevent large leaf weights and stabilize training."
	},
	{
	"id": 21,
	"questionText": "Scenario: Model accuracy is fluctuating during boosting. Which parameter helps smooth this effect?",
	"options": [
	"learning_rate",
	"num_leaves",
	"min_gain_to_split",
	"bagging_freq"
	],
	"correctAnswerIndex": 0,
	"explanation": "A smaller learning_rate helps stabilize model updates, reducing fluctuations."
	},
	{
	"id": 22,
	"questionText": "What is the role of min_gain_to_split?",
	"options": [
	"Minimum loss reduction required for a split",
	"Minimum number of leaves required per tree",
	"Maximum number of features allowed",
	"Learning rate decay factor"
	],
	"correctAnswerIndex": 0,
	"explanation": "min_gain_to_split prevents small, insignificant splits by requiring a minimum loss reduction."
	},
	{
	"id": 23,
	"questionText": "Scenario: Dataset contains categorical variables. How does LightGBM handle them efficiently?",
	"options": [
	"Using one-hot encoding automatically",
	"By internally converting them using optimal split algorithms",
	"By ignoring categorical variables",
	"By treating them as numeric values directly"
	],
	"correctAnswerIndex": 1,
	"explanation": "LightGBM natively supports categorical features by finding optimal split points without full one-hot encoding."
	},
	{
	"id": 24,
	"questionText": "Which parameter in LightGBM is used to handle categorical features?",
	"options": [
	"categorical_feature",
	"cat_var",
	"cat_split",
	"categorical_index"
	],
	"correctAnswerIndex": 0,
	"explanation": "The categorical_feature parameter specifies which columns are treated as categorical during training."
	},
	{
	"id": 25,
	"questionText": "Scenario: LightGBM is using GPU for training. Which advantage does this offer?",
	"options": [
	"Faster histogram construction and split finding",
	"Automatic feature engineering",
	"Better interpretability",
	"Improved regularization"
	],
	"correctAnswerIndex": 0,
	"explanation": "GPU acceleration speeds up histogram creation and split computations, significantly reducing training time."
	}
	]
	}
	{
	"title": "LightGBM Mastery: Part 2 (Q26–50)",
	"description": "Questions 26–50 exploring LightGBM tuning — sampling, regularization, parameter interactions, parallelization, and practical training strategies.",
	"questions": [
	{
	"id": 26,
	"questionText": "What does the bagging_freq parameter control in LightGBM?",
	"options": [
	"How frequently bagging is performed during training",
	"Number of features to drop per tree",
	"Learning rate schedule frequency",
	"Regularization update interval"
	],
	"correctAnswerIndex": 0,
	"explanation": "bagging_freq determines after how many boosting iterations LightGBM performs row subsampling."
	},
	{
	"id": 27,
	"questionText": "Which LightGBM optimization helps in handling large-scale datasets efficiently?",
	"options": [
	"Histogram-based binning",
	"Deep tree expansion",
	"Dynamic pruning",
	"Recurrent boosting"
	],
	"correctAnswerIndex": 0,
	"explanation": "LightGBM uses histogram-based binning to reduce computation and memory usage for large datasets."
	},
	{
	"id": 28,
	"questionText": "Scenario: Dataset contains extreme class imbalance. Which parameter helps mitigate it?",
	"options": [
	"scale_pos_weight",
	"bagging_fraction",
	"num_leaves",
	"min_child_samples"
	],
	"correctAnswerIndex": 0,
	"explanation": "scale_pos_weight adjusts the relative weight of positive samples to handle imbalanced datasets effectively."
	},
	{
	"id": 29,
	"questionText": "Which LightGBM setting should be increased to make the model less sensitive to noise?",
	"options": [
	"min_child_samples",
	"num_leaves",
	"learning_rate",
	"max_depth"
	],
	"correctAnswerIndex": 0,
	"explanation": "Increasing min_child_samples ensures leaves contain more data points, making the model more robust to noise."
	},
	{
	"id": 30,
	"questionText": "What is the role of early_stopping_round in LightGBM training?",
	"options": [
	"Stops training when validation loss does not improve after a certain number of rounds",
	"Reduces learning rate automatically",
	"Saves best iteration for retraining",
	"Increases number of leaves gradually"
	],
	"correctAnswerIndex": 0,
	"explanation": "early_stopping_round halts training if performance on validation data stops improving."
	},
	{
	"id": 31,
	"questionText": "Scenario: You observe that LightGBM trains very fast but underfits. What adjustment helps?",
	"options": [
	"Increase num_leaves or n_estimators",
	"Decrease learning rate",
	"Reduce max_depth",
	"Reduce feature_fraction"
	],
	"correctAnswerIndex": 0,
	"explanation": "Increasing num_leaves or n_estimators allows the model to capture more complexity and reduce underfitting."
	},
	{
	"id": 32,
	"questionText": "What does LightGBM’s leaf-wise tree growth mean?",
	"options": [
	"It splits the leaf with the highest loss reduction first",
	"It splits all leaves at the same level simultaneously",
	"It grows the tree symmetrically",
	"It uses fixed depth trees"
	],
	"correctAnswerIndex": 0,
	"explanation": "Leaf-wise growth selects and splits the leaf that gives the greatest loss reduction, leading to faster convergence."
	},
	{
	"id": 33,
	"questionText": "Which parameter combination most affects model complexity?",
	"options": [
	"num_leaves and max_depth",
	"feature_fraction and bagging_fraction",
	"learning_rate and n_estimators",
	"lambda_l1 and lambda_l2"
	],
	"correctAnswerIndex": 0,
	"explanation": "num_leaves and max_depth jointly control tree structure and hence the complexity of the model."
	},
	{
	"id": 34,
	"questionText": "Scenario: LightGBM runs out of memory on a massive dataset. Which setting helps reduce memory usage?",
	"options": [
	"Reduce max_bin",
	"Increase learning_rate",
	"Set boosting_type to dart",
	"Increase num_leaves"
	],
	"correctAnswerIndex": 0,
	"explanation": "Reducing max_bin decreases the number of histogram bins, lowering memory requirements."
	},
	{
	"id": 35,
	"questionText": "What does the parameter max_bin represent in LightGBM?",
	"options": [
	"Maximum number of bins to bucket continuous features",
	"Maximum number of leaves per tree",
	"Maximum depth of trees",
	"Maximum iterations for convergence"
	],
	"correctAnswerIndex": 0,
	"explanation": "max_bin determines how many discrete bins each feature will be divided into during histogram building."
	},
	{
	"id": 36,
	"questionText": "Scenario: Model training takes too long. Which adjustment improves speed most effectively?",
	"options": [
	"Reduce max_bin or use bagging_fraction < 1",
	"Increase num_leaves",
	"Reduce learning_rate only",
	"Increase regularization terms"
	],
	"correctAnswerIndex": 0,
	"explanation": "Reducing max_bin or using smaller bagging_fraction reduces the dataset processed each iteration, speeding up training."
	},
	{
	"id": 37,
	"questionText": "What is the primary drawback of leaf-wise tree growth?",
	"options": [
	"Higher risk of overfitting on small data",
	"Slower convergence",
	"Worse performance on large datasets",
	"Poor categorical handling"
	],
	"correctAnswerIndex": 0,
	"explanation": "Leaf-wise growth can overfit on small datasets because it may produce very deep trees."
	},
	{
	"id": 38,
	"questionText": "Which LightGBM parameter defines how many bins are created for each feature?",
	"options": [
	"max_bin",
	"num_leaves",
	"feature_fraction",
	"max_depth"
	],
	"correctAnswerIndex": 0,
	"explanation": "max_bin sets how finely continuous features are bucketed into discrete bins for histogram-based learning."
	},
	{
	"id": 39,
	"questionText": "What type of regularization do lambda_l1 and lambda_l2 correspond to?",
	"options": [
	"Lasso and Ridge regularization",
	"Elastic Net regularization",
	"Dropout regularization",
	"Tree pruning regularization"
	],
	"correctAnswerIndex": 0,
	"explanation": "lambda_l1 and lambda_l2 implement Lasso (L1) and Ridge (L2) regularization respectively."
	},
	{
	"id": 40,
	"questionText": "Scenario: You observe overfitting with high validation error. Which parameters help reduce it?",
	"options": [
	"Increase min_child_samples, decrease num_leaves",
	"Increase learning_rate, increase num_leaves",
	"Reduce lambda_l2",
	"Increase feature_fraction"
	],
	"correctAnswerIndex": 0,
	"explanation": "Increasing min_child_samples and reducing num_leaves simplify the model, reducing overfitting."
	},
	{
	"id": 41,
	"questionText": "What is the role of monotone_constraints in LightGBM?",
	"options": [
	"Ensure certain features have monotonic relationships with the target",
	"Enforce equal feature importance",
	"Reduce overfitting using L2 regularization",
	"Apply monotonic normalization to inputs"
	],
	"correctAnswerIndex": 0,
	"explanation": "monotone_constraints force LightGBM to maintain a monotonic relationship for specific features."
	},
	{
	"id": 42,
	"questionText": "Scenario: You want reproducible results from LightGBM training. Which parameter helps?",
	"options": [
	"random_state",
	"seed",
	"boosting_seed",
	"Any of the above"
	],
	"correctAnswerIndex": 3,
	"explanation": "Setting seed or random_state ensures deterministic behavior in LightGBM training."
	},
	{
	"id": 43,
	"questionText": "Which LightGBM parameter defines the objective function?",
	"options": [
	"objective",
	"metric",
	"boosting_type",
	"learning_rate"
	],
	"correctAnswerIndex": 0,
	"explanation": "The objective parameter defines the loss function that LightGBM optimizes, e.g., 'binary', 'regression'."
	},
	{
	"id": 44,
	"questionText": "Scenario: You are using LightGBM for multi-class classification. What should the objective be set to?",
	"options": [
	"multiclass",
	"multiclassova",
	"binary",
	"regression"
	],
	"correctAnswerIndex": 0,
	"explanation": "For multi-class classification, objective='multiclass' should be used with num_class specified."
	},
	{
	"id": 45,
	"questionText": "What does feature_pre_filter in LightGBM control?",
	"options": [
	"Whether features are pre-screened before training",
	"Feature normalization",
	"Automatic feature selection during training",
	"Dropout of low importance features"
	],
	"correctAnswerIndex": 0,
	"explanation": "feature_pre_filter determines if LightGBM filters out constant or low-variance features before training."
	},
	{
	"id": 46,
	"questionText": "Scenario: You use categorical features in LightGBM. What advantage does native support provide?",
	"options": [
	"Faster training and memory efficiency",
	"One-hot encoding automatically expands features",
	"Improved interpretability",
	"Model regularization"
	],
	"correctAnswerIndex": 0,
	"explanation": "Native categorical handling avoids one-hot expansion, leading to faster and more efficient training."
	},
	{
	"id": 47,
	"questionText": "Which LightGBM boosting type is best for highly imbalanced datasets?",
	"options": [
	"goss",
	"gbdt",
	"dart",
	"rf"
	],
	"correctAnswerIndex": 0,
	"explanation": "GOSS (Gradient-based One-Side Sampling) is efficient and works well for imbalanced datasets by focusing on large-gradient samples."
	},
	{
	"id": 48,
	"questionText": "Scenario: LightGBM shows slightly worse accuracy than XGBoost. Which parameter tuning may help?",
	"options": [
	"Reduce learning_rate and increase n_estimators",
	"Increase feature_fraction",
	"Reduce num_leaves",
	"Disable histogram optimization"
	],
	"correctAnswerIndex": 0,
	"explanation": "Decreasing learning_rate while increasing n_estimators allows more refined learning and may improve accuracy."
	},
	{
	"id": 49,
	"questionText": "Which LightGBM metric should you use for binary classification?",
	"options": [
	"binary_logloss",
	"l2",
	"mae",
	"multi_logloss"
	],
	"correctAnswerIndex": 0,
	"explanation": "binary_logloss measures the log loss for binary classification tasks."
	},
	{
	"id": 50,
	"questionText": "Scenario: You want to save training time without losing much performance. Which parameters can be combined?",
	"options": [
	"Use smaller max_bin, bagging_fraction, and feature_fraction",
	"Increase num_leaves and n_estimators",
	"Reduce learning_rate only",
	"Disable regularization terms"
	],
	"correctAnswerIndex": 0,
	"explanation": "Reducing max_bin, bagging_fraction, and feature_fraction reduces training cost while maintaining accuracy."
	}
	]
	}
	{
	"title": "LightGBM Mastery: Part 3 (Q51–75)",
	"description": "Intermediate to advanced questions (51–75) exploring LightGBM regularization, overfitting control, advanced parameters, and real-world optimization scenarios.",
	"questions": [
	{
	"id": 51,
	"questionText": "What is the function of lambda_l1 in LightGBM?",
	"options": [
	"Applies L1 regularization to leaf weights",
	"Limits number of leaves per tree",
	"Determines the learning rate",
	"Applies L2 regularization"
	],
	"correctAnswerIndex": 0,
	"explanation": "lambda_l1 controls L1 regularization on leaf weights to reduce overfitting."
	},
	{
	"id": 52,
	"questionText": "What is the function of lambda_l2 in LightGBM?",
	"options": [
	"Applies L2 regularization to leaf weights",
	"Controls learning rate",
	"Reduces feature fraction",
	"Prunes shallow trees"
	],
	"correctAnswerIndex": 0,
	"explanation": "lambda_l2 applies L2 regularization on leaf weights to smooth large values and improve generalization."
	},
	{
	"id": 53,
	"questionText": "Scenario: You notice LightGBM overfits heavily. Which parameters could help?",
	"options": [
	"Decrease num_leaves, increase min_data_in_leaf",
	"Increase num_leaves, lower min_data_in_leaf",
	"Increase learning rate",
	"Increase max_depth only"
	],
	"correctAnswerIndex": 0,
	"explanation": "Reducing num_leaves and increasing min_data_in_leaf makes trees simpler and prevents overfitting."
	},
	{
	"id": 54,
	"questionText": "What does 'min_split_gain' control in LightGBM?",
	"options": [
	"Minimum gain required to perform a split",
	"Maximum gain per leaf",
	"Learning rate adjustment",
	"Subsample fraction"
	],
	"correctAnswerIndex": 0,
	"explanation": "min_split_gain sets a threshold for information gain; splits below this are ignored."
	},
	{
	"id": 55,
	"questionText": "Which LightGBM parameter controls row sampling per iteration?",
	"options": [
	"bagging_fraction",
	"feature_fraction",
	"num_leaves",
	"max_bin"
	],
	"correctAnswerIndex": 0,
	"explanation": "bagging_fraction randomly samples rows for each boosting round to reduce variance."
	},
	{
	"id": 56,
	"questionText": "What does 'feature_fraction' control in LightGBM?",
	"options": [
	"Fraction of features used per tree",
	"Number of bins per feature",
	"Maximum depth of tree",
	"Feature scaling"
	],
	"correctAnswerIndex": 0,
	"explanation": "feature_fraction specifies the proportion of features used to build each tree."
	},
	{
	"id": 57,
	"questionText": "Scenario: Increasing feature_fraction improves accuracy but lowers stability. Why?",
	"options": [
	"More features increase model variance",
	"Feature_fraction reduces bias",
	"Learning rate decreases automatically",
	"Tree depth is fixed"
	],
	"correctAnswerIndex": 0,
	"explanation": "Using more features per iteration reduces randomness and increases variance, which may reduce stability."
	},
	{
	"id": 58,
	"questionText": "Which LightGBM parameter controls data sampling frequency?",
	"options": [
	"bagging_freq",
	"num_iterations",
	"min_child_samples",
	"max_depth"
	],
	"correctAnswerIndex": 0,
	"explanation": "bagging_freq controls how often (in iterations) row subsampling is performed."
	},
	{
	"id": 59,
	"questionText": "Scenario: You set bagging_fraction=1.0 and feature_fraction=1.0. Effect?",
	"options": [
	"No random sampling; all data and features used every iteration",
	"Strong regularization",
	"Improved generalization",
	"Subsampling increases variance"
	],
	"correctAnswerIndex": 0,
	"explanation": "Setting both to 1.0 disables random sampling, using all features and samples every iteration."
	},
	{
	"id": 60,
	"questionText": "What does 'max_bin' affect in LightGBM?",
	"options": [
	"Precision of feature discretization",
	"Learning rate",
	"Tree depth",
	"Bagging rate"
	],
	"correctAnswerIndex": 0,
	"explanation": "max_bin determines how many bins each continuous feature is bucketed into for histogram-based splitting."
	},
	{
	"id": 61,
	"questionText": "Higher max_bin values typically lead to what?",
	"options": [
	"More precise splits but slower training",
	"Faster training with less precision",
	"More regularization",
	"Smaller trees"
	],
	"correctAnswerIndex": 0,
	"explanation": "Increasing max_bin gives more precise splits but increases memory and training time."
	},
	{
	"id": 62,
	"questionText": "Scenario: Large dataset with limited RAM. What should you adjust?",
	"options": [
	"Decrease max_bin and num_leaves",
	"Increase learning rate",
	"Disable histogram mode",
	"Increase max_depth"
	],
	"correctAnswerIndex": 0,
	"explanation": "Reducing max_bin and num_leaves lowers memory footprint and speeds up training."
	},
	{
	"id": 63,
	"questionText": "What is the function of 'min_data_in_leaf'?",
	"options": [
	"Minimum number of samples required to form a leaf",
	"Maximum depth limit",
	"Learning rate controller",
	"Number of leaves in total"
	],
	"correctAnswerIndex": 0,
	"explanation": "min_data_in_leaf ensures a minimum number of samples per leaf to avoid overfitting."
	},
	{
	"id": 64,
	"questionText": "What happens if min_data_in_leaf is set too high?",
	"options": [
	"Model underfits due to shallow trees",
	"Model overfits easily",
	"Learning rate decreases",
	"Training stops early"
	],
	"correctAnswerIndex": 0,
	"explanation": "Too high min_data_in_leaf makes leaves large and reduces model complexity, causing underfitting."
	},
	{
	"id": 65,
	"questionText": "What parameter limits the maximum tree depth in LightGBM?",
	"options": [
	"max_depth",
	"num_leaves",
	"min_data_in_leaf",
	"feature_fraction"
	],
	"correctAnswerIndex": 0,
	"explanation": "max_depth caps how deep each tree can grow."
	},
	{
	"id": 66,
	"questionText": "Scenario: You set max_depth=-1. What happens?",
	"options": [
	"Tree depth is unlimited and controlled by num_leaves instead",
	"Training fails",
	"Trees become shallow automatically",
	"Regularization is disabled"
	],
	"correctAnswerIndex": 0,
	"explanation": "Setting max_depth=-1 removes explicit depth restriction; num_leaves indirectly limits complexity."
	},
	{
	"id": 67,
	"questionText": "What is the effect of increasing num_iterations in LightGBM?",
	"options": [
	"Model trains longer and may overfit if learning_rate is not reduced",
	"Model converges faster",
	"Less accurate model",
	"Shallower trees"
	],
	"correctAnswerIndex": 0,
	"explanation": "More boosting iterations improve fit but can overfit unless compensated by lower learning rate."
	},
	{
	"id": 68,
	"questionText": "Scenario: Reducing learning_rate but keeping num_iterations constant causes?",
	"options": [
	"Underfitting, since model learns slower",
	"Overfitting",
	"Higher variance",
	"Deeper trees"
	],
	"correctAnswerIndex": 0,
	"explanation": "Low learning rate with few iterations may lead to underfitting as the model learns too slowly."
	},
	{
	"id": 69,
	"questionText": "What is the main benefit of histogram-based decision trees in LightGBM?",
	"options": [
	"Faster training and lower memory usage",
	"More precise split thresholds",
	"Supports only small datasets",
	"Improves interpretability"
	],
	"correctAnswerIndex": 0,
	"explanation": "Histogram-based methods speed up training by grouping continuous values into discrete bins."
	},
	{
	"id": 70,
	"questionText": "Scenario: You increase max_bin significantly. What might happen?",
	"options": [
	"Training slows down and may overfit",
	"Training speeds up",
	"Model ignores rare features",
	"Learning rate increases automatically"
	],
	"correctAnswerIndex": 0,
	"explanation": "Higher max_bin allows finer splits but can increase overfitting and computation time."
	},
	{
	"id": 71,
	"questionText": "What parameter controls the number of boosting rounds?",
	"options": [
	"num_iterations",
	"max_depth",
	"feature_fraction",
	"min_data_in_leaf"
	],
	"correctAnswerIndex": 0,
	"explanation": "num_iterations defines the total number of boosting rounds (trees) to train."
	},
	{
	"id": 72,
	"questionText": "Scenario: Decreasing num_iterations while keeping learning_rate fixed will usually?",
	"options": [
	"Reduce model capacity and may underfit",
	"Cause overfitting",
	"Speed up convergence with higher accuracy",
	"Have no effect"
	],
	"correctAnswerIndex": 0,
	"explanation": "Fewer iterations reduce model capacity, leading to underfitting if learning_rate is unchanged."
	},
	{
	"id": 73,
	"questionText": "What is the benefit of early_stopping_rounds in LightGBM?",
	"options": [
	"Automatically halts training when validation loss stops improving",
	"Reduces learning rate dynamically",
	"Increases tree depth automatically",
	"Samples more features"
	],
	"correctAnswerIndex": 0,
	"explanation": "early_stopping_rounds prevents overfitting by stopping when performance stops improving on validation data."
	},
	{
	"id": 74,
	"questionText": "Which parameter combination best prevents overfitting?",
	"options": [
	"Lower num_leaves, lower learning_rate, higher min_data_in_leaf",
	"Higher num_leaves, higher learning_rate",
	"Increase max_depth only",
	"Set bagging_fraction=1"
	],
	"correctAnswerIndex": 0,
	"explanation": "Simpler trees, smaller learning rate, and more data per leaf enhance generalization."
	},
	{
	"id": 75,
	"questionText": "Scenario: Large data, strong overfitting, and high variance. What to do?",
	"options": [
	"Lower num_leaves, use bagging and feature_fraction < 1",
	"Increase tree depth",
	"Raise learning rate",
	"Disable regularization"
	],
	"correctAnswerIndex": 0,
	"explanation": "Using smaller trees and random sampling helps reduce overfitting and variance."
	}
	]
	}
	{
	"title": "LightGBM Mastery: Part 4 (Q76–100)",
	"description": "Advanced and expert-level questions (76–100) exploring LightGBM’s GPU acceleration, categorical feature encoding, distributed learning, interpretability, and fine-tuning strategies.",
	"questions": [
	{
	"id": 76,
	"questionText": "What is one key advantage of LightGBM over XGBoost?",
	"options": [
	"Uses leaf-wise tree growth for faster convergence",
	"Uses level-wise tree growth for stability",
	"Cannot handle large datasets",
	"Lacks regularization"
	],
	"correctAnswerIndex": 0,
	"explanation": "LightGBM grows trees leaf-wise with depth constraints, achieving faster convergence and lower loss."
	},
	{
	"id": 77,
	"questionText": "What happens if num_leaves is much larger than 2^max_depth?",
	"options": [
	"Overfitting increases due to overly complex trees",
	"Model underfits severely",
	"No effect on model performance",
	"Training halts automatically"
	],
	"correctAnswerIndex": 0,
	"explanation": "Too many leaves compared to max_depth allow excessive branching, causing overfitting."
	},
	{
	"id": 78,
	"questionText": "Scenario: You enable GPU support in LightGBM. What primary benefit is expected?",
	"options": [
	"Faster histogram construction and split finding",
	"Improved accuracy",
	"Reduced model complexity",
	"Automatic regularization"
	],
	"correctAnswerIndex": 0,
	"explanation": "GPU acceleration speeds up histogram building and split calculations, improving training speed."
	},
	{
	"id": 79,
	"questionText": "Which LightGBM parameter enables GPU training?",
	"options": [
	"device_type='gpu'",
	"gpu_enable=True",
	"use_gpu=1",
	"boosting_type='gpu'"
	],
	"correctAnswerIndex": 0,
	"explanation": "Setting device_type='gpu' tells LightGBM to use GPU resources for training."
	},
	{
	"id": 80,
	"questionText": "What is the impact of 'boosting_type' parameter?",
	"options": [
	"Selects the boosting algorithm (gbdt, dart, goss)",
	"Controls feature sampling",
	"Defines tree depth",
	"Applies learning rate decay"
	],
	"correctAnswerIndex": 0,
	"explanation": "boosting_type specifies the boosting algorithm variant such as 'gbdt', 'dart', or 'goss'."
	},
	{
	"id": 81,
	"questionText": "What is DART in LightGBM?",
	"options": [
	"Dropouts meet Multiple Additive Regression Trees",
	"Distributed Automatic Regression Tree",
	"Dynamic Adaptive Regularized Trees",
	"Data Adaptive Reduction Technique"
	],
	"correctAnswerIndex": 0,
	"explanation": "DART is 'Dropouts meet Multiple Additive Regression Trees', introducing dropout into boosting to reduce overfitting."
	},
	{
	"id": 82,
	"questionText": "Scenario: Using boosting_type='goss'. What does GOSS stand for?",
	"options": [
	"Gradient-based One-Side Sampling",
	"Global Overfitting Sample Selector",
	"Generalized Optimization for Split Search",
	"Gradient Optimization Sampling Strategy"
	],
	"correctAnswerIndex": 0,
	"explanation": "GOSS stands for Gradient-based One-Side Sampling, reducing data processed per iteration for speed."
	},
	{
	"id": 83,
	"questionText": "What does GOSS primarily do?",
	"options": [
	"Keeps large-gradient samples and randomly drops small-gradient ones",
	"Drops large-gradient samples",
	"Uses all samples equally",
	"Increases number of trees"
	],
	"correctAnswerIndex": 0,
	"explanation": "GOSS keeps high-gradient samples for training, reducing computation while preserving accuracy."
	},
	{
	"id": 84,
	"questionText": "Scenario: Dataset has many categorical variables. What should you do?",
	"options": [
	"Use LightGBM's built-in categorical feature support",
	"One-hot encode all features manually",
	"Convert to text data",
	"Ignore categorical columns"
	],
	"correctAnswerIndex": 0,
	"explanation": "LightGBM natively supports categorical features through optimal split encoding without full one-hot expansion."
	},
	{
	"id": 85,
	"questionText": "How does LightGBM handle categorical features internally?",
	"options": [
	"Sorts categories by average target and finds best split",
	"Performs label encoding only",
	"Uses frequency encoding",
	"Applies hash bucketing"
	],
	"correctAnswerIndex": 0,
	"explanation": "LightGBM sorts categories by their target mean to efficiently find the best split."
	},
	{
	"id": 86,
	"questionText": "Scenario: Training time is long on large data with many features. What can help?",
	"options": [
	"Reduce feature_fraction and bagging_fraction",
	"Increase num_leaves",
	"Disable histogram mode",
	"Increase max_bin drastically"
	],
	"correctAnswerIndex": 0,
	"explanation": "Reducing feature_fraction and bagging_fraction speeds up training by using subsets of features and samples."
	},
	{
	"id": 87,
	"questionText": "What is the role of 'max_cat_threshold'?",
	"options": [
	"Controls maximum thresholds for categorical splits",
	"Limits maximum tree depth",
	"Sets number of categories allowed",
	"Defines learning rate schedule"
	],
	"correctAnswerIndex": 0,
	"explanation": "max_cat_threshold limits how many thresholds LightGBM evaluates for categorical splits."
	},
	{
	"id": 88,
	"questionText": "Scenario: Distributed LightGBM training is producing inconsistent results. Likely reason?",
	"options": [
	"Non-deterministic data shuffling or parameter differences across nodes",
	"Too high learning rate",
	"Disabled GPU support",
	"Overfitting due to small num_leaves"
	],
	"correctAnswerIndex": 0,
	"explanation": "Different random seeds or node configurations in distributed mode can cause inconsistency."
	},
	{
	"id": 89,
	"questionText": "What helps ensure reproducible LightGBM results?",
	"options": [
	"Set deterministic=True and fix random_seed",
	"Increase bagging_fraction",
	"Enable GPU mode",
	"Reduce learning rate"
	],
	"correctAnswerIndex": 0,
	"explanation": "Setting deterministic=True and fixing random_seed ensures consistent results across runs."
	},
	{
	"id": 90,
	"questionText": "Which LightGBM feature allows parallel learning across machines?",
	"options": [
	"Distributed training mode",
	"Bagging",
	"GPU histograms",
	"Early stopping"
	],
	"correctAnswerIndex": 0,
	"explanation": "Distributed mode enables training across multiple machines using data parallelism."
	},
	{
	"id": 91,
	"questionText": "What is the key difference between DART and standard GBDT?",
	"options": [
	"DART randomly drops trees during training to prevent overfitting",
	"DART doubles learning rate dynamically",
	"DART uses fewer features per tree",
	"DART cannot perform regression tasks"
	],
	"correctAnswerIndex": 0,
	"explanation": "DART introduces dropout on trees, improving regularization and generalization."
	},
	{
	"id": 92,
	"questionText": "Scenario: Validation accuracy fluctuates heavily between iterations. Likely cause?",
	"options": [
	"Learning rate too high or bagging too aggressive",
	"Too many trees",
	"Too few bins",
	"High lambda_l2"
	],
	"correctAnswerIndex": 0,
	"explanation": "High learning rate or aggressive subsampling can cause instability in validation metrics."
	},
	{
	"id": 93,
	"questionText": "What does 'linear_tree' parameter enable?",
	"options": [
	"Adds linear models to each leaf for hybrid boosting",
	"Switches boosting type",
	"Performs polynomial regression",
	"Forces shallow trees"
	],
	"correctAnswerIndex": 0,
	"explanation": "linear_tree enables a linear model within each leaf, combining tree and linear learning."
	},
	{
	"id": 94,
	"questionText": "Scenario: Using linear_tree improved performance slightly but increased training time. Why?",
	"options": [
	"Linear models per leaf require additional optimization",
	"Learning rate reduced automatically",
	"Tree structure became shallower",
	"Fewer bins created per feature"
	],
	"correctAnswerIndex": 0,
	"explanation": "Each leaf fits a small linear model, increasing training computation but often improving accuracy."
	},
	{
	"id": 95,
	"questionText": "Which LightGBM setting improves memory efficiency on large data?",
	"options": [
	"Use histogram pool sharing and smaller max_bin",
	"Increase max_depth",
	"Enable linear_tree",
	"Disable bagging"
	],
	"correctAnswerIndex": 0,
	"explanation": "Reducing max_bin and using histogram sharing significantly lower memory usage."
	},
	{
	"id": 96,
	"questionText": "Scenario: Feature importance shows unexpected zeros for numeric features. Why?",
	"options": [
	"Feature was rarely used due to high correlation or low information gain",
	"Model error",
	"Bug in LightGBM",
	"Feature_fraction=1.0"
	],
	"correctAnswerIndex": 0,
	"explanation": "Highly correlated or uninformative features may never be chosen for splits, yielding zero importance."
	},
	{
	"id": 97,
	"questionText": "What is the purpose of 'monotone_constraints'?",
	"options": [
	"Forces model predictions to follow specified monotonic relationships with features",
	"Restricts tree depth",
	"Balances data classes",
	"Disables early stopping"
	],
	"correctAnswerIndex": 0,
	"explanation": "monotone_constraints ensure predictions move consistently up or down with certain features."
	},
	{
	"id": 98,
	"questionText": "Scenario: You set monotone_constraints incorrectly. Possible issue?",
	"options": [
	"Model accuracy drops or fails to converge",
	"Training halts immediately",
	"All features are ignored",
	"Learning rate resets"
	],
	"correctAnswerIndex": 0,
	"explanation": "Wrong monotonic constraints can make optimization infeasible, harming accuracy or convergence."
	},
	{
	"id": 99,
	"questionText": "What metric would you monitor for binary classification?",
	"options": [
	"binary_logloss or AUC",
	"mean_squared_error",
	"poisson",
	"quantile"
	],
	"correctAnswerIndex": 0,
	"explanation": "For binary tasks, LightGBM supports metrics like binary_logloss and AUC for evaluation."
	},
	{
	"id": 100,
	"questionText": "Scenario: After tuning, training accuracy improves but test accuracy drops. What happened?",
	"options": [
	"Overfitting",
	"Underfitting",
	"Learning rate too small",
	"Too many missing values"
	],
	"correctAnswerIndex": 0,
	"explanation": "Higher training accuracy with lower test performance indicates overfitting."
	}
	]
	}