Spaces:

sklearn-docs
/

single-estimator-vs-bagging

Sleeping

App Files Files Community

marik0 commited on Apr 12, 2023

Commit

baaa327

1 Parent(s): 6fa0397

Created the demo

Browse files

Files changed (2) hide show

app.py +148 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,148 @@

+import numpy as np
+import matplotlib
+import matplotlib.pyplot as plt
+from sklearn.ensemble import BaggingRegressor
+from sklearn.tree import DecisionTreeRegressor
+import gradio as gr
+matplotlib.use('agg')
+# Generate data
+def f(x):
+    x = x.ravel()
+    return np.exp(-(x**2)) + 1.5 * np.exp(-((x - 2) ** 2))
+def generate(n_samples, noise, n_repeat=1):
+    X = np.random.rand(n_samples) * 10 - 5
+    X = np.sort(X)
+    if n_repeat == 1:
+        y = f(X) + np.random.normal(0.0, noise, n_samples)
+    else:
+        y = np.zeros((n_samples, n_repeat))
+        for i in range(n_repeat):
+            y[:, i] = f(X) + np.random.normal(0.0, noise, n_samples)
+    X = X.reshape((n_samples, 1))
+    return X, y
+def train_model(n_train, noise):
+    # Settings
+    n_repeat = 50  # Number of iterations for computing expectations
+    # n_train = 50  # Size of the training set
+    n_test = 1000  # Size of the test set
+    # noise = noise  # Standard deviation of the noise
+    np.random.seed(0)
+    # Change this for exploring the bias-variance decomposition of other
+    # estimators. This should work well for estimators with high variance (e.g.,
+    # decision trees or KNN), but poorly for estimators with low variance (e.g.,
+    # linear models).
+    estimators = [
+        ("Tree", DecisionTreeRegressor()),
+        ("Bagging(Tree)", BaggingRegressor(DecisionTreeRegressor())),
+    ]
+    n_estimators = len(estimators)
+    X_train = []
+    y_train = []
+    for i in range(n_repeat):
+        X, y = generate(n_samples=n_train, noise=noise)
+        X_train.append(X)
+        y_train.append(y)
+    X_test, y_test = generate(n_samples=n_test, noise=noise, n_repeat=n_repeat)
+    fig = plt.figure(figsize=(10, 8))
+    out_str = ""
+    # Loop over estimators to compare
+    for n, (name, estimator) in enumerate(estimators):
+        # Compute predictions
+        y_predict = np.zeros((n_test, n_repeat))
+        for i in range(n_repeat):
+            estimator.fit(X_train[i], y_train[i])
+            y_predict[:, i] = estimator.predict(X_test)
+        # Bias^2 + Variance + Noise decomposition of the mean squared error
+        y_error = np.zeros(n_test)
+        for i in range(n_repeat):
+            for j in range(n_repeat):
+                y_error += (y_test[:, j] - y_predict[:, i]) ** 2
+        y_error /= n_repeat * n_repeat
+        y_noise = np.var(y_test, axis=1)
+        y_bias = (f(X_test) - np.mean(y_predict, axis=1)) ** 2
+        y_var = np.var(y_predict, axis=1)
+        out_str += f"{name}: {np.mean(y_error):.4f} (error) = {np.mean(y_bias):.4f} (bias^2) + {np.mean(y_var):.4f} (var) + {np.mean(y_noise):.4f} (noise)\n"
+        # Plot figures
+        plt.subplot(2, n_estimators, n + 1)
+        plt.plot(X_test, f(X_test), "b", label="$f(x)$")
+        plt.plot(X_train[0], y_train[0], ".b", label="LS ~ $y = f(x)+noise$")
+        for i in range(n_repeat):
+            if i == 0:
+                plt.plot(X_test, y_predict[:, i], "r", label=r"$\^y(x)$")
+            else:
+                plt.plot(X_test, y_predict[:, i], "r", alpha=0.05)
+        plt.plot(X_test, np.mean(y_predict, axis=1), "c", label=r"$\mathbb{E}_{LS} \^y(x)$")
+        plt.xlim([-5, 5])
+        plt.title(name)
+        if n == n_estimators - 1:
+            plt.legend(loc=(1.1, 0.5))
+        plt.subplot(2, n_estimators, n_estimators + n + 1)
+        plt.plot(X_test, y_error, "r", label="$error(x)$")
+        plt.plot(X_test, y_bias, "b", label="$bias^2(x)$"),
+        plt.plot(X_test, y_var, "g", label="$variance(x)$"),
+        plt.plot(X_test, y_noise, "c", label="$noise(x)$")
+        plt.xlim([-5, 5])
+        plt.ylim([0, noise])
+        if n == n_estimators - 1:
+            plt.legend(loc=(1.1, 0.5))
+    plt.subplots_adjust(right=0.75)
+    return fig, out_str
+title = "Single estimator versus bagging: bias-variance decomposition ⚖️"
+description = "This example illustrates and compares the bias-variance decomposition of the expected mean squared error of a single estimator against a bagging ensemble. "
+with gr.Blocks() as demo:
+    gr.Markdown(f"## {title}")
+    gr.Markdown(description)
+    num_samples = gr.Slider(minimum=50, maximum=200, step=50, value=50, label="Number of samples")
+    noise = gr.Slider(minimum=0.05, maximum=0.2, step=0.05, value=0.1, label="Noise")
+    with gr.Row():
+        with gr.Row():
+            with gr.Column(scale=2):
+                plot = gr.Plot()
+            with gr.Column(scale=1):
+                results = gr.Textbox(label="Results")
+    num_samples.change(fn=train_model, inputs=[num_samples, noise], outputs=[plot, results])
+    noise.change(fn=train_model, inputs=[num_samples, noise], outputs=[plot, results])
+demo.launch(enable_queue=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+numpy
+matplotlib
+scikit-learn