Spaces:

sklearn-docs
/

feature-importance-rf

Running on CPU Upgrade

App Files Files Community

marik0 commited on Apr 10, 2023

Commit

889be89

1 Parent(s): f7996e9

Add sliders for dataset

Browse files

Files changed (1) hide show

app.py +24 -23

app.py CHANGED Viewed

@@ -9,11 +9,11 @@ import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
-def create_dataset():
     X, y = make_classification(
-        n_samples=1000,
         n_features=10,
-        n_informative=3,
         n_redundant=0,
         n_repeated=0,
         n_classes=2,
@@ -24,17 +24,6 @@ def create_dataset():
     X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)
     return X_train, X_test, y_train, y_test
-def train_model():
-    X_train, X_test, y_train, y_test = create_dataset()
-    feature_names = [f"feature {i}" for i in range(X_train.shape[1])]
-    forest = RandomForestClassifier(random_state=0)
-    forest.fit(X_train, y_train)
-    return forest, feature_names, X_test, y_test
 def plot_mean_decrease(clf, feature_names):
     importances = clf.feature_importances_
     std = np.std([tree.feature_importances_ for tree in clf.estimators_], axis=0)
@@ -63,11 +52,25 @@ def plot_feature_perm(clf, feature_names, X_test, y_test):
     return fig
 title = "Feature importances with a forest of trees 🌳"
 description = """This example shows the use of a forest of trees to evaluate the importance of features on an artificial classification task.
                 The blue bars are the feature importances of the forest, along with their inter-trees variability represented by the error bars.
               """
 with gr.Blocks() as demo:
@@ -75,18 +78,16 @@ with gr.Blocks() as demo:
     gr.Markdown(description)
     # with gr.Column():
-    clf, feature_names, X_test, y_test  = train_model()
     with gr.Row():
-        plot = gr.Plot(plot_mean_decrease(clf, feature_names))
-        plot2 = gr.Plot(plot_feature_perm(clf, feature_names, X_test, y_test))
-            # input_data = gr.Dropdown(choices=feature_names, label="Feature", value="body-mass index")
-            # coef = gr.Textbox(label="Coefficients")
-            # mse = gr.Textbox(label="Mean squared error (MSE)")
-            # r2 = gr.Textbox(label="R2 score")
-    # input_data.change(fn=train_model, inputs=[input_data], outputs=[plot, coef, mse, r2], queue=False)
 demo.launch(enable_queue=True)

 import pandas as pd
 import matplotlib.pyplot as plt
+def create_dataset(num_samples, num_informative):
     X, y = make_classification(
+        n_samples=num_samples,
         n_features=10,
+        n_informative=num_informative,
         n_redundant=0,
         n_repeated=0,
         n_classes=2,
     X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)
     return X_train, X_test, y_train, y_test
 def plot_mean_decrease(clf, feature_names):
     importances = clf.feature_importances_
     std = np.std([tree.feature_importances_ for tree in clf.estimators_], axis=0)
     return fig
+def train_model(num_samples, num_info):
+    X_train, X_test, y_train, y_test = create_dataset(num_samples, num_info)
+    feature_names = [f"feature {i}" for i in range(X_train.shape[1])]
+    forest = RandomForestClassifier(random_state=0)
+    forest.fit(X_train, y_train)
+    fig = plot_mean_decrease(forest, feature_names)
+    fig2 = plot_feature_perm(forest, feature_names, X_test, y_test)
+    return fig, fig2
 title = "Feature importances with a forest of trees 🌳"
 description = """This example shows the use of a forest of trees to evaluate the importance of features on an artificial classification task.
                 The blue bars are the feature importances of the forest, along with their inter-trees variability represented by the error bars.
+                The model is trained with simulated data.
               """
 with gr.Blocks() as demo:
     gr.Markdown(description)
     # with gr.Column():
+    num_samples = gr.Slider(minimum=1000, maximum=5000, step=500, value=1000, label="Number of samples")
+    num_info = gr.Slider(minimum=2, maximum=10, step=1, value=3, label="Number of informative features")
     with gr.Row():
+        plot = gr.Plot()
+        plot2 = gr.Plot()
+    num_samples.change(fn=train_model, inputs=[num_samples, num_info], outputs=[plot, plot2])
+    num_info.change(fn=train_model, inputs=[num_samples, num_info], outputs=[plot, plot2])
 demo.launch(enable_queue=True)