Spaces:
Runtime error
Runtime error
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| from matplotlib.colors import ListedColormap | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.datasets import make_moons, make_circles, make_classification | |
| from sklearn.neural_network import MLPClassifier | |
| from sklearn.neighbors import KNeighborsClassifier | |
| from sklearn.svm import SVC | |
| from sklearn.gaussian_process import GaussianProcessClassifier | |
| from sklearn.gaussian_process.kernels import RBF | |
| from sklearn.tree import DecisionTreeClassifier | |
| from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier | |
| from sklearn.naive_bayes import GaussianNB | |
| from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis | |
| from sklearn.inspection import DecisionBoundaryDisplay | |
| from sklearn.datasets import make_blobs, make_circles, make_moons | |
| import gradio as gr | |
| import math | |
| from functools import partial | |
| ### DATASETS | |
| def normalize(X): | |
| return StandardScaler().fit_transform(X) | |
| def linearly_separable(): | |
| X, y = make_classification( | |
| n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1 | |
| ) | |
| rng = np.random.RandomState(2) | |
| X += 2 * rng.uniform(size=X.shape) | |
| linearly_separable = (X, y) | |
| return linearly_separable | |
| DATA_MAPPING = { | |
| "Moons": make_moons(noise=0.3, random_state=0), | |
| "Circles":make_circles(noise=0.2, factor=0.5, random_state=1), | |
| "Linearly Separable Random Dataset": linearly_separable(), | |
| } | |
| #### MODELS | |
| def get_groundtruth_model(X, labels): | |
| # dummy model to show true label distribution | |
| class Dummy: | |
| def __init__(self, y): | |
| self.labels_ = labels | |
| return Dummy(labels) | |
| DATASETS = [ | |
| make_moons(noise=0.3, random_state=0), | |
| make_circles(noise=0.2, factor=0.5, random_state=1), | |
| linearly_separable() | |
| ] | |
| NAME_CLF_MAPPING = { | |
| "Ground Truth":get_groundtruth_model, | |
| "Nearest Neighbors":KNeighborsClassifier(3), | |
| "Linear SVM":SVC(kernel="linear", C=0.025), | |
| "RBF SVM":SVC(gamma=2, C=1), | |
| "Gaussian Process":GaussianProcessClassifier(1.0 * RBF(1.0)), | |
| "Decision Tree":DecisionTreeClassifier(max_depth=5), | |
| "Random Forest":RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), | |
| "Neural Net":MLPClassifier(alpha=1, max_iter=1000), | |
| "AdaBoost":AdaBoostClassifier(), | |
| "Naive Bayes":GaussianNB(), | |
| } | |
| #### PLOT | |
| FIGSIZE = 7,7 | |
| figure = plt.figure(figsize=(25, 10)) | |
| i = 1 | |
| def train_models(selected_data, clf_name): | |
| cm = plt.cm.RdBu | |
| cm_bright = ListedColormap(["#FF0000", "#0000FF"]) | |
| clf = NAME_CLF_MAPPING[clf_name] | |
| X, y = DATA_MAPPING[selected_data] | |
| X = StandardScaler().fit_transform(X) | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| X, y, test_size=0.4, random_state=42 | |
| ) | |
| x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5 | |
| y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5 | |
| if clf_name != "Ground Truth": | |
| clf.fit(X_train, y_train) | |
| score = clf.score(X_test, y_test) | |
| fig, ax = plt.subplots(figsize=FIGSIZE) | |
| ax.set_title(clf_name, fontsize = 10) | |
| DecisionBoundaryDisplay.from_estimator( | |
| clf, X, cmap=cm, alpha=0.8, ax=ax, eps=0.5 | |
| ).plot() | |
| return fig | |
| else: | |
| ######### | |
| for ds_cnt, ds in enumerate(DATASETS): | |
| X, y = ds | |
| x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5 | |
| y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5 | |
| # just plot the dataset first | |
| cm = plt.cm.RdBu | |
| cm_bright = ListedColormap(["#FF0000", "#0000FF"]) | |
| fig, ax = plt.subplots(figsize=FIGSIZE) | |
| ax.set_title("Input data") | |
| # Plot the training points | |
| ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright, edgecolors="k") | |
| # Plot the testing points | |
| ax.scatter( | |
| X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6, edgecolors="k" | |
| ) | |
| ax.set_xlim(x_min, x_max) | |
| ax.set_ylim(y_min, y_max) | |
| ax.set_xticks(()) | |
| ax.set_yticks(()) | |
| return fig | |
| ########### | |
| description = "Learn how different statistical classifiers perform in different datasets." | |
| def iter_grid(n_rows, n_cols): | |
| # create a grid using gradio Block | |
| for _ in range(n_rows): | |
| with gr.Row(): | |
| for _ in range(n_cols): | |
| with gr.Column(): | |
| yield | |
| title = "Compare Classifiers!" | |
| with gr.Blocks(title=title) as demo: | |
| gr.Markdown(f"## {title}") | |
| gr.Markdown(description) | |
| input_models = list(NAME_CLF_MAPPING) | |
| input_data = gr.Radio( | |
| choices=["Moons", "Circles", "Linearly Separable Random Dataset"], | |
| value="Moons" | |
| ) | |
| counter = 0 | |
| for _ in iter_grid(2, 5): | |
| if counter >= len(input_models): | |
| break | |
| input_model = input_models[counter] | |
| plot = gr.Plot(label=input_model) | |
| fn = partial(train_models, clf_name=input_model) | |
| input_data.change(fn=fn, inputs=[input_data], outputs=plot) | |
| counter += 1 | |
| demo.launch(debug=True) | |