Spaces:
Build error
Build error
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| from sklearn.preprocessing import LabelEncoder | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.model_selection import GridSearchCV | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.linear_model import LinearRegression | |
| from sklearn.svm import SVR | |
| from sklearn.tree import DecisionTreeRegressor | |
| from sklearn.ensemble import RandomForestRegressor | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.neighbors import KNeighborsClassifier | |
| from sklearn.svm import SVC | |
| from sklearn.tree import DecisionTreeClassifier | |
| from sklearn.ensemble import RandomForestClassifier | |
| def read(file,dep,ord): | |
| df = pd.read_csv(file.name) | |
| cat = list() | |
| dep_type = str(df.dtypes[dep]) | |
| for col in df.columns.values: | |
| if str(df.dtypes[col]) == 'bool' or str(df.dtypes[col]) == 'object': | |
| cat.append(col) | |
| new_df = df.dropna(axis=0) | |
| if ord == "" and (dep_type == 'bool' or dep_type == 'object'): | |
| ord = list() | |
| ord.append(dep) | |
| elif ord == "": | |
| ord = list() | |
| else: | |
| pass | |
| if len(ord)!=0: | |
| le = LabelEncoder() | |
| new_df[ord] = new_df[ord].apply(lambda col: le.fit_transform(col)) | |
| nom = list(set(cat).difference(set(ord))) | |
| if len(nom) == 0: | |
| pass | |
| else: | |
| ohe_df = pd.get_dummies(new_df[nom], drop_first=True) | |
| new_df.drop(columns=nom, axis=1,inplace=True) | |
| new_df = pd.concat([new_df,ohe_df],axis=1) | |
| if dep_type == 'bool' or dep_type == 'object': | |
| text = "classification" | |
| result = classification(new_df,dep) | |
| else: | |
| text = "regression" | |
| result = regression(new_df,dep) | |
| return df.head(5),new_df.head(5),result, text, cat, ord, nom | |
| def classification(df,dep): | |
| X = df.drop(dep,axis=1) | |
| y = df[dep] | |
| X_train, X_test, y_train, y_test = train_test_split(X, y) | |
| scale = StandardScaler() | |
| pipe = Pipeline(steps=[('scale',scale),('classification','pass')]) | |
| parameters = [ | |
| { | |
| 'classification':[LogisticRegression()], | |
| }, | |
| { | |
| 'classification':[RandomForestClassifier()], | |
| }, | |
| { | |
| 'classification':[DecisionTreeClassifier()], | |
| }, | |
| { | |
| 'classification':[SVC()], | |
| }, | |
| { | |
| 'classification':[KNeighborsClassifier(n_neighbors=5)], | |
| }, | |
| ] | |
| search = GridSearchCV(pipe, param_grid=parameters, n_jobs=-1, scoring='accuracy') | |
| search.fit(X_train,y_train) | |
| result = pd.DataFrame(search.cv_results_)[['params','rank_test_score','mean_test_score']] | |
| result['mean_test_score']= (result['mean_test_score'])*100 | |
| result = result.astype({'params': str}) | |
| result.sort_values('rank_test_score',inplace=True) | |
| return result | |
| def regression(df,dep): | |
| X = df.drop(dep,axis=1) | |
| y =df[dep] | |
| X_train, X_test, y_train, y_test = train_test_split(X, y) | |
| scale = StandardScaler() | |
| pipe = Pipeline(steps=[('scale',scale),('regression','pass')]) | |
| parameters = [ | |
| { | |
| 'regression':[LinearRegression()] | |
| }, | |
| { | |
| 'regression':[RandomForestRegressor()], | |
| }, | |
| { | |
| 'regression':[DecisionTreeRegressor()], | |
| }, | |
| { | |
| 'regression':[SVR()], | |
| }, | |
| ] | |
| search = GridSearchCV(pipe, param_grid=parameters, cv=5, n_jobs=-1, scoring='neg_mean_absolute_percentage_error') | |
| search.fit(X_train,y_train) | |
| result = pd.DataFrame(search.cv_results_)[['params','rank_test_score','mean_test_score']] | |
| result['mean_test_score']= (result['mean_test_score']+1)*100 | |
| result = result.astype({'params': str}) | |
| result.sort_values('rank_test_score',inplace=True) | |
| return result | |
| with gr.Blocks() as demo: | |
| gr.Markdown("Model Recommendation App **Upload** file to see the output.") | |
| with gr.Column(): | |
| with gr.Row(): | |
| file = gr.File(label="Upload File(Comma Separated)") | |
| dep = gr.Textbox(label="Dependent Variable(Variable as in the file)") | |
| ord = gr.Textbox(label="Ordinal Variables(Seperate with a comma)") | |
| submit = gr.Button("Submit") | |
| text = gr.Text(label="Suitable Algorithm") | |
| other1 = gr.Text(label="Categorical Variables") | |
| other2 = gr.Text(label="LabelEncoded Vairables") | |
| other3 = gr.Text(label="OneHotEncoded Variables") | |
| with gr.Row(): | |
| org = gr.DataFrame(overflow_row_behaviour="paginate", label="Original Data") | |
| converted = gr.DataFrame(overflow_row_behaviour="paginate", label="Transformed Data") | |
| result = gr.DataFrame(label="Result") | |
| submit.click(fn=read, inputs=[file,dep,ord], outputs=[org,converted,result,text,other1,other2,other3]) | |
| demo.launch() | |