Spaces:

TBF
/

AutomaticDatavisualization

Runtime error

App Files Files Community

AutomaticDatavisualization / classification.py

RahulParajuli12

Added application file

2735152 almost 3 years ago

raw

history blame contribute delete

9.42 kB

	import pandas as pd
	import numpy as np
	# import matplotlib.pyplot as plt
	# import seaborn as sns
	from sklearn.metrics import classification_report, confusion_matrix
	import joblib
	import streamlit as st
	import os
	import plotly.express as px

	def preprocess(dataset, x_iloc_list, y_iloc, testSize):
	# dataset = pd.read_csv(csv_file)
	X = dataset.iloc[:, x_iloc_list].values
	y = dataset.iloc[:, y_iloc].values

	# split into training and testing set
	from sklearn.model_selection import train_test_split
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = testSize, random_state = 0)

	# standardization of values
	from sklearn.preprocessing import StandardScaler
	sc = StandardScaler()
	X_train = sc.fit_transform(X_train)
	X_test = sc.transform(X_test)
	return X_train, X_test, y_train, y_test


	class classification:

	def __init__(self, X_train, X_test, y_train, y_test):
	self.X_train = X_train
	self.X_test = X_test
	self.y_train = y_train
	self.y_test = y_test


	def accuracy(self, confusion_matrix):
	sum, total = 0,0
	for i in range(len(confusion_matrix)):
	for j in range(len(confusion_matrix[0])):
	if i == j:
	sum += confusion_matrix[i,j]
	total += confusion_matrix[i,j]
	return sum/total


	def classification_report_plot(self, clf_report):
	fig = px.imshow(pd.DataFrame(clf_report).iloc[:-1, :].T)
	st.plotly_chart(fig)


	def LR(self):
	from sklearn.linear_model import LogisticRegression
	lr_classifier = LogisticRegression()
	lr_classifier.fit(self.X_train, self.y_train)
	joblib.dump(lr_classifier, "model/lr.sav")
	y_pred = lr_classifier.predict(self.X_test)

	st.write("\n")
	st.write("--------------------------------------")
	st.write("### Random Forest Classifier ###")
	st.write("--------------------------------------")
	st.write('Classification Report: ')
	clf = classification_report(self.y_test, y_pred, output_dict=True)
	st.table(pd.DataFrame(clf))
	st.write('Confusion Matrix: ')
	st.table(pd.DataFrame(confusion_matrix(self.y_test, y_pred)))
	st.write('Precision: ', self.accuracy(confusion_matrix(self.y_test, y_pred))*100,'%')

	self.classification_report_plot(clf)



	def KNN(self):
	from sklearn.neighbors import KNeighborsClassifier
	knn_classifier = KNeighborsClassifier()
	knn_classifier.fit(self.X_train, self.y_train)
	joblib.dump(knn_classifier, "model/knn.sav")
	y_pred = knn_classifier.predict(self.X_test)

	st.write("\n")
	st.write("-------------------------------")
	st.write("### K-Neighbors Classifier ###")
	st.write("-------------------------------")
	st.write('Classification Report: ')
	clf = classification_report(self.y_test, y_pred, output_dict=True)
	st.table(pd.DataFrame(clf))
	st.write('Confusion Matrix: ')
	st.table(pd.DataFrame(confusion_matrix(self.y_test, y_pred)))
	st.write('Precision: ', self.accuracy(confusion_matrix(self.y_test, y_pred))*100,'%')

	self.classification_report_plot(clf)



	# kernel type could be 'linear' or 'rbf' (Gaussian)
	def SVM(self, kernel_type):
	from sklearn.svm import SVC
	svm_classifier = SVC(kernel = kernel_type)
	svm_classifier.fit(self.X_train, self.y_train)
	joblib.dump(svm_classifier, "model/svm.sav")
	y_pred = svm_classifier.predict(self.X_test)

	st.write("\n")
	st.write("--------------------------------------")
	st.write("### Support Vector Classifier (" + kernel_type + ") ###")
	st.write("--------------------------------------")
	st.write('Classification Report: ')
	clf = classification_report(self.y_test, y_pred, output_dict=True)
	st.table(pd.DataFrame(clf))
	st.write('Confusion Matrix: ')
	st.table(pd.DataFrame(confusion_matrix(self.y_test, y_pred)))
	st.write('Precision: ', self.accuracy(confusion_matrix(self.y_test, y_pred))*100,'%')

	self.classification_report_plot(clf)



	def NB(self):
	from sklearn.naive_bayes import GaussianNB
	nb_classifier = GaussianNB()
	nb_classifier.fit(self.X_train, self.y_train)
	joblib.dump(nb_classifier, "model/nb.sav")
	y_pred = nb_classifier.predict(self.X_test)

	st.write("\n")
	st.write("------------------------------")
	st.write("### Naive Bayes Classifier ###")
	st.write("------------------------------")
	st.write('Classification Report: ')
	clf = classification_report(self.y_test, y_pred, output_dict=True)
	st.table(pd.DataFrame(clf))
	st.write('Confusion Matrix: ')
	st.table(pd.DataFrame(confusion_matrix(self.y_test, y_pred)))
	st.write('Precision: ', self.accuracy(confusion_matrix(self.y_test, y_pred))*100,'%')

	self.classification_report_plot(clf)




	def DT(self):
	from sklearn.tree import DecisionTreeClassifier
	tree_classifier = DecisionTreeClassifier()
	tree_classifier.fit(self.X_train, self.y_train)
	joblib.dump(tree_classifier, "model/tree.sav")
	y_pred = tree_classifier.predict(self.X_test)

	st.write("\n")
	st.write("--------------------------------")
	st.write("### Decision Tree Classifier ###")
	st.write("--------------------------------")
	st.write('Classification Report: ')
	clf = classification_report(self.y_test, y_pred, output_dict=True)
	st.table(pd.DataFrame(clf))
	st.write('Confusion Matrix: ')
	st.table(pd.DataFrame(confusion_matrix(self.y_test, y_pred)))
	st.write('Precision: ', self.accuracy(confusion_matrix(self.y_test, y_pred))*100,'%')

	self.classification_report_plot(clf)

	def RF(self):
	from sklearn.ensemble import RandomForestClassifier
	rf_classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy')
	rf_classifier.fit(self.X_train, self.y_train)
	joblib.dump(rf_classifier, "model/rf-model.pkl")
	y_pred = rf_classifier.predict(self.X_test)

	st.write("\n")
	st.write("--------------------------------")
	st.write("### Random Forest Classifier ###")
	st.write("--------------------------------")
	st.write('Classification Report: ')
	clf = classification_report(self.y_test, y_pred, output_dict=True)
	st.table(pd.DataFrame(clf))
	st.write('Confusion Matrix: ')
	st.table(pd.DataFrame(confusion_matrix(self.y_test, y_pred)))
	st.write('Precision: ', self.accuracy(confusion_matrix(self.y_test, y_pred))*100,'%')

	self.classification_report_plot(clf)

	# primary App interfacing function for classification
	def st_classification():
	df = pd.read_csv("temp_data/test.csv")

	# select features/columns
	col_names = []
	feature_list = list(df.columns)
	st.sidebar.write("Select Column Names from the Dataset")
	for col_name in feature_list:
	check_box = st.sidebar.checkbox(col_name)
	if check_box:
	col_names.append(col_name)

	try:
	df = df[col_names]
	st.write(df)
	except:
	pass
	try :
	x_iloc_list = list(range(0,len(df.columns)-1))

	y_iloc = len(df.columns)-1

	test_size = st.sidebar.slider("Enter Test Data Size (default 0.2)", 0.0,0.4,0.2,0.1)

	X_train, X_test, y_train, y_test = preprocess(df, x_iloc_list, y_iloc, test_size)

	model = st.sidebar.selectbox(
	'Choose Model', ["LR", "KNN", "SVM", "NB", "DT", "RF"])

	classifier = classification(X_train, X_test, y_train, y_test)

	if model == "LR":
	try:
	classifier.LR()
	except Exception as e:
	st.write(e)

	if model == "KNN":
	try:
	classifier.KNN()
	except Exception as e:
	st.write(e)

	if model == "SVM":
	kernel_choice = st.sidebar.selectbox('Select Feature Selection Method',\
	["linear", "rbf"])
	try:
	classifier.SVM(kernel_choice)
	except Exception as e:
	st.write(e)

	if model == "NB":
	try:
	classifier.NB()
	except Exception as e:
	st.write(e)

	if model == "DT":
	try:
	classifier.DT()
	except Exception as e:
	st.write(e)

	if model == "RF":
	try:
	classifier.RF()
	except Exception as e:
	st.write(e)
	except Exception as e:
	st.warning('Consider selecting the columns in the left bar for classification', icon="⚠️")