Back to School II: Analyzing and predicting grades students’ grades

We just finished preparing our dataset for analysis. In this tutorial, we will look at how different classification algorithms perform on that pre-processed data –

import pandas as pd

df = pd.read_csv("output.csv")

#print("Printing orininal dataframe")
#print(df.head())

X = df.loc[:,["school","sex","age","address","famsize","Pstatus","Medu","Fedu","Mjob","Fjob","reason","guardian","traveltime","studytime","failures","schoolsup","famsup","paid","activities","nursery","higher","internet","romantic","famrel","freetime","goout","Dalc","Walc","health","absences"]]
y = df.loc[:,["grade"]]

#print("Dtypes of X")
#print(X.dtypes)
#print("Describe of y")
#print(y.describe())

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

from sklearn.metrics import accuracy_score

from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier()
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
print "Accuracy (DT): "+str(accuracy_score(y_test,y_pred))

from sklearn.ensemble import RandomForestClassifier
clf=RandomForestClassifier()
clf.fit(X_train,y_train.values.ravel())
y_pred = clf.predict(X_test)
print "Accuracy (RF): "+str(accuracy_score(y_test,y_pred))

from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(X_train,y_train.values.ravel())
y_pred = clf.predict(X_test)
print "Accuracy (NB): "+str(accuracy_score(y_test,y_pred))

from sklearn.neighbors import KNeighborsClassifier
clf=KNeighborsClassifier()
clf.fit(X_train,y_train.values.ravel())
y_pred = clf.predict(X_test)
print "Accuracy (KNN): "+str(accuracy_score(y_test,y_pred))

from sklearn.svm import SVC
clf=SVC()
clf.fit(X_train,y_train.values.ravel())
y_pred = clf.predict(X_test)
print "Accuracy (SVM): "+str(accuracy_score(y_test,y_pred))