Back to School I: Reading and pre-processing the student dataset

Read student-mat.csv using Pandas and changing the string values to numerical values

import pandas as pd

df = pd.read_csv("student-mat.csv")

#print(df.head())

marks_counts = df.G1.value_counts()
marks_counts.sort_index(inplace=True)

marks_counts.plot(kind="bar")

def marks_to_grade(idx):
	if idx<7:
		return "LOW"
	elif idx>15:
		return "HIGH"
	else:
		return "AVG"

df["grade"] = df.G1.apply(marks_to_grade)

def yesno_to_num(idx):
	if(idx=="no"):
		return 0
	elif(idx=="yes"):
		return 1

df["schoolsup"] = df.schoolsup.apply(yesno_to_num)

df.to_csv("output.csv")
import matplotlib.pyplot as plt
plt.show()

Program to change input file to required format –

import pandas as pd

df = pd.read_csv("student-mat.csv")

#print(df.head())

marks_counts = df.G1.value_counts()
marks_counts.sort_index(inplace=True)

marks_counts.plot(kind="bar")

def marks_to_grade(idx):
	if idx<7:
		return "LOW"
	elif idx>15:
		return "HIGH"
	else:
		return "AVG"

df["grade"] = df.G1.apply(marks_to_grade)

def yesno_to_num(idx):
	if(idx=="no"):
		return 0
	elif(idx=="yes"):
		return 1

df["schoolsup"] = df.schoolsup.apply(yesno_to_num)

df["famsup"] = df.famsup.apply(yesno_to_num)
df["paid"] = df.paid.apply(yesno_to_num)
df["activities"] = df.activities.apply(yesno_to_num)
df["nursery"] = df.nursery.apply(yesno_to_num)
df["higher"] = df.higher.apply(yesno_to_num)
df["internet"] = df.internet.apply(yesno_to_num)
df["romantic"] = df.romantic.apply(yesno_to_num)

def sex_to_num(idx):
	if(idx=="M"):
		return 0
	elif(idx=="F"):
		return 1

df["sex"] = df.sex.apply(sex_to_num)

def school_to_num(idx):
	if(idx=="GP"):
		return 0
	elif(idx=="MS"):
		return 1

df["school"] = df.school.apply(school_to_num)

def address_to_num(idx):
	if(idx=="U"):
		return 0
	elif(idx=="R"):
		return 1

df["address"] = df.address.apply(address_to_num)

def famsize_to_num(idx):
	if(idx=="GT3"):
		return 0
	elif(idx=="LE3"):
		return 1

df["famsize"] = df.famsize.apply(famsize_to_num)

def guardian_to_num(idx):
	if(idx=="mother"):
		return 0
	elif(idx=="father"):
		return 1
	elif(idx=="other"):
		return 2

df["guardian"] = df.guardian.apply(guardian_to_num)

def Pstatus_to_num(idx):
	if(idx=="A"):
		return 0
	elif(idx=="T"):
		return 1

df["Pstatus"] = df.Pstatus.apply(Pstatus_to_num)

def job_to_num(idx):
	if(idx=="at_home"):
		return 0
	elif(idx=="health"):
		return 1
	elif(idx=="services"):
		return 2
	elif(idx=="teacher"):
		return 3
	elif(idx=="other"):
		return 4

df["Fjob"] = df.Fjob.apply(job_to_num)
df["Mjob"] = df.Mjob.apply(job_to_num)

def reason_to_num(idx):
	if(idx=="course"):
		return 0
	elif(idx=="home"):
		return 1
	elif(idx=="reputation"):
		return 2
	elif(idx=="other"):
		return 3

df["reason"] = df.reason.apply(reason_to_num)

df.drop(["G1","G2","G3"],axis=1,inplace=True)

df.to_csv("output.csv")
import matplotlib.pyplot as plt
plt.show()