Implement K-nearest neighbor classification using Python and Scikit-learn

Implement using Python

import math
def getKey(item):
    return item[0]

dataset = [[5.1,3.5,1.4,0.2],
           [4.6,3.6,1.0,0.2],
           [5.9,3.0,4.2,1.5],
           [5.4,3.0,4.5,1.5],
           [7.7,2.8,6.7,2.0],
           [7.9,3.8,6.4,2.0]]

targets = [0,0,1,1,2,2]

#query = [4.4,2.9,1.4,0.2] #Class 0
query = [6.1,2.9,4.7,1.4] #Class 1
#query = [7.2,3.2,6.0,1.8] #Class 2

query_distance = []

current=0
for datarow in dataset:
    distance = 0
    for i in range(len(datarow)):
        distance += (datarow[i]-query[i])**2
    query_distance.append([math.sqrt(distance),targets[current]])
    current += 1

print(query_distance)

sorted_distance = sorted(query_distance, key=getKey)

knn = [0,0,0] #No of elements in list = k
for i in range(len(knn)):
    knn[sorted_distance[i][1]] += 1
       
print(knn)

To view the decision boundaries

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import neighbors, datasets

n_neighbors = 15

# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, :2] # we only take the first two features. We could
# avoid this ugly slicing by using a two-dim dataset
y = iris.target

h = .02 # step size in the mesh

# Create color maps
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])

for weights in ['uniform', 'distance']:
# we create an instance of Neighbours Classifier and fit the data.
clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
clf.fit(X, y)

# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, x_max]x[y_min, y_max].
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure()
plt.pcolormesh(xx, yy, Z, cmap=cmap_light)

# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.title("3-Class classification (k = %i, weights = '%s')"
% (n_neighbors, weights))

plt.show()

A classification example

import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

df=pd.read_csv('iris.csv')
#print(data.describe())
#print(data.dtypes)
#print(data.info())

X=pd.get_dummies(df.loc[:, ['sepal_length', 'sepal_width','petal_length','petal_width']])
y=pd.get_dummies(df.loc[:, 'species'])

#sns.lmplot(x="sepal_length",y="sepal_width",data=df,hue="species",fit_reg=False)
#sns.pairplot(df, hue="species")
#plt.show()

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25)
#print(type(X_train))

from sklearn import neighbors
knn = neighbors.KNeighborsClassifier(n_neighbors=15)
knn.fit(X_train, y_train)

y_predict_ndarray = knn.predict(X_test)
#print type(predict_ndarray)
y_predict_df = pd.DataFrame(y_predict_ndarray,columns=["setosa","versicolor","virginica"])
'''
print("Predicted: ")
print(predict_df.head())
print("Actual: ")
print(y_test.head())
'''

count_total = len(y_predict_df)
count_error = 0
for i in range(count_total):
	if not y_predict_df.iloc[i].equals(y_test.iloc[i]):
		count_error += 1
print "Total : "+str(count_total)+" Error : "+str(count_error)+" Accuracy: "+str(1-(float(count_error)/float(count_total)))

'''
predict = y_predict_df.values
target = y_test.values

#print type(predict)
#print type(target)

count_error = 0
for i in range(len(predict)):
	#print np.array_equal(predict[i],target[i])
	if not np.array_equal(predict[i],target[i]):
		count_error += 1
		print str(i)+": Predict: "+str(predict[i])+": Target: "+str(target[i])
'''

#from sklearn.metrics import accuracy_score
#print "Accuracy: "+str(accuracy_score(y_test,y_predict_df))

'''
from sklearn.metrics import confusion_matrix
c_mat = confusion_matrix(y_test.values.argmax(axis=1), y_predict_df.values.argmax(axis=1))
sns.heatmap(c_mat,annot=True,fmt="d")
plt.show()
'''

KNN Scikit-learn Confusion matrix