老师您好,我手写KNN算法识别MNIST数据集,但结果不尽如人意仅有30%左右的准确率,请问是什么原因导致的呢?以下是源代码
import numpy as np
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
from math import sqrt
from collections import Counter
def KNNClassifier(X_train,y_train,k,x):
distances = [sqrt(np.sum((x_train - x)**2)) for x_train in X_train]
nearest = np.argsort(distances)
topK_y = [y_train[i] for i in nearest[:k]]
votes = Counter(topK_y)
return votes.most_common(1)[0][0]
def PredictMatrix(X_train,y_train,k,X_predict):
"""给定待预测数据集X_predict,返回表示X_predict的结果向量"""
y_predict = [KNNClassifier(X_train,y_train,k,x) for x in X_predict]
return np.array(y_predict)
def Accuracy(predict_matrix,label_matrix):
return sum(predict_matrix==label_matrix)/label_matrix.shape[0]
#获取MNIST数据集
mnist = fetch_mldata("MNIST original")
#随机获取MNIST中的5000个样本作为样本点
sample = np.array(np.random.randint(low=0,high=70000, size=5000))
data = mnist.data[sample]
target = mnist.target[sample]
#训练测试集划分
X_train, X_test, y_train, y_test = np.array(train_test_split(data, target, train_size=0.9))
prediction_matrix = PredictMatrix(X_train,y_train,5,X_test)
score = Accuracy(prediction_matrix,y_test)
print('knn score: %f' % score)