import numpy as np #根据公式计算先验概率P_y,利用拉普拉斯平滑λ=1,并对连乘公式取log P_y = np.zeros((10 , 1)) classes = 10 for i in range(classes): P_y[i] = ((np.sum(y_train == i))+1)/(len(y_train)+classes) P_y = np.log(P_y) print(P_y)
1 2 3 4 5 6 7 8
#计算条件概率Px_y #这个模块实现求取各个特征值的个数 Px_y = np.zeros((classes , X_train.shape[1] , 2)) for i in range(X_train.shape[0]): k = y_train[i] x = X_train[i] for feature in range(X_train.shape[1]): Px_y[k,feature,x[f]] += 1
1 2 3 4 5 6 7 8 9 10
#开始计算条件概率Px_y # for index in range(classes): for f in range(X_train.shape[1]): Px_y[index , f , 0] = (Px_y[index , f , 0] + 1) / (np.sum(y_train == index) + 2) Px_y[index , f , 1] = (Px_y[index , f , 1] + 1) / (np.sum(y_train == index) + 2) #为了把连乘变成连加,我们对Px_y取一个log对数 Px_y = np.log(Px_y) #拿一个出来看看 print(Px_y[0])
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#我们预测的最终结果放在predict中 #每一个数据在10个分类器的结果放在res中,取一个argmax就得到一个预测值 predict = np.zeros((y_test.shape[0],1)) for data in range(y_test.shape[0]): res = np.zeros((classes , 1)) for i in range(classes): res[i] = P_y[i] for f in range(X_train.shape[1]): res[i] += Px_y[i,f,X_test[data,f]] predict[data] = np.argmax(res) predict = predict.astype(int) #取两个拿出来对比一下 print(predict[:5]) print(y_test[:5])