1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
|
import matplotlib import matplotlib.pyplot as plt import numpy as np import pandas as pd import csv
from numpy import linalg
def multiplyList(myList) : for x in myList: if x == 0: return True return False; def loadDataSet(fileName): dataMat = []; labelMat = [] with open(fileName, 'r') as f: reader = csv.reader(f, delimiter='\t') for row in reader: row = [float(x) for x in row] dataMat.append(row[:-1]) labelMat.append(row[-1]) for i in dataMat: if i[0]==0: i[0]=2 elif i[0]==-1: i[0]=3 return dataMat, labelMat def get_w(X, Y): X_I = np.linalg.inv(np.dot(X.T, X)) w = np.dot(np.dot(X_I, X.T), Y) return w def get_w_lwlr(X, Y, x_test): m = X.shape[0] k=0.02 weight = np.eye(m) for j in range(m): diff = x_test - X[j, :] weight[j, j] = np.exp(np.dot(diff.T, diff) / (-2 * k ** 2)) X_w = np.linalg.inv(np.dot(np.dot(X.T, weight), X)) w = np.dot(np.dot(np.dot(X_w, X.T), weight), Y) return w
def score(X,Y,testx,testy): num = 0 sum1 = 0 sum2 = 0 sum4 = 0 sum5 = 0 sum3 = sum(testy[0:1000])-testy[165]-testy[276]-testy[277]-testy[355]-testy[762]-testy[999]-testy[510] sum3 = sum3/993 mse = 0 mae = 0 for i in range(1000): if multiplyList(testx[i]) or i==165 or i==276 or i==277 or i==355 or i==762 or i==999 or i==510: continue w_lwlr = get_w_lwlr(X, Y, testx[i]) y2 = np.dot(np.array(testx[i]), w_lwlr.T) if (testy[i]-0.5)<=y2<=(testy[i]+0.5): num = num +1 print(testx[i],i) print("真实值:",testy[i]) w = get_w(X, Y) y1 = np.dot(np.array(testx[i]), w.T) sum1 = (y2-testy[i])**2+sum1 sum4 = (y1-testy[i])**2+sum4 sum2 = (sum3-testy[i])**2+sum2 sum5 = abs(y2-testy[i])+sum5 print("局部加权预测值:", y2) print("普通线性回归预测值:", y1) print("局部加权线性回归R^2分析:",1-sum1/sum2) print("普通线性回归R^2分析:", 1 - sum4/sum2) print("mae平均绝对误差:",sum1/993) print("mse均方误差:",sum5/993)
def main(): dataMat, labelMat = loadDataSet('abalone.txt') print(dataMat) X = np.array(dataMat) Y = np.array(labelMat).T score(X,Y,dataMat,labelMat);
if __name__ == '__main__': main()
|