0%

局部加权线性回归

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# -*- codeing = utf-8 -*-
# @Time : 2022/4/29 17:01
# @Author :
# @File : bao_1.py
# @Software : PyCharm
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import csv

from numpy import linalg


def multiplyList(myList) :
for x in myList:
if x == 0:
return True
return False;
def loadDataSet(fileName):
dataMat = [];
labelMat = []
with open(fileName, 'r') as f:
reader = csv.reader(f, delimiter='\t')
for row in reader:
row = [float(x) for x in row]
dataMat.append(row[:-1]) #特征数据
labelMat.append(row[-1]) #年龄数据
for i in dataMat:
if i[0]==0:
i[0]=2
elif i[0]==-1:
i[0]=3
return dataMat, labelMat
def get_w(X, Y):#普通的线性回归求解参数方法
# 用普通最小二乘法求解,知道X,Y,求参数w w=(X.T*X)(-1)*X.T*Y
X_I = np.linalg.inv(np.dot(X.T, X)) # 返回矩阵的逆
w = np.dot(np.dot(X_I, X.T), Y) #矩阵乘法
return w
def get_w_lwlr(X, Y, x_test):
m = X.shape[0]
# k = 0.01 #可变参数,衰减因子,即权重衰减的速率
k=0.02
weight = np.eye(m) #生成对角矩阵
for j in range(m):
# 对于预测点,根据预测点与每一个样本点之间的接近程度,更新每一个样本点的权重
diff = x_test - X[j, :]
weight[j, j] = np.exp(np.dot(diff.T, diff) / (-2 * k ** 2)) #e的x幂次方,权重值大小以指数级衰减
# 局部加权之后,重新计算得到新的参数w_lwlr
X_w = np.linalg.inv(np.dot(np.dot(X.T, weight), X))
w = np.dot(np.dot(np.dot(X_w, X.T), weight), Y)
return w

def score(X,Y,testx,testy):
num = 0
sum1 = 0
sum2 = 0
sum4 = 0
sum5 = 0
sum3 = sum(testy[0:1000])-testy[165]-testy[276]-testy[277]-testy[355]-testy[762]-testy[999]-testy[510]
sum3 = sum3/993
mse = 0
mae = 0
for i in range(1000):
if multiplyList(testx[i]) or i==165 or i==276 or i==277 or i==355 or i==762 or i==999 or i==510:
continue
w_lwlr = get_w_lwlr(X, Y, testx[i])
y2 = np.dot(np.array(testx[i]), w_lwlr.T)
if (testy[i]-0.5)<=y2<=(testy[i]+0.5):
num = num +1
print(testx[i],i)
print("真实值:",testy[i])
w = get_w(X, Y)
y1 = np.dot(np.array(testx[i]), w.T)
sum1 = (y2-testy[i])**2+sum1
sum4 = (y1-testy[i])**2+sum4
sum2 = (sum3-testy[i])**2+sum2
sum5 = abs(y2-testy[i])+sum5
print("局部加权预测值:", y2)
print("普通线性回归预测值:", y1)
print("局部加权线性回归R^2分析:",1-sum1/sum2)
print("普通线性回归R^2分析:", 1 - sum4/sum2)
print("mae平均绝对误差:",sum1/993)
print("mse均方误差:",sum5/993)

def main():
dataMat, labelMat = loadDataSet('abalone.txt')
print(dataMat)
X = np.array(dataMat)
Y = np.array(labelMat).T
score(X,Y,dataMat,labelMat);

if __name__ == '__main__':
main()