0%

岭回归

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# -*- codeing = utf-8 -*-
# @Time : 2022/6/5 15:11
# @Author :
# @File : ling.py
# @Software : PyCharm

import pandas as pd
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression


# 1、数据预处理
#对sex进行onehot编码,便于后续模型纳入哑变量
warnings.filterwarnings('ignore')
abalone=pd.read_csv("鲍鱼.csv")
abalone.head()
sex_onehot=pd.get_dummies(abalone['Sex'],prefix='Sex')
abalone[sex_onehot.columns]=sex_onehot
abalone['ones']=1
abalone['age']=abalone['Rings']+1.5
y=abalone['age']
features_with_ones=["Length","Diameter","Height" , "Whole_weight" , "Shucked_weight" , "Viscera_weight" , "Shell_weight" , 'Sex_F','Sex_M','Sex_I',"ones"]
features_without_ones=["Length","Diameter","Height" , "Whole_weight" , "Shucked_weight" , "Viscera_weight" , "Shell_weight" , 'Sex_F','Sex_M','Sex_I']
x=abalone[features_with_ones]

# 2、模型拟合
ridge=Ridge(alpha=1.0)
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=111)
ridge.fit(x_train[features_without_ones],y_train)

w_ridge=[]
w_ridge.extend(ridge.coef_)
w_ridge.append(ridge.intercept_)
w1=[]

# 3、 参数选择
alphas=np.logspace(-10,10,20)
coef=pd.DataFrame()
for alpha in alphas:
ridge_clf=Ridge(alpha=alpha)
ridge_clf.fit(x_train[features_without_ones],y_train)
df=pd.DataFrame([ridge_clf.coef_],columns=x_train[features_without_ones].columns)
df['alpha']=alpha
coef=coef.append(df,ignore_index=True)
coef.round(decimals=2)

# 4、岭迹分析可视化

plt.rcParams['font.sans-serif']=['Microsoft Yahei'] #绘图 #显示中文和正负号
plt.rcParams['axes.unicode_minus']=False
plt.rcParams['figure.dpi']=100
plt.figure(figsize=(9,6))
coef['alpha']=coef['alpha']
for feature in x_train.columns[:-1]:
plt.plot('alpha',feature,data=coef)
ax=plt.gca()
ax.set_xscale('log')
plt.legend(loc='upper right')
plt.xlabel(r'$\alpha$',fontsize=15)
plt.ylabel('系数',fontsize=15)
plt.show()

# 5、 模型预测
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=111)
ridge.fit(x_train[features_without_ones],y_train)
predict_value=ridge.predict(x[features_without_ones])

# 6、 线性回归 用于比较结果
lr=LinearRegression()
lr.fit(x_train[features_without_ones],y_train)
y_test_pred_lr=lr.predict(x_test.iloc[:,:-1])

# 7、结果评估
# 7.1)度量指标比较
#mae
y_test_pred_lr=lr.predict(x_test.iloc[:,:-1])
print("线性回归mae: ",round(mean_absolute_error(y_test,y_test_pred_lr),4)) #线性回归
y_test_pred_ridge=ridge.predict(x_test[features_without_ones])
print("岭回归mae: ",round(mean_absolute_error(y_test,y_test_pred_ridge),4)) #岭回归

#mse
y_test_pred_lr=lr.predict(x_test.iloc[:,:-1])
print("线性回归mse: ",round(mean_squared_error(y_test,y_test_pred_lr),4)) #线性回归
y_test_pred_ridge=ridge.predict(x_test[features_without_ones])
print("岭回归mse: ",round(mean_squared_error(y_test,y_test_pred_ridge),4)) #岭回归







# 7.2) 预测结果折线图
x1 = range(len(y))

y1 = y
y2 = predict_value
plt.figure(figsize=(10, 6), dpi=80)
plt.title(' raw & Predict折线图')
plt.xlabel('鲍鱼序号')
plt.ylabel('年龄')
plt.plot(x1, y1, color='orange', label='Raw')
plt.plot(x1, y2, color='blue', label='Predict')
# 添加网格,alpha 为透明度
plt.grid(alpha=0.5)
# 添加图例
plt.legend(loc='upper right')
plt.show()