岭回归 | Strive-NUAA

# -*- codeing = utf-8 -*-
# @Time : 2022/6/5 15:11
# @Author :
# @File : ling.py
# @Software : PyCharm

import pandas as pd
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression


# 1、数据预处理
#对sex进行onehot编码，便于后续模型纳入哑变量
warnings.filterwarnings('ignore')
abalone=pd.read_csv("鲍鱼.csv")
abalone.head()
sex_onehot=pd.get_dummies(abalone['Sex'],prefix='Sex')
abalone[sex_onehot.columns]=sex_onehot
abalone['ones']=1
abalone['age']=abalone['Rings']+1.5
y=abalone['age']
features_with_ones=["Length","Diameter","Height" ,  "Whole_weight" , "Shucked_weight" , "Viscera_weight" , "Shell_weight" , 'Sex_F','Sex_M','Sex_I',"ones"]
features_without_ones=["Length","Diameter","Height" ,  "Whole_weight" , "Shucked_weight" , "Viscera_weight" , "Shell_weight" , 'Sex_F','Sex_M','Sex_I']
x=abalone[features_with_ones]

# 2、模型拟合
ridge=Ridge(alpha=1.0)
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=111)
ridge.fit(x_train[features_without_ones],y_train)

w_ridge=[]
w_ridge.extend(ridge.coef_)
w_ridge.append(ridge.intercept_)
w1=[]

# 3、 参数选择
alphas=np.logspace(-10,10,20)
coef=pd.DataFrame()
for alpha in alphas:
    ridge_clf=Ridge(alpha=alpha)
    ridge_clf.fit(x_train[features_without_ones],y_train)
    df=pd.DataFrame([ridge_clf.coef_],columns=x_train[features_without_ones].columns)
    df['alpha']=alpha
    coef=coef.append(df,ignore_index=True)
coef.round(decimals=2)

# 4、岭迹分析可视化

plt.rcParams['font.sans-serif']=['Microsoft Yahei']    #绘图  #显示中文和正负号
plt.rcParams['axes.unicode_minus']=False
plt.rcParams['figure.dpi']=100
plt.figure(figsize=(9,6))
coef['alpha']=coef['alpha']
for feature in x_train.columns[:-1]:
    plt.plot('alpha',feature,data=coef)
ax=plt.gca()
ax.set_xscale('log')
plt.legend(loc='upper right')
plt.xlabel(r'$\alpha$',fontsize=15)
plt.ylabel('系数',fontsize=15)
plt.show()

# 5、 模型预测
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=111)
ridge.fit(x_train[features_without_ones],y_train)
predict_value=ridge.predict(x[features_without_ones])

# 6、 线性回归 用于比较结果
lr=LinearRegression()
lr.fit(x_train[features_without_ones],y_train)
y_test_pred_lr=lr.predict(x_test.iloc[:,:-1])

# 7、结果评估
  #  7.1）度量指标比较
#mae
y_test_pred_lr=lr.predict(x_test.iloc[:,:-1])
print("线性回归mae: ",round(mean_absolute_error(y_test,y_test_pred_lr),4))      #线性回归
y_test_pred_ridge=ridge.predict(x_test[features_without_ones])
print("岭回归mae: ",round(mean_absolute_error(y_test,y_test_pred_ridge),4))   #岭回归

#mse
y_test_pred_lr=lr.predict(x_test.iloc[:,:-1])
print("线性回归mse: ",round(mean_squared_error(y_test,y_test_pred_lr),4))   #线性回归
y_test_pred_ridge=ridge.predict(x_test[features_without_ones])
print("岭回归mse: ",round(mean_squared_error(y_test,y_test_pred_ridge),4))    #岭回归







#  7.2) 预测结果折线图
x1 = range(len(y))

y1 = y
y2 = predict_value
plt.figure(figsize=(10, 6), dpi=80)
plt.title(' raw & Predict折线图')
plt.xlabel('鲍鱼序号')
plt.ylabel('年龄')
plt.plot(x1, y1, color='orange', label='Raw')
plt.plot(x1, y2, color='blue', label='Predict')
# 添加网格，alpha 为透明度
plt.grid(alpha=0.5)
# 添加图例
plt.legend(loc='upper right')
plt.show()