1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
|
import pandas as pd import warnings import matplotlib.pyplot as plt import seaborn as sns from sklearn.metrics import mean_squared_error from sklearn.metrics import mean_absolute_error from sklearn.metrics import r2_score import numpy as np from sklearn.model_selection import train_test_split from sklearn.linear_model import Ridge import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression
warnings.filterwarnings('ignore') abalone=pd.read_csv("鲍鱼.csv") abalone.head() sex_onehot=pd.get_dummies(abalone['Sex'],prefix='Sex') abalone[sex_onehot.columns]=sex_onehot abalone['ones']=1 abalone['age']=abalone['Rings']+1.5 y=abalone['age'] features_with_ones=["Length","Diameter","Height" , "Whole_weight" , "Shucked_weight" , "Viscera_weight" , "Shell_weight" , 'Sex_F','Sex_M','Sex_I',"ones"] features_without_ones=["Length","Diameter","Height" , "Whole_weight" , "Shucked_weight" , "Viscera_weight" , "Shell_weight" , 'Sex_F','Sex_M','Sex_I'] x=abalone[features_with_ones]
ridge=Ridge(alpha=1.0) x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=111) ridge.fit(x_train[features_without_ones],y_train)
w_ridge=[] w_ridge.extend(ridge.coef_) w_ridge.append(ridge.intercept_) w1=[]
alphas=np.logspace(-10,10,20) coef=pd.DataFrame() for alpha in alphas: ridge_clf=Ridge(alpha=alpha) ridge_clf.fit(x_train[features_without_ones],y_train) df=pd.DataFrame([ridge_clf.coef_],columns=x_train[features_without_ones].columns) df['alpha']=alpha coef=coef.append(df,ignore_index=True) coef.round(decimals=2)
plt.rcParams['font.sans-serif']=['Microsoft Yahei'] plt.rcParams['axes.unicode_minus']=False plt.rcParams['figure.dpi']=100 plt.figure(figsize=(9,6)) coef['alpha']=coef['alpha'] for feature in x_train.columns[:-1]: plt.plot('alpha',feature,data=coef) ax=plt.gca() ax.set_xscale('log') plt.legend(loc='upper right') plt.xlabel(r'$\alpha$',fontsize=15) plt.ylabel('系数',fontsize=15) plt.show()
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=111) ridge.fit(x_train[features_without_ones],y_train) predict_value=ridge.predict(x[features_without_ones])
lr=LinearRegression() lr.fit(x_train[features_without_ones],y_train) y_test_pred_lr=lr.predict(x_test.iloc[:,:-1])
y_test_pred_lr=lr.predict(x_test.iloc[:,:-1]) print("线性回归mae: ",round(mean_absolute_error(y_test,y_test_pred_lr),4)) y_test_pred_ridge=ridge.predict(x_test[features_without_ones]) print("岭回归mae: ",round(mean_absolute_error(y_test,y_test_pred_ridge),4))
y_test_pred_lr=lr.predict(x_test.iloc[:,:-1]) print("线性回归mse: ",round(mean_squared_error(y_test,y_test_pred_lr),4)) y_test_pred_ridge=ridge.predict(x_test[features_without_ones]) print("岭回归mse: ",round(mean_squared_error(y_test,y_test_pred_ridge),4))
x1 = range(len(y))
y1 = y y2 = predict_value plt.figure(figsize=(10, 6), dpi=80) plt.title(' raw & Predict折线图') plt.xlabel('鲍鱼序号') plt.ylabel('年龄') plt.plot(x1, y1, color='orange', label='Raw') plt.plot(x1, y2, color='blue', label='Predict')
plt.grid(alpha=0.5)
plt.legend(loc='upper right') plt.show()
|