4.线性回归
APIfrom sklearn.linear_model import LinearRegression# 获取数据x = [[80,86],[82,80],[85,78],[90,90],[86,82],[82,90],[78,80],[92,84]]y = [84.2,80.6,80.1,90,83.2,87.6,79.4,93.4]# 模型训练# 实例化一个估计器estimator = Li
·
API
from sklearn.linear_model import LinearRegression
# 获取数据
x = [
[80,86],
[82,80],
[85,78],
[90,90],
[86,82],
[82,90],
[78,80],
[92,84]
]
y = [84.2,80.6,80.1,90,83.2,87.6,79.4,93.4]
# 模型训练
# 实例化一个估计器
estimator = LinearRegression()
estimator.fit(x,y)
# 结果输出
print("线性回归的系数:\n",estimator.coef_)
print("预测结果:\n",estimator.predict([[100,80]]))
损失和优化
损失函数(预测点到实际点的距离的平方和)
优化算法
正规方程
正规方程推导
推导方式一
推导方式二
正规方程求解例子
梯度下降
由于存在多个极小值的情况,梯度下降不能保证取到最小值
梯度下降例子
单变量函数梯度下降
多变量函数梯度下降
梯度下降详细介绍
相关概念
推导过程
梯度算法种类
全梯度下降算法(FG)
随机梯度下降算法(SG)
小批量梯度下降算法(mini-batch)
随机平均梯度下降算法(SAG)
线性回归API
设 Y=kX+b,可以理解为,k:回归系数,b:偏执
总结对比
算法选择依据
波士顿房价预测案例
数据介绍
回归性能分析
代码实现
正规方程实现
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
获取数据
boston = load_boston()
boston
# 数据基本处理
x_train,x_test,y_train,y_test = train_test_split(boston.data,boston.target,test_size=0.2)
## 特征工程-标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
# 机器学习-线性回归
estimator = LinearRegression()
estimator.fit(x_train,y_train)
print("这个模型的偏置是:\n",estimator.intercept_)
print("这个模型的系数是:\n",estimator.coef_)
# 模型评估
y_pre = estimator.predict(x_test)
print("预测值是:\n",y_pre)
# 均方误差
ret = mean_squared_error(y_test,y_pre)
print("均方误差:\n",ret)
梯度下降实现
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error
获取数据
boston = load_boston()
boston
# 数据基本处理
x_train,x_test,y_train,y_test = train_test_split(boston.data,boston.target,test_size=0.2)
## 特征工程-标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
# 机器学习-线性回归
estimator = SGDRegressor(max_iter=1000)
estimator.fit(x_train,y_train)
print("这个模型的偏置是:\n",estimator.intercept_)
print("这个模型的系数是:\n",estimator.coef_)
# 模型评估
y_pre = estimator.predict(x_test)
print("预测值是:\n",y_pre)
# 均方误差
ret = mean_squared_error(y_test,y_pre)
print("均方误差:\n",ret)
欠拟合和过拟合
原因及解决方法
正则化
正则化类别
Ridge Regression 岭回归
Lasso Regression Lasso 回归
Elastic Net 弹性网络
如何选择
Early Stopping(了解)
使用岭回归改进线性回归
API
Ridge(岭回归)代码实现
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
# 获取数据
boston = load_boston()
# 数据基本处理
x_train,x_test,y_train,y_test = train_test_split(boston.data,boston.target,test_size=0.2)
## 特征工程-标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
# 机器学习-线性回归
estimator = Ridge(alpha=1.0)
estimator.fit(x_train,y_train)
print("这个模型的偏置是:\n",estimator.intercept_)
print("这个模型的系数是:\n",estimator.coef_)
# 模型评估
y_pre = estimator.predict(x_test)
print("预测值是:\n",y_pre)
# 均方误差
ret = mean_squared_error(y_test,y_pre)
print("均方误差:\n",ret)
RidgeCV 代码实现
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeCV
from sklearn.metrics import mean_squared_error
# 获取数据
boston = load_boston()
# 数据基本处理
x_train,x_test,y_train,y_test = train_test_split(boston.data,boston.target,test_size=0.2)
## 特征工程-标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
# 机器学习-线性回归
estimator = RidgeCV(alphas=[0.1, 1.0, 10.0])
estimator.fit(x_train,y_train)
print("这个模型的偏置是:\n",estimator.intercept_)
print("这个模型的系数是:\n",estimator.coef_)
# 模型评估
y_pre = estimator.predict(x_test)
print("预测值是:\n",y_pre)
# 均方误差
ret = mean_squared_error(y_test,y_pre)
print("均方误差:\n",ret)
sklearn模型保存和加载
API
案例
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeCV
from sklearn.metrics import mean_squared_error
from sklearn.externals import joblib
# 获取数据
boston = load_boston()
# 数据基本处理
x_train,x_test,y_train,y_test = train_test_split(boston.data,boston.target,test_size=0.2,random_state=22)
## 特征工程-标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
# 机器学习-线性回归
estimator = RidgeCV(alphas=[0.1, 1.0, 10.0])
estimator.fit(x_train,y_train)
# 模型保存
joblib.dump(estimator,"./test.pkl")
print("这个模型的偏置是:\n",estimator.intercept_)
print("这个模型的系数是:\n",estimator.coef_)
# 模型评估
y_pre = estimator.predict(x_test)
print("预测值是:\n",y_pre)
# 均方误差
ret = mean_squared_error(y_test,y_pre)
print("均方误差:\n",ret)
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeCV
from sklearn.metrics import mean_squared_error
from sklearn.externals import joblib
# 获取模型
estimator = joblib.load("./test.pkl")
print("这个模型的偏置是:\n",estimator.intercept_)
print("这个模型的系数是:\n",estimator.coef_)
# 模型评估
y_pre = estimator.predict(x_test)
print("预测值是:\n",y_pre)
# 均方误差
ret = mean_squared_error(y_test,y_pre)
print("均方误差:\n",ret)
更多推荐
已为社区贡献7条内容
所有评论(0)