数据挖掘——梯度下降算法解决糖尿病问题
一、问题描述实现线性回归的梯度下降算法,解决糖尿病预测问题,输出mse和的值二、实验目的熟练的掌握线性回归的梯度下降算法应用三、实验内容导入数据from sklearn.datasets import load_diabetesX, y = load_diabetes(return_X_y = True) #获取数据2、对数据进行训练,标准化处理y = y.reshape((442, 1))...
·
一、问题描述
实现线性回归的梯度下降算法,解决糖尿病预测问题,输出mse和的值
二、实验目的
熟练的掌握线性回归的梯度下降算法应用
三、实验内容
- 导入数据
from sklearn.datasets import load_diabetes
X, y = load_diabetes(return_X_y = True) #获取数据
2、对数据进行训练,标准化处理
y = y.reshape((442, 1))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=5)
X_train = process_features(X_train) #矩阵的标准化
X_test = process_features(X_test) #矩阵的标准化
model = LinearRegression()
model.fit(X_train, y_train, eta=0.001, epsilon=0.0001)
3、预测数据
y_pred = model.predict(X_test)
4、求均方误差和R2
mse = mean_squared_error(y_test, y_pred)
score = r2_score(y_test, y_pred)
print("mse={} andr2={}".format(mse,score))
5、图形化显示
#图像化显示函数
def printLine(x_name, y_name, title, X, Y): #x轴,y轴,标题,矩阵
plt.figure(1)
plt.plot(X[:,0], X[:,1], 'bo', ms=3) #蓝色散点
plt.plot(X[:,0], X[:,1], 'b', ms=3, label='line1') #蓝色实线
plt.plot(Y[:,0], Y[:,1], 'ro', ms=3) #蓝色散点
plt.plot(Y[:,0], Y[:,1], 'r', ms=3, label='line2') #蓝色实线
plt.xlabel(x_name, fontproperties = font) #步骤三
plt.ylabel(y_name, fontproperties = font)
plt.title(title, fontproperties = font)
plt.show()
return 0
四、实验结果及分析
1. 糖尿病数据
2. 运行结果
五、完整代码
机器学习GitHub:https://github.com/wanglei18/machine_learning
import numpy as np
class LinearRegression:
def fit(self, X, y, eta, epsilon):
m, n = X.shape
w = np.zeros((n, 1))
while True:
e = X.dot(w) - y
g = 2 * X.T.dot(e) / m # 梯度g
w = w - eta * g
if np.linalg.norm(g, 2) < epsilon:
break
self.w = w
#进行预测
def predict(self, X):
return X.dot(self.w)
import numpy as np
import matplotlib.pyplot as plt
from machine_learning.homework.libs.grade import LinearRegression
from sklearn.datasets import load_diabetes
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from matplotlib.font_manager import FontProperties #步骤一
font = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=15) #步骤二
#将一维数组变成二维数组
def addLine(X):
length = X.shape[0] #矩阵第一维度的长度
num = np.ones((length,1))
for i in range(0, length):
num[i] = i+1 #坐标轴 x
X = np.c_[num, X] #合并坐标轴
return X
#图像化显示函数
def printLine(x_name, y_name, title, X, Y): #x轴,y轴,标题,矩阵
plt.figure(1)
plt.plot(X[:,0], X[:,1], 'bo', ms=3) #蓝色散点
plt.plot(X[:,0], X[:,1], 'b', ms=3, label='line1') #蓝色实线
plt.plot(Y[:,0], Y[:,1], 'ro', ms=3) #蓝色散点
plt.plot(Y[:,0], Y[:,1], 'r', ms=3, label='line2') #蓝色实线
plt.xlabel(x_name, fontproperties = font) #步骤三
plt.ylabel(y_name, fontproperties = font)
plt.title(title, fontproperties = font)
plt.show()
return 0
#求均方误差
def mean_squared_error(y_true,y_pred):
return np.average((y_true-y_pred) ** 2, axis=0)
#求R2决定系数
def r2_score(y_true,y_pred):
numerator = (y_true-y_pred) ** 2
denominator = (y_true-np.average(y_true,axis=0)) ** 2
return (1 - numerator.sum(axis=0) / denominator.sum(axis=0))
#函数预期
def process_features(X):
scaler = StandardScaler()
X = scaler.fit_transform(X)
m, n = X.shape
X = np.c_[np.ones((m, 1)), X]
return X
X, y = load_diabetes(return_X_y = True) #获取数据
print(X.shape, X)
print(y.shape, y)
y = y.reshape((442, 1)) #一维变二维
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=5) #训练集、测试集划分
X_train = process_features(X_train) #矩阵的标准化
X_test = process_features(X_test) #矩阵的标准化
print(X_train.shape,y_train.shape)
model = LinearRegression()
model.fit(X_train, y_train, eta=0.001, epsilon=0.0001) #标准化处理
y_pred = model.predict(X_test) #数据预测
print(y_pred.shape,y_test.shape,model.w.shape)
mse = mean_squared_error(y_test, y_pred) #均方误差
score = r2_score(y_test, y_pred) #R^2的值
print("mse={} andr2={}".format(mse,score))
y_test = addLine(y_test) #图形化显示
y_pred = addLine(y_pred)
printLine('范围', '期望值', '梯度下降算法解决糖尿病问题', y_pred, y_test)
更多推荐
已为社区贡献4条内容
所有评论(0)