一、问题描述

        实现线性回归的梯度下降算法,解决糖尿病预测问题,输出mse和的值

二、实验目的

        熟练的掌握线性回归的梯度下降算法应用

三、实验内容

  1. 导入数据
from sklearn.datasets import load_diabetes

X, y = load_diabetes(return_X_y = True)     #获取数据

2、对数据进行训练,标准化处理

y = y.reshape((442, 1))                                                                     

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=5)

X_train = process_features(X_train)          #矩阵的标准化

X_test = process_features(X_test)            #矩阵的标准化
model = LinearRegression()

model.fit(X_train, y_train, eta=0.001, epsilon=0.0001)
3、预测数据
y_pred = model.predict(X_test)
4、求均方误差和R2
mse = mean_squared_error(y_test, y_pred)
score = r2_score(y_test, y_pred)
print("mse={}  andr2={}".format(mse,score))

5、图形化显示

#图像化显示函数
def printLine(x_name, y_name, title, X, Y): #x轴,y轴,标题,矩阵
    plt.figure(1)
    plt.plot(X[:,0], X[:,1], 'bo', ms=3)            #蓝色散点
    plt.plot(X[:,0], X[:,1], 'b', ms=3, label='line1')      #蓝色实线
    plt.plot(Y[:,0], Y[:,1], 'ro', ms=3)            #蓝色散点
    plt.plot(Y[:,0], Y[:,1], 'r', ms=3, label='line2')      #蓝色实线
    plt.xlabel(x_name, fontproperties = font)   #步骤三
    plt.ylabel(y_name, fontproperties = font)
    plt.title(title, fontproperties = font)
    plt.show()
    return 0

四、实验结果及分析

1. 糖尿病数据

2. 运行结果

五、完整代码

机器学习GitHub:https://github.com/wanglei18/machine_learning

import numpy as np

class LinearRegression:

    def fit(self, X, y, eta, epsilon):
        m, n = X.shape
        w = np.zeros((n, 1))
        while True:
            e = X.dot(w) - y
            g = 2 * X.T.dot(e) / m  # 梯度g
            w = w - eta * g
            if np.linalg.norm(g, 2) < epsilon:
                break
        self.w = w

    #进行预测
    def predict(self, X):
        return X.dot(self.w)

import numpy as np
import matplotlib.pyplot as plt
from machine_learning.homework.libs.grade import LinearRegression
from sklearn.datasets import load_diabetes
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from matplotlib.font_manager import FontProperties      #步骤一

font = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=15)    #步骤二

#将一维数组变成二维数组
def addLine(X):
    length = X.shape[0]         #矩阵第一维度的长度
    num = np.ones((length,1))
    for i in range(0, length):
        num[i] = i+1              #坐标轴 x
    X = np.c_[num, X]             #合并坐标轴
    return X

#图像化显示函数
def printLine(x_name, y_name, title, X, Y): #x轴,y轴,标题,矩阵
    plt.figure(1)
    plt.plot(X[:,0], X[:,1], 'bo', ms=3)            #蓝色散点
    plt.plot(X[:,0], X[:,1], 'b', ms=3, label='line1')      #蓝色实线
    plt.plot(Y[:,0], Y[:,1], 'ro', ms=3)            #蓝色散点
    plt.plot(Y[:,0], Y[:,1], 'r', ms=3, label='line2')      #蓝色实线
    plt.xlabel(x_name, fontproperties = font)   #步骤三
    plt.ylabel(y_name, fontproperties = font)
    plt.title(title, fontproperties = font)
    plt.show()
    return 0

#求均方误差
def mean_squared_error(y_true,y_pred):
    return np.average((y_true-y_pred) ** 2, axis=0)

#求R2决定系数
def r2_score(y_true,y_pred):
    numerator = (y_true-y_pred) ** 2
    denominator = (y_true-np.average(y_true,axis=0)) ** 2
    return (1 - numerator.sum(axis=0) / denominator.sum(axis=0))

#函数预期
def process_features(X):
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    m, n = X.shape
    X = np.c_[np.ones((m, 1)), X]
    return X

X, y = load_diabetes(return_X_y = True)     #获取数据
print(X.shape, X)
print(y.shape, y)
y = y.reshape((442, 1))                                                                     #一维变二维
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=5)    #训练集、测试集划分
X_train = process_features(X_train)                                                         #矩阵的标准化
X_test = process_features(X_test)                                                           #矩阵的标准化

print(X_train.shape,y_train.shape)

model = LinearRegression()
model.fit(X_train, y_train, eta=0.001, epsilon=0.0001)      #标准化处理
y_pred = model.predict(X_test)                              #数据预测
print(y_pred.shape,y_test.shape,model.w.shape)

mse = mean_squared_error(y_test, y_pred)                    #均方误差
score = r2_score(y_test, y_pred)                            #R^2的值
print("mse={}  andr2={}".format(mse,score))

y_test = addLine(y_test)                #图形化显示
y_pred = addLine(y_pred)

printLine('范围', '期望值', '梯度下降算法解决糖尿病问题', y_pred, y_test)

Logo

CSDN联合极客时间,共同打造面向开发者的精品内容学习社区,助力成长!

更多推荐