最简单的线性公式y=wx+b
代码

#!/etc/bin/python
#coding=utf-8
import random
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
def get_w(datas):
    sum = 0
    for data in datas:
        sum+=data[0]
    ave_x = sum/len(datas)

    sum = 0
    for data in datas:
        sum+= data[1]*(data[0]-ave_x)
    square_a =0
    for data in datas:
        square_a+=data[0]*data[0]
    square_b=0
    for data in datas:
        square_b+=data[0]
    square_b=square_b*square_b/len(datas)
    return sum/(square_a-square_b)
def get_b(w,datas):
    sum =0
    for data in datas:
        sum += data[1] -w*data[0]
    return sum/len(datas)
if __name__ == '__main__':

    np.random.seed(random.randint(1,10000))
    y = np.random.standard_normal(30)
    y1 = y.cumsum()
    data = list(zip(range(0,len(y.cumsum())),y1))
    w = get_w(data)
    b = get_b(w,data)
    y2 = [w*x+b for x in range(30)]

    plt.grid(True) ##增加格点
    plt.xlim(-1,20)
    plt.plot(y1,'ro',lw = 1.5,label ="data")
    plt.plot(y2,'b',lw = 1.5,label ="learner")
    plt.legend(loc=0)

    plt.show()

通过最小二乘法使得点到直线的距离最短求这条直线。下面的data就是数据集 ,learner就是通过数据集获取到的一个简单的线性回归模型。
效果
在这里插入图片描述在这里插入图片描述
在这里插入图片描述

应用:预测房价

这里有一组数据,数据中只有两个属性 面积和房价 ,我们以面积作为x以房价为y进行预测
代码

#!/etc/bin/python
#coding=utf-8
import random
import requests
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
def get_data():
    url = "https://raw.githubusercontent.com/carefree0910/MachineLearning/8ee143051d5dbbb21f6b9cd3127cd3600bfcf9e7/_Data/prices.txt"
    r = requests.get(url)
    datas = r.text.split("\n")
    datas = [(int(data.split(",")[0]),int(data.split(",")[1])) for data in datas[:-1]]
    return datas
def get_train_test_data(data):
    test_data = []
    import random

    while len(test_data) < 15:

        index = random.randint(0, len(data) - 1)
        if data[index] not in test_data:
            test_data.append(data[index])
    train_data = list(set(data) - set(test_data))
    return train_data,test_data
def get_w(datas):
    sum = 0
    for data in datas:
        sum+=data[0]
    ave_x = sum/len(datas)

    sum = 0
    for data in datas:
        sum+= data[1]*(data[0]-ave_x)
    square_a =0
    for data in datas:
        square_a+=data[0]*data[0]
    square_b=0
    for data in datas:
        square_b+=data[0]
    square_b=square_b*square_b/len(datas)
    return sum/(square_a-square_b)
def get_b(w,datas):
    sum =0
    for data in datas:
        sum += data[1] -w*data[0]
    return sum/len(datas)
if __name__ == '__main__':
    data = get_data()
    data = [(dat[0] / 100, dat[1] / 100) for dat in data[:-1]]
    train_data,test_data = get_train_test_data(data)
    w = get_w(train_data)
    b = get_b(w, train_data)
    x1 = [dat[0] for dat in train_data]
    y1 = [dat[1] for dat in train_data]
    y2 = [w * x + b for x in range(40)]

    x3 = [dat[0] for dat in test_data]
    y3 = [dat[1] for dat in test_data]
    plt.grid(True)  ##增加格点
    plt.xlim(-1, 100)
    plt.plot(x1, y1, 'ro', lw=1.5, label="train_data")
    plt.plot(y2, 'b', lw=1.5, label="learner")
    plt.plot(x3, y3, 'bo', lw=1.5, label="test_data")
    plt.legend(loc=0)
    plt.show()

效果展示
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

Logo

CSDN联合极客时间,共同打造面向开发者的精品内容学习社区,助力成长!

更多推荐