机器学习之线性回归
最简单的线性公式y=wx+b代码#!/etc/bin/python#coding=utf-8import randomimport numpy as npimport matplotlib as mplimport matplotlib.pyplot as pltdef get_w(datas):sum = 0for data in datas:...
·
最简单的线性公式y=wx+b
代码
#!/etc/bin/python
#coding=utf-8
import random
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
def get_w(datas):
sum = 0
for data in datas:
sum+=data[0]
ave_x = sum/len(datas)
sum = 0
for data in datas:
sum+= data[1]*(data[0]-ave_x)
square_a =0
for data in datas:
square_a+=data[0]*data[0]
square_b=0
for data in datas:
square_b+=data[0]
square_b=square_b*square_b/len(datas)
return sum/(square_a-square_b)
def get_b(w,datas):
sum =0
for data in datas:
sum += data[1] -w*data[0]
return sum/len(datas)
if __name__ == '__main__':
np.random.seed(random.randint(1,10000))
y = np.random.standard_normal(30)
y1 = y.cumsum()
data = list(zip(range(0,len(y.cumsum())),y1))
w = get_w(data)
b = get_b(w,data)
y2 = [w*x+b for x in range(30)]
plt.grid(True) ##增加格点
plt.xlim(-1,20)
plt.plot(y1,'ro',lw = 1.5,label ="data")
plt.plot(y2,'b',lw = 1.5,label ="learner")
plt.legend(loc=0)
plt.show()
通过最小二乘法使得点到直线的距离最短求这条直线。下面的data就是数据集 ,learner就是通过数据集获取到的一个简单的线性回归模型。
效果
应用:预测房价
这里有一组数据,数据中只有两个属性 面积和房价 ,我们以面积作为x以房价为y进行预测
代码
#!/etc/bin/python
#coding=utf-8
import random
import requests
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
def get_data():
url = "https://raw.githubusercontent.com/carefree0910/MachineLearning/8ee143051d5dbbb21f6b9cd3127cd3600bfcf9e7/_Data/prices.txt"
r = requests.get(url)
datas = r.text.split("\n")
datas = [(int(data.split(",")[0]),int(data.split(",")[1])) for data in datas[:-1]]
return datas
def get_train_test_data(data):
test_data = []
import random
while len(test_data) < 15:
index = random.randint(0, len(data) - 1)
if data[index] not in test_data:
test_data.append(data[index])
train_data = list(set(data) - set(test_data))
return train_data,test_data
def get_w(datas):
sum = 0
for data in datas:
sum+=data[0]
ave_x = sum/len(datas)
sum = 0
for data in datas:
sum+= data[1]*(data[0]-ave_x)
square_a =0
for data in datas:
square_a+=data[0]*data[0]
square_b=0
for data in datas:
square_b+=data[0]
square_b=square_b*square_b/len(datas)
return sum/(square_a-square_b)
def get_b(w,datas):
sum =0
for data in datas:
sum += data[1] -w*data[0]
return sum/len(datas)
if __name__ == '__main__':
data = get_data()
data = [(dat[0] / 100, dat[1] / 100) for dat in data[:-1]]
train_data,test_data = get_train_test_data(data)
w = get_w(train_data)
b = get_b(w, train_data)
x1 = [dat[0] for dat in train_data]
y1 = [dat[1] for dat in train_data]
y2 = [w * x + b for x in range(40)]
x3 = [dat[0] for dat in test_data]
y3 = [dat[1] for dat in test_data]
plt.grid(True) ##增加格点
plt.xlim(-1, 100)
plt.plot(x1, y1, 'ro', lw=1.5, label="train_data")
plt.plot(y2, 'b', lw=1.5, label="learner")
plt.plot(x3, y3, 'bo', lw=1.5, label="test_data")
plt.legend(loc=0)
plt.show()
效果展示
更多推荐
所有评论(0)