与经典线性回归比较
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
import numpy as np
from sklearn.linear_model import LinearRegression
#from sklearn.datasets.samples_generator import make_regression
Xdata = np.array([4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0])
Ydata = np.array([33, 42, 45, 51, 53, 61, 62])
plt.figure(figsize=(8,5))
plt.scatter(Xdata,Ydata, s = 80)
plt.xlabel('x', fontsize= 16)
plt.tick_params(labelsize=16)
这些点看来在一条直线上,因此线性回归可能有用。
构建阶段
这些点看来在一条直线上,因此线性回归可能有用。
构建阶段
我们来构建神经网络。它只有一个神经元,使用线性激活函数。
#定义模型函数
def model(x,w,b):
return tf.multiply(x,w)+b
def loss_fun(x,y,w,b):
err = model(x,w,b)-y
squared_err = tf.square(err)
return tf.reduce_mean(squared_err)
def grad(x,y,w,b):
with tf.GradientTape() as tape:
loss_ = loss_fun(x,y,w,b)
return tape.gradient(loss_,[w,b])
需要一些额外的节点:
- 损失函数 J
- 使损失函数最小的节点
改变数据集的形状
我们然望 X 和 Y 是一行的张量。我们可以检查一下
print(Xdata.shape)
print(Ydata.shape)
这不是我们想要的,所以我们要改变它们
x = Xdata.reshape(1,-1)
y = Ydata.reshape(1,-1)
print(x.shape)
print(y.shape)
现在更好了... 为了检查不同的学习速率我们可以定义函数进行训练
def run_linear_model(learning_rate, train_epochs, x,y, debug = False):
#开始训练,轮数为epoch,采用SGD随机梯度下降优化方法
w = tf.Variable(np.random.randn(),tf.float32)
b = tf.Variable(0.0,tf.float32)
#count = tf.Variable(0.0,tf.float32)
#开始训练,轮数为epoch,采用SGD随机梯度下降优化方法
loss = []
count = 0
display_count = 10 #控制显示粒度的参数,每训练10个样本输出一次损失值
for epoch in range(train_epochs):
for xs,ys in zip(x,y): #for xs,ys in zip(x_data,y_data):
#计算损失,并保存本次损失计算结果
loss_ =loss_fun(xs,ys,w,b)
loss.append(loss_)
#计算当前[w,b]的梯度
delta_w,delta_b = grad(xs,ys,w,b)
change_w = delta_w * learning_rate
change_b = delta_b * learning_rate
w.assign_sub(change_w)
b.assign_sub(change_b)
#训练步数加1
count = count +1
if count % display_count == 0:
print('train epoch : ','%02d'%(epoch+1),'step:%03d' % (count),'loss= ','{:.9f}'.format(loss_))
#完成一轮训练后,画图
#plt.plot(x,w.numpy() * x +b.numpy()) #plt.plot(x_data,w.numpy() * x_data +b.numpy())
return loss
ch = run_linear_model(0.1, 1000, x, y, True)
很有趣... 我们试一下更小的学习速率
ch1 = run_linear_model(1e-3, 1000, x, y, True)
ch2 = run_linear_model(1e-3, 5000, x, y, True)
检查 J... 一直在变小。
绘制损失函数
plt.rc('font', family='arial')
plt.rc('xtick', labelsize='x-small')
plt.rc('ytick', labelsize='x-small')
plt.tight_layout()
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(1, 1, 1)
ax.plot(ch1, ls='solid', color = 'black')
ax.plot(ch2, ls='solid', color = 'red')
ax.set_xlabel('epochs', fontsize = 16)
ax.set_ylabel('Cost function $J$ (MSE)', fontsize = 16)
plt.xlim(0,1000)
plt.tick_params(labelsize=16)
你看不到有什么区别,我们来放大一下
你看不到有什么区别,我们来放大一下
plt.rc('font', family='arial')
plt.rc('xtick', labelsize='x-small')
plt.rc('ytick', labelsize='x-small')
plt.tight_layout()
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(1, 1, 1)
ax.plot(ch2, ls='solid', color = 'red')
ax.plot(ch1, ls='solid', color = 'black')
ax.set_ylim(3,3.6)
ax.set_xlabel('epochs', fontsize = 16)
ax.set_ylabel('Cost function $J$ (MSE)', fontsize = 16)
plt.xlim(0,5000)
plt.tick_params(labelsize=16)
注意到学习速率越小,收敛速度越慢... 我们试一下快一点的...
ch3 = run_linear_model(1e-2, 5000, x, y, True)
plt.rc('font', family='arial')
plt.rc('xtick', labelsize='x-small')
plt.rc('ytick', labelsize='x-small')
plt.tight_layout()
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(1, 1, 1)
ax.plot(ch3, ls='solid', lw = 3, color = 'blue', label = r"$\gamma = 10^{-2}$ up to 5000 epochs")
ax.plot(ch2, ls='solid', lw = 3, color = 'red', label = r"$\gamma = 10^{-3}$ up to 5000 epochs")
ax.plot(ch1, ls='--', lw = 5, color = 'black',label = r"$\gamma = 10^{-3}$, up to 1000 epochs")
ax.set_ylim(2.6,3.6)
ax.set_xlabel('epochs', fontsize = 16)
ax.set_ylabel('Cost function $J$ (MSE)', fontsize = 16)
plt.xlim(0,5000)
plt.legend(fontsize = 16)
plt.tick_params(labelsize=16)
现在接近扁平...
尝度找到最佳参数
ch5 = run_linear_model(0.03, 5000, x, y, True)
#构建线性函数的斜率和截距
w = tf.Variable(np.random.randn(),tf.float32)
b = tf.Variable(0.0,tf.float32)
#设置迭代次数和学习率
train_epochs = 5000
learning_rate = 0.03
loss = []
count = 0
display_count = 10 #控制显示粒度的参数,每训练10个样本输出一次损失值
#开始训练,轮数为epoch,采用SGD随机梯度下降优化方法
for epoch in range(train_epochs):
for xs,ys in zip(x,y): #for xs,ys in zip(x_data,y_data):
#计算损失,并保存本次损失计算结果
loss_ =loss_fun(xs,ys,w,b)
loss.append(loss_)
#计算当前[w,b]的梯度
delta_w,delta_b = grad(xs,ys,w,b)
change_w = delta_w * learning_rate
change_b = delta_b * learning_rate
w.assign_sub(change_w)
b.assign_sub(change_b)
#训练步数加1
count = count +1
if count % display_count == 0:
print('train epoch : ','%02d'%(epoch+1),'step:%03d' % (count),'loss= ','{:.9f}'.format(loss_))
#完成一轮训练后,画图
#plt.plot(x,w.numpy() * x +b.numpy()) #plt.plot(x_data,w.numpy() * x_data +b.numpy())
plt.rc('font', family='arial')
plt.rc('xtick', labelsize='x-small')
plt.rc('ytick', labelsize='x-small')
plt.tight_layout()
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(1, 1, 1)
ax.plot(ch5, ls='solid', lw = 3, color = 'green', label = r"$\gamma = 0.03$ up to 5000 epochs")
ax.plot(ch3, ls='solid', lw = 3, color = 'blue', label = r"$\gamma = 10^{-2}$ up to 5000 epochs")
ax.plot(ch2, ls='solid', lw = 3, color = 'red', label = r"$\gamma = 10^{-3}$ up to 5000 epochs")
ax.plot(ch1, ls='--', lw = 5, color = 'black',label = r"$\gamma = 10^{-3}$, up to 1000 epochs")
ax.set_ylim(2.6,3.6)
ax.set_xlabel('epochs', fontsize = 16)
ax.set_ylabel('Cost function $J$ (MSE)', fontsize = 16)
plt.xlim(0,5000)
plt.legend(fontsize = 16)
plt.tick_params(labelsize=16)
看来绿色或蓝色线的值是可以达到的... 所以它们是好的候选...
pred_y = model(x,w.numpy(),b.numpy())
mse_y = tf.reduce_mean(tf.square(pred_y - y))
plt.rc('font', family='arial')
plt.rc('xtick', labelsize='x-small')
plt.rc('ytick', labelsize='x-small')
plt.tight_layout()
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(1, 1, 1)
ax.scatter(y, pred_y, lw = 5)
ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw = 5)
ax.set_xlabel('Measured Target Value', fontsize = 16)
ax.set_ylabel('Predicted Target Value', fontsize = 16)
plt.tick_params(labelsize=16)
如何找到权重?
通常我们感兴趣于线性回归的参数。使用神经网络时我们只对预测感兴趣,因为参数太多,但看一下如何是从计算图获得参数是很有启发性的。我们的线性方程 为
y=wx1+b
且 w 包含于 tf.Variable
W
, 且偏置bias在 tf.Variable
b
里。所以要得到它们我们只要简单的要求tensorflow评估节点。
参数为
#显示训练结果
print('w: ',w.numpy())
print('b: ',b.numpy())
所以我们可以用最佳拟合绘制数据
x_ = np.arange(4, 7, 0.05).reshape(1,-1)
yfit_ = model(x_ ,w.numpy(),b.numpy())
plt.rc('font', family='arial')
plt.rc('xtick', labelsize='x-small')
plt.rc('ytick', labelsize='x-small')
fig = plt.figure(figsize=(8, 5))
ax = fig.add_subplot(1, 1, 1)
ax.plot(x_[0], yfit_[0], label = "Linear Regression")
ax.scatter (x,y, color = 'red', s = 80, label = "True Data")
ax.set_xlabel('x', fontsize = 16)
ax.set_ylabel('y', fontsize = 16)
plt.legend(fontsize = 16)
plt.tick_params(labelsize=16)
与经典的线性回归进行比较
我们比较一下经典线性回归的结果 sklearn
xt = x.reshape(7,-1)
yt = y.reshape(7,-1)
reg = LinearRegression().fit(xt,yt)
reg.score(xt,yt)
reg.coef_
reg.intercept_
xt_ = x_[0].reshape(60,-1)
yfitsk_ = reg.predict(xt_.reshape(60,-1))
plt.rc('font', family='arial')
plt.rc('xtick', labelsize='x-small')
plt.rc('ytick', labelsize='x-small')
fig = plt.figure(figsize=(8, 5))
ax = fig.add_subplot(1, 1, 1)
ax.plot(x_[0], yfit_[0], label = "Linear Regression")
ax.plot(x_[0], yfitsk_, label = "sklearn Linear Regression")
ax.scatter (x,y, color = 'red', s = 80, label = "True Data")
ax.set_xlabel('x', fontsize = 16)
ax.set_ylabel('y', fontsize = 16)
plt.legend(fontsize = 16)
plt.tick_params(labelsize=16)
ch4 = run_linear_model(1e-2, 15000, x, y, True)
#
构建线性函数的斜率和截距
w = tf.Variable(np.random.randn(),tf.float32)
b = tf.Variable(0.0,tf.float32)
#
设置迭代次数和学习率
train_epochs = 15000
learning_rate = 1e-2
loss = []
count = 0
display_count = 10 #
控制显示粒度的参数,每训练10个样本输出一次损失值
#
开始训练,轮数为epoch,采用SGD随机梯度下降优化方法
for epoch in range(train_epochs):
for xs,ys in zip(x,y): #for xs,ys in zip(x_data,y_data):
#
计算损失,并保存本次损失计算结果
loss_ =loss_fun(xs,ys,w,b)
loss.append(loss_)
#
计算当前[w,b]的梯度
delta_w,delta_b = grad(xs,ys,w,b)
change_w = delta_w * learning_rate
change_b = delta_b * learning_rate
w.assign_sub(change_w)
b.assign_sub(change_b)
#
训练步数加1
count = count +1
if count % display_count == 0:
print('train epoch : ','%02d'%(epoch+1),'step:%03d' % (count),'loss= ','{:.9f}'.format(loss_))
#
完成一轮训练后,画图
#print(W_, b_)
print('w: ',w.numpy())
print('b: ',b.numpy())
经典线性回归的结果为
9.5, -2.67857143
所以非常的接近!