目录
多分类
我们已经接触过二分类问题,神经网络输出层只有一个神经元,表示预测输出是正类的概率P(y =1m),g>0.5则判断为正类,反之判断为负类。那么对于多分类问题怎么办?
对于多分类问题,用 N表示种类个数,那么神经网络的输出层的神经元个数必须为L[output]=N,每个神经元的输出依次对应属于N个类别当中某个具体类别的概率,即 P(y=N1|),…,P(y= Nn|z)。
softmax回归
Softmax回归(也称为多项逻辑回归)是逻辑回归在多分类问题上的自然扩展,它是深度学习中最基础也是最重要的多分类方法之一。与二分类的逻辑回归不同,Softmax回归可以同时处理多个类别的分类问题,并且将输出转化为概率分布,满足 0 ≤ P ≤ 1
交叉熵损失
对于softmax回归(逻辑回归代价函数的推广,都可称之为交叉损失),它的代价函数公式为:
其实将逻辑回归看作是一个多分类(两个分类),它的表达形式也可以与之统一,
手写数字识别案例
本次代码没有手写实现正向传播、反向传播等细节,都使用tensorflow包调用其API来实现,
封装更简洁,大大增加了代码可读性。这里引入的tensorflow的版本可能落后,完全复制可能不一定复现成功。
本次使用的样本是通过下载网上的数据集,并通过mnist = tf.keras.datasets.mnist.load_data()引入数据。
import tensorflow.compat.v1 as tf tf.disable_v2_behavior() mnist = tf.keras.datasets.mnist.load_data() def fc_nn(): # 获取数据 mnist = tf.keras.datasets.mnist.load_data() with tf.variable_scope("resource"): # 特征X x = tf.placeholder(tf.float32, [None, 784], name="X_data") # 目标值Y(修正:改为float32以匹配one-hot编码) y = tf.placeholder(tf.float32, [None, 10], name="label") with tf.variable_scope("hidden"): # [none,784] * [784 ,64] + [64] = [none , 64] # 修正:将mean和stddev参数移到random_normal函数内部 weight_hidden = tf.Variable(tf.random_normal([784,64], mean=0.0, stddev=1.0), name="weight_hidden") bias_hidden = tf.Variable(tf.random_normal([64], mean=0.0, stddev=1.0), name="bias_hidden") A1 = tf.matmul(x, weight_hidden) + bias_hidden with tf.variable_scope("fc"): # [none , 64] * [64 , 10] + [10] = [none , 10] weight_fc = tf.Variable(tf.random_normal([64,10], mean=0.0, stddev=1.0), name="weight_fc") bias_fc = tf.Variable(tf.random_normal([10], mean=0.0, stddev=1.0), name="bias_fc") y_pred = tf.matmul(A1, weight_fc) + bias_fc with tf.variable_scope("compute_loss"): # 计算损失 all_loss = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_pred) loss = tf.reduce_mean(all_loss) with tf.variable_scope("optimize"): train_op = tf.train.GradientDescentOptimizer(0.1).minimize(loss) with tf.Session() as sess: # 运行初始化变量(必须做) sess.run(tf.global_variables_initializer()) # 循环 for i in range(2000): # 修正:添加批量数据获取 batch_x, batch_y = mnist.train.next_batch(100) # 修正:添加feed_dict参数传递数据 loss_run, _ = sess.run([loss, train_op], feed_dict={x: batch_x, y: batch_y}) print("迭代第%d步,损失为:%f"%(i, loss_run)) if __name__ == '__main__': fc_nn()
这里是通过AI修改后,符合版本的代码,谨慎尝试
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
def fc_nn():
# 获取数据
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
# 预处理数据
x_train = x_train.reshape(-1, 784).astype('float32') / 255.0
y_train = tf.keras.utils.to_categorical(y_train, 10)
with tf.variable_scope("resource"):
# 特征X
x = tf.placeholder(tf.float32, [None, 784], name="X_data")
# 目标值Y
y = tf.placeholder(tf.float32, [None, 10], name="label")
with tf.variable_scope("hidden"):
# [none,784] * [784 ,64] + [64] = [none , 64]
weight_hidden = tf.Variable(tf.random_normal([784,64], mean=0.0, stddev=1.0), name="weight_hidden")
bias_hidden = tf.Variable(tf.random_normal([64], mean=0.0, stddev=1.0), name="bias_hidden")
A1 = tf.matmul(x, weight_hidden) + bias_hidden
with tf.variable_scope("fc"):
# [none , 64] * [64 , 10] + [10] = [none , 10]
weight_fc = tf.Variable(tf.random_normal([64,10], mean=0.0, stddev=1.0), name="weight_fc")
bias_fc = tf.Variable(tf.random_normal([10], mean=0.0, stddev=1.0), name="bias_fc")
y_pred = tf.matmul(A1, weight_fc) + bias_fc
with tf.variable_scope("compute_loss"):
# 计算损失
all_loss = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_pred)
loss = tf.reduce_mean(all_loss)
with tf.variable_scope("optimize"):
train_op = tf.train.GradientDescentOptimizer(0.1).minimize(loss)
with tf.Session() as sess:
# 运行初始化变量(必须做)
sess.run(tf.global_variables_initializer())
# 循环
for i in range(2000):
# 获取批量数据
start = (i * 100) % (len(x_train) - 100)
end = start + 100
batch_x = x_train[start:end]
batch_y = y_train[start:end]
loss_run, _ = sess.run([loss, train_op], feed_dict={x: batch_x, y: batch_y})
print("迭代第%d步,损失为:%f"%(i, loss_run))
if __name__ == '__main__':
fc_nn()
改进代码(数据保存+TensorBoard显示图像)
import tensorflow.compat.v1 as tf
import os
tf.disable_v2_behavior()
def fc_nn():
# 获取数据
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
# 预处理数据
x_train = x_train.reshape(-1, 784).astype('float32') / 255.0
y_train = tf.keras.utils.to_categorical(y_train, 10)
# 创建日志目录
log_dir = "./logs/fc_nn"
if not os.path.exists(log_dir):
os.makedirs(log_dir)
with tf.variable_scope("resource"):
# 特征X
x = tf.placeholder(tf.float32, [None, 784], name="X_data")
# 目标值Y
y = tf.placeholder(tf.float32, [None, 10], name="label")
with tf.variable_scope("hidden"):
# [none,784] * [784 ,64] + [64] = [none , 64]
weight_hidden = tf.Variable(tf.random_normal([784,64], mean=0.0, stddev=1.0), name="weight_hidden")
bias_hidden = tf.Variable(tf.random_normal([64], mean=0.0, stddev=1.0), name="bias_hidden")
A1 = tf.matmul(x, weight_hidden) + bias_hidden
# 添加激活函数和直方图记录
A1 = tf.nn.relu(A1)
tf.summary.histogram("hidden_activations", A1)
with tf.variable_scope("fc"):
# [none , 64] * [64 , 10] + [10] = [none , 10]
weight_fc = tf.Variable(tf.random_normal([64,10], mean=0.0, stddev=1.0), name="weight_fc")
bias_fc = tf.Variable(tf.random_normal([10], mean=0.0, stddev=1.0), name="bias_fc")
y_pred = tf.matmul(A1, weight_fc) + bias_fc
# 记录权重和偏置
tf.summary.histogram("fc_weights", weight_fc)
tf.summary.histogram("fc_biases", bias_fc)
with tf.variable_scope("compute_loss"):
# 计算损失
all_loss = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_pred)
loss = tf.reduce_mean(all_loss)
# 记录损失
tf.summary.scalar("loss", loss)
with tf.variable_scope("accuracy"):
# 计算准确率
correct_pred = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
tf.summary.scalar("accuracy", accuracy)
with tf.variable_scope("optimize"):
train_op = tf.train.GradientDescentOptimizer(0.1).minimize(loss)
# 合并所有summary
merged = tf.summary.merge_all()
with tf.Session() as sess:
# 创建FileWriter
train_writer = tf.summary.FileWriter(log_dir, sess.graph)
# 运行初始化变量
sess.run(tf.global_variables_initializer())
# 训练循环
for i in range(2000):
# 获取批量数据
start = (i * 100) % (len(x_train) - 100)
end = start + 100
batch_x = x_train[start:end]
batch_y = y_train[start:end]
# 运行训练并记录summary
_, loss_run, summary = sess.run([train_op, loss, merged],
feed_dict={x: batch_x, y: batch_y})
# 写入summary
train_writer.add_summary(summary, i)
# 每100步打印一次信息
if i % 100 == 0:
acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
print(f"迭代第{i}步,损失: {loss_run:.4f}, 准确率: {acc:.4f}")
# 关闭FileWriter
train_writer.close()
if __name__ == '__main__':
fc_nn()
该代码运行一次后会创建log文件保存数据,然后再python终端里输入
tensorboard --logdir=./logs
就能看到该神经网络拟合过程的图像。本代码只保存了accuracy和loss,当然自己也可以增加保存的变量看看图像。记得清理日志。