一、环境准备
- 语言环境:Python3.8.0
- 编译器:PyCharm 2021
- 深度学习环境:TensorFlow-gpu 2.5
二、代码编写
1. 准备VGG类
# -*-coding:utf-8-*-
from tensorflow.keras import layers, Model, Sequential
CONV_KERNEL_INITIALIZER = {
'class_name': 'VarianceScaling',
'config': {
'scale': 2.0,
'mode': 'fan_out',
'distribution': 'truncated_normal'
}
}
DENSE_KERNEL_INITIALIZER = {
'class_name': 'VarianceScaling',
'config': {
'scale': 1. / 3.,
'mode': 'fan_out',
'distribution': 'uniform'
}
}
# 输出预测层
def VGG(feature, im_height=224, im_width=224, num_classes=1000):
# tensorflow中的tensor通道排序是NHWC
input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32")
x = feature(input_image) # 将提取特征层和下面的层进行连接
x = layers.Flatten()(x) # 展平层,将输入数据展平成1维
x = layers.Dropout(rate=0.5)(x) # 随机让50%的神经元失活,缓解过拟合
x = layers.Dense(2048, activation='relu',
kernel_initializer=DENSE_KERNEL_INITIALIZER)(x) # 全连接层,激活函数为relu,有2048个神经元
x = layers.Dropout(rate=0.5)(x)
x = layers.Dense(2048, activation='relu',
kernel_initializer=DENSE_KERNEL_INITIALIZER)(x)
x = layers.Dense(num_classes,
kernel_initializer=DENSE_KERNEL_INITIALIZER)(x)
output = layers.Softmax()(x) # softmax处理多分类问题,得到类别概率分布
model = Model(inputs=input_image, outputs=output) # inputs:函数第一行中定义的input节点;outputs:softmax得到的概率分布
return model
# 提取特征函数
def make_feature(cfg):
feature_layers = []
for v in cfg:
if v == "M":
feature_layers.append(layers.MaxPool2D(pool_size=2, strides=2))
else:
conv2d = layers.Conv2D(v, kernel_size=3, padding="SAME", activation="relu",
kernel_initializer=CONV_KERNEL_INITIALIZER)
feature_layers.append(conv2d)
return Sequential(feature_layers, name="feature")
# 各种vgg模型结构
cfgs = {
# M代表最大池化层
'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}
# 生成VGG模型,默认使用vgg16结构; im_height:图像高度;im_width:图像宽度;num_classes:图像类别个数
def vgg(model_name="vgg16", im_height=224, im_width=224, num_classes=1000):
assert model_name in cfgs.keys(), "not support model {}".format(model_name)
cfg = cfgs[model_name]
model = VGG(make_feature(cfg), im_height=im_height, im_width=im_width, num_classes=num_classes)
return model
2. 导入相关库
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers,models
import pathlib
from VGG_Model import vgg
3.导入数据
#设置随机种子尽可能使结果得以重现
np.random.seed(520)
tf.random.set_seed(520)
#导入数据
data_dir = r"你的数据集路径"
data_dir = pathlib.Path(data_dir)
# 设置GPU运行
gpus = tf.config.experimental.list_physical_devices("GPU")
if gpus:
try:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
except RuntimeError as e:
print(e)
exit(-1)
# 查看数据
imageCount = len(list(data_dir.glob('*/*.png')))
print("图片的总数为:",imageCount)
4.数据预处理
# 1.加载数据
batch_size = 32
img_height = 224
img_width = 224
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="training",
seed=520,
image_size=(img_height,img_width),
batch_size=batch_size
)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="validation",
seed=520,
image_size=(img_height,img_width),
batch_size=batch_size
)
class_names = train_ds.class_names
print(class_names)
# 2.可视化数据
plt.figure(figsize=(10,5),dpi=100)
for images,labels in train_ds.take(1):
for i in range(8):
ax = plt.subplot(2,4,i+1)
plt.imshow(images[i].numpy().astype("uint8"))
plt.title(class_names[labels[i]])
plt.axis("off")
plt.imshow(images[1].numpy().astype("uint8"))
# 3.再次检查数据
for image_batch, labels_batch in train_ds:
print(image_batch.shape)
print(labels_batch.shape)
break
# 4.配置数据集
# shuffle():打乱数据
# prefetch():预取数据,加速运行
# cache():将数据集缓存到内存当中,加速运行
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
# 5.归一化
normalization_layer = layers.experimental.preprocessing.Rescaling(1./255)
normalization_train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(val_ds))
first_image = image_batch[0]
# 6.查看归一化后的数据
print(np.min(first_image), np.max(first_image))
Found 621 files belonging to 7 classes.
Using 497 files for training.
Found 621 files belonging to 7 classes.
Using 124 files for validation.
[‘lufei’, ‘luobin’, ‘namei’, ‘qiaoba’, ‘shanzhi’, ‘suolong’, ‘wusuopu’]
(32, 224, 224, 3)
(32,)
- Image_batch是形状的张量(32,180,180,3)。这是一批形状180x180x3的32张图片(最后一维指的是彩色通道RGB)。
- Label_batch是形状(32,)的张量,这些标签对应32张图片
5. 实例化VGG-16模型
model = vgg("vgg16", 224, 224, 5)
model.summary()
模型参数:
6.设置优化器
opt = tf.keras.optimizers.Adam(learning_rate=1e-4)
model.compile(optimizer=opt,
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
7.训练模型
由于我是用CPU训练的,速度会很慢,所以这里只迭代了10次,识别准确率也不是很高
epochs = 10
history = model.fit(
train_ds,
validation_data=val_ds,
epochs=epochs
)
Epoch 1/10
16/16 [] - 195s 12s/step - loss: 2.0244 - accuracy: 0.1408 - val_loss: 1.9719 - val_accuracy: 0.1935
Epoch 2/10
16/16 [] - 195s 12s/step - loss: 2.0326 - accuracy: 0.1328 - val_loss: 1.9719 - val_accuracy: 0.1935
Epoch 3/10
16/16 [] - 194s 12s/step - loss: 2.0326 - accuracy: 0.1328 - val_loss: 1.9719 - val_accuracy: 0.1935
Epoch 4/10
16/16 [] - 196s 12s/step - loss: 2.0326 - accuracy: 0.1328 - val_loss: 1.9719 - val_accuracy: 0.1935
Epoch 5/10
16/16 [] - 194s 12s/step - loss: 2.0326 - accuracy: 0.1328 - val_loss: 1.9719 - val_accuracy: 0.1935
Epoch 6/10
16/16 [] - 196s 12s/step - loss: 2.0326 - accuracy: 0.1328 - val_loss: 1.9719 - val_accuracy: 0.1935
Epoch 7/10
16/16 [] - 196s 12s/step - loss: 2.0326 - accuracy: 0.1328 - val_loss: 1.9719 - val_accuracy: 0.1935
Epoch 8/10
16/16 [] - 196s 12s/step - loss: 2.0326 - accuracy: 0.1328 - val_loss: 1.9719 - val_accuracy: 0.1935
Epoch 9/10
16/16 [] - 194s 12s/step - loss: 2.0326 - accuracy: 0.1328 - val_loss: 1.9719 - val_accuracy: 0.1935
Epoch 10/10
16/16 [] - 194s 12s/step - loss: 2.0326 - accuracy: 0.1328 - val_loss: 1.9719 - val_accuracy: 0.1935
8.模型评估
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(epochs)
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
可以看到,训练集和测试集的准确率都均没有超过20%(模型应该是没有问题的,在其他训练集上取得过95+%的识别准确率),应该是迭代次数还远远不够。本文海贼王数据和部分思路来自(k同学啊)