【机器视觉】VGG-16实现海贼王人物识别

发布于:2022-10-17 ⋅ 阅读:(513) ⋅ 点赞:(0)


一、环境准备

  • 语言环境:Python3.8.0
  • 编译器:PyCharm 2021
  • 深度学习环境:TensorFlow-gpu 2.5

二、代码编写

1. 准备VGG类

# -*-coding:utf-8-*-
from tensorflow.keras import layers, Model, Sequential

CONV_KERNEL_INITIALIZER = {
    'class_name': 'VarianceScaling',
    'config': {
        'scale': 2.0,
        'mode': 'fan_out',
        'distribution': 'truncated_normal'
    }
}

DENSE_KERNEL_INITIALIZER = {
    'class_name': 'VarianceScaling',
    'config': {
        'scale': 1. / 3.,
        'mode': 'fan_out',
        'distribution': 'uniform'
    }
}

# 输出预测层
def VGG(feature, im_height=224, im_width=224, num_classes=1000):
    # tensorflow中的tensor通道排序是NHWC
    input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32")
    x = feature(input_image) # 将提取特征层和下面的层进行连接
    x = layers.Flatten()(x) # 展平层,将输入数据展平成1维
    x = layers.Dropout(rate=0.5)(x) # 随机让50%的神经元失活,缓解过拟合
    x = layers.Dense(2048, activation='relu',
                     kernel_initializer=DENSE_KERNEL_INITIALIZER)(x) # 全连接层,激活函数为relu,有2048个神经元
    x = layers.Dropout(rate=0.5)(x)
    x = layers.Dense(2048, activation='relu',
                     kernel_initializer=DENSE_KERNEL_INITIALIZER)(x)
    x = layers.Dense(num_classes,
                     kernel_initializer=DENSE_KERNEL_INITIALIZER)(x)
    output = layers.Softmax()(x) # softmax处理多分类问题,得到类别概率分布
    model = Model(inputs=input_image, outputs=output) # inputs:函数第一行中定义的input节点;outputs:softmax得到的概率分布
    return model

# 提取特征函数
def make_feature(cfg):
    feature_layers = []
    for v in cfg:
        if v == "M":
            feature_layers.append(layers.MaxPool2D(pool_size=2, strides=2))
        else:
            conv2d = layers.Conv2D(v, kernel_size=3, padding="SAME", activation="relu",
                                   kernel_initializer=CONV_KERNEL_INITIALIZER)
            feature_layers.append(conv2d)
    return Sequential(feature_layers, name="feature")

# 各种vgg模型结构
cfgs = {
    # M代表最大池化层
    'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}

# 生成VGG模型,默认使用vgg16结构; im_height:图像高度;im_width:图像宽度;num_classes:图像类别个数
def vgg(model_name="vgg16", im_height=224, im_width=224, num_classes=1000):
    assert model_name in cfgs.keys(), "not support model {}".format(model_name)
    cfg = cfgs[model_name]
    model = VGG(make_feature(cfg), im_height=im_height, im_width=im_width, num_classes=num_classes)
    return model

2. 导入相关库

import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers,models
import pathlib
from VGG_Model import vgg

3.导入数据

#设置随机种子尽可能使结果得以重现
np.random.seed(520)
tf.random.set_seed(520)

#导入数据
data_dir = r"你的数据集路径"
data_dir = pathlib.Path(data_dir)

# 设置GPU运行
gpus = tf.config.experimental.list_physical_devices("GPU")
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)
        exit(-1)

# 查看数据
imageCount = len(list(data_dir.glob('*/*.png')))
print("图片的总数为:",imageCount)

在这里插入图片描述
在这里插入图片描述

4.数据预处理

# 1.加载数据
batch_size = 32
img_height = 224
img_width = 224

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="training",
    seed=520,
    image_size=(img_height,img_width),
    batch_size=batch_size
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="validation",
    seed=520,
    image_size=(img_height,img_width),
    batch_size=batch_size
)

class_names = train_ds.class_names
print(class_names)

# 2.可视化数据
plt.figure(figsize=(10,5),dpi=100)

for images,labels in train_ds.take(1):
    for i in range(8):
        ax = plt.subplot(2,4,i+1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(class_names[labels[i]])
        plt.axis("off")
    plt.imshow(images[1].numpy().astype("uint8"))

# 3.再次检查数据
for image_batch, labels_batch in train_ds:
    print(image_batch.shape)
    print(labels_batch.shape)
    break

# 4.配置数据集
# shuffle():打乱数据
# prefetch():预取数据,加速运行
# cache():将数据集缓存到内存当中,加速运行
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

# 5.归一化
normalization_layer = layers.experimental.preprocessing.Rescaling(1./255)
normalization_train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(val_ds))
first_image = image_batch[0]

# 6.查看归一化后的数据
print(np.min(first_image), np.max(first_image))

Found 621 files belonging to 7 classes.
Using 497 files for training.
Found 621 files belonging to 7 classes.
Using 124 files for validation.
[‘lufei’, ‘luobin’, ‘namei’, ‘qiaoba’, ‘shanzhi’, ‘suolong’, ‘wusuopu’]
在这里插入图片描述

(32, 224, 224, 3)
(32,)

  • Image_batch是形状的张量(32,180,180,3)。这是一批形状180x180x3的32张图片(最后一维指的是彩色通道RGB)。
  • Label_batch是形状(32,)的张量,这些标签对应32张图片

5. 实例化VGG-16模型

在这里插入图片描述

在这里插入图片描述

model = vgg("vgg16", 224, 224, 5)
model.summary()

模型参数:
在这里插入图片描述

6.设置优化器

opt = tf.keras.optimizers.Adam(learning_rate=1e-4)

model.compile(optimizer=opt,
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

7.训练模型

由于我是用CPU训练的,速度会很慢,所以这里只迭代了10次,识别准确率也不是很高

epochs = 10
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=epochs
)

Epoch 1/10
16/16 [] - 195s 12s/step - loss: 2.0244 - accuracy: 0.1408 - val_loss: 1.9719 - val_accuracy: 0.1935
Epoch 2/10
16/16 [
] - 195s 12s/step - loss: 2.0326 - accuracy: 0.1328 - val_loss: 1.9719 - val_accuracy: 0.1935
Epoch 3/10
16/16 [] - 194s 12s/step - loss: 2.0326 - accuracy: 0.1328 - val_loss: 1.9719 - val_accuracy: 0.1935
Epoch 4/10
16/16 [
] - 196s 12s/step - loss: 2.0326 - accuracy: 0.1328 - val_loss: 1.9719 - val_accuracy: 0.1935
Epoch 5/10
16/16 [] - 194s 12s/step - loss: 2.0326 - accuracy: 0.1328 - val_loss: 1.9719 - val_accuracy: 0.1935
Epoch 6/10
16/16 [
] - 196s 12s/step - loss: 2.0326 - accuracy: 0.1328 - val_loss: 1.9719 - val_accuracy: 0.1935
Epoch 7/10
16/16 [] - 196s 12s/step - loss: 2.0326 - accuracy: 0.1328 - val_loss: 1.9719 - val_accuracy: 0.1935
Epoch 8/10
16/16 [
] - 196s 12s/step - loss: 2.0326 - accuracy: 0.1328 - val_loss: 1.9719 - val_accuracy: 0.1935
Epoch 9/10
16/16 [] - 194s 12s/step - loss: 2.0326 - accuracy: 0.1328 - val_loss: 1.9719 - val_accuracy: 0.1935
Epoch 10/10
16/16 [
] - 194s 12s/step - loss: 2.0326 - accuracy: 0.1328 - val_loss: 1.9719 - val_accuracy: 0.1935

8.模型评估

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

在这里插入图片描述
可以看到,训练集和测试集的准确率都均没有超过20%(模型应该是没有问题的,在其他训练集上取得过95+%的识别准确率),应该是迭代次数还远远不够。本文海贼王数据和部分思路来自(k同学啊


网站公告

今日签到

点亮在社区的每一天
去签到