卷积成神之路多亏二维码

发布于:2024-03-23 ⋅ 阅读:(71) ⋅ 点赞:(0)

卷积成神之路多亏二维码


在这里插入图片描述

项目地址

https://aistudio.baidu.com/projectdetail/7614096?contributionType=1

训练代码

import numpy as np
import qrcode
import json
from multiprocessing import Pool, Manager, freeze_support
import pandas as pd
import cv2

from glob import glob

from tqdm import tqdm


def generate_qrcode(data, image_name):
    # 创建一个二维码实例
    qr = qrcode.QRCode(
        version=1,
        error_correction=qrcode.constants.ERROR_CORRECT_L,
        box_size=8,
        border=2,
    )
    # 添加数据
    qr.add_data(data)
    qr.make(fit=True)
    # 生成二维码图像
    img = qr.make_image(fill_color="green", back_color="white")
    # 保存二维码图像
    img.save(image_name)


def gen_image(j, one, total_list):
    data = ""
    for i, o in enumerate(one[:-1]):
        print(j, i)
        label = one[i + 1]
        data += o
        generate_qrcode(data, "/home/aistudio/text_image/{}_{}.png".format(j, i))

        total_list.append([label, "/home/aistudio/text_image/{}_{}.png".format(j, i)])


def gen_text_to_image():
    with open('唐诗.json', 'r', encoding='utf-8') as f:
        dataset = json.load(f)

    two = [two_data[4].replace('\n', '') for two_data in dataset]
    two = [i for i in two if 32 <= len(i) <= 72]
    two = [i for i in two if len(i) == 32]

    total_list = Manager().list()

    pool = Pool(processes=7)

    for j, one in enumerate(two):
        pool.apply_async(gen_image, args=(j, one, total_list,))
    pool.close()
    pool.join()
    pd.to_pickle(list(total_list), "image_data_set.pkl")


import paddle


class VlmBlock(paddle.nn.Layer):
    def __init__(self, input_dim, output_dim, down_flag):
        super(VlmBlock, self).__init__()
        self.down_flag = down_flag
        self.one_layer = paddle.nn.Conv2D(input_dim, output_dim, 3, padding=1, bias_attr=False)
        self.two_layer = paddle.nn.Conv2D(input_dim, output_dim, 3, padding=1, bias_attr=False)
        self.three_layer = paddle.nn.Conv2D(input_dim, 2 * output_dim, 3, padding=1, bias_attr=False)
        if down_flag:
            self.down_layer = paddle.nn.MaxPool2D(2, 2)
        self.relu = paddle.nn.ReLU()

    def forward(self, x):
        x0 = self.one_layer(x)
        x1 = self.two_layer(x)
        x2 = self.three_layer(x)
        x = self.relu(paddle.concat([x0, x1], axis=1) + x2)
        if self.down_flag:
            x = self.down_layer(x)
        return x


class VLM(paddle.nn.Layer):
    def __init__(self, class_num):
        super(VLM, self).__init__()
        self.one_layer = VlmBlock(1, 3, True)
        self.two_layer = VlmBlock(6, 6, False)
        self.three_layer = VlmBlock(12, 12, True)
        self.four_layer = VlmBlock(24, 12, False)
        self.five_layer = VlmBlock(24, 12, True)
        self.six_layer = VlmBlock(24, 12, True)
        # 新增一个1x1卷积层来调整维度,如果需要的话

        self.out_layer = paddle.nn.Linear(1536, class_num)
        self.out_layer1 = paddle.nn.Linear(1536, class_num)

    def forward(self, x):
        x = self.one_layer(x)
        x = self.two_layer(x)
        x = self.three_layer(x)
        x = self.four_layer(x)
        x = self.five_layer(x)
        x = self.six_layer(x)
        x0 = self.out_layer(x.reshape([x.shape[0], -1]))
        x1 = self.out_layer1(x.reshape([x.shape[0], -1]))
        return x0, x1


class VlmLoss(paddle.nn.Layer):
    def __init__(self):
        super(VlmLoss, self).__init__()

    def forward(self, x, y, xx):
        loss = paddle.nn.functional.cross_entropy(x * (1 - paddle.nn.functional.softmax(xx, axis=-1)), y)
        return loss


def reshape_data():
    path = glob("E:/text_image/*")
    for i in tqdm(path):
        image = cv2.resize(cv2.imread(i, 0), (128, 128))
        cv2.imwrite(i, image)


if __name__ == '__main__':
    freeze_support()
    # gen_text_to_image()
    # resize 形状
    # reshape_data()

    path = pd.read_pickle("/home/aistudio/data/data263456/image_data_set.pkl")
    voc = sorted(set([i[0] for i in path]))
    path_index = [[voc.index(i[0]), cv2.imread(i[1].replace("E:/text_image/","/home/aistudio/text_image/"),0),ii] for ii,i in enumerate(path)]
    
    path_index_data =[i[1] for i in path_index ]
    path_index =[[i[0],i[-1]] for i in path_index ]
    epoch = 10
    batch_size = 800*5
    data_set = []
    for i in range(epoch):
        np.random.shuffle(path_index)
        data_set += np.array(path_index).copy().tolist()
    vlm = VLM(class_num=len(voc))
    vlm.load_dict(paddle.load("/home/aistudio/vlm.pdparams"))
    optimizer = paddle.optimizer.Adam(parameters=vlm.parameters(), learning_rate=0.0002)
    loss_func = VlmLoss()
    bar = tqdm(range(0, len(data_set), batch_size))
    for i in bar:
        j = i + batch_size
        batch_data = data_set[i:j]
        batch_label = [i[0] for i in batch_data]
        batch_input = [path_index_data[i[1]] for i in batch_data]
        batch_input = paddle.to_tensor(batch_input).astype('float32') / 256
        batch_label = paddle.to_tensor(np.array(batch_label).astype(int)).astype('int64')
        vlm_out = vlm(batch_input.reshape([-1, 1, 128, 128]))
        loss = loss_func(vlm_out[0], batch_label, vlm_out[1])
        bar.set_description("loss: {:.4f}".format(loss.item()))
        optimizer.clear_grad()
        loss.backward()
        optimizer.step()

    paddle.save(vlm.state_dict(), 'vlm.pdparams')

验证代码

import numpy as np
import qrcode
import json
from multiprocessing import Pool, Manager, freeze_support
import pandas as pd
import cv2

from glob import glob

from tqdm import tqdm


def generate_qrcode(data, image_name):
    # 创建一个二维码实例
    qr = qrcode.QRCode(
        version=1,
        error_correction=qrcode.constants.ERROR_CORRECT_L,
        box_size=8,
        border=2,
    )
    # 添加数据
    qr.add_data(data)
    qr.make(fit=True)
    # 生成二维码图像
    img = qr.make_image(fill_color="green", back_color="white")
    # 保存二维码图像
    img.save(image_name)

def generate_qrcode_x(data):
    # 创建一个二维码实例
    qr = qrcode.QRCode(
        version=1,
        error_correction=qrcode.constants.ERROR_CORRECT_L,
        box_size=8,
        border=2,
    )
    # 添加数据
    qr.add_data(data)
    qr.make(fit=True)
    # 生成二维码图像
    img = qr.make_image(fill_color="green", back_color="white")
    # 保存二维码图像
    return img
def gen_image(j, one, total_list):
    data = ""
    for i, o in enumerate(one[:-1]):
        print(j, i)
        label = one[i + 1]
        data += o
        generate_qrcode(data, "/home/aistudio/text_image/{}_{}.png".format(j, i))

        total_list.append([label, "/home/aistudio/text_image/{}_{}.png".format(j, i)])


def gen_text_to_image():
    with open('唐诗.json', 'r', encoding='utf-8') as f:
        dataset = json.load(f)

    two = [two_data[4].replace('\n', '') for two_data in dataset]
    two = [i for i in two if 32 <= len(i) <= 72]
    two = [i for i in two if len(i) == 32]

    total_list = Manager().list()

    pool = Pool(processes=7)

    for j, one in enumerate(two):
        pool.apply_async(gen_image, args=(j, one, total_list,))
    pool.close()
    pool.join()
    pd.to_pickle(list(total_list), "image_data_set.pkl")


import paddle


class VlmBlock(paddle.nn.Layer):
    def __init__(self, input_dim, output_dim, down_flag):
        super(VlmBlock, self).__init__()
        self.down_flag = down_flag
        self.one_layer = paddle.nn.Conv2D(input_dim, output_dim, 3, padding=1, bias_attr=False)
        self.two_layer = paddle.nn.Conv2D(input_dim, output_dim, 3, padding=1, bias_attr=False)
        self.three_layer = paddle.nn.Conv2D(input_dim, 2 * output_dim, 3, padding=1, bias_attr=False)
        if down_flag:
            self.down_layer = paddle.nn.MaxPool2D(2, 2)
        self.relu = paddle.nn.ReLU()

    def forward(self, x):
        x0 = self.one_layer(x)
        x1 = self.two_layer(x)
        x2 = self.three_layer(x)
        x = self.relu(paddle.concat([x0, x1], axis=1) + x2)
        if self.down_flag:
            x = self.down_layer(x)
        return x


class VLM(paddle.nn.Layer):
    def __init__(self, class_num):
        super(VLM, self).__init__()
        self.one_layer = VlmBlock(1, 3, True)
        self.two_layer = VlmBlock(6, 6, False)
        self.three_layer = VlmBlock(12, 12, True)
        self.four_layer = VlmBlock(24, 12, False)
        self.five_layer = VlmBlock(24, 12, True)
        self.six_layer = VlmBlock(24, 12, True)
        # 新增一个1x1卷积层来调整维度,如果需要的话

        self.out_layer = paddle.nn.Linear(1536, class_num)
        self.out_layer1 = paddle.nn.Linear(1536, class_num)

    def forward(self, x):
        x = self.one_layer(x)
        x = self.two_layer(x)
        x = self.three_layer(x)
        x = self.four_layer(x)
        x = self.five_layer(x)
        x = self.six_layer(x)
        x0 = self.out_layer(x.reshape([x.shape[0], -1]))
        x1 = self.out_layer1(x.reshape([x.shape[0], -1]))
        return x0, x1


class VlmLoss(paddle.nn.Layer):
    def __init__(self):
        super(VlmLoss, self).__init__()

    def forward(self, x, y, xx):
        loss = paddle.nn.functional.cross_entropy(x * (1 - paddle.nn.functional.softmax(xx, axis=-1)), y)
        return loss


def reshape_data():
    path = glob("E:/text_image/*")
    for i in tqdm(path):
        image = cv2.resize(cv2.imread(i, 0), (128, 128))
        cv2.imwrite(i, image)


if __name__ == '__main__':
    freeze_support()
    # gen_text_to_image()
    # resize 形状
    # reshape_data()

    path = pd.read_pickle("/home/aistudio/data/data263456/image_data_set.pkl")
    voc = sorted(set([i[0] for i in path]))


    vlm = VLM(class_num=len(voc))
    vlm.load_dict(paddle.load("/home/aistudio/vlm.pdparams"))
    vlm.eval()
    word="我"
    for _  in range(31):
        cv2_image = cv2.cvtColor(np.array(generate_qrcode_x(word)), cv2.COLOR_RGB2BGR)
        cv2_image=cv2.resize(cv2_image, (128, 128))
        cv2_image=cv2.cvtColor(cv2_image, cv2.COLOR_BGR2GRAY)
        cv2_image=cv2_image/255
        cv2_image=paddle.to_tensor(cv2_image).astype('float32').reshape([1, 1, 128, 128])
        vlm_out=vlm(cv2_image)
        vlm_out=vlm_out[0] * (1 - paddle.nn.functional.softmax(vlm_out[1], axis=-1))
        word+=voc[paddle.argmax(vlm_out,-1)]
        print(word)


本文含有隐藏内容,请 开通VIP 后查看