vgg16模型复用--dogs-vs-cats

89 阅读 0 评论 59 点赞

我是靠谱客的博主开心裙子，最近开发中收集的这篇文章主要介绍vgg16模型复用--dogs-vs-cats，觉得挺不错的，现在分享给大家，希望可以做个参考。

概述

数据准备

数据集由训练数据和测试数据组成：

训练数据包含猫和狗各12500张图片
测试数据包含12500张猫和狗的图片
下载地址：https://www.kaggle.com/c/dogs-vs-cats

说明：下载该数据需要连接外网
在这里插入图片描述

下载下来的数据需要在train文件夹下分成两个文件cat和dog，将猫和狗的图片分开来

VGG结构

在这里插入图片描述

vgg模型的复用–dogs-vs-cats

数据的读入–utils.py文件

import tensorflow as tf
import numpy as np
import os
from vgg_preprocess import preprocess_for_train


def get_file(file_dir):
    images = []
    temp = []
    for root,sub_folders,files in os.walk(file_dir):
        for name in files:
            images.append(os.path.join(root,name))
        for name in sub_folders:
            temp.append(os.path.join(root,name))
            labels = []
    for one_folder in temp:
        n_img = len(os.listdir(one_folder))
        letter = one_folder.split('/')[-1]
        if letter == 'cat':
            labels = np.append(labels,n_img*[0])
        else:
            labels = np.append(labels,n_img*[1])
    images = np.array(images)
    #print(len(images))
    #print(len(labels))
    #print(images)
    #print(labels)
    temp = np.array([images,labels])
    #print(temp)
    temp = temp.transpose()
    #print(temp)
    np.random.shuffle(temp)
   
    image_list = list(temp[:,0])
    label_list = list(temp[:,1])
    label_list = [int(float(i)) for i in label_list]
    
    return image_list,label_list  

img_width = 224
img_height = 224
def get_batch(image_list,label_list,img_width,img_height,batch_size,capacity):
    image = tf.cast(image_list,tf.string)  #tf.cast()函数的作用是执行 tensorflow 中张量数据类型转换
    label = tf.cast(label_list,tf.int32)
    input_queue = tf.train.slice_input_producer([image,label])#tf.train.slice_input_producer是一个tensor生成器，作用是按照设定，每次从一个tensor列表中按顺序或者随机抽取出一个tensor放入文件名队列。
    label = input_queue[1]
    image_contents = tf.read_file(input_queue[0])
    
    image = tf.image.decode_jpeg(image_contents,channels=3)   #解码
    image = preprocess_for_train(image,224,224)    #对输入图像进行预处理
    image_batch,label_batch = tf.train.batch([image,label],batch_size=batch_size,num_threads=64,capacity=capacity)#tf.train.batch是一个tensor队列生成器，作用是按照给定的tensor顺序，把batch_size个tensor推送到文件队列，作为训练一个batch的数据，等待tensor出队执行计算。
    
    return image_batch,label_batch
def onehot(labels):
    n_sample = len(labels)
    n_class = max(labels)+1
    onehot_labels = np.zeros((n_sample,n_class))
    onehot_labels[np.arange(n_sample),labels] = 1
    return onehot_labels

定义VGG16模型类–VGG16_model.py文件

import tensorflow as tf
from tensorflow import float32 
import numpy as np
'''
修改VGG模型：全连接层的神经元个数 trainable参数变动
（1）预测训练的VGG是在ImageNet数据集上进行训练的，对1000个类别进行判定 若希望利用已训练模型用于其他分类任务，
需要修改最后的全连接层
（2）在进行Finetuning对模型重新训练时，对于部分不需要训练的层可以通过设置trainable=False来确保其在训练过程中不会被修改权值
'''
class vgg16:
    def __init__(self, imgs):
        # 在类的初始化时加入全局列表 将所需共享的参数加载进来
        self.parameters = []
        self.imgs = imgs
        self.convlayers()
        self.fc_layers()
        # 输出所属类别的概率值
        self.probs = tf.nn.softmax(self.fc8)
    
    def saver(self):
        return tf.train.Saver()
    
    def maxpool(self, name, input_data):
        out = tf.nn.max_pool(input_data, [1, 2, 2, 1], [1, 2, 2, 1], padding="SAME", name = name)
        return out
        
    # 卷积层
    def conv(self, name, input_data, out_channel, trainable=False):
        in_channel = input_data.get_shape()[-1]
        with tf.variable_scope(name):
            kernel = tf.get_variable("weights", [3, 3, in_channel, out_channel], dtype=tf.float32, trainable=False)
            biases = tf.get_variable("biases", [out_channel], dtype=tf.float32, trainable=False)
            conv_res = tf.nn.conv2d(input_data, kernel, [1, 1, 1, 1], padding="SAME")
            res = tf.nn.bias_add(conv_res, biases)
            out = tf.nn.relu(res, name = name)
        # 将卷积层的参数kernel和biases加入列表
        self.parameters += [kernel, biases]
        return out
    
    # 全连接层
    def fc(self, name, input_data, out_channel, trainable=True):
        shape = input_data.get_shape().as_list()
        if len(shape) == 4:
            # 全连接层的输入神经元的个数
            size = shape[-1] * shape[-2] * shape[-3]
        else:
            size = shape[1]
        # 对数据进行展开操作
        input_data_flat = tf.reshape(input_data, [-1, size])
        with tf.variable_scope(name):
            weights = tf.get_variable(name="weights", shape=[size, out_channel], dtype=tf.float32, trainable=trainable)
            biases = tf.get_variable(name="biases", shape=[out_channel], dtype=tf.float32, trainable=trainable)
            res = tf.matmul(input_data_flat, weights) 
            out = tf.nn.relu(tf.nn.bias_add(res, biases))
            
        self.parameters += [weights, biases]
        return out
    
    # Vgg的一层卷积层包含两次卷积运算一层池化
    def convlayers(self):
        #conv1
        self.conv1_1 = self.conv('conv1_1',self.imgs,64,trainable=False)
        self.conv1_2 = self.conv('conv1_2',self.conv1_1,64,trainable=False)
        self.pool1 = self.maxpool('pool1',self.conv1_2)
        #conv2
        self.conv2_1 = self.conv('conv2_1',self.pool1,128,trainable=False)
        self.conv2_2 = self.conv('conv2_2',self.conv2_1,128,trainable=False)
        self.pool2 = self.maxpool('pool2',self.conv2_2)
        #conv3
        self.conv3_1 = self.conv('conv3_1',self.pool2,256,trainable=False)
        self.conv3_2 = self.conv('conv3_2',self.conv3_1,256,trainable=False)
        self.conv3_3 = self.conv('conv3_3',self.conv3_2,256,trainable=False)
        self.pool3 = self.maxpool('pool3',self.conv3_3)
        #conv4
        self.conv4_1 = self.conv('conv4_1',self.pool3,512,trainable=False)
        self.conv4_2 = self.conv('conv4_2',self.conv4_1,512,trainable=False)
        self.conv4_3 = self.conv('conv4_3',self.conv4_2,512,trainable=False)
        self.pool4 = self.maxpool('pool4',self.conv4_3)
        #conv5
        self.conv5_1 = self.conv('conv5_1',self.pool4,512,trainable=False)
        self.conv5_2 = self.conv('conv5_2',self.conv5_1,512,trainable=False)
        self.conv5_3 = self.conv('conv5_3',self.conv5_2,512,trainable=False)
        self.pool5 = self.maxpool('pool5',self.conv5_3)
        
    def fc_layers(self):
        self.fc6 = self.fc('fc1',self.pool5,4096,trainable=False)
        self.fc7 = self.fc('fc2',self.fc6,4096,trainable=False)
        self.fc8 = self.fc('fc3',self.fc7,2,trainable=True)
        
    # 这个函数将获取的权重载入VGG模型中
    def load_weights(self, weight_file, sess):
        weights = np.load(weight_file)
        # 按键值进行排序
        keys = sorted(weights.keys())
        for i, k in enumerate(keys):
            # 剔除fc8层和它的softmax层 
            if i not in [30, 31]:
                sess.run(self.parameters[i].assign(weights[k]))
        print('---------weights loaded---------')

模型的重新训练与保存

import os
import tensorflow as tf
from time import time
# 上面实现的两个文件
import VGG16_model as model
import utils

startTime = time()
batch_size = 32
# 内存中存储的最大数据容量
capacity = 256
# VGG训练时图像预处理所减均值RGB三通道
means = [123.68, 116.779, 103.939]

# 获取图像列表和标签列表
xs, ys = utils.get_file("./dog-vs-cat/train/")
# 通过读取列表来载入批量图片和标签
image_batch, label_batch = utils.get_batch(xs, ys, 224, 224, batch_size, capacity)
# 设置占位符
x = tf.placeholder(tf.float32, [None, 224, 224, 3])
y = tf.placeholder(tf.int32, [None, 2])

vgg = model.vgg16(x)
fc8_finetuining = vgg.probs # 即softmax层
# 损失函数
loss_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=fc8_finetuining, labels=y))
# 优化器
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss_function)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
# 通过npz格式的文件获取 VGG的相应权重参数，从而将权重注入即可实现复用
vgg.load_weights("./vgg16/vgg16_weights.npz", sess)
saver = tf.train.Saver()

# 启动线程
# 使用协调器Coordinator来管理线程
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord, sess=sess)

epoch_start_time = time()

for i in range(100):
    images, labels = sess.run([image_batch, label_batch])

    # 用one-hot 形式对标签进行编码
    labels = utils.onehot(labels)
    
    sess.run(optimizer, feed_dict={x: images, y:labels})
    loss = sess.run(loss_function, feed_dict={x: images, y: labels})
    print("Now the loss is %f" % loss)
    
    epoch_end_time = time()
    print("Current epoch takes: ", (epoch_end_time - epoch_start_time))
    epoch_start_time = epoch_end_time
    
    if (i+1) % 500 == 0:
        saver.save(sess, os.path.join("model/", "epoch {:06d}.ckpt".format(i)))
    print("----------Epoch %d is finished----------" % i)
    
saver.save(sess, "model/")
print("Optimization Finished")

duration = time() - startTime
print("Train Finished takes:", "{:2f}".format(duration))

# 通知其他线程关闭
coord.request_stop()
coord.join(threads)

参考视频：深度学习应用开发TensorFlow实践