针对keras模型多输出或多损失方法使用实例一实例二

187 阅读 0 评论 124 点赞

我是靠谱客的博主欢喜啤酒，这篇文章主要介绍针对keras模型多输出或多损失方法使用实例一实例二，现在分享给大家，希望可以做个参考。

有些模型存在多个输出在计算loss的情况，其中比较典型的如hed边缘检测网络，该网络具体介绍可以看之前的博客；hed网络有多个输出，写法如下；

实例一


def hed():
    # Input
    img_input = Input(shape=(480,480,3), name='input')

    # Block 1
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
    b1= side_branch(x, 1) # 480 480 1
    x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='block1_pool')(x) # 240 240 64

    # Block 2
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
    b2= side_branch(x, 2) # 480 480 1
    x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='block2_pool')(x) # 120 120 128

    # Block 3
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
    b3= side_branch(x, 4) # 480 480 1
    x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='block3_pool')(x) # 60 60 256

    # Block 4
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
    b4= side_branch(x, 8) # 480 480 1
    x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='block4_pool')(x) # 30 30 512

    # Block 5
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x) # 30 30 512
    b5= side_branch(x, 16) # 480 480 1

    # fuse
    fuse = Concatenate(axis=-1)([b1, b2, b3, b4, b5])
    fuse = Conv2D(1, (1,1), padding='same', use_bias=False, activation=None)(fuse) # 480 480 1

    # outputs
    o1    = Activation('sigmoid', name='o1')(b1)
    o2    = Activation('sigmoid', name='o2')(b2)
    o3    = Activation('sigmoid', name='o3')(b3)
    o4    = Activation('sigmoid', name='o4')(b4)
    o5    = Activation('sigmoid', name='o5')(b5)
    ofuse = Activation('sigmoid', name='ofuse')(fuse)


    # model
    model = Model(inputs=[img_input], outputs=[o1, o2, o3, o4, o5, ofuse])
    filepath = './models/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
    # load_weights_from_hdf5_group_by_name(model, filepath)
    adam = Adam(lr = 1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0001)
    model.compile(optimizer= adam,
                    loss={'o1': cross_entropy_balanced,
                        'o2': cross_entropy_balanced,
                        'o3': cross_entropy_balanced,
                        'o4': cross_entropy_balanced,
                        'o5': cross_entropy_balanced,
                        'ofuse': cross_entropy_balanced,
                        },
                  metrics={'ofuse': ofuse_pixel_error})

    return model

实例二

同时该博客也提供了类似方法https://blog.csdn.net/u012938704/article/details/79904173，搬运一下如下；

部分代码

# create the base pre-trained model
input_tensor = Input(shape=(299, 299, 3))
base_model = Xception(include_top=True, weights='imagenet', input_tensor=None, input_shape=None)
plot_model(base_model, to_file='xception_model.png')
base_model.layers.pop()
base_model.outputs = [base_model.layers[-1].output]
base_model.layers[-1].outbound_nodes = []
base_model.output_layers = [base_model.layers[-1]]

feature = base_model
img1 = Input(shape=(299, 299, 3), name='img_1')
img2 = Input(shape=(299, 299, 3), name='img_2')

feature1 = feature(img1)
feature2 = feature(img2)

# Three loss functions
category_predict1 = Dense(100, activation='softmax', name='ctg_out_1')(
    Dropout(0.5)(feature1)
)
category_predict2 = Dense(100, activation='softmax', name='ctg_out_2')(
    Dropout(0.5)(feature2)
)
dis = Lambda(eucl_dist, name='square')([feature1, feature2])
judge = Dense(2, activation='softmax', name='bin_out')(dis)
model = Model(inputs=[img1, img2], outputs=[category_predict1, category_predict2, judge])
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
              loss={
                  'ctg_out_1': 'categorical_crossentropy',
                  'ctg_out_2': 'categorical_crossentropy',
                  'bin_out': 'categorical_crossentropy'},
              loss_weights={
                  'ctg_out_1': 1.,
                  'ctg_out_2': 1.,
                  'bin_out': 0.5
              },
              metrics=['accuracy'])

完整代码：https://github.com/ahangchen/keras-dogs/blob/master/single/single_model.py

import os
import numpy as np

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
from keras import Input
from keras.applications import Xception, InceptionV3
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.layers import Dense, Dropout, concatenate, maximum
from keras.models import Model, load_model
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import plot_model

train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    width_shift_range=0.4,
    height_shift_range=0.4,
    rotation_range=90,
    zoom_range=0.7,
    horizontal_flip=True,
    vertical_flip=True)

test_datagen = ImageDataGenerator(rescale=1. / 255)

batch_size = 48
train_generator = train_datagen.flow_from_directory(
    '/hdd/cwh/dog_keras_train',
    # '/home/cwh/coding/data/cwh/test1',
    target_size=(299, 299),
    # batch_size=1,
    batch_size=batch_size,
    class_mode='categorical')

validation_generator = test_datagen.flow_from_directory(
    '/hdd/cwh/dog_keras_valid',
    # '/home/cwh/coding/data/cwh/test1',
    target_size=(299, 299),
    # batch_size=1,
    batch_size=batch_size,
    class_mode='categorical')


def triple_generator(generator):
    while True:
        x, y = generator.next()
        yield x, [y, y, y, y]


early_stopping = EarlyStopping(monitor='val_loss', patience=3)
auto_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=0, mode='auto', epsilon=0.0001,
                            cooldown=0, min_lr=0)

if os.path.exists('dog_single_xception.h5'):
    model = load_model('dog_single_xception.h5')
else:
    # create the base pre-trained model
    input_tensor = Input(shape=(299, 299, 3))
    base_model1 = Xception(include_top=True, weights='imagenet', input_tensor=None, input_shape=None)
    base_model1 = Model(inputs=[base_model1.input], outputs=[base_model1.get_layer('avg_pool').output], name='xception')

    base_model2 = InceptionV3(include_top=True, weights='imagenet', input_tensor=None, input_shape=None)
    base_model2 = Model(inputs=[base_model2.input], outputs=[base_model2.get_layer('avg_pool').output],
                        name='inceptionv3')

    img1 = Input(shape=(299, 299, 3), name='img_1')

    feature1 = base_model1(img1)
    feature2 = base_model2(img1)

    # let's add a fully-connected layer
    category_predict1 = Dense(100, activation='softmax', name='ctg_out_1')(
        Dropout(0.5)(
            feature1
        )
    )

    category_predict2 = Dense(100, activation='softmax', name='ctg_out_2')(
        Dropout(0.5)(
            feature2
        )
    )

    category_predict = Dense(100, activation='softmax', name='ctg_out')(
        concatenate([feature1, feature2])
    )
    max_category_predict = maximum([category_predict1, category_predict2])

    model = Model(inputs=[img1], outputs=[category_predict1, category_predict2, category_predict, max_category_predict])

    # model.save('dog_xception.h5')
    plot_model(model, to_file='single_model.png')
    # first: train only the top layers (which were randomly initialized)
    # i.e. freeze all convolutional InceptionV3 layers
    for layer in base_model1.layers:
        layer.trainable = False

    for layer in base_model2.layers:
        layer.trainable = False

    # compile the model (should be done *after* setting layers to non-trainable)
    model.compile(optimizer='nadam',
                  loss={
                      'ctg_out_1': 'categorical_crossentropy',
                      'ctg_out_2': 'categorical_crossentropy',
                      'ctg_out': 'categorical_crossentropy',
                      'maximum_1': 'categorical_crossentropy'
                  },
                  metrics=['accuracy'])
    # model = make_parallel(model, 3)
    # train the model on the new data for a few epochs

    model.fit_generator(triple_generator(train_generator),
                        steps_per_epoch=16500 / batch_size + 1,
                        epochs=30,
                        validation_data=triple_generator(validation_generator),
                        validation_steps=1800 / batch_size + 1,
                        callbacks=[early_stopping, auto_lr])
    model.save('dog_single_xception.h5')
# at this point, the top layers are well trained and we can start fine-tuning
# convolutional layers from inception V3. We will freeze the bottom N layers
# and train the remaining top layers.

# let's visualize layer names and layer indices to see how many layers
# we should freeze:
for i, layer in enumerate(model.layers):
    print(i, layer.name)

# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 172 layers and unfreeze the rest:
cur_base_model = model.layers[1]
for layer in cur_base_model.layers[:105]:
    layer.trainable = False
for layer in cur_base_model.layers[105:]:
    layer.trainable = True

cur_base_model = model.layers[2]
for layer in cur_base_model.layers[:262]:
    layer.trainable = False
for layer in cur_base_model.layers[262:]:
    layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate

model.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
              loss={
                      'ctg_out_1': 'categorical_crossentropy',
                      'ctg_out_2': 'categorical_crossentropy',
                      'ctg_out': 'categorical_crossentropy',
                      'maximum_1': 'categorical_crossentropy'
                  },
              metrics=['accuracy'])
batch_size = batch_size * 3 / 4
train_generator = test_datagen.flow_from_directory(
    '/hdd/cwh/dog_keras_train',
    # '/home/cwh/coding/data/cwh/test1',
    target_size=(299, 299),
    # batch_size=1,
    batch_size=batch_size,
    class_mode='categorical')
validation_generator = test_datagen.flow_from_directory(
    '/hdd/cwh/dog_keras_valid',
    # '/home/cwh/coding/data/cwh/test1',
    target_size=(299, 299),
    # batch_size=1,
    batch_size=batch_size,
    class_mode='categorical')

# we train our model again (this time fine-tuning the top 2 inception blocks
# alongside the top Dense layers
save_model = ModelCheckpoint('xception-tuned{epoch:02d}-{val_ctg_out_acc:.2f}.h5')
model.fit_generator(triple_generator(train_generator),
                    steps_per_epoch=16500 / batch_size + 1,
                    epochs=30,
                    validation_data=triple_generator(validation_generator),
                    validation_steps=1800 / batch_size + 1,
                    callbacks=[early_stopping, auto_lr, save_model])  # otherwise the generator would loop indefinitely
model.save('dog_single_xception_tuned.h5')