diff options
Diffstat (limited to 'models.py')
-rw-r--r-- | models.py | 387 |
1 files changed, 387 insertions, 0 deletions
diff --git a/models.py b/models.py new file mode 100644 index 0000000..61cc26c --- /dev/null +++ b/models.py @@ -0,0 +1,387 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""ResNet50 model for Keras. + +Adapted from tf.keras.applications.resnet50.ResNet50(). + +Related papers/blogs: +- https://arxiv.org/abs/1512.03385 +- https://arxiv.org/pdf/1603.05027v2.pdf +- http://torch.ch/blog/2016/02/04/resnets.html + +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import warnings + +import tensorflow as tf +from tensorflow.python.keras import layers +from tensorflow.python.keras import models +from tensorflow.python.keras import regularizers +from tensorflow.python.keras import utils + +import tensorflow.keras +from tensorflow.keras import backend as K +from tensorflow.keras.models import Sequential, Model +from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Input, Lambda, Reshape +from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Conv2DTranspose, GlobalAveragePooling2D +from tensorflow.keras.layers import Input, concatenate + + +L2_WEIGHT_DECAY = 1e-4 +BATCH_NORM_DECAY = 0.9 +BATCH_NORM_EPSILON = 1e-5 + + +def identity_block(input_tensor, kernel_size, filters, stage, block, trainable=True): + """The identity block is the block that has no conv layer at shortcut. + + # Arguments + input_tensor: input tensor + kernel_size: default 3, the kernel size of + middle conv layer at main path + filters: list of integers, the filters of 3 conv layer at main path + stage: integer, current stage label, used for generating layer names + block: 'a','b'..., current block label, used for generating layer names + + # Returns + Output tensor for the block. + """ + filters1, filters2, filters3 = filters + if K.image_data_format() == 'channels_last': + bn_axis = 3 + else: + bn_axis = 1 + conv_name_base = 'res' + str(stage) + block + '_branch' + bn_name_base = 'bn' + str(stage) + block + '_branch' + + x = layers.Conv2D(filters1, (1, 1), + trainable=trainable, + kernel_initializer='he_normal', + kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), + bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), + name=conv_name_base + '2a')(input_tensor) + x = layers.BatchNormalization(axis=bn_axis, + trainable=trainable, + momentum=BATCH_NORM_DECAY, + epsilon=BATCH_NORM_EPSILON, + name=bn_name_base + '2a')(x) + x = layers.Activation('relu')(x) + + x = layers.Conv2D(filters2, kernel_size, + trainable=trainable, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), + bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), + name=conv_name_base + '2b')(x) + x = layers.BatchNormalization(axis=bn_axis, + trainable=trainable, + momentum=BATCH_NORM_DECAY, + epsilon=BATCH_NORM_EPSILON, + name=bn_name_base + '2b')(x) + x = layers.Activation('relu')(x) + + x = layers.Conv2D(filters3, (1, 1), + trainable=trainable, + kernel_initializer='he_normal', + kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), + bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), + name=conv_name_base + '2c')(x) + x = layers.BatchNormalization(axis=bn_axis, + trainable=trainable, + momentum=BATCH_NORM_DECAY, + epsilon=BATCH_NORM_EPSILON, + name=bn_name_base + '2c')(x) + + x = layers.add([x, input_tensor]) + x = layers.Activation('relu')(x) + return x + + +def conv_block(input_tensor, + kernel_size, + filters, + stage, + block, + strides=(2, 2), + trainable=True): + """A block that has a conv layer at shortcut. + + # Arguments + input_tensor: input tensor + kernel_size: default 3, the kernel size of + middle conv layer at main path + filters: list of integers, the filters of 3 conv layer at main path + stage: integer, current stage label, used for generating layer names + block: 'a','b'..., current block label, used for generating layer names + strides: Strides for the second conv layer in the block. + + # Returns + Output tensor for the block. + + Note that from stage 3, + the second conv layer at main path is with strides=(2, 2) + And the shortcut should have strides=(2, 2) as well + """ + filters1, filters2, filters3 = filters + if K.image_data_format() == 'channels_last': + bn_axis = 3 + else: + bn_axis = 1 + conv_name_base = 'res' + str(stage) + block + '_branch' + bn_name_base = 'bn' + str(stage) + block + '_branch' + + x = layers.Conv2D(filters1, (1, 1), kernel_initializer='he_normal', + trainable=trainable, + kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), + bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), + name=conv_name_base + '2a')(input_tensor) + x = layers.BatchNormalization(axis=bn_axis, + trainable=trainable, + momentum=BATCH_NORM_DECAY, + epsilon=BATCH_NORM_EPSILON, + name=bn_name_base + '2a')(x) + x = layers.Activation('relu')(x) + + x = layers.Conv2D(filters2, kernel_size, strides=strides, padding='same', + trainable=trainable, + kernel_initializer='he_normal', + kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), + bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), + name=conv_name_base + '2b')(x) + x = layers.BatchNormalization(axis=bn_axis, + trainable=trainable, + momentum=BATCH_NORM_DECAY, + epsilon=BATCH_NORM_EPSILON, + name=bn_name_base + '2b')(x) + x = layers.Activation('relu')(x) + + x = layers.Conv2D(filters3, (1, 1), + trainable=trainable, + kernel_initializer='he_normal', + kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), + bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), + name=conv_name_base + '2c')(x) + x = layers.BatchNormalization(axis=bn_axis, + trainable=trainable, + momentum=BATCH_NORM_DECAY, + epsilon=BATCH_NORM_EPSILON, + name=bn_name_base + '2c')(x) + + shortcut = layers.Conv2D(filters3, (1, 1), strides=strides, + trainable=trainable, + kernel_initializer='he_normal', + kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), + bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), + name=conv_name_base + '1')(input_tensor) + shortcut = layers.BatchNormalization(axis=bn_axis, + trainable=trainable, + momentum=BATCH_NORM_DECAY, + epsilon=BATCH_NORM_EPSILON, + name=bn_name_base + '1')(shortcut) + + x = layers.add([x, shortcut]) + x = layers.Activation('relu')(x) + return x + + +def ResNet50(width, height, num_classes): + """Instantiates the ResNet50 architecture. + + Args: + num_classes: `int` number of classes for image classification. + + Returns: + A Keras model instance. + """ + # Determine proper input shape + if K.image_data_format() == 'channels_first': + input_shape = (3, height, width) + bn_axis = 1 + else: + input_shape = (height, width, 3) + bn_axis = 3 + + img_input = layers.Input(shape=input_shape) + x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input) + x = layers.Conv2D(64, (7, 7), + strides=(2, 2), + padding='valid', + kernel_initializer='he_normal', + kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), + bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), + name='conv1')(x) + x = layers.BatchNormalization(axis=bn_axis, + momentum=BATCH_NORM_DECAY, + epsilon=BATCH_NORM_EPSILON, + name='bn_conv1')(x) + x = layers.Activation('relu')(x) + x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x) + x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) + + x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) + x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') + x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') + + x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') + x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') + x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') + x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') + + x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') + + x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') + x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') + x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') + + x = layers.GlobalAveragePooling2D(name='avg_pool')(x) + + # When loading weights by name the last layer won't actually be loaded because + # the name depends on the number of classes + + x = layers.Dense( + num_classes, activation='softmax', + kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), + bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), + name='fc'+str(num_classes))(x) + + # Create model. + return models.Model(img_input, x, name='resnet50') + +def get_logo_model(width, height, num_classes, output_layer = True, base_trainable = False): + if K.image_data_format() == 'channels_first': + input_shape = (3, height, width) + bn_axis = 1 + else: + input_shape = (height, width, 3) + bn_axis = 3 + + init_weights = tf.keras.initializers.he_normal() + + logo_model = Sequential() + logo_model.add(Conv2D(32, 3, padding='same', trainable = base_trainable, input_shape=input_shape, activation='elu', name='logo_conv1')) + + logo_model.add(Conv2D(32, 3, padding='same', trainable = base_trainable, use_bias = False, activation='elu', name='logo_conv2')) + + logo_model.add(Conv2D(64, 3, padding='same', trainable = base_trainable, strides=2, use_bias = False, activation='elu', name='logo_conv3')) + + logo_model.add(Conv2D(64, 3, padding='same', trainable = base_trainable, use_bias = False, activation='elu', name='logo_conv4')) + + logo_model.add(Conv2D(128, 3, padding='same', trainable = base_trainable, strides=2, use_bias = False, activation='elu', name='logo_conv5')) + + logo_model.add(Conv2D(128, 3, padding='same', trainable = True, use_bias = False, activation='elu', name='logo_conv6')) + logo_model.add(Dropout(0.3)) + + logo_model.add(Conv2D(128, 8, padding='same', trainable = True, use_bias = False, activation='elu', name='logo_conv7')) + + if output_layer: + # These two layers are only used in training + logo_model.add(GlobalAveragePooling2D(name='logo_avg_pool')) + logo_model.add(Dense( + num_classes, activation='softmax', name='logo_fc'+str(num_classes))) + + return logo_model + + +def get_logores_model(width, height, num_classes, resnet_trainable = True, logo_trainable = False, logo_end_trainable=True): + # Determine proper input shape + if K.image_data_format() == 'channels_first': + input_shape = (3, height, width) + bn_axis = 1 + else: + input_shape = (height, width, 3) + bn_axis = 3 + + img_input = layers.Input(shape=input_shape) + #logo_model = get_logo_model(width, height, num_classes, output_layer = False) + + + ## Freeze the weights of the logo model + #for layer in logo_model.layers: + # layer.trainable = False + + #logo_x = logo_model(img_input) + + logo_x = Conv2D(32, 3, padding='same', input_shape=input_shape, activation='elu', name='logo_conv1', trainable=logo_trainable)(img_input) + logo_x = Conv2D(32, 3, padding='same', use_bias = False, activation='elu', name='logo_conv2', trainable=logo_trainable)(logo_x) + logo_x = Conv2D(64, 3, padding='same', strides=2, use_bias = False, activation='elu', name='logo_conv3', trainable=logo_trainable)(logo_x) + logo_x = Conv2D(64, 3, padding='same', use_bias = False, activation='elu', name='logo_conv4', trainable=logo_trainable)(logo_x) + logo_x = Conv2D(128, 3, padding='same', strides=2, use_bias = False, activation='elu', name='logo_conv5', trainable=logo_trainable)(logo_x) + logo_x = Conv2D(128, 3, padding='same', use_bias = False, activation='elu', name='logo_conv6', trainable=logo_end_trainable)(logo_x) + logo_x = Dropout(0.3, trainable=logo_trainable)(logo_x) + logo_x = Conv2D(128, 8, padding='same', use_bias = False, activation='elu', name='logo_conv7', trainable=logo_end_trainable)(logo_x) + + x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input) + x = layers.Conv2D(64, (7, 7), + strides=(2, 2), + padding='valid', + kernel_initializer='he_normal', + kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), + bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), + trainable=resnet_trainable, + name='conv1')(x) + x = layers.BatchNormalization(axis=bn_axis, + momentum=BATCH_NORM_DECAY, + epsilon=BATCH_NORM_EPSILON, + trainable=resnet_trainable, + name='bn_conv1')(x) + x = layers.Activation('relu')(x) + x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x) + x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) + + x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1), trainable=resnet_trainable) + x = identity_block(x, 3, [64, 64, 256], stage=2, block='b', trainable=resnet_trainable) + x = identity_block(x, 3, [64, 64, 256], stage=2, block='c', trainable=resnet_trainable) + + x = conv_block(x, 3, [128, 128, 512], stage=3, block='a', trainable=resnet_trainable) + x = identity_block(x, 3, [128, 128, 512], stage=3, block='b', trainable=resnet_trainable) + x = identity_block(x, 3, [128, 128, 512], stage=3, block='c', trainable=resnet_trainable) + x = identity_block(x, 3, [128, 128, 512], stage=3, block='d', trainable=resnet_trainable) + + x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a', trainable=resnet_trainable) + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b', trainable=resnet_trainable) + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c', trainable=resnet_trainable) + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d', trainable=resnet_trainable) + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e', trainable=resnet_trainable) + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f', trainable=resnet_trainable) + + x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a', trainable=resnet_trainable) + x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b', trainable=resnet_trainable) + x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c', trainable=resnet_trainable) + + x = layers.GlobalAveragePooling2D(name='avg_pool')(x) + logo_x = layers.GlobalAveragePooling2D(name='logo_avg_pool')(logo_x) + + x = layers.concatenate([x, logo_x]) + + # When loading weights by name the last layer won't actually be loaded because + # the name depends on the number of classes + + x = layers.Dense( + num_classes, activation='softmax', + kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), + bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), + name='fc'+str(num_classes))(x) + + return models.Model(img_input, x, name='resnet50+logo') |