# Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """ResNet50 model for Keras. Adapted from tf.keras.applications.resnet50.ResNet50(). Related papers/blogs: - https://arxiv.org/abs/1512.03385 - https://arxiv.org/pdf/1603.05027v2.pdf - http://torch.ch/blog/2016/02/04/resnets.html """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import warnings import tensorflow as tf from tensorflow.python.keras import layers from tensorflow.python.keras import models from tensorflow.python.keras import regularizers from tensorflow.python.keras import utils import tensorflow.keras from tensorflow.keras import backend as K from tensorflow.keras.models import Sequential, Model from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Input, Lambda, Reshape from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Conv2DTranspose, GlobalAveragePooling2D from tensorflow.keras.layers import Input, concatenate L2_WEIGHT_DECAY = 1e-4 BATCH_NORM_DECAY = 0.9 BATCH_NORM_EPSILON = 1e-5 def identity_block(input_tensor, kernel_size, filters, stage, block, trainable=True): """The identity block is the block that has no conv layer at shortcut. # Arguments input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names # Returns Output tensor for the block. """ filters1, filters2, filters3 = filters if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = layers.Conv2D(filters1, (1, 1), trainable=trainable, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2a')(input_tensor) x = layers.BatchNormalization(axis=bn_axis, trainable=trainable, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2a')(x) x = layers.Activation('relu')(x) x = layers.Conv2D(filters2, kernel_size, trainable=trainable, padding='same', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2b')(x) x = layers.BatchNormalization(axis=bn_axis, trainable=trainable, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2b')(x) x = layers.Activation('relu')(x) x = layers.Conv2D(filters3, (1, 1), trainable=trainable, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2c')(x) x = layers.BatchNormalization(axis=bn_axis, trainable=trainable, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2c')(x) x = layers.add([x, input_tensor]) x = layers.Activation('relu')(x) return x def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2), trainable=True): """A block that has a conv layer at shortcut. # Arguments input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names strides: Strides for the second conv layer in the block. # Returns Output tensor for the block. Note that from stage 3, the second conv layer at main path is with strides=(2, 2) And the shortcut should have strides=(2, 2) as well """ filters1, filters2, filters3 = filters if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = layers.Conv2D(filters1, (1, 1), kernel_initializer='he_normal', trainable=trainable, kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2a')(input_tensor) x = layers.BatchNormalization(axis=bn_axis, trainable=trainable, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2a')(x) x = layers.Activation('relu')(x) x = layers.Conv2D(filters2, kernel_size, strides=strides, padding='same', trainable=trainable, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2b')(x) x = layers.BatchNormalization(axis=bn_axis, trainable=trainable, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2b')(x) x = layers.Activation('relu')(x) x = layers.Conv2D(filters3, (1, 1), trainable=trainable, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2c')(x) x = layers.BatchNormalization(axis=bn_axis, trainable=trainable, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2c')(x) shortcut = layers.Conv2D(filters3, (1, 1), strides=strides, trainable=trainable, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '1')(input_tensor) shortcut = layers.BatchNormalization(axis=bn_axis, trainable=trainable, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '1')(shortcut) x = layers.add([x, shortcut]) x = layers.Activation('relu')(x) return x def ResNet50(width, height, num_classes): """Instantiates the ResNet50 architecture. Args: num_classes: `int` number of classes for image classification. Returns: A Keras model instance. """ # Determine proper input shape if K.image_data_format() == 'channels_first': input_shape = (3, height, width) bn_axis = 1 else: input_shape = (height, width, 3) bn_axis = 3 img_input = layers.Input(shape=input_shape) x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input) x = layers.Conv2D(64, (7, 7), strides=(2, 2), padding='valid', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name='conv1')(x) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name='bn_conv1')(x) x = layers.Activation('relu')(x) x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x) x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') x = layers.GlobalAveragePooling2D(name='avg_pool')(x) # When loading weights by name the last layer won't actually be loaded because # the name depends on the number of classes x = layers.Dense( num_classes, activation='softmax', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name='fc'+str(num_classes))(x) # Create model. return models.Model(img_input, x, name='resnet50') def get_logo_model(width, height, num_classes, output_layer = True, base_trainable = False): if K.image_data_format() == 'channels_first': input_shape = (3, height, width) bn_axis = 1 else: input_shape = (height, width, 3) bn_axis = 3 init_weights = tf.keras.initializers.he_normal() logo_model = Sequential() logo_model.add(Conv2D(32, 3, padding='same', trainable = base_trainable, input_shape=input_shape, activation='elu', name='logo_conv1')) logo_model.add(Conv2D(32, 3, padding='same', trainable = base_trainable, use_bias = False, activation='elu', name='logo_conv2')) logo_model.add(Conv2D(64, 3, padding='same', trainable = base_trainable, strides=2, use_bias = False, activation='elu', name='logo_conv3')) logo_model.add(Conv2D(64, 3, padding='same', trainable = base_trainable, use_bias = False, activation='elu', name='logo_conv4')) logo_model.add(Conv2D(128, 3, padding='same', trainable = base_trainable, strides=2, use_bias = False, activation='elu', name='logo_conv5')) logo_model.add(Conv2D(128, 3, padding='same', trainable = True, use_bias = False, activation='elu', name='logo_conv6')) logo_model.add(Dropout(0.3)) logo_model.add(Conv2D(128, 8, padding='same', trainable = True, use_bias = False, activation='elu', name='logo_conv7')) if output_layer: # These two layers are only used in training logo_model.add(GlobalAveragePooling2D(name='logo_avg_pool')) logo_model.add(Dense( num_classes, activation='softmax', name='logo_fc'+str(num_classes))) return logo_model def get_logores_model(width, height, num_classes, resnet_trainable = True, logo_trainable = False, logo_end_trainable=True): # Determine proper input shape if K.image_data_format() == 'channels_first': input_shape = (3, height, width) bn_axis = 1 else: input_shape = (height, width, 3) bn_axis = 3 img_input = layers.Input(shape=input_shape) #logo_model = get_logo_model(width, height, num_classes, output_layer = False) ## Freeze the weights of the logo model #for layer in logo_model.layers: # layer.trainable = False #logo_x = logo_model(img_input) logo_x = Conv2D(32, 3, padding='same', input_shape=input_shape, activation='elu', name='logo_conv1', trainable=logo_trainable)(img_input) logo_x = Conv2D(32, 3, padding='same', use_bias = False, activation='elu', name='logo_conv2', trainable=logo_trainable)(logo_x) logo_x = Conv2D(64, 3, padding='same', strides=2, use_bias = False, activation='elu', name='logo_conv3', trainable=logo_trainable)(logo_x) logo_x = Conv2D(64, 3, padding='same', use_bias = False, activation='elu', name='logo_conv4', trainable=logo_trainable)(logo_x) logo_x = Conv2D(128, 3, padding='same', strides=2, use_bias = False, activation='elu', name='logo_conv5', trainable=logo_trainable)(logo_x) logo_x = Conv2D(128, 3, padding='same', use_bias = False, activation='elu', name='logo_conv6', trainable=logo_end_trainable)(logo_x) logo_x = Dropout(0.3, trainable=logo_trainable)(logo_x) logo_x = Conv2D(128, 8, padding='same', use_bias = False, activation='elu', name='logo_conv7', trainable=logo_end_trainable)(logo_x) x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input) x = layers.Conv2D(64, (7, 7), strides=(2, 2), padding='valid', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), trainable=resnet_trainable, name='conv1')(x) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, trainable=resnet_trainable, name='bn_conv1')(x) x = layers.Activation('relu')(x) x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x) x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1), trainable=resnet_trainable) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b', trainable=resnet_trainable) x = identity_block(x, 3, [64, 64, 256], stage=2, block='c', trainable=resnet_trainable) x = conv_block(x, 3, [128, 128, 512], stage=3, block='a', trainable=resnet_trainable) x = identity_block(x, 3, [128, 128, 512], stage=3, block='b', trainable=resnet_trainable) x = identity_block(x, 3, [128, 128, 512], stage=3, block='c', trainable=resnet_trainable) x = identity_block(x, 3, [128, 128, 512], stage=3, block='d', trainable=resnet_trainable) x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a', trainable=resnet_trainable) x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b', trainable=resnet_trainable) x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c', trainable=resnet_trainable) x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d', trainable=resnet_trainable) x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e', trainable=resnet_trainable) x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f', trainable=resnet_trainable) x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a', trainable=resnet_trainable) x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b', trainable=resnet_trainable) x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c', trainable=resnet_trainable) x = layers.GlobalAveragePooling2D(name='avg_pool')(x) logo_x = layers.GlobalAveragePooling2D(name='logo_avg_pool')(logo_x) x = layers.concatenate([x, logo_x]) # When loading weights by name the last layer won't actually be loaded because # the name depends on the number of classes x = layers.Dense( num_classes, activation='softmax', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name='fc'+str(num_classes))(x) return models.Model(img_input, x, name='resnet50+logo')