summaryrefslogtreecommitdiff
path: root/models.py
diff options
context:
space:
mode:
Diffstat (limited to 'models.py')
-rw-r--r--models.py387
1 files changed, 387 insertions, 0 deletions
diff --git a/models.py b/models.py
new file mode 100644
index 0000000..61cc26c
--- /dev/null
+++ b/models.py
@@ -0,0 +1,387 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""ResNet50 model for Keras.
+
+Adapted from tf.keras.applications.resnet50.ResNet50().
+
+Related papers/blogs:
+- https://arxiv.org/abs/1512.03385
+- https://arxiv.org/pdf/1603.05027v2.pdf
+- http://torch.ch/blog/2016/02/04/resnets.html
+
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import warnings
+
+import tensorflow as tf
+from tensorflow.python.keras import layers
+from tensorflow.python.keras import models
+from tensorflow.python.keras import regularizers
+from tensorflow.python.keras import utils
+
+import tensorflow.keras
+from tensorflow.keras import backend as K
+from tensorflow.keras.models import Sequential, Model
+from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Input, Lambda, Reshape
+from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Conv2DTranspose, GlobalAveragePooling2D
+from tensorflow.keras.layers import Input, concatenate
+
+
+L2_WEIGHT_DECAY = 1e-4
+BATCH_NORM_DECAY = 0.9
+BATCH_NORM_EPSILON = 1e-5
+
+
+def identity_block(input_tensor, kernel_size, filters, stage, block, trainable=True):
+ """The identity block is the block that has no conv layer at shortcut.
+
+ # Arguments
+ input_tensor: input tensor
+ kernel_size: default 3, the kernel size of
+ middle conv layer at main path
+ filters: list of integers, the filters of 3 conv layer at main path
+ stage: integer, current stage label, used for generating layer names
+ block: 'a','b'..., current block label, used for generating layer names
+
+ # Returns
+ Output tensor for the block.
+ """
+ filters1, filters2, filters3 = filters
+ if K.image_data_format() == 'channels_last':
+ bn_axis = 3
+ else:
+ bn_axis = 1
+ conv_name_base = 'res' + str(stage) + block + '_branch'
+ bn_name_base = 'bn' + str(stage) + block + '_branch'
+
+ x = layers.Conv2D(filters1, (1, 1),
+ trainable=trainable,
+ kernel_initializer='he_normal',
+ kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+ bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+ name=conv_name_base + '2a')(input_tensor)
+ x = layers.BatchNormalization(axis=bn_axis,
+ trainable=trainable,
+ momentum=BATCH_NORM_DECAY,
+ epsilon=BATCH_NORM_EPSILON,
+ name=bn_name_base + '2a')(x)
+ x = layers.Activation('relu')(x)
+
+ x = layers.Conv2D(filters2, kernel_size,
+ trainable=trainable,
+ padding='same',
+ kernel_initializer='he_normal',
+ kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+ bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+ name=conv_name_base + '2b')(x)
+ x = layers.BatchNormalization(axis=bn_axis,
+ trainable=trainable,
+ momentum=BATCH_NORM_DECAY,
+ epsilon=BATCH_NORM_EPSILON,
+ name=bn_name_base + '2b')(x)
+ x = layers.Activation('relu')(x)
+
+ x = layers.Conv2D(filters3, (1, 1),
+ trainable=trainable,
+ kernel_initializer='he_normal',
+ kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+ bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+ name=conv_name_base + '2c')(x)
+ x = layers.BatchNormalization(axis=bn_axis,
+ trainable=trainable,
+ momentum=BATCH_NORM_DECAY,
+ epsilon=BATCH_NORM_EPSILON,
+ name=bn_name_base + '2c')(x)
+
+ x = layers.add([x, input_tensor])
+ x = layers.Activation('relu')(x)
+ return x
+
+
+def conv_block(input_tensor,
+ kernel_size,
+ filters,
+ stage,
+ block,
+ strides=(2, 2),
+ trainable=True):
+ """A block that has a conv layer at shortcut.
+
+ # Arguments
+ input_tensor: input tensor
+ kernel_size: default 3, the kernel size of
+ middle conv layer at main path
+ filters: list of integers, the filters of 3 conv layer at main path
+ stage: integer, current stage label, used for generating layer names
+ block: 'a','b'..., current block label, used for generating layer names
+ strides: Strides for the second conv layer in the block.
+
+ # Returns
+ Output tensor for the block.
+
+ Note that from stage 3,
+ the second conv layer at main path is with strides=(2, 2)
+ And the shortcut should have strides=(2, 2) as well
+ """
+ filters1, filters2, filters3 = filters
+ if K.image_data_format() == 'channels_last':
+ bn_axis = 3
+ else:
+ bn_axis = 1
+ conv_name_base = 'res' + str(stage) + block + '_branch'
+ bn_name_base = 'bn' + str(stage) + block + '_branch'
+
+ x = layers.Conv2D(filters1, (1, 1), kernel_initializer='he_normal',
+ trainable=trainable,
+ kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+ bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+ name=conv_name_base + '2a')(input_tensor)
+ x = layers.BatchNormalization(axis=bn_axis,
+ trainable=trainable,
+ momentum=BATCH_NORM_DECAY,
+ epsilon=BATCH_NORM_EPSILON,
+ name=bn_name_base + '2a')(x)
+ x = layers.Activation('relu')(x)
+
+ x = layers.Conv2D(filters2, kernel_size, strides=strides, padding='same',
+ trainable=trainable,
+ kernel_initializer='he_normal',
+ kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+ bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+ name=conv_name_base + '2b')(x)
+ x = layers.BatchNormalization(axis=bn_axis,
+ trainable=trainable,
+ momentum=BATCH_NORM_DECAY,
+ epsilon=BATCH_NORM_EPSILON,
+ name=bn_name_base + '2b')(x)
+ x = layers.Activation('relu')(x)
+
+ x = layers.Conv2D(filters3, (1, 1),
+ trainable=trainable,
+ kernel_initializer='he_normal',
+ kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+ bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+ name=conv_name_base + '2c')(x)
+ x = layers.BatchNormalization(axis=bn_axis,
+ trainable=trainable,
+ momentum=BATCH_NORM_DECAY,
+ epsilon=BATCH_NORM_EPSILON,
+ name=bn_name_base + '2c')(x)
+
+ shortcut = layers.Conv2D(filters3, (1, 1), strides=strides,
+ trainable=trainable,
+ kernel_initializer='he_normal',
+ kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+ bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+ name=conv_name_base + '1')(input_tensor)
+ shortcut = layers.BatchNormalization(axis=bn_axis,
+ trainable=trainable,
+ momentum=BATCH_NORM_DECAY,
+ epsilon=BATCH_NORM_EPSILON,
+ name=bn_name_base + '1')(shortcut)
+
+ x = layers.add([x, shortcut])
+ x = layers.Activation('relu')(x)
+ return x
+
+
+def ResNet50(width, height, num_classes):
+ """Instantiates the ResNet50 architecture.
+
+ Args:
+ num_classes: `int` number of classes for image classification.
+
+ Returns:
+ A Keras model instance.
+ """
+ # Determine proper input shape
+ if K.image_data_format() == 'channels_first':
+ input_shape = (3, height, width)
+ bn_axis = 1
+ else:
+ input_shape = (height, width, 3)
+ bn_axis = 3
+
+ img_input = layers.Input(shape=input_shape)
+ x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input)
+ x = layers.Conv2D(64, (7, 7),
+ strides=(2, 2),
+ padding='valid',
+ kernel_initializer='he_normal',
+ kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+ bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+ name='conv1')(x)
+ x = layers.BatchNormalization(axis=bn_axis,
+ momentum=BATCH_NORM_DECAY,
+ epsilon=BATCH_NORM_EPSILON,
+ name='bn_conv1')(x)
+ x = layers.Activation('relu')(x)
+ x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
+ x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
+
+ x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
+ x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
+ x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
+
+ x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
+ x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
+ x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
+ x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
+
+ x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
+ x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
+ x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
+ x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
+ x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
+ x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')
+
+ x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
+ x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
+ x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
+
+ x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
+
+ # When loading weights by name the last layer won't actually be loaded because
+ # the name depends on the number of classes
+
+ x = layers.Dense(
+ num_classes, activation='softmax',
+ kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+ bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+ name='fc'+str(num_classes))(x)
+
+ # Create model.
+ return models.Model(img_input, x, name='resnet50')
+
+def get_logo_model(width, height, num_classes, output_layer = True, base_trainable = False):
+ if K.image_data_format() == 'channels_first':
+ input_shape = (3, height, width)
+ bn_axis = 1
+ else:
+ input_shape = (height, width, 3)
+ bn_axis = 3
+
+ init_weights = tf.keras.initializers.he_normal()
+
+ logo_model = Sequential()
+ logo_model.add(Conv2D(32, 3, padding='same', trainable = base_trainable, input_shape=input_shape, activation='elu', name='logo_conv1'))
+
+ logo_model.add(Conv2D(32, 3, padding='same', trainable = base_trainable, use_bias = False, activation='elu', name='logo_conv2'))
+
+ logo_model.add(Conv2D(64, 3, padding='same', trainable = base_trainable, strides=2, use_bias = False, activation='elu', name='logo_conv3'))
+
+ logo_model.add(Conv2D(64, 3, padding='same', trainable = base_trainable, use_bias = False, activation='elu', name='logo_conv4'))
+
+ logo_model.add(Conv2D(128, 3, padding='same', trainable = base_trainable, strides=2, use_bias = False, activation='elu', name='logo_conv5'))
+
+ logo_model.add(Conv2D(128, 3, padding='same', trainable = True, use_bias = False, activation='elu', name='logo_conv6'))
+ logo_model.add(Dropout(0.3))
+
+ logo_model.add(Conv2D(128, 8, padding='same', trainable = True, use_bias = False, activation='elu', name='logo_conv7'))
+
+ if output_layer:
+ # These two layers are only used in training
+ logo_model.add(GlobalAveragePooling2D(name='logo_avg_pool'))
+ logo_model.add(Dense(
+ num_classes, activation='softmax', name='logo_fc'+str(num_classes)))
+
+ return logo_model
+
+
+def get_logores_model(width, height, num_classes, resnet_trainable = True, logo_trainable = False, logo_end_trainable=True):
+ # Determine proper input shape
+ if K.image_data_format() == 'channels_first':
+ input_shape = (3, height, width)
+ bn_axis = 1
+ else:
+ input_shape = (height, width, 3)
+ bn_axis = 3
+
+ img_input = layers.Input(shape=input_shape)
+ #logo_model = get_logo_model(width, height, num_classes, output_layer = False)
+
+
+ ## Freeze the weights of the logo model
+ #for layer in logo_model.layers:
+ # layer.trainable = False
+
+ #logo_x = logo_model(img_input)
+
+ logo_x = Conv2D(32, 3, padding='same', input_shape=input_shape, activation='elu', name='logo_conv1', trainable=logo_trainable)(img_input)
+ logo_x = Conv2D(32, 3, padding='same', use_bias = False, activation='elu', name='logo_conv2', trainable=logo_trainable)(logo_x)
+ logo_x = Conv2D(64, 3, padding='same', strides=2, use_bias = False, activation='elu', name='logo_conv3', trainable=logo_trainable)(logo_x)
+ logo_x = Conv2D(64, 3, padding='same', use_bias = False, activation='elu', name='logo_conv4', trainable=logo_trainable)(logo_x)
+ logo_x = Conv2D(128, 3, padding='same', strides=2, use_bias = False, activation='elu', name='logo_conv5', trainable=logo_trainable)(logo_x)
+ logo_x = Conv2D(128, 3, padding='same', use_bias = False, activation='elu', name='logo_conv6', trainable=logo_end_trainable)(logo_x)
+ logo_x = Dropout(0.3, trainable=logo_trainable)(logo_x)
+ logo_x = Conv2D(128, 8, padding='same', use_bias = False, activation='elu', name='logo_conv7', trainable=logo_end_trainable)(logo_x)
+
+ x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input)
+ x = layers.Conv2D(64, (7, 7),
+ strides=(2, 2),
+ padding='valid',
+ kernel_initializer='he_normal',
+ kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+ bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+ trainable=resnet_trainable,
+ name='conv1')(x)
+ x = layers.BatchNormalization(axis=bn_axis,
+ momentum=BATCH_NORM_DECAY,
+ epsilon=BATCH_NORM_EPSILON,
+ trainable=resnet_trainable,
+ name='bn_conv1')(x)
+ x = layers.Activation('relu')(x)
+ x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
+ x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
+
+ x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1), trainable=resnet_trainable)
+ x = identity_block(x, 3, [64, 64, 256], stage=2, block='b', trainable=resnet_trainable)
+ x = identity_block(x, 3, [64, 64, 256], stage=2, block='c', trainable=resnet_trainable)
+
+ x = conv_block(x, 3, [128, 128, 512], stage=3, block='a', trainable=resnet_trainable)
+ x = identity_block(x, 3, [128, 128, 512], stage=3, block='b', trainable=resnet_trainable)
+ x = identity_block(x, 3, [128, 128, 512], stage=3, block='c', trainable=resnet_trainable)
+ x = identity_block(x, 3, [128, 128, 512], stage=3, block='d', trainable=resnet_trainable)
+
+ x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a', trainable=resnet_trainable)
+ x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b', trainable=resnet_trainable)
+ x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c', trainable=resnet_trainable)
+ x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d', trainable=resnet_trainable)
+ x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e', trainable=resnet_trainable)
+ x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f', trainable=resnet_trainable)
+
+ x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a', trainable=resnet_trainable)
+ x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b', trainable=resnet_trainable)
+ x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c', trainable=resnet_trainable)
+
+ x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
+ logo_x = layers.GlobalAveragePooling2D(name='logo_avg_pool')(logo_x)
+
+ x = layers.concatenate([x, logo_x])
+
+ # When loading weights by name the last layer won't actually be loaded because
+ # the name depends on the number of classes
+
+ x = layers.Dense(
+ num_classes, activation='softmax',
+ kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+ bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+ name='fc'+str(num_classes))(x)
+
+ return models.Model(img_input, x, name='resnet50+logo')