# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""ResNet50 model for Keras.

Adapted from tf.keras.applications.resnet50.ResNet50().

Related papers/blogs:
- https://arxiv.org/abs/1512.03385
- https://arxiv.org/pdf/1603.05027v2.pdf
- http://torch.ch/blog/2016/02/04/resnets.html

"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import warnings

import tensorflow as tf
from tensorflow.python.keras import layers
from tensorflow.python.keras import models
from tensorflow.python.keras import regularizers
from tensorflow.python.keras import utils

import tensorflow.keras
from tensorflow.keras import backend as K
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Input, Lambda, Reshape
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Conv2DTranspose, GlobalAveragePooling2D
from tensorflow.keras.layers import Input, concatenate


L2_WEIGHT_DECAY = 1e-4
BATCH_NORM_DECAY = 0.9
BATCH_NORM_EPSILON = 1e-5


def identity_block(input_tensor, kernel_size, filters, stage, block, trainable=True):
  """The identity block is the block that has no conv layer at shortcut.

  # Arguments
      input_tensor: input tensor
      kernel_size: default 3, the kernel size of
          middle conv layer at main path
      filters: list of integers, the filters of 3 conv layer at main path
      stage: integer, current stage label, used for generating layer names
      block: 'a','b'..., current block label, used for generating layer names

  # Returns
      Output tensor for the block.
  """
  filters1, filters2, filters3 = filters
  if K.image_data_format() == 'channels_last':
    bn_axis = 3
  else:
    bn_axis = 1
  conv_name_base = 'res' + str(stage) + block + '_branch'
  bn_name_base = 'bn' + str(stage) + block + '_branch'

  x = layers.Conv2D(filters1, (1, 1),
                    trainable=trainable,
                    kernel_initializer='he_normal',
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name=conv_name_base + '2a')(input_tensor)
  x = layers.BatchNormalization(axis=bn_axis,
                                trainable=trainable,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name=bn_name_base + '2a')(x)
  x = layers.Activation('relu')(x)

  x = layers.Conv2D(filters2, kernel_size,
                    trainable=trainable,
                    padding='same',
                    kernel_initializer='he_normal',
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name=conv_name_base + '2b')(x)
  x = layers.BatchNormalization(axis=bn_axis,
                                trainable=trainable,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name=bn_name_base + '2b')(x)
  x = layers.Activation('relu')(x)

  x = layers.Conv2D(filters3, (1, 1),
                    trainable=trainable,
                    kernel_initializer='he_normal',
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name=conv_name_base + '2c')(x)
  x = layers.BatchNormalization(axis=bn_axis,
                                trainable=trainable,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name=bn_name_base + '2c')(x)

  x = layers.add([x, input_tensor])
  x = layers.Activation('relu')(x)
  return x


def conv_block(input_tensor,
               kernel_size,
               filters,
               stage,
               block,
               strides=(2, 2),
               trainable=True):
  """A block that has a conv layer at shortcut.

  # Arguments
      input_tensor: input tensor
      kernel_size: default 3, the kernel size of
          middle conv layer at main path
      filters: list of integers, the filters of 3 conv layer at main path
      stage: integer, current stage label, used for generating layer names
      block: 'a','b'..., current block label, used for generating layer names
      strides: Strides for the second conv layer in the block.

  # Returns
      Output tensor for the block.

  Note that from stage 3,
  the second conv layer at main path is with strides=(2, 2)
  And the shortcut should have strides=(2, 2) as well
  """
  filters1, filters2, filters3 = filters
  if K.image_data_format() == 'channels_last':
    bn_axis = 3
  else:
    bn_axis = 1
  conv_name_base = 'res' + str(stage) + block + '_branch'
  bn_name_base = 'bn' + str(stage) + block + '_branch'

  x = layers.Conv2D(filters1, (1, 1), kernel_initializer='he_normal',
                    trainable=trainable,
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name=conv_name_base + '2a')(input_tensor)
  x = layers.BatchNormalization(axis=bn_axis,
                                trainable=trainable,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name=bn_name_base + '2a')(x)
  x = layers.Activation('relu')(x)

  x = layers.Conv2D(filters2, kernel_size, strides=strides, padding='same',
                    trainable=trainable,
                    kernel_initializer='he_normal',
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name=conv_name_base + '2b')(x)
  x = layers.BatchNormalization(axis=bn_axis,
                                trainable=trainable,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name=bn_name_base + '2b')(x)
  x = layers.Activation('relu')(x)

  x = layers.Conv2D(filters3, (1, 1),
                    trainable=trainable,
                    kernel_initializer='he_normal',
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name=conv_name_base + '2c')(x)
  x = layers.BatchNormalization(axis=bn_axis,
                                trainable=trainable,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name=bn_name_base + '2c')(x)

  shortcut = layers.Conv2D(filters3, (1, 1), strides=strides,
                           trainable=trainable,
                           kernel_initializer='he_normal',
                           kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                           bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                           name=conv_name_base + '1')(input_tensor)
  shortcut = layers.BatchNormalization(axis=bn_axis,
                                       trainable=trainable,
                                       momentum=BATCH_NORM_DECAY,
                                       epsilon=BATCH_NORM_EPSILON,
                                       name=bn_name_base + '1')(shortcut)

  x = layers.add([x, shortcut])
  x = layers.Activation('relu')(x)
  return x


def ResNet50(width, height, num_classes):
  """Instantiates the ResNet50 architecture.

  Args:
    num_classes: `int` number of classes for image classification.

  Returns:
      A Keras model instance.
  """
  # Determine proper input shape
  if K.image_data_format() == 'channels_first':
    input_shape = (3, height, width)
    bn_axis = 1
  else:
    input_shape = (height, width, 3)
    bn_axis = 3

  img_input = layers.Input(shape=input_shape)
  x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input)
  x = layers.Conv2D(64, (7, 7),
                    strides=(2, 2),
                    padding='valid',
                    kernel_initializer='he_normal',
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name='conv1')(x)
  x = layers.BatchNormalization(axis=bn_axis,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name='bn_conv1')(x)
  x = layers.Activation('relu')(x)
  x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
  x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)

  x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
  x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
  x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')

  x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
  x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
  x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
  x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')

  x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')

  x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
  x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
  x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')

  x = layers.GlobalAveragePooling2D(name='avg_pool')(x)

  # When loading weights by name the last layer won't actually be loaded because
  # the name depends on the number of classes

  x = layers.Dense(
      num_classes, activation='softmax',
      kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
      bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
      name='fc'+str(num_classes))(x)

  # Create model.
  return models.Model(img_input, x, name='resnet50')

def get_logo_model(width, height, num_classes, output_layer = True, base_trainable = False):
  if K.image_data_format() == 'channels_first':
    input_shape = (3, height, width)
    bn_axis = 1
  else:
    input_shape = (height, width, 3)
    bn_axis = 3

  init_weights = tf.keras.initializers.he_normal()

  logo_model = Sequential()
  logo_model.add(Conv2D(32, 3, padding='same', trainable = base_trainable,  input_shape=input_shape, activation='elu', name='logo_conv1'))

  logo_model.add(Conv2D(32, 3, padding='same', trainable = base_trainable,  use_bias = False, activation='elu', name='logo_conv2'))

  logo_model.add(Conv2D(64, 3, padding='same', trainable = base_trainable,  strides=2, use_bias = False, activation='elu', name='logo_conv3'))

  logo_model.add(Conv2D(64, 3, padding='same', trainable = base_trainable,  use_bias = False, activation='elu', name='logo_conv4'))

  logo_model.add(Conv2D(128, 3, padding='same', trainable = base_trainable,  strides=2,  use_bias = False, activation='elu', name='logo_conv5'))

  logo_model.add(Conv2D(128, 3, padding='same', trainable = True,  use_bias = False, activation='elu', name='logo_conv6'))
  logo_model.add(Dropout(0.3))

  logo_model.add(Conv2D(128, 8, padding='same', trainable = True,  use_bias = False, activation='elu', name='logo_conv7'))

  if output_layer:
      # These two layers are only used in training
      logo_model.add(GlobalAveragePooling2D(name='logo_avg_pool'))
      logo_model.add(Dense(
          num_classes, activation='softmax', name='logo_fc'+str(num_classes)))

  return logo_model


def get_logores_model(width, height, num_classes, resnet_trainable = True, logo_trainable = False, logo_end_trainable=True):
  # Determine proper input shape
  if K.image_data_format() == 'channels_first':
    input_shape = (3, height, width)
    bn_axis = 1
  else:
    input_shape = (height, width, 3)
    bn_axis = 3

  img_input = layers.Input(shape=input_shape)
  #logo_model = get_logo_model(width, height, num_classes, output_layer = False)


  ## Freeze the weights of the logo model
  #for layer in logo_model.layers:
  #    layer.trainable = False

  #logo_x = logo_model(img_input)

  logo_x = Conv2D(32, 3, padding='same', input_shape=input_shape, activation='elu', name='logo_conv1', trainable=logo_trainable)(img_input)
  logo_x = Conv2D(32, 3, padding='same', use_bias = False, activation='elu', name='logo_conv2', trainable=logo_trainable)(logo_x)
  logo_x = Conv2D(64, 3, padding='same', strides=2, use_bias = False, activation='elu', name='logo_conv3', trainable=logo_trainable)(logo_x)
  logo_x = Conv2D(64, 3, padding='same', use_bias = False, activation='elu', name='logo_conv4', trainable=logo_trainable)(logo_x)
  logo_x = Conv2D(128, 3, padding='same', strides=2,  use_bias = False, activation='elu', name='logo_conv5', trainable=logo_trainable)(logo_x)
  logo_x = Conv2D(128, 3, padding='same', use_bias = False, activation='elu', name='logo_conv6', trainable=logo_end_trainable)(logo_x)
  logo_x = Dropout(0.3, trainable=logo_trainable)(logo_x)
  logo_x = Conv2D(128, 8, padding='same', use_bias = False, activation='elu', name='logo_conv7', trainable=logo_end_trainable)(logo_x)

  x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input)
  x = layers.Conv2D(64, (7, 7),
                    strides=(2, 2),
                    padding='valid',
                    kernel_initializer='he_normal',
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    trainable=resnet_trainable,
                    name='conv1')(x)
  x = layers.BatchNormalization(axis=bn_axis,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                trainable=resnet_trainable,
                                name='bn_conv1')(x)
  x = layers.Activation('relu')(x)
  x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
  x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)

  x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1), trainable=resnet_trainable)
  x = identity_block(x, 3, [64, 64, 256], stage=2, block='b', trainable=resnet_trainable)
  x = identity_block(x, 3, [64, 64, 256], stage=2, block='c', trainable=resnet_trainable)

  x = conv_block(x, 3, [128, 128, 512], stage=3, block='a', trainable=resnet_trainable)
  x = identity_block(x, 3, [128, 128, 512], stage=3, block='b', trainable=resnet_trainable)
  x = identity_block(x, 3, [128, 128, 512], stage=3, block='c', trainable=resnet_trainable)
  x = identity_block(x, 3, [128, 128, 512], stage=3, block='d', trainable=resnet_trainable)

  x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a', trainable=resnet_trainable)
  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b', trainable=resnet_trainable)
  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c', trainable=resnet_trainable)
  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d', trainable=resnet_trainable)
  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e', trainable=resnet_trainable)
  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f', trainable=resnet_trainable)

  x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a', trainable=resnet_trainable)
  x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b', trainable=resnet_trainable)
  x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c', trainable=resnet_trainable)

  x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
  logo_x = layers.GlobalAveragePooling2D(name='logo_avg_pool')(logo_x)

  x = layers.concatenate([x, logo_x])

  # When loading weights by name the last layer won't actually be loaded because
  # the name depends on the number of classes

  x = layers.Dense(
      num_classes, activation='softmax',
      kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
      bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
      name='fc'+str(num_classes))(x)

  return models.Model(img_input, x, name='resnet50+logo')