Public FYP ReleaseHEAD master

author: Vasil Zlatanov <v@skozl.com> 2019-06-24 21:31:41 +0100
committer: Vasil Zlatanov <v@skozl.com> 2019-06-24 21:31:41 +0100
commit: 02f4425ceadec0c10c1e9903286c1121a9313357 (patch)
tree: cc8b2447ec0a5fa8d38926fd516823c62341c3d8
download: e4-fyp-netcraft-02f4425ceadec0c10c1e9903286c1121a9313357.tar.gz
e4-fyp-netcraft-02f4425ceadec0c10c1e9903286c1121a9313357.tar.bz2
e4-fyp-netcraft-02f4425ceadec0c10c1e9903286c1121a9313357.zip
18 files changed, 2410 insertions, 0 deletions
diff --git a/classifier-logo.py b/classifier-logo.py
new file mode 100755
index 0000000..87e5779
--- /dev/null
+++ b/classifier-logo.py
@@ -0,0 +1,51 @@
+#!/usr/bin/python
+
+from absl import flags
+from absl import logging
+
+import os
+import sys
+import numpy as np
+import tensorflow as tf
+import models
+import resnet_preprocessing
+from tensorflow.keras.utils import plot_model
+
+
+DEF_IMAGE_WIDTH  = None
+DEF_IMAGE_HEIGHT = None
+DEF_WEIGHTS = 'weights.h5'
+
+flags.DEFINE_integer('image_width', DEF_IMAGE_WIDTH, '')
+flags.DEFINE_integer('image_height', DEF_IMAGE_HEIGHT, '')
+flags.DEFINE_string('weights', DEF_WEIGHTS, 'Weights of the model')
+flags.DEFINE_integer('num_classes', 100, 'Number of classes thei weights were trained for')
+
+FLAGS = flags.FLAGS
+
+FLAGS(sys.argv)
+
+print(FLAGS.weights)
+
+classes = [ "absa_logo", "adobe_logo", "airbnb_logo", "alibaba_logo", "amazon_logo", "americanas_logo", "americanexpress_logo", "aol_logo", "apple_logo", "argenta_logo", "att_logo", "bancodechile_logo", "bancodecredito_logo", "bancodobrasil_logo", "bancosantander_logo", "bankofamerica_logo", "barclaysuk_logo", "bestchangecom_logo", "bet365_logo", "binance_logo", "blockchain_logo", "bnpparibas_logo", "bradesco_logo", "britishtelecom_logo", "caixabrazil_logo", "canadapharmacy_logo", "capitalone_logo", "casasbahia_logo", "chase_logo", "cibc_logo", "citicorp_logo", "coinbase_logo", "dailymirror_logo", "dhl_logo", "docusign_logo", "dropbox_logo", "ebay_logo", "ethereum_logo", "facebook_logo", "federalexpress_logo", "fonbetru_logo", "freefr_logo", "genericbankfraud_logo", "godaddy_logo", "google_logo", "grandlisboamacau_logo", "halkbankas_logo", "hmrevenuecustoms_logo", "hsbceub_logo", "impotsgouvfr_logo", "inggroup_logo", "instagram_logo", "interac_logo", "itauunibanco_logo", "lacaixaes_logo", "lassurancemaladie_logo", "lasvegassands_logo", "linkedin_logo", "lloydsbank_logo", "logos.txt", "luno_logo", "mcafee_logo", "mercadolibre_logo", "mercadopago_logo", "metrobank_uk_logo", "microsoft_logo", "mostbet_logo", "myetherwallet_logo", "nationalaustraliabank_logo", "natwest_logo", "netease_logo", "netflix_logo", "netseu_logo", "orange_logo", "ourtime_logo", "ovh_logo", "paypal_logo", "phpshell_logo", "posteitaliane_logo", "postmaster_logo", "rayban_logo", "rbc_logo", "scotiabank_logo", "standardchartered_logo", "steam_logo", "suntrust_logo", "swisscom_logo", "torontodominion_logo", "uber_logo", "unicredit_logo", "usaa_logo", "usbank_logo", "visa_logo", "vkontakte_logo", "walmart_logo", "wellsfargo_logo", "wetransfer_logo", "whatsapp_logo", "xfinity_logo", "yahoo_logo" ]
+
+model = models.get_logo_model(width=FLAGS.image_width, height=FLAGS.image_height, num_classes=FLAGS.num_classes)
+
+weights_file = os.path.join(FLAGS.weights)
+model.load_weights(weights_file)
+model.save_weights('new.hdf5')
+
+image_bytes = tf.read_file('my_logo.png')
+
+images_raw = resnet_preprocessing.preprocess_image(image_bytes, FLAGS.image_width, FLAGS.image_height, resize=False, is_training=True) 
+
+#plot_model(model, to_file='model.pdf', show_layer_names=False, show_shapes=True)
+
+images_expanded = tf.expand_dims(images_raw, 0)
+predictions = model.predict_on_batch(images_expanded)
+
+for prediction in predictions:
+    largest_ind = np.argpartition(prediction, -5)[-5:]
+
+    for i in largest_ind[np.argsort(-prediction[largest_ind])]:
+        print(classes[i]+" :   \t\t"+str(prediction[i]))
diff --git a/classifier.py b/classifier.py
new file mode 100755
index 0000000..b8e2b6a
--- /dev/null
+++ b/classifier.py
@@ -0,0 +1,48 @@
+#!/usr/bin/python
+
+from absl import flags
+from absl import logging
+
+import os
+import sys
+import numpy as np
+import tensorflow as tf
+import models
+
+import resnet_preprocessing
+
+
+DEF_IMAGE_WIDTH  = 320
+DEF_IMAGE_HEIGHT = 240
+DEF_WEIGHTS = 'weights.h5'
+
+flags.DEFINE_integer('image_width', DEF_IMAGE_WIDTH, '')
+flags.DEFINE_integer('image_height', DEF_IMAGE_HEIGHT, '')
+flags.DEFINE_string('weights', DEF_WEIGHTS, 'Weights of the model')
+flags.DEFINE_integer('num_classes', 39, 'Number of classes thei weights were trained for')
+
+FLAGS = flags.FLAGS
+
+FLAGS(sys.argv)
+
+print(FLAGS.weights)
+
+classes = [ "adnetwork", "adobe", "airbnb", "amazon", "applecomputer", "applecomputer_scam", "bancosantander", "bankofamerica", "bnbankru", "bnpparibas", "chase", "craigslist", "dhl", "docusign", "dropbox", "facebook", "genericwebmailphishing", "godaddy", "google", "holding", "ingdirect", "linkedin", "microsoft", "microsoft_scam", "navyfederalcreditunion", "netflix", "orange", "paypal", "phpshell", "posteitaliane", "postmaster", "squarespace", "unicreditgroup", "visa", "vkontakte", "wellsfargo", "wetransfer", "windowslive", "yahoo"]
+
+model = models.ResNet50(width=FLAGS.image_width, height=FLAGS.image_height, num_classes=FLAGS.num_classes)
+
+weights_file = os.path.join(FLAGS.weights)
+model.load_weights(weights_file)
+
+image_bytes = tf.read_file('my_file.png')
+
+images_raw = resnet_preprocessing.preprocess_image(image_bytes, FLAGS.image_width, FLAGS.image_height, resize=True, is_training=True) 
+
+images_expanded = tf.expand_dims(images_raw, 0)
+predictions = model.predict_on_batch(images_expanded)
+
+for prediction in predictions:
+    largest_ind = np.argpartition(prediction, -5)[-5:]
+
+    for i in largest_ind[np.argsort(-prediction[largest_ind])]:
+        print(classes[i]+" :   \t\t"+str(prediction[i]))
diff --git a/densenet.py b/densenet.py
new file mode 100755
index 0000000..22cf3cb
--- /dev/null
+++ b/densenet.py
@@ -0,0 +1,256 @@
+#!/usr/bin/python3
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+r"""DenseNet implemented with Keras running on Cloud TPUs.
+
+This file shows how you can run DenseNet on a Cloud TPU using the TensorFlow
+Keras support. This is configured for ImageNet (e.g. 1000 classes), but you can
+easily adapt to your own datasets by changing the code appropriately.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+from absl import app
+from absl import flags
+from absl import logging
+import numpy as np
+import tensorflow as tf
+
+import eval_utils
+import imagenet_input
+from models.densenet import DenseNetImageNet121
+from tensorflow.python.keras import backend as K
+from tensorflow.python.keras.optimizer_v2 import gradient_descent, adam
+
+try:
+  import h5py as _  # pylint: disable=g-import-not-at-top
+  HAS_H5PY = True
+except ImportError:
+  logging.warning('`h5py` is not installed. Please consider installing it '
+                  'to save weights for long-running training.')
+  HAS_H5PY = False
+
+
+# Imagenet training and test data sets.
+
+IMAGE_WIDTH  = 320
+IMAGE_HEIGHT = 240
+EPOCHS = 90  # Standard imagenet training regime.
+
+# Training hyperparameters.
+NUM_CORES = 8
+PER_CORE_BATCH_SIZE = 64
+CPU_BATCH_SIZE = 4
+BATCH_SIZE = NUM_CORES * PER_CORE_BATCH_SIZE
+BASE_LEARNING_RATE = 0.4
+# Learning rate schedule
+LR_SCHEDULE = [    # (multiplier, epoch to start) tuples
+    (1.0, 5), (0.1, 30), (0.01, 60), (0.001, 80)
+]
+
+DEFAULT_WEIGHTS_H5 = 'resnet50_weights.h5'
+DEFAULT_LOG_DIR = '/tmp/netcraft'
+DEFAULT_BUCKET = 'gs://netcraft/'
+
+flags.DEFINE_integer('epochs', EPOCHS, '')
+flags.DEFINE_string('weights', None, 'Use saved weights')
+flags.DEFINE_string('bucket', DEFAULT_BUCKET, 'Bucket to use')
+flags.DEFINE_string('tpu', None, 'Name of the TPU to use.')
+flags.DEFINE_string('data', None, 'Path to training and testing data.')
+flags.DEFINE_string(
+    'log', DEFAULT_LOG_DIR,
+    ('The directory where the model weights and training/evaluation summaries '
+     'are stored. If not specified, save to /tmp/netcraft.'))
+flags.DEFINE_bool(
+    'complete_eval', True,
+    'Eval both top 1 and top 5 accuracy. Otherwise, only eval top 1 accuracy. '
+    'Furthemore generate confusion matrixes and save softmax values in log_dir')
+flags.DEFINE_bool('evalonly', False, 'Only run eval with given weights, do not train')
+
+FLAGS = flags.FLAGS
+
+def learning_rate_schedule(current_epoch, current_batch):
+  """Handles linear scaling rule, gradual warmup, and LR decay.
+
+  The learning rate starts at 0, then it increases linearly per step.
+  After 5 epochs we reach the base learning rate (scaled to account
+    for batch size).
+  After 30, 60 and 80 epochs the learning rate is divided by 10.
+  After 90 epochs training stops and the LR is set to 0. This ensures
+    that we train for exactly 90 epochs for reproducibility.
+
+  Args:
+    current_epoch: integer, current epoch indexed from 0.
+    current_batch: integer, current batch in the current epoch, indexed from 0.
+
+  Returns:
+    Adjusted learning rate.
+  """
+  return 0.0
+  epoch = current_epoch + float(current_batch) / train_steps_per_epoch
+  warmup_lr_multiplier, warmup_end_epoch = LR_SCHEDULE[0]
+  if epoch < warmup_end_epoch:
+    # Learning rate increases linearly per step.
+    return BASE_LEARNING_RATE * warmup_lr_multiplier * epoch / warmup_end_epoch
+  for mult, start_epoch in LR_SCHEDULE:
+    if epoch >= start_epoch:
+      learning_rate = BASE_LEARNING_RATE * mult
+    else:
+      break
+  return learning_rate
+
+
+class LearningRateBatchScheduler(tf.keras.callbacks.Callback):
+  """Callback to update learning rate on every batch (not epoch boundaries).
+
+  N.B. Only support Keras optimizers, not TF optimizers.
+
+  Args:
+      schedule: a function that takes an epoch index and a batch index as input
+          (both integer, indexed from 0) and returns a new learning rate as
+          output (float).
+  """
+
+  def __init__(self, schedule):
+    super(LearningRateBatchScheduler, self).__init__()
+    self.schedule = schedule
+    self.epochs = -1
+    self.prev_lr = -1
+
+  def on_epoch_begin(self, epoch, logs=None):
+    if not hasattr(self.model.optimizer, 'lr'):
+      raise ValueError('Optimizer must have a "lr" attribute.')
+    self.epochs += 1
+
+  def on_batch_begin(self, batch, logs=None):
+    lr = self.schedule(self.epochs, batch)
+    if not isinstance(lr, (float, np.float32, np.float64)):
+      raise ValueError('The output of the "schedule" function should be float.')
+    if lr != self.prev_lr:
+      K.set_value(self.model.optimizer.lr, lr)
+      self.prev_lr = lr
+      logging.debug('Epoch %05d Batch %05d: LearningRateBatchScheduler change '
+                    'learning rate to %s.', self.epochs, batch, lr)
+
+
+def main(argv):
+  dinfo = np.load(os.path.join(FLAGS.data, 'dinfo.npz'))
+  classes = dinfo['classes']
+  NUM_CLASSES = len(classes)
+  train_cnt = dinfo['train_cnt'] # 1141 # 50273 # Approximate number of images.
+  val_cnt = dinfo['val_cnt'] # 488 # 12560  # Number of images.
+  train_steps_per_epoch = int(train_cnt / BATCH_SIZE)
+  val_steps = int(val_cnt // BATCH_SIZE )
+
+  print("Using", train_cnt, "training images and", val_cnt, "for testing")
+
+  logging.info('Building Keras DenseNet model')
+  model = DenseNetImageNet121(classes=NUM_CLASSES, weights=None)
+
+  if FLAGS.tpu:
+    logging.info('Converting from CPU to TPU model.')
+    resolver = tf.contrib.cluster_resolver.TPUClusterResolver(tpu=FLAGS.tpu)
+    strategy = tf.contrib.tpu.TPUDistributionStrategy(resolver)
+    model = tf.contrib.tpu.keras_to_tpu_model(model, strategy=strategy)
+
+  logging.info('Compiling model.')
+  model.compile(
+      optimizer=gradient_descent.SGD(learning_rate=BASE_LEARNING_RATE, momentum=0.9, nesterov=True),
+      loss='sparse_categorical_crossentropy',
+      metrics=['sparse_categorical_accuracy'])
+
+  if FLAGS.data is None:
+    training_images = np.random.randn(
+        BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, 3).astype(np.float32)
+    training_labels = np.random.randint(NUM_CLASSES, size=BATCH_SIZE,
+                                        dtype=np.int32)
+    logging.info('Training model using synthetica data.')
+    model.fit(
+        training_images,
+        training_labels,
+        epochs=EPOCHS,
+        batch_size=BATCH_SIZE)
+    logging.info('Evaluating the model on synthetic data.')
+    model.evaluate(training_images, training_labels, verbose=0)
+  else:
+    per_core_batch_size = PER_CORE_BATCH_SIZE if FLAGS.tpu else CPU_BATCH_SIZE
+    imagenet_train = imagenet_input.ImageNetInput(
+        is_training=True,
+        data_dir=FLAGS.bucket+FLAGS.data if FLAGS.tpu else FLAGS.data,
+        per_core_batch_size=per_core_batch_size)
+    logging.info('Training model using real data in directory "%s".',
+                 FLAGS.data)
+    # If evaluating complete_eval, we feed the inputs from a Python generator,
+    # so we need to build a single batch for all of the cores, which will be
+    # split on TPU.
+    per_core_batch_size = (
+        BATCH_SIZE if FLAGS.complete_eval else PER_CORE_BATCH_SIZE)
+    imagenet_validation = imagenet_input.ImageNetInput(
+        is_training=False,
+        data_dir=FLAGS.bucket+FLAGS.data if FLAGS.tpu else FLAGS.data,
+        per_core_batch_size=per_core_batch_size)
+
+    eval_callback = eval_utils.TensorBoardWithValidation(
+            log_dir=FLAGS.log,
+            validation_imagenet_input=imagenet_validation,
+            validation_steps=val_steps,
+            validation_epochs=[ 3, 10, 30, 60, 90],
+            write_images=True,
+            write_graph=True,
+            plot_wrong=True,
+            plot_cm=True,
+            plot_pr=True,
+            classes=classes,
+            complete_eval=FLAGS.complete_eval)
+
+    callbacks = [
+        LearningRateBatchScheduler(schedule=learning_rate_schedule),
+        eval_callback
+                    ]
+
+    if FLAGS.tpu:
+        model_in = imagenet_train.input_fn 
+    else:
+        model_in = imagenet_train.input_fn()
+
+
+    if FLAGS.weights:
+        weights_file = os.path.join(FLAGS.weights)
+        logging.info('Loading model and weights from %s', weights_file)
+        model.load_weights(weights_file)
+    else:
+        weights_file = os.path.join(DEFAULT_WEIGHTS_H5)
+        
+    if FLAGS.evalonly:
+        eval_callback.set_model(model)
+        eval_callback.on_epoch_end(420)
+    else:
+        model.fit(model_in,
+                  epochs=EPOCHS,
+                  steps_per_epoch=train_steps_per_epoch,
+                  callbacks=callbacks)
+
+    logging.info('Saving weights into %s', weights_file)
+    model.save_weights(weights_file, overwrite=True)
+
+
+if __name__ == '__main__':
+  tf.logging.set_verbosity(tf.logging.INFO)
+  app.run(main)
diff --git a/eval_utils.py b/eval_utils.py
new file mode 100644
index 0000000..bf00b0b
--- /dev/null
+++ b/eval_utils.py
@@ -0,0 +1,293 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Evaluation utils for `KerasTPUmodel`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+from six.moves import xrange
+import sys
+
+import tensorflow as tf
+from tensorflow.python.keras import backend as K
+from tensorflow.python.keras import optimizers
+from tensorflow.python.keras import callbacks
+from tensorflow.python.platform import tf_logging as logging
+from sklearn.metrics import confusion_matrix
+
+import matplotlib.pyplot as plt
+import io
+import os
+import itertools
+import scikitplot as skplt
+
+from tqdm import trange
+
+def save_softmax(log_dir, epoch, labels, predictions):
+    location = os.path.join(log_dir, 'softmax' + str(epoch) + '.npz')
+    np.savez(location, labels=labels, predictions=predictions)
+
+def draw_graphs(self, log_dir, classes, y_true, y_probas, epoch):
+    y_pred = np.argmax(y_probas, axis=1)
+    if self._plot_cm:
+        skplt.metrics.plot_confusion_matrix(y_true, y_pred, normalize=True)
+        plot_to_tensorboard(log_dir, epoch, "model_projections", "confusion_matrix")
+    if self._plot_pr:
+        skplt.metrics.plot_precision_recall(y_true, y_probas)
+        plot_to_tensorboard(log_dir, epoch, "model_projections", "pr_curve")
+
+def plot_to_tensorboard(log_dir, epoch, model_projection, family_name):
+    buf = io.BytesIO()
+    plt.rcParams.update({'font.size': 5})
+    plt.savefig(buf, dpi=250, format='png')
+    buf.seek(0)
+    image = tf.image.decode_png(buf.getvalue(), channels=3)
+    image = tf.expand_dims(image, 0)
+
+    summary_op = tf.summary.image(model_projection, image, max_outputs=1, family=family_name)
+    writer = tf.summary.FileWriter(log_dir)
+    writer.add_summary(summary_op.eval(session=K.get_session()), epoch)
+    writer.close()
+
+def draw_c_matrix(log_dir, c_matrix, classes, epoch, normalize=False):
+    if normalize:
+        c_matrix = c_matrix.astype('float') / c_matrix.sum(axis=1)[:, np.newaxis]
+    plt.figure()
+    plt.imshow(c_matrix, cmap=plt.cm.Blues)
+    plt.xlabel('Predicted')
+    plt.ylabel('True Label')
+    tick_marks = np.arange(len(classes))
+    plt.xticks(tick_marks, classes, rotation=45)
+    plt.yticks(tick_marks, classes)
+
+    fmt = '.2f'
+    thresh = c_matrix.max() / 2.
+    for i, j in itertools.product(range(c_matrix.shape[0]), range(c_matrix.shape[1])):
+        plt.text(j, i, format(c_matrix[i, j], fmt),
+             horizontalalignment="center",
+             color="white" if c_matrix[i, j] > thresh else "black")
+
+    buf = io.BytesIO()
+    plt.savefig(buf, dpi=500, format='png')
+    buf.seek(0)
+    image = tf.image.decode_png(buf.getvalue(), channels=4)
+    image = tf.expand_dims(image, 0)
+
+    summary_op = tf.summary.image("model_projections", image, max_outputs=1, family='family_name')
+    writer = tf.summary.FileWriter(log_dir)
+    writer.add_summary(summary_op.eval(session=K.get_session()), epoch)
+    writer.close()
+
+def multi_top_k_accuracy(self, log_dir, model, evaluation_generator, eval_steps, classes, epoch, ks=(1, 5)):
+  """Calculates top k accuracy for the given `k` values.
+
+  Args:
+    model: `KerasTPUModel` to evaluate.
+    evaluation_generator: a Python generator to generate (features, labels) for
+                          evaluation.
+    eval_steps: int, number of evaluation steps.
+    ks: a tuple of int, position values to calculate top k accurary.
+
+  Returns:
+    A dictionary containing top k accuracy for the given `k` values.
+  """
+  def _count_matched(classes, predictions, labels, ks):
+    """Count number of pairs with label in any of top k predictions."""
+    top_k_matched = dict.fromkeys(ks, 0)
+    for prediction, label in zip(predictions, labels):
+      for k in ks:
+        top_k_predictions = np.argpartition(prediction, -k)[-k:]
+        if label in top_k_predictions:
+          top_k_matched[k] += 1
+
+    return top_k_matched
+
+  total = 0
+  top_k_matched = dict.fromkeys(ks, 0)
+  c_matrix = np.zeros((len(classes),len(classes)))
+  all_labels = np.zeros((0,1))
+  all_predictions = np.zeros((0,len(classes)))
+  logging.info('There are %d validation steps', eval_steps )
+  t = trange(eval_steps)
+  for step in t:
+    try:
+      (features, labels) = next(evaluation_generator)
+    except Exception as e:
+      logging.debug(e)
+      break
+    predictions = model.predict_on_batch(features) # May be quicker
+    # predictions = model.predict(features, batch_size=8)
+    sorted_pred_args = np.flip(predictions.argsort(axis=1), axis=1)
+    flat_predictions = sorted_pred_args[:,0]
+
+    # Todo: clean this function, it is a mess
+
+    # Print some falsely predicted images
+    if self._plot_wrong:# and not (step+4) % 8:
+        # Squeeze labels into same dimension and type as predictions
+        sq_labels = np.squeeze(labels.astype(int))
+        # If running in unity image per batch, squeeze squeezes one too many dimensions
+        if sq_labels.shape == ():
+            sq_labels = np.expand_dims(sq_labels, axis=0)
+        failed_indexes = np.where(np.not_equal(flat_predictions, sq_labels))[0]
+        limiter = 0
+        for idx in failed_indexes:
+            if limiter > 90:
+                break
+            limiter += 1
+            predicted_class_name = classes[flat_predictions[idx]]
+            true_class_name = classes[sq_labels[idx]]
+            proba_range = range(3) # Show softmax for top 3 
+            top_cl  = classes[sorted_pred_args[idx][proba_range]]
+            probas = predictions[idx][sorted_pred_args[idx][proba_range]]
+            if probas[0] > 0.9:
+                top_3 = '\n'.join(cl + ": " + proba for cl, proba in zip(top_cl, probas.astype(str)))
+                print("Predicted", flat_predictions[idx],
+                      "True:",      sq_labels[idx],
+                      "Proba:",     probas.astype(str)) 
+                plt.clf()
+                plt.imshow(features[idx].astype(int))
+                plt.text(0, 0, top_3, size=9, va="bottom", bbox=dict(boxstyle="square", ec=(1., 0.5, 0.5), fc=(1., 0.8, 0.8),))
+                plot_to_tensorboard(log_dir, epoch, "mislabled_images", "P_"+predicted_class_name+"_Tr_"+true_class_name)
+
+    c_matrix += confusion_matrix(labels, flat_predictions, labels=range(len(classes)))
+    batch_top_k_matched = _count_matched(classes, predictions, labels, ks)
+    all_labels = np.vstack((all_labels, labels))
+    all_predictions = np.vstack((all_predictions, predictions))
+    for k, matched in batch_top_k_matched.items():
+      top_k_matched[k] += matched
+    total += len(labels)
+
+    t.set_description("Top 1: %f" %  np.float_(top_k_matched[1]/float(total)))
+
+  logging.info("Confusion matrix:")
+  print(c_matrix)
+
+  try:
+      #draw_c_matrix(self._log_dir, c_matrix, self._targets, epoch, normalize=True)
+      #draw_c_matrix(self._log_dir, c_matrix, self._targets, epoch, normalize=False)
+      draw_graphs(self, log_dir, classes, all_labels, all_predictions, epoch)
+  except:
+      pass
+
+  save_softmax(log_dir, epoch, all_labels, all_predictions)
+  metrics = dict([('top_{0}_accuracy'.format(k), np.float_(matched / float(total)))
+               for k, matched in top_k_matched.items()])
+  print(metrics)
+  return metrics
+
+
+class TensorBoardWithValidation(callbacks.TensorBoard):
+  """Extend TensorBoard Callback with validation .
+
+  Validation is executed at the end of specified epochs, and the validation
+  metrics are exported to tensorboard for visualization.
+
+  Args:
+      log_dir: the path of the directory where to save the log
+          files to be parsed by TensorBoard.
+      validation_imagenet_input: ImageNetInput for validation.
+      validation_steps: total number of steps to validate.
+      validation_epochs: a list of integers, epochs to run validation.
+      eval_top_k_accuracy: boolean, if true, evaluate top k accuracies using
+          multi_top_k_accuracy(). Otherwise, use model.evaluate().
+          N.B. enabling this would significantly slow down the eval time due to
+          using python generator for evaluation input.
+      top_ks: a tuple of int, position values to calculate top k accurary. It's
+          only used when eval_top_k_accuracy is true.
+  """
+
+  def __init__(self,
+               log_dir,
+               validation_imagenet_input,
+               validation_steps,
+               validation_epochs,
+               write_graph,
+               write_images,
+               plot_wrong,
+               plot_cm,
+               plot_pr,
+               classes,
+               complete_eval,
+               top_ks=(1, 5)):
+    super(TensorBoardWithValidation, self).__init__(log_dir)
+    self._validation_imagenet_input = validation_imagenet_input
+    self._validation_steps = validation_steps
+    self._validation_epochs = validation_epochs
+    self._write_graph = write_graph
+    self._write_images = write_images
+    self._plot_wrong = plot_wrong
+    self._plot_cm = plot_cm
+    self._plot_pr = plot_pr
+    self._complete_eval = complete_eval
+    self._top_ks = top_ks
+    self._targets = classes
+    self._log_dir = log_dir
+
+  def on_epoch_end(self, epoch, logs=None):
+    if epoch in self._validation_epochs:
+      logging.info('\nValidate in epoch %s', epoch)
+      if self._complete_eval:
+        logging.info("Running complete eval")
+        score = multi_top_k_accuracy(
+            self,
+            self._log_dir,
+            self.model,
+            self._validation_imagenet_input.evaluation_generator(
+                K.get_session()),
+            self._validation_steps,
+            self._targets,
+            epoch,
+            ks=self._top_ks)
+        for metric_name, metric_value in score.items():
+          logs['val_' + metric_name] = metric_value
+      else:
+        # evaluate() is executed as callbacks during the training. In this case,
+        # _numpy_to_infeed_manager_list is not empty, so save it for
+        # recovery at the end of evaluate call.
+        # TODO(jingli): remove this monkey patch hack once the fix is included
+        # in future TF release.
+        original_numpy_to_infeed_manager_list = []
+        if self.model._numpy_to_infeed_manager_list:
+          original_numpy_to_infeed_manager_list = (
+              self.model._numpy_to_infeed_manager_list)
+          self.model._numpy_to_infeed_manager_list = []
+        # Set _eval_function to None to enforce recompliation to use the newly
+        # created dataset in self._validation_imagenet_input.input_fn in
+        # evaluation.
+        # pylint: disable=bare-except
+        # pylint: disable=protected-access
+        try:
+          self.model._eval_function = None
+        except:
+          pass
+
+        try:
+          # In TF 1.12, _eval_function does not exist, only test_function
+          # existed.
+          self.model.test_function = None
+        except:
+          pass
+
+        scores = self.model.evaluate(self._validation_imagenet_input.input_fn,
+                                     steps=self._validation_steps)
+        self.model._numpy_to_infeed_manager_list = (
+            original_numpy_to_infeed_manager_list)
+        for metric_name, metric_value in zip(self.model.metrics_names, scores):
+          logging.info('Evaluation metric. %s: %s.', metric_name, metric_value)
+          logs['val_' + metric_name] = metric_value
+    # The parent callback is responsible to write the logs as events file.
+    super(TensorBoardWithValidation, self).on_epoch_end(epoch, logs)
diff --git a/imagenet_input.py b/imagenet_input.py
new file mode 100644
index 0000000..55729b2
--- /dev/null
+++ b/imagenet_input.py
@@ -0,0 +1,166 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Efficient ImageNet input pipeline using tf.data.Dataset."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+import tensorflow as tf
+
+import resnet_preprocessing
+
+class ImageNetInput(object):
+  """Generates ImageNet input_fn for training or evaluation.
+
+  The training data is assumed to be in TFRecord format with keys as specified
+  in the dataset_parser below, sharded across 1024 files, named sequentially:
+      train-00000-of-01024
+      train-00001-of-01024
+      ...
+      train-01023-of-01024
+
+  The validation data is in the same format but sharded in 128 files.
+
+  The format of the data required is created by the script at:
+      https://github.com/tensorflow/tpu/blob/master/tools/datasets/imagenet_to_gcs.py
+
+  Args:
+    is_training: `bool` for whether the input is for training.
+    data_dir: `str` for the directory of the training and validation data;
+        if 'null' (the literal string 'null', not None), then construct a null
+        pipeline, consisting of empty images.
+    use_bfloat16: If True, use bfloat16 precision; else use float32.
+    per_core_batch_size: The per-TPU-core batch size to use.
+  """
+
+  def __init__(self,
+               width,
+               height,
+               resize,
+               is_training,
+               data_dir,
+               use_bfloat16=False,
+               per_core_batch_size=128):
+    self.image_preprocessing_fn = resnet_preprocessing.preprocess_image
+    self.is_training = is_training
+    self.width = width
+    self.height = height
+    self.resize = resize
+    self.use_bfloat16 = use_bfloat16
+    self.data_dir = data_dir
+    if self.data_dir == 'null' or self.data_dir == '':
+      self.data_dir = None
+    self.per_core_batch_size = per_core_batch_size
+
+  def dataset_parser(self, value):
+    """Parse an ImageNet record from a serialized string Tensor."""
+    keys_to_features = {
+        'image/encoded':
+            tf.FixedLenFeature((), tf.string, ''),
+        'image/format':
+            tf.FixedLenFeature((), tf.string, 'png'),
+        'image/class/label':
+            tf.FixedLenFeature([], tf.int64, -1),
+        'image/height':
+            tf.FixedLenFeature([], tf.int64, -2),
+        'image/width':
+            tf.FixedLenFeature([], tf.int64, -3),
+
+    }
+
+    parsed = tf.parse_single_example(value, keys_to_features)
+    image_bytes = tf.reshape(parsed['image/encoded'], shape=[])
+
+    image = self.image_preprocessing_fn(
+        image_bytes,
+        width=self.width, height=self.height,
+        resize=self.resize,
+        is_training=self.is_training,
+        use_bfloat16=self.use_bfloat16,
+        )
+
+    # Subtract one so that labels are in [0, 1000), and cast to float32 for
+    # Keras model.
+    label = tf.cast(tf.cast(
+        tf.reshape(parsed['image/class/label'], shape=[1]), dtype=tf.int32), #  - 1,
+                    dtype=tf.float32)
+
+    return image, label
+
+  def input_fn(self):
+    """Input function which provides a single batch for train or eval.
+
+    Returns:
+      A `tf.data.Dataset` object.
+    """
+    # Shuffle the filenames to ensure better randomization.
+    file_pattern = os.path.join(
+        self.data_dir, 'websites_train*' if self.is_training else 'websites_validation*')
+    dataset = tf.data.Dataset.list_files(file_pattern, shuffle=self.is_training)
+
+    if self.is_training:
+      dataset = dataset.repeat()
+
+    def fetch_dataset(filename):
+      buffer_size = 100 * 1024 * 1024     # 100 MiB per file
+      dataset = tf.data.TFRecordDataset(filename, buffer_size=buffer_size)
+      return dataset
+
+    # Read the data from disk in parallel
+    dataset = dataset.interleave(fetch_dataset, cycle_length=16)
+
+    if self.is_training:
+      dataset = dataset.shuffle(1024)
+
+    # Parse, pre-process, and batch the data in parallel
+    dataset = dataset.apply(
+        tf.data.experimental.map_and_batch(
+            self.dataset_parser,
+            batch_size=self.per_core_batch_size,
+            num_parallel_batches=2,
+            drop_remainder=True))
+
+    # Prefetch overlaps in-feed with training
+    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
+    return dataset
+
+  # TODO(xiejw): Remove this generator when we have support for top_k
+  # evaluation.
+  def evaluation_generator(self, sess):
+    """Creates a generator for evaluation."""
+    next_batch = self.input_fn().make_one_shot_iterator().get_next()
+    while True:
+      try:
+        yield sess.run(next_batch)
+      except tf.errors.OutOfRangeError:
+        return
+
+  def input_fn_null(self):
+    """Input function which provides null (black) images."""
+    dataset = tf.data.Dataset.range(1).repeat().map(self._get_null_input)
+    dataset = dataset.prefetch(self.per_core_batch_size)
+
+    dataset = dataset.batch(self.per_core_batch_size, drop_remainder=True)
+
+    dataset = dataset.prefetch(32)     # Prefetch overlaps in-feed with training
+    tf.logging.info('Input dataset: %s', str(dataset))
+    return dataset
+
+  def _get_null_input(self, _):
+    null_image = tf.zeros([320, 240, 3], tf.float32)
+    return null_image, tf.constant(0, tf.float32)
diff --git a/logo_input.py b/logo_input.py
new file mode 100644
index 0000000..1e017a4
--- /dev/null
+++ b/logo_input.py
@@ -0,0 +1,143 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Efficient ImageNet input pipeline using tf.data.Dataset."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+import tensorflow as tf
+
+import resnet_preprocessing
+
+class ImageNetInput(object):
+  def __init__(self,
+               width,
+               height,
+               resize,
+               is_training,
+               data_dir,
+               use_bfloat16=False,
+               per_core_batch_size=128):
+    self.image_preprocessing_fn = resnet_preprocessing.preprocess_image
+    self.is_training = is_training
+    self.width = width
+    self.height = height
+    self.resize = resize
+    self.use_bfloat16 = use_bfloat16
+    self.data_dir = data_dir
+    if self.data_dir == 'null' or self.data_dir == '':
+      self.data_dir = None
+    self.per_core_batch_size = per_core_batch_size
+
+  def dataset_parser(self, value):
+    """Parse an ImageNet record from a serialized string Tensor."""
+    keys_to_features = {
+        'image/encoded':
+            tf.FixedLenFeature((), tf.string, ''),
+        'image/format':
+            tf.FixedLenFeature((), tf.string, 'png'),
+        'image/class/label':
+            tf.FixedLenFeature([], tf.int64, -1),
+        'image/height':
+            tf.FixedLenFeature([], tf.int64, -2),
+        'image/width':
+            tf.FixedLenFeature([], tf.int64, -3),
+
+    }
+
+    parsed = tf.parse_single_example(value, keys_to_features)
+    image_bytes = tf.reshape(parsed['image/encoded'], shape=[])
+
+    image = self.image_preprocessing_fn(
+        image_bytes,
+        width=self.width, height=self.height,
+        resize=self.resize,
+        is_training=self.is_training,
+        use_bfloat16=self.use_bfloat16,
+        )
+
+    # Subtract one so that labels are in [0, 1000), and cast to float32 for
+    # Keras model.
+    label = tf.cast(tf.cast(
+        tf.reshape(parsed['image/class/label'], shape=[1]), dtype=tf.int32), #  - 1,
+                    dtype=tf.float32)
+
+    return image, label
+
+  def input_fn(self):
+    """Input function which provides a single batch for train or eval.
+
+    Returns:
+      A `tf.data.Dataset` object.
+    """
+    # Shuffle the filenames to ensure better randomization.
+    file_pattern = os.path.join(
+        self.data_dir, 'websites_train*' if self.is_training else 'websites_validation*')
+    dataset = tf.data.Dataset.list_files(file_pattern, shuffle=self.is_training)
+
+    if self.is_training:
+      dataset = dataset.repeat()
+
+    def fetch_dataset(filename):
+      buffer_size = 100 * 1024 * 1024     # 100 MiB per file
+      dataset = tf.data.TFRecordDataset(filename, buffer_size=buffer_size)
+      return dataset
+
+    # Read the data from disk in parallel
+    dataset = dataset.interleave(fetch_dataset, cycle_length=16)
+
+    if self.is_training:
+      dataset = dataset.shuffle(1024)
+
+    # Parse, pre-process, and batch the data in parallel
+    dataset = dataset.apply(
+        tf.data.experimental.map_and_batch(
+            self.dataset_parser,
+            batch_size=self.per_core_batch_size,
+            num_parallel_batches=2,
+            drop_remainder=True))
+
+    # Prefetch overlaps in-feed with training
+    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
+    return dataset
+
+  # TODO(xiejw): Remove this generator when we have support for top_k
+  # evaluation.
+  def evaluation_generator(self, sess):
+    """Creates a generator for evaluation."""
+    next_batch = self.input_fn().make_one_shot_iterator().get_next()
+    while True:
+      try:
+        yield sess.run(next_batch)
+      except tf.errors.OutOfRangeError:
+        return
+
+  def input_fn_null(self):
+    """Input function which provides null (black) images."""
+    dataset = tf.data.Dataset.range(1).repeat().map(self._get_null_input)
+    dataset = dataset.prefetch(self.per_core_batch_size)
+
+    dataset = dataset.batch(self.per_core_batch_size, drop_remainder=True)
+
+    dataset = dataset.prefetch(32)     # Prefetch overlaps in-feed with training
+    tf.logging.info('Input dataset: %s', str(dataset))
+    return dataset
+
+  def _get_null_input(self, _):
+    null_image = tf.zeros([320, 240, 3], tf.float32)
+    return null_image, tf.constant(0, tf.float32)
diff --git a/models.py b/models.py
new file mode 100644
index 0000000..61cc26c
--- /dev/null
+++ b/models.py
@@ -0,0 +1,387 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""ResNet50 model for Keras.
+
+Adapted from tf.keras.applications.resnet50.ResNet50().
+
+Related papers/blogs:
+- https://arxiv.org/abs/1512.03385
+- https://arxiv.org/pdf/1603.05027v2.pdf
+- http://torch.ch/blog/2016/02/04/resnets.html
+
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import warnings
+
+import tensorflow as tf
+from tensorflow.python.keras import layers
+from tensorflow.python.keras import models
+from tensorflow.python.keras import regularizers
+from tensorflow.python.keras import utils
+
+import tensorflow.keras
+from tensorflow.keras import backend as K
+from tensorflow.keras.models import Sequential, Model
+from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Input, Lambda, Reshape
+from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Conv2DTranspose, GlobalAveragePooling2D
+from tensorflow.keras.layers import Input, concatenate
+
+
+L2_WEIGHT_DECAY = 1e-4
+BATCH_NORM_DECAY = 0.9
+BATCH_NORM_EPSILON = 1e-5
+
+
+def identity_block(input_tensor, kernel_size, filters, stage, block, trainable=True):
+  """The identity block is the block that has no conv layer at shortcut.
+
+  # Arguments
+      input_tensor: input tensor
+      kernel_size: default 3, the kernel size of
+          middle conv layer at main path
+      filters: list of integers, the filters of 3 conv layer at main path
+      stage: integer, current stage label, used for generating layer names
+      block: 'a','b'..., current block label, used for generating layer names
+
+  # Returns
+      Output tensor for the block.
+  """
+  filters1, filters2, filters3 = filters
+  if K.image_data_format() == 'channels_last':
+    bn_axis = 3
+  else:
+    bn_axis = 1
+  conv_name_base = 'res' + str(stage) + block + '_branch'
+  bn_name_base = 'bn' + str(stage) + block + '_branch'
+
+  x = layers.Conv2D(filters1, (1, 1),
+                    trainable=trainable,
+                    kernel_initializer='he_normal',
+                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+                    bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+                    name=conv_name_base + '2a')(input_tensor)
+  x = layers.BatchNormalization(axis=bn_axis,
+                                trainable=trainable,
+                                momentum=BATCH_NORM_DECAY,
+                                epsilon=BATCH_NORM_EPSILON,
+                                name=bn_name_base + '2a')(x)
+  x = layers.Activation('relu')(x)
+
+  x = layers.Conv2D(filters2, kernel_size,
+                    trainable=trainable,
+                    padding='same',
+                    kernel_initializer='he_normal',
+                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+                    bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+                    name=conv_name_base + '2b')(x)
+  x = layers.BatchNormalization(axis=bn_axis,
+                                trainable=trainable,
+                                momentum=BATCH_NORM_DECAY,
+                                epsilon=BATCH_NORM_EPSILON,
+                                name=bn_name_base + '2b')(x)
+  x = layers.Activation('relu')(x)
+
+  x = layers.Conv2D(filters3, (1, 1),
+                    trainable=trainable,
+                    kernel_initializer='he_normal',
+                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+                    bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+                    name=conv_name_base + '2c')(x)
+  x = layers.BatchNormalization(axis=bn_axis,
+                                trainable=trainable,
+                                momentum=BATCH_NORM_DECAY,
+                                epsilon=BATCH_NORM_EPSILON,
+                                name=bn_name_base + '2c')(x)
+
+  x = layers.add([x, input_tensor])
+  x = layers.Activation('relu')(x)
+  return x
+
+
+def conv_block(input_tensor,
+               kernel_size,
+               filters,
+               stage,
+               block,
+               strides=(2, 2),
+               trainable=True):
+  """A block that has a conv layer at shortcut.
+
+  # Arguments
+      input_tensor: input tensor
+      kernel_size: default 3, the kernel size of
+          middle conv layer at main path
+      filters: list of integers, the filters of 3 conv layer at main path
+      stage: integer, current stage label, used for generating layer names
+      block: 'a','b'..., current block label, used for generating layer names
+      strides: Strides for the second conv layer in the block.
+
+  # Returns
+      Output tensor for the block.
+
+  Note that from stage 3,
+  the second conv layer at main path is with strides=(2, 2)
+  And the shortcut should have strides=(2, 2) as well
+  """
+  filters1, filters2, filters3 = filters
+  if K.image_data_format() == 'channels_last':
+    bn_axis = 3
+  else:
+    bn_axis = 1
+  conv_name_base = 'res' + str(stage) + block + '_branch'
+  bn_name_base = 'bn' + str(stage) + block + '_branch'
+
+  x = layers.Conv2D(filters1, (1, 1), kernel_initializer='he_normal',
+                    trainable=trainable,
+                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+                    bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+                    name=conv_name_base + '2a')(input_tensor)
+  x = layers.BatchNormalization(axis=bn_axis,
+                                trainable=trainable,
+                                momentum=BATCH_NORM_DECAY,
+                                epsilon=BATCH_NORM_EPSILON,
+                                name=bn_name_base + '2a')(x)
+  x = layers.Activation('relu')(x)
+
+  x = layers.Conv2D(filters2, kernel_size, strides=strides, padding='same',
+                    trainable=trainable,
+                    kernel_initializer='he_normal',
+                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+                    bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+                    name=conv_name_base + '2b')(x)
+  x = layers.BatchNormalization(axis=bn_axis,
+                                trainable=trainable,
+                                momentum=BATCH_NORM_DECAY,
+                                epsilon=BATCH_NORM_EPSILON,
+                                name=bn_name_base + '2b')(x)
+  x = layers.Activation('relu')(x)
+
+  x = layers.Conv2D(filters3, (1, 1),
+                    trainable=trainable,
+                    kernel_initializer='he_normal',
+                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+                    bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+                    name=conv_name_base + '2c')(x)
+  x = layers.BatchNormalization(axis=bn_axis,
+                                trainable=trainable,
+                                momentum=BATCH_NORM_DECAY,
+                                epsilon=BATCH_NORM_EPSILON,
+                                name=bn_name_base + '2c')(x)
+
+  shortcut = layers.Conv2D(filters3, (1, 1), strides=strides,
+                           trainable=trainable,
+                           kernel_initializer='he_normal',
+                           kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+                           bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+                           name=conv_name_base + '1')(input_tensor)
+  shortcut = layers.BatchNormalization(axis=bn_axis,
+                                       trainable=trainable,
+                                       momentum=BATCH_NORM_DECAY,
+                                       epsilon=BATCH_NORM_EPSILON,
+                                       name=bn_name_base + '1')(shortcut)
+
+  x = layers.add([x, shortcut])
+  x = layers.Activation('relu')(x)
+  return x
+
+
+def ResNet50(width, height, num_classes):
+  """Instantiates the ResNet50 architecture.
+
+  Args:
+    num_classes: `int` number of classes for image classification.
+
+  Returns:
+      A Keras model instance.
+  """
+  # Determine proper input shape
+  if K.image_data_format() == 'channels_first':
+    input_shape = (3, height, width)
+    bn_axis = 1
+  else:
+    input_shape = (height, width, 3)
+    bn_axis = 3
+
+  img_input = layers.Input(shape=input_shape)
+  x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input)
+  x = layers.Conv2D(64, (7, 7),
+                    strides=(2, 2),
+                    padding='valid',
+                    kernel_initializer='he_normal',
+                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+                    bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+                    name='conv1')(x)
+  x = layers.BatchNormalization(axis=bn_axis,
+                                momentum=BATCH_NORM_DECAY,
+                                epsilon=BATCH_NORM_EPSILON,
+                                name='bn_conv1')(x)
+  x = layers.Activation('relu')(x)
+  x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
+  x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
+
+  x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
+  x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
+  x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
+
+  x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
+  x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
+  x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
+  x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
+
+  x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
+  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
+  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
+  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
+  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
+  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')
+
+  x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
+  x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
+  x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
+
+  x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
+
+  # When loading weights by name the last layer won't actually be loaded because
+  # the name depends on the number of classes
+
+  x = layers.Dense(
+      num_classes, activation='softmax',
+      kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+      bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+      name='fc'+str(num_classes))(x)
+
+  # Create model.
+  return models.Model(img_input, x, name='resnet50')
+
+def get_logo_model(width, height, num_classes, output_layer = True, base_trainable = False):
+  if K.image_data_format() == 'channels_first':
+    input_shape = (3, height, width)
+    bn_axis = 1
+  else:
+    input_shape = (height, width, 3)
+    bn_axis = 3
+
+  init_weights = tf.keras.initializers.he_normal()
+
+  logo_model = Sequential()
+  logo_model.add(Conv2D(32, 3, padding='same', trainable = base_trainable,  input_shape=input_shape, activation='elu', name='logo_conv1'))
+
+  logo_model.add(Conv2D(32, 3, padding='same', trainable = base_trainable,  use_bias = False, activation='elu', name='logo_conv2'))
+
+  logo_model.add(Conv2D(64, 3, padding='same', trainable = base_trainable,  strides=2, use_bias = False, activation='elu', name='logo_conv3'))
+
+  logo_model.add(Conv2D(64, 3, padding='same', trainable = base_trainable,  use_bias = False, activation='elu', name='logo_conv4'))
+
+  logo_model.add(Conv2D(128, 3, padding='same', trainable = base_trainable,  strides=2,  use_bias = False, activation='elu', name='logo_conv5'))
+
+  logo_model.add(Conv2D(128, 3, padding='same', trainable = True,  use_bias = False, activation='elu', name='logo_conv6'))
+  logo_model.add(Dropout(0.3))
+
+  logo_model.add(Conv2D(128, 8, padding='same', trainable = True,  use_bias = False, activation='elu', name='logo_conv7'))
+
+  if output_layer:
+      # These two layers are only used in training
+      logo_model.add(GlobalAveragePooling2D(name='logo_avg_pool'))
+      logo_model.add(Dense(
+          num_classes, activation='softmax', name='logo_fc'+str(num_classes)))
+
+  return logo_model
+
+
+def get_logores_model(width, height, num_classes, resnet_trainable = True, logo_trainable = False, logo_end_trainable=True):
+  # Determine proper input shape
+  if K.image_data_format() == 'channels_first':
+    input_shape = (3, height, width)
+    bn_axis = 1
+  else:
+    input_shape = (height, width, 3)
+    bn_axis = 3
+
+  img_input = layers.Input(shape=input_shape)
+  #logo_model = get_logo_model(width, height, num_classes, output_layer = False)
+
+
+  ## Freeze the weights of the logo model
+  #for layer in logo_model.layers:
+  #    layer.trainable = False
+
+  #logo_x = logo_model(img_input)
+
+  logo_x = Conv2D(32, 3, padding='same', input_shape=input_shape, activation='elu', name='logo_conv1', trainable=logo_trainable)(img_input)
+  logo_x = Conv2D(32, 3, padding='same', use_bias = False, activation='elu', name='logo_conv2', trainable=logo_trainable)(logo_x)
+  logo_x = Conv2D(64, 3, padding='same', strides=2, use_bias = False, activation='elu', name='logo_conv3', trainable=logo_trainable)(logo_x)
+  logo_x = Conv2D(64, 3, padding='same', use_bias = False, activation='elu', name='logo_conv4', trainable=logo_trainable)(logo_x)
+  logo_x = Conv2D(128, 3, padding='same', strides=2,  use_bias = False, activation='elu', name='logo_conv5', trainable=logo_trainable)(logo_x)
+  logo_x = Conv2D(128, 3, padding='same', use_bias = False, activation='elu', name='logo_conv6', trainable=logo_end_trainable)(logo_x)
+  logo_x = Dropout(0.3, trainable=logo_trainable)(logo_x)
+  logo_x = Conv2D(128, 8, padding='same', use_bias = False, activation='elu', name='logo_conv7', trainable=logo_end_trainable)(logo_x)
+
+  x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input)
+  x = layers.Conv2D(64, (7, 7),
+                    strides=(2, 2),
+                    padding='valid',
+                    kernel_initializer='he_normal',
+                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+                    bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+                    trainable=resnet_trainable,
+                    name='conv1')(x)
+  x = layers.BatchNormalization(axis=bn_axis,
+                                momentum=BATCH_NORM_DECAY,
+                                epsilon=BATCH_NORM_EPSILON,
+                                trainable=resnet_trainable,
+                                name='bn_conv1')(x)
+  x = layers.Activation('relu')(x)
+  x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
+  x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
+
+  x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1), trainable=resnet_trainable)
+  x = identity_block(x, 3, [64, 64, 256], stage=2, block='b', trainable=resnet_trainable)
+  x = identity_block(x, 3, [64, 64, 256], stage=2, block='c', trainable=resnet_trainable)
+
+  x = conv_block(x, 3, [128, 128, 512], stage=3, block='a', trainable=resnet_trainable)
+  x = identity_block(x, 3, [128, 128, 512], stage=3, block='b', trainable=resnet_trainable)
+  x = identity_block(x, 3, [128, 128, 512], stage=3, block='c', trainable=resnet_trainable)
+  x = identity_block(x, 3, [128, 128, 512], stage=3, block='d', trainable=resnet_trainable)
+
+  x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a', trainable=resnet_trainable)
+  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b', trainable=resnet_trainable)
+  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c', trainable=resnet_trainable)
+  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d', trainable=resnet_trainable)
+  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e', trainable=resnet_trainable)
+  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f', trainable=resnet_trainable)
+
+  x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a', trainable=resnet_trainable)
+  x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b', trainable=resnet_trainable)
+  x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c', trainable=resnet_trainable)
+
+  x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
+  logo_x = layers.GlobalAveragePooling2D(name='logo_avg_pool')(logo_x)
+
+  x = layers.concatenate([x, logo_x])
+
+  # When loading weights by name the last layer won't actually be loaded because
+  # the name depends on the number of classes
+
+  x = layers.Dense(
+      num_classes, activation='softmax',
+      kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+      bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+      name='fc'+str(num_classes))(x)
+
+  return models.Model(img_input, x, name='resnet50+logo')
diff --git a/resnet50.py b/resnet50.py
new file mode 100755
index 0000000..e063bfc
--- /dev/null
+++ b/resnet50.py
@@ -0,0 +1,348 @@
+#!/usr/bin/python3
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""ResNet-50 implemented with Keras running on Cloud TPUs.
+
+This file shows how you can run ResNet-50 on a Cloud TPU using the TensorFlow
+Keras support. This is configured for ImageNet (e.g. 1000 classes), but you can
+easily adapt to your own datasets by changing the code appropriately.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+from absl import app
+from absl import flags
+from absl import logging
+import numpy as np
+import tensorflow as tf
+
+import eval_utils
+import imagenet_input
+import models
+from tensorflow.python.keras import backend as K
+from tensorflow.python.keras.optimizer_v2 import gradient_descent, adam
+
+try:
+  import h5py as _  # pylint: disable=g-import-not-at-top
+  HAS_H5PY = True
+except ImportError:
+  logging.warning('`h5py` is not installed. Please consider installing it '
+                  'to save weights for long-running training.')
+  HAS_H5PY = False
+
+
+# Imagenet training and test data sets.
+
+DEF_IMAGE_WIDTH  = 320
+DEF_IMAGE_HEIGHT = 240
+DEF_EPOCHS = 90  # Standard imagenet training regime.
+
+# Training hyperparameters.
+NUM_CORES = 8
+PER_CORE_BATCH_SIZE = 4
+CPU_BATCH_SIZE = 1
+BASE_LEARNING_RATE = 1e-3
+# Learning rate schedule
+LR_SCHEDULE = [    # (multiplier, epoch to start) tuples
+    (1.0, 5), (0.1, 30), (0.01, 60), (0.001, 80)
+]
+
+DEFAULT_WEIGHTS_H5 = 'resnet50_weights.h5'
+DEFAULT_LOG_DIR = '/tmp/netcraft'
+DEFAULT_BUCKET = 'gs://netcraft/'
+
+flags.DEFINE_float('lr', BASE_LEARNING_RATE, 'Defines the step size when training')
+flags.DEFINE_integer('epochs', DEF_EPOCHS, 'Number of epochs until which to train')
+flags.DEFINE_integer('split_epochs', 1, 'Split epochs into smaller bits, helps save weights')
+flags.DEFINE_integer('initial_epoch', 0, 'Epoch from which to start, useful when resuming training')
+flags.DEFINE_integer('image_width', DEF_IMAGE_WIDTH, '')
+flags.DEFINE_integer('image_height', DEF_IMAGE_HEIGHT, '')
+flags.DEFINE_string('weights', None, 'Use saved weights')
+flags.DEFINE_string('weights2', None, 'Use another saved weights')
+flags.DEFINE_string('bucket', DEFAULT_BUCKET, 'Bucket to use')
+flags.DEFINE_string('tpu', None, 'Name of the TPU to use.')
+flags.DEFINE_string('data', None, 'Path to training and testing data.')
+flags.DEFINE_string('model', "resnet", 'Which logo to use (resnet, or logo)')
+flags.DEFINE_string(
+    'log', DEFAULT_LOG_DIR,
+    ('The directory where the model weights and training/evaluation summaries '
+     'are stored. If not specified, save to /tmp/netcraft.'))
+flags.DEFINE_bool(
+    'complete_eval', True,
+    'Eval both top 1 and top 5 accuracy. Otherwise, only eval top 1 accuracy. '
+    'Furthemore generate confusion matrixes and save softmax values in log_dir')
+flags.DEFINE_bool('evalonly', False, 'Only run eval with given weights, do not train')
+flags.DEFINE_bool('class_weights', False, 'Use class weights to deal with imbalanced dataset')
+flags.DEFINE_integer('benign_multiplier', 1, 'Multiplier for weigh tof benign class')
+flags.DEFINE_bool('plot_wrong', False, 'Plot false images in tensorboard, make eval slower')
+flags.DEFINE_bool('plot_cm', True, 'Plot confusion matrix in tensorboard')
+flags.DEFINE_bool('plot_pr', True, 'Plot precision recall in tensorboard')
+flags.DEFINE_bool('weights_by_name', False, 'Load weights by name, this allows loading weights with an incompatible fully '+
+                  'connect layer i.e. a different number of targets. The FC layer is randomly initiated and needs to be trained.')
+
+FLAGS = flags.FLAGS
+
+def learning_rate_schedule(current_epoch, current_batch, train_steps_per_epoch, base_learning_rate):
+  """Handles linear scaling rule, gradual warmup, and LR decay.
+
+  The learning rate starts at 0, then it increases linearly per step.
+  After 5 epochs we reach the base learning rate (scaled to account
+    for batch size).
+  After 30, 60 and 80 epochs the learning rate is divided by 10.
+  After 90 epochs training stops and the LR is set to 0. This ensures
+    that we train for exactly 90 epochs for reproducibility.
+
+  Args:
+    current_epoch: integer, current epoch indexed from 0.
+    current_batch: integer, current batch in the current epoch, indexed from 0.
+
+  Returns:
+    Adjusted learning rate.
+  """
+  epoch = current_epoch + float(current_batch) / train_steps_per_epoch
+  warmup_lr_multiplier, warmup_end_epoch = LR_SCHEDULE[0]
+  if epoch < warmup_end_epoch:
+    # Learning rate increases linearly per step.
+    return base_learning_rate * warmup_lr_multiplier * epoch / warmup_end_epoch
+  for mult, start_epoch in LR_SCHEDULE:
+    if epoch >= start_epoch:
+      learning_rate = base_learning_rate * mult
+    else:
+      break
+  return learning_rate
+
+
+class LearningRateBatchScheduler(tf.keras.callbacks.Callback):
+  """Callback to update learning rate on every batch (not epoch boundaries).
+
+  N.B. Only support Keras optimizers, not TF optimizers.
+
+  Args:
+      schedule: a function that takes an epoch index and a batch index as input
+          (both integer, indexed from 0) and returns a new learning rate as
+          output (float).
+  """
+
+  def __init__(self, schedule, train_steps_per_epoch, base_learning_rate):
+    super(LearningRateBatchScheduler, self).__init__()
+    self.base_lr = base_learning_rate
+    self.schedule = schedule
+    self.train_steps_per_epoch = train_steps_per_epoch
+    self.epochs = -1
+    self.prev_lr = -1
+
+  def on_epoch_begin(self, epoch, logs=None):
+    if not hasattr(self.model.optimizer, 'lr'):
+      raise ValueError('Optimizer must have a "lr" attribute.')
+    self.epochs += 1
+
+  def on_batch_begin(self, batch, logs=None):
+    lr = self.schedule(self.epochs, batch, self.train_steps_per_epoch, self.base_lr)
+    if not isinstance(lr, (float, np.float32, np.float64)):
+      raise ValueError('The output of the "schedule" function should be float.')
+    if lr != self.prev_lr:
+      K.set_value(self.model.optimizer.lr, lr)
+      self.prev_lr = lr
+      logging.debug('Epoch %05d Batch %05d: LearningRateBatchScheduler change '
+                    'learning rate to %s.', self.epochs, batch, lr)
+
+
+def main(argv):
+  if FLAGS.data:
+      dinfo = np.load(os.path.join(FLAGS.data, 'dinfo.npz'), allow_pickle=True)
+      classes = dinfo['classes']
+      num_classes = len(classes)
+      train_cnt = dinfo['train_cnt'] # 1141 # 50273 # Approximate number of images.
+      val_cnt = dinfo['val_cnt'] # 488 # 12560  # Number of images.
+      class_weights = dinfo['class_weights'].tolist()
+      #class_weights = class_weights[()] # Unpack 0d np.array
+
+      if FLAGS.class_weights and FLAGS.benign_multiplier != 1:
+          benign_class = np.squeeze(np.where(classes=='benign'))
+          if benign_class:
+              benign_class = np.asscalar(benign_class)
+              class_weights[benign_class] *= FLAGS.benign_multiplier
+          else:
+              logging.warning("Could not find benign class. Ignoring benign multiplier.")
+  else:
+      train_cnt = 10e5
+      val_cnt = 10e4
+      num_classes = 10e2
+
+  if FLAGS.tpu:
+      batch_size = NUM_CORES * PER_CORE_BATCH_SIZE
+  else:
+      batch_size = CPU_BATCH_SIZE
+
+  train_steps_per_epoch = int(train_cnt / (batch_size * FLAGS.split_epochs))
+  val_steps = int(val_cnt // batch_size )
+
+  logging.info("Using %d training images and %d for validation", train_cnt, val_cnt)
+
+  if FLAGS.model == 'resnet':
+      logging.info('Building Keras ResNet-50 model')
+      model = models.ResNet50(width=FLAGS.image_width, height=FLAGS.image_height, num_classes=num_classes)
+  elif FLAGS.model == 'combined':
+      logging.info('Building Keras ResNet-50 + LOGO model')
+      model = models.get_logores_model(width=FLAGS.image_width, height=FLAGS.image_height, num_classes=num_classes, resnet_trainable=False)
+  elif FLAGS.model == 'combined_trainable':
+      logging.info('Building Keras ResNet-50 + LOGO model')
+      model = models.get_logores_model(width=FLAGS.image_width, height=FLAGS.image_height, num_classes=num_classes, resnet_trainable=True)
+  elif FLAGS.model == 'logo':
+      logging.info('Building LogoNet model')
+      model = models.get_logo_model(width=None, height=None, num_classes=num_classes, base_trainable=True)
+  elif FLAGS.model == 'logo_extended':
+      logging.info('Building LogoNet model')
+      model = models.get_logo_model(width=FLAGS.image_width, height=FLAGS.image_height, base_trainable=False, num_classes=num_classes)
+  elif FLAGS.model == 'logo_new':
+      logging.info('Building LogoNet model')
+      model = models.get_logo_model_new(width=FLAGS.image_width, height=FLAGS.image_height, base_trainable=False, num_classes=num_classes)
+  elif FLAGS.model == 'logo_extended_trainable':
+      logging.info('Building LogoNet model')
+      model = models.get_logo_model(width=FLAGS.image_width, height=FLAGS.image_height, base_trainable=True, num_classes=num_classes)
+  else:
+      return 'Only valid models are resnet and logo'
+
+  if FLAGS.tpu:
+    logging.info('Converting from CPU to TPU model.')
+    resolver = tf.contrib.cluster_resolver.TPUClusterResolver(tpu=FLAGS.tpu)
+    strategy = tf.contrib.tpu.TPUDistributionStrategy(resolver)
+    model = tf.contrib.tpu.keras_to_tpu_model(model, strategy=strategy)
+
+  logging.info('Compiling model.')
+  model.compile(
+      optimizer=adam.Adam(learning_rate=FLAGS.lr),
+      loss='sparse_categorical_crossentropy',
+      metrics=['sparse_categorical_accuracy'])
+
+  if FLAGS.data is None:
+    training_images = np.random.randn(
+        batch_size, FLAGS.image_height, FLAGS.image_width, 3).astype(np.float32)
+    training_labels = np.random.randint(num_classes, size=batch_size,
+                                        dtype=np.int32)
+    logging.info('Training model using synthetica data, use --data flag to provided real data.')
+    model.fit(
+        training_images,
+        training_labels,
+        epochs=FLAGS.epochs,
+        initial_epoch=FLAGS.initial_epoch,
+        batch_size=batch_size)
+    logging.info('Evaluating the model on synthetic data.')
+    model.evaluate(training_images, training_labels, verbose=0)
+  else:
+    per_core_batch_size = PER_CORE_BATCH_SIZE if FLAGS.tpu else CPU_BATCH_SIZE
+    imagenet_train = imagenet_input.ImageNetInput(
+        width=FLAGS.image_width,
+        height=FLAGS.image_height,
+        resize=False if (FLAGS.model == 'logo') else True,
+        is_training=True,
+        data_dir=FLAGS.bucket+FLAGS.data if FLAGS.tpu else FLAGS.data,
+        per_core_batch_size=per_core_batch_size)
+    logging.info('Training model using real data in directory "%s".',
+                 FLAGS.data)
+    # If evaluating complete_eval, we feed the inputs from a Python generator,
+    # so we need to build a single batch for all of the cores, which will be
+    # split on TPU.
+    per_core_batch_size = (
+        batch_size if (FLAGS.complete_eval or not FLAGS.tpu) else PER_CORE_BATCH_SIZE)
+    imagenet_validation = imagenet_input.ImageNetInput(
+        FLAGS.image_width, FLAGS.image_height,
+        resize=False if (FLAGS.model == 'logo') else True,
+        is_training=False,
+        data_dir=FLAGS.bucket+FLAGS.data if FLAGS.tpu else FLAGS.data,
+        per_core_batch_size=per_core_batch_size)
+
+    if FLAGS.evalonly:
+        validation_epochs= [420]
+        logging.info("Only running a single validation epoch")
+    else:
+        validation_epochs=[ 3, 10, 30, 60, 90]
+        logging.info("Validation will be run on epochs %s", str(validation_epochs))
+
+    eval_callback = eval_utils.TensorBoardWithValidation(
+            log_dir=FLAGS.log,
+            validation_imagenet_input=imagenet_validation,
+            validation_steps=val_steps,
+            validation_epochs=validation_epochs,
+            write_images=True,
+            write_graph=True,
+            plot_wrong=FLAGS.plot_wrong,
+            plot_cm=FLAGS.plot_cm,
+            plot_pr=FLAGS.plot_pr,
+            classes=classes,
+            complete_eval=FLAGS.complete_eval)
+
+    callbacks = [
+        tf.keras.callbacks.ModelCheckpoint(FLAGS.log+"/weights.{epoch:02d}-{sparse_categorical_accuracy:.2f}.hdf5", 
+            monitor='sparse_categorical_accuracy', verbose=1, 
+            save_best_only=True, save_weights_only=True, mode='auto'),
+        LearningRateBatchScheduler(schedule=learning_rate_schedule, train_steps_per_epoch=train_steps_per_epoch, base_learning_rate=FLAGS.lr),
+        eval_callback
+                    ]
+
+    if FLAGS.tpu:
+        model_in = imagenet_train.input_fn 
+    else:
+        model_in = imagenet_train.input_fn()
+
+    preloaded_weights = []
+    for layer in model.layers:
+        preloaded_weights.append(layer.get_weights())
+
+    if FLAGS.weights:
+        weights_file = os.path.join(FLAGS.weights)
+        logging.info('Loading trained weights from %s', weights_file)
+        model.load_weights(weights_file, by_name=FLAGS.weights_by_name)
+        if FLAGS.weights2:
+            weights2_file = os.path.join(FLAGS.weights2)
+            logging.info('Loading secondary trained weights from %s', weights2_file)
+            model.load_weights(weights2_file, by_name=FLAGS.weights_by_name)
+    else:
+        if FLAGS.weights2:
+            logging.debug("Ignoring --weights2 flag as no --weights")
+        weights_file = os.path.join(DEFAULT_WEIGHTS_H5)
+
+    # Check if we loaded weights
+    for layer, pre in zip(model.layers, preloaded_weights):
+        weights = layer.get_weights()
+
+        populated=True
+        if weights:
+          for weight, pr in zip(weights, pre):
+            if np.array_equal(weight, pr):
+                populated=False
+
+        if not populated:
+            logging.warning('Layer %s not populated with weights!', layer.name)
+        
+    if FLAGS.evalonly:
+        eval_callback.set_model(model)
+        eval_callback.on_epoch_end(420)
+    else:
+        model.fit(model_in,
+                  epochs=FLAGS.epochs,
+                  initial_epoch=FLAGS.initial_epoch,
+                  class_weight = class_weights if FLAGS.class_weights else None,
+                  steps_per_epoch=train_steps_per_epoch,
+                  callbacks=callbacks)
+
+if __name__ == '__main__':
+  tf.logging.set_verbosity(tf.logging.INFO)
+  app.run(main)
diff --git a/resnet_preprocessing.py b/resnet_preprocessing.py
new file mode 100644
index 0000000..72c799a
--- /dev/null
+++ b/resnet_preprocessing.py
@@ -0,0 +1,87 @@
+# Trimmed by Vasil Zlatanov
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""ImageNet preprocessing for ResNet."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+def resize_or_crop_image(image, target_height, target_width):
+  image_height = tf.shape(image)[0]
+  image_width = tf.shape(image)[1]
+  # If the viewport is long but the width is right simply crop the length of the page
+  # Otherwise we just resize the image bilinearly
+  image = tf.cond(
+          tf.logical_and(tf.greater(image_height, target_height),tf.equal(target_width, image_width)),
+          lambda: tf.cast(tf.image.crop_to_bounding_box(image, 0, 0, target_height, target_width),dtype=tf.float32),
+          lambda: tf.image.resize_images(image, [target_height,target_width], align_corners=True)
+          )
+  return image
+
+def preprocess_for_train(image_bytes, target_width, target_height, resize, use_bfloat16):
+  """Preprocesses the given image for evaluation.
+
+  Args:
+    image_bytes: `Tensor` representing an image binary of arbitrary size.
+    use_bfloat16: `bool` for whether to use bfloat16.
+
+  Returns:
+    A preprocessed image `Tensor`.
+  """
+  image = tf.image.decode_png(image_bytes, channels=3)
+  if resize:
+      image = resize_or_crop_image(image, target_height, target_width)
+  else:
+      image = tf.cast(image, tf.float32)
+
+  return image
+
+
+def preprocess_for_eval(image_bytes, target_width, target_height, resize, use_bfloat16):
+  """Preprocesses the given image for evaluation.
+
+  Args:
+    image_bytes: `Tensor` representing an image binary of arbitrary size.
+    use_bfloat16: `bool` for whether to use bfloat16.
+
+  Returns:
+    A preprocessed image `Tensor`.
+  """
+  image = tf.image.decode_png(image_bytes, channels=3)
+  if resize:
+      image = resize_or_crop_image(image, target_height, target_width)
+  else:
+      image = tf.cast(image, tf.float32)
+
+  return image
+
+
+def preprocess_image(image_bytes, width, height, resize, is_training=False, use_bfloat16=False):
+  """Preprocesses the given image.
+
+  Args:
+    image_bytes: `Tensor` representing an image binary of arbitrary size.
+    is_training: `bool` for whether the preprocessing is for training.
+    use_bfloat16: `bool` for whether to use bfloat16.
+
+  Returns:
+    A preprocessed image `Tensor`.
+  """
+  if is_training:
+    return preprocess_for_train(image_bytes, width, height, resize, use_bfloat16)
+  else:
+    return preprocess_for_eval(image_bytes, width, height, resize, use_bfloat16)
diff --git a/util/__pycache__/dataset_utils.cpython-37.pyc b/util/__pycache__/dataset_utils.cpython-37.pyc
new file mode 100644
index 0000000..7ed2852
--- /dev/null
+++ b/util/__pycache__/dataset_utils.cpython-37.pyc
diff --git a/util/dataset_utils.py b/util/dataset_utils.py
new file mode 100644
index 0000000..fdaefca
--- /dev/null
+++ b/util/dataset_utils.py
@@ -0,0 +1,150 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Contains utilities for downloading and converting datasets."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import tarfile
+
+from six.moves import urllib
+import tensorflow as tf
+
+LABELS_FILENAME = 'labels.txt'
+
+
+def int64_feature(values):
+  """Returns a TF-Feature of int64s.
+
+  Args:
+    values: A scalar or list of values.
+
+  Returns:
+    A TF-Feature.
+  """
+  if not isinstance(values, (tuple, list)):
+    values = [values]
+  return tf.train.Feature(int64_list=tf.train.Int64List(value=values))
+
+
+def bytes_feature(values):
+  """Returns a TF-Feature of bytes.
+
+  Args:
+    values: A string.
+
+  Returns:
+    A TF-Feature.
+  """
+  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))
+
+
+def float_feature(values):
+  """Returns a TF-Feature of floats.
+
+  Args:
+    values: A scalar of list of values.
+
+  Returns:
+    A TF-Feature.
+  """
+  if not isinstance(values, (tuple, list)):
+    values = [values]
+  return tf.train.Feature(float_list=tf.train.FloatList(value=values))
+
+
+def image_to_tfexample(image_data, image_format, height, width, class_id):
+  return tf.train.Example(features=tf.train.Features(feature={
+      'image/encoded': bytes_feature(image_data),
+      'image/format': bytes_feature(image_format),
+      'image/class/label': int64_feature(class_id),
+      'image/height': int64_feature(height),
+      'image/width': int64_feature(width),
+  }))
+
+
+def download_and_uncompress_tarball(tarball_url, dataset_dir):
+  """Downloads the `tarball_url` and uncompresses it locally.
+
+  Args:
+    tarball_url: The URL of a tarball file.
+    dataset_dir: The directory where the temporary files are stored.
+  """
+  filename = tarball_url.split('/')[-1]
+  filepath = os.path.join(dataset_dir, filename)
+
+  def _progress(count, block_size, total_size):
+    sys.stdout.write('\r>> Downloading %s %.1f%%' % (
+        filename, float(count * block_size) / float(total_size) * 100.0))
+    sys.stdout.flush()
+  filepath, _ = urllib.request.urlretrieve(tarball_url, filepath, _progress)
+  print()
+  statinfo = os.stat(filepath)
+  print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
+  tarfile.open(filepath, 'r:gz').extractall(dataset_dir)
+
+
+def write_label_file(labels_to_class_names, dataset_dir,
+                     filename=LABELS_FILENAME):
+  """Writes a file with the list of class names.
+
+  Args:
+    labels_to_class_names: A map of (integer) labels to class names.
+    dataset_dir: The directory in which the labels file should be written.
+    filename: The filename where the class names are written.
+  """
+  labels_filename = os.path.join(dataset_dir, filename)
+  with tf.gfile.Open(labels_filename, 'w') as f:
+    for label in labels_to_class_names:
+      class_name = labels_to_class_names[label]
+      f.write('%d:%s\n' % (label, class_name))
+
+
+def has_labels(dataset_dir, filename=LABELS_FILENAME):
+  """Specifies whether or not the dataset directory contains a label map file.
+
+  Args:
+    dataset_dir: The directory in which the labels file is found.
+    filename: The filename where the class names are written.
+
+  Returns:
+    `True` if the labels file exists and `False` otherwise.
+  """
+  return tf.gfile.Exists(os.path.join(dataset_dir, filename))
+
+
+def read_label_file(dataset_dir, filename=LABELS_FILENAME):
+  """Reads the labels file and returns a mapping from ID to class name.
+
+  Args:
+    dataset_dir: The directory in which the labels file is found.
+    filename: The filename where the class names are written.
+
+  Returns:
+    A map from a label (integer) to class name.
+  """
+  labels_filename = os.path.join(dataset_dir, filename)
+  with tf.gfile.Open(labels_filename, 'rb') as f:
+    lines = f.read().decode()
+  lines = lines.split('\n')
+  lines = filter(None, lines)
+
+  labels_to_class_names = {}
+  for line in lines:
+    index = line.index(':')
+    labels_to_class_names[int(line[:index])] = line[index+1:]
+  return labels_to_class_names
diff --git a/util/generate_smaller.sh b/util/generate_smaller.sh
new file mode 100755
index 0000000..68769b4
--- /dev/null
+++ b/util/generate_smaller.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+
+OUTPUT_DIR='better_noise_logos'
+
+for width in $(seq 16 1 96);do 
+  echo "Making width of $width"
+  for brand in $1/*;do
+    mkdir -p "$OUTPUT_DIR/$brand"
+
+    find $brand -iname '*.png' | while read image;do
+      BRI=$((80 + RANDOM % 40))
+      SAT=$((80 + RANDOM % 40))
+      HUE=$((60 + RANDOM % 80))
+      width_off=$((RANDOM % (320-width)))
+      height_off=$((RANDOM % (240-width)))
+      convert -size 320x240 xc: +noise Random $image -modulate $BRI,$SAT,$HUE -geometry ${width}x+${width_off}+${height_off} -composite "$OUTPUT_DIR/$brand/`basename $image`.$width.png"
+      sleep 1
+    done
+  done
+
+done
+
diff --git a/util/make-tfrecords.py b/util/make-tfrecords.py
new file mode 100755
index 0000000..f9f3e71
--- /dev/null
+++ b/util/make-tfrecords.py
@@ -0,0 +1,203 @@
+#!/usr/bin/python
+import random
+import os
+import sys
+import math
+import tensorflow as tf
+import dataset_utils
+import numpy as np
+
+#===============DEFINE YOUR ARGUMENTS==============
+flags = tf.app.flags
+
+#State your dataset directory
+flags.DEFINE_string('data', None, 'String: Your dataset directory')
+
+# The number of images in the validation set. You would have to know the total number of examples in advance. This is essentially your evaluation dataset.
+flags.DEFINE_float('validation_size', 0.25, 'Float: The proportion of examples in the dataset to be used for validation')
+
+# The number of shards per dataset split.
+flags.DEFINE_integer('num_shards', 1, 'Int: Number of shards to split the TFRecord files')
+
+# Seed for repeatability.
+flags.DEFINE_integer('random_seed', 0, 'Int: Random seed to use for repeatability.')
+flags.DEFINE_bool('overwrite', False, 'Overwrite prevoiusly generated files')
+
+FLAGS = flags.FLAGS
+
+class ImageReader(object):
+  """Helper class that provides TensorFlow image coding utilities."""
+
+  def __init__(self):
+    # Initializes function that decodes RGB JPEG data.
+    self._decode_png_data = tf.placeholder(dtype=tf.string)
+    self._decode_png = tf.image.decode_png(self._decode_png_data, channels=0)
+
+  def read_image_dims(self, sess, image_data):
+    image = self.decode_png(sess, image_data)
+    return image.shape[0], image.shape[1]
+
+  def decode_png(self, sess, image_data):
+    image = sess.run(self._decode_png,
+                     feed_dict={self._decode_png_data: image_data})
+    assert len(image.shape) == 3
+    return image
+
+def _get_filenames_and_classes(data):
+  """Returns a list of filenames and inferred class names.
+
+  Args:
+    data: A directory containing a set of subdirectories representing
+      class names. Each subdirectory should contain PNG or JPG encoded images.
+
+  Returns:
+    A list of image file paths, relative to `data` and the list of
+    subdirectories, representing class names.
+  """
+  directories = []
+  class_names = []
+  for filename in os.listdir(data):
+    path = os.path.join(data, filename)
+    if os.path.isdir(path):
+      print(path)
+      directories.append(path)
+      class_names.append(filename)
+
+  photo_filenames = []
+  for directory in directories:
+    for filename in os.listdir(directory):
+      path = os.path.join(directory, filename)
+      photo_filenames.append(path)
+
+  return photo_filenames, sorted(class_names)
+
+
+def _get_dataset_filename(data, split_name, shard_id, _NUM_SHARDS):
+  output_filename = 'websites_%s_%05d-of-%05d.tfrecord' % (
+      split_name, shard_id, _NUM_SHARDS)
+  return os.path.join(data, output_filename)
+
+
+def _convert_dataset(split_name, filenames, class_names_to_ids, data, _NUM_SHARDS):
+  """Converts the given filenames to a TFRecord dataset.
+
+  Args:
+    split_name: The name of the dataset, either 'train' or 'validation'.
+    filenames: A list of absolute paths to png or jpg images.
+    class_names_to_ids: A dictionary from class names (strings) to ids
+      (integers).
+    data: The directory where the converted datasets are stored.
+  """
+  assert split_name in ['train', 'validation']
+
+  failed = 0
+  success = 0
+  # class_cnts is used for balancing training through class_weights
+  class_cnts = [0] * len(class_names_to_ids)
+  num_per_shard = int(math.ceil(len(filenames) / float(_NUM_SHARDS)))
+
+  with tf.Graph().as_default():
+    image_reader = ImageReader()
+
+    with tf.Session('') as sess:
+
+      for shard_id in range(_NUM_SHARDS):
+        output_filename = _get_dataset_filename(
+            data, split_name, shard_id, _NUM_SHARDS)
+
+        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
+          start_ndx = shard_id * num_per_shard
+          end_ndx = min((shard_id+1) * num_per_shard, len(filenames))
+          for i in range(start_ndx, end_ndx):
+#            sys.stdout.write('\r>> Converting image %d/%d shard %d: %s' % (
+#                i+1, len(filenames), shard_id, filenames[i]))
+#            sys.stdout.flush()
+
+            # Read the filename:
+            image_data = tf.gfile.FastGFile(filenames[i], 'rb').read()
+            try:
+                height, width = image_reader.read_image_dims(sess, image_data)
+                class_name = os.path.basename(os.path.dirname(filenames[i]))
+                class_id = class_names_to_ids[class_name]
+
+                example = dataset_utils.image_to_tfexample(
+                    image_data, b'png', height, width, class_id)
+                tfrecord_writer.write(example.SerializeToString())
+                success += 1;
+                class_cnts[class_id] += 1;
+            except:
+                failed = failed + 1;
+
+
+
+  
+  sys.stdout.write('%d in total failed!\n' % failed)
+  sys.stdout.write('%d in total were written successfuly!\n' % success)
+  sys.stdout.flush()
+  return class_cnts
+
+
+def _dataset_exists(data, _NUM_SHARDS):
+  for split_name in ['train', 'validation']:
+    for shard_id in range(_NUM_SHARDS):
+      output_filename = _get_dataset_filename(
+          data, split_name, shard_id, _NUM_SHARDS)
+      if not tf.gfile.Exists(output_filename):
+        return False
+  return True
+
+def main():
+
+    #=============CHECKS==============
+    #Check if there is a dataset directory entered
+    if not FLAGS.data:
+        raise ValueError('data is empty. Please state a data argument.')
+
+    #If the TFRecord files already exist in the directory, then exit without creating the files again
+    if not FLAGS.overwrite and _dataset_exists(data = FLAGS.data, _NUM_SHARDS = FLAGS.num_shards):
+        print('Dataset files already exist. Exiting without re-creating them.')
+        print('Use --overwrite flag or remove them')
+        return None
+    #==========END OF CHECKS============
+
+    #Get a list of photo_filenames like ['123.jpg', '456.jpg'...] and a list of sorted class names from parsing the subdirectories.
+    photo_filenames, class_names = _get_filenames_and_classes(FLAGS.data)
+
+    #Refer each of the class name to a specific integer number for predictions later
+    class_names_to_ids = dict(zip(class_names, range(len(class_names))))
+
+    #Find the number of validation examples we need
+    num_validation = int(FLAGS.validation_size * len(photo_filenames))
+
+    # Divide the training datasets into train and test:
+    random.seed(FLAGS.random_seed)
+    random.shuffle(photo_filenames)
+    training_filenames = photo_filenames[num_validation:]
+    validation_filenames = photo_filenames[:num_validation]
+
+    # First, convert the training and validation sets.
+    train_cnts = _convert_dataset('train', training_filenames, class_names_to_ids,
+                     data = FLAGS.data, _NUM_SHARDS = 1)
+    val_cnts = _convert_dataset('validation', validation_filenames, class_names_to_ids,
+                     data = FLAGS.data, _NUM_SHARDS = 1)
+
+    # Finally, write the labels file:
+    labels_to_class_names = dict(zip(range(len(class_names)), class_names))
+    dataset_utils.write_label_file(labels_to_class_names, FLAGS.data)
+
+    total_train_cnt = sum(train_cnts)
+    class_cnt  = len(train_cnts)
+    class_weights = [ total_train_cnt/(train_cnts[i]*class_cnt+1e-10) for i in range(class_cnt) ]
+
+    data_info = os.path.join(FLAGS.data, 'dinfo.npz')
+    np.savez(data_info, train_cnt=total_train_cnt,
+                        val_cnt=sum(val_cnts),
+                        class_weights=class_weights,
+                        classes=class_names
+                        )
+
+    print('\nFinished converting the dataset!')
+
+if __name__ == "__main__":
+    main()
+
diff --git a/util/plot-report b/util/plot-report
new file mode 100755
index 0000000..927437f
--- /dev/null
+++ b/util/plot-report
@@ -0,0 +1,102 @@
+#!/usr/bin/python
+import tensorflow as tf
+import numpy as np
+import matplotlib.pyplot as plt
+import scikitplot as skplt
+
+from sklearn.preprocessing import label_binarize
+from sklearn.preprocessing import LabelEncoder
+from sklearn.metrics import auc, confusion_matrix
+from sklearn.metrics import precision_recall_curve
+from sklearn.metrics import average_precision_score
+from sklearn.metrics import classification_report
+
+flags = tf.app.flags
+
+flags.DEFINE_string('softmax', None, 'The softmax.npz file contained labels and probas')
+flags.DEFINE_string('dinfo', None, 'The dinfo.npz file')
+flags.DEFINE_integer('chunks', 4, 'The number of plots to produce')
+
+
+FLAGS = flags.FLAGS
+
+
+def plot_classification_report(classification_report, title='Classification report ', cmap='RdBu'):
+    '''
+    Plot scikit-learn classification report.
+    Extension based on https://stackoverflow.com/a/31689645/395857 
+    '''
+    lines = classification_report.split('\n')
+
+    classes = []
+    plotMat = []
+    support = []
+    class_names = []
+    for line in lines[2 : (len(lines) - 2)]:
+        t = line.strip().split()
+        if len(t) < 2: continue
+        classes.append(t[0])
+        v = [float(x) for x in t[1: len(t) - 1]]
+        support.append(int(t[-1]))
+        class_names.append(t[0])
+        print(v)
+        plotMat.append(v)
+
+    print('plotMat: {0}'.format(plotMat))
+    print('support: {0}'.format(support))
+
+    xlabel = 'Metrics'
+    ylabel = 'Classes'
+    xticklabels = ['Precision', 'Recall', 'F1-score']
+    yticklabels = ['{0} ({1})'.format(class_names[idx], sup) for idx, sup  in enumerate(support)]
+    figure_width = 25
+    figure_height = len(class_names) + 7
+    correct_orientation = False
+    heatmap(np.array(plotMat), title, xlabel, ylabel, xticklabels, yticklabels, figure_width, figure_height, correct_orientation, cmap=cmap)
+
+softmax = np.load(FLAGS.softmax)
+dinfo = np.load(FLAGS.dinfo)
+
+class_names=dinfo['classes']
+
+y_true = softmax['labels']
+y_proba = softmax['predictions']
+
+y_true_sparse = label_binarize(y_true, classes=np.unique(y_true))
+y_pred = np.argmax(y_proba, axis=1)
+
+cl_report= classification_report(y_true, y_pred, target_names=class_names, labels=np.arange(len(class_names)))
+print(cl_report)
+
+cm = confusion_matrix(y_true, y_pred, labels=np.arange(len(class_names)))
+print(cm)
+
+def top_wrong(cm, N=5):
+    a=cm
+    N = 150
+    idx = np.argsort(a.ravel())[-N:][::-1] #single slicing: `[:N-2:-1]`
+    topN_val = a.ravel()[idx]
+    row_col = np.c_[np.unravel_index(idx, a.shape)]
+    return row_col
+
+#print(top_wrong(cm))
+for idxs in top_wrong(cm):
+    if idxs[0] != idxs[1]:
+        print(class_names[idxs[0]],"\t",class_names[idxs[1]],"\t",cm[idxs[0], idxs[1]])
+
+benign_class = np.where(class_names=='benign')
+
+benign_pages, _ = np.where(y_true == benign_class)
+
+cnt=0
+cnt9=0
+for benign_page in benign_pages:
+    guess = y_pred[benign_page]
+    if guess != benign_class:
+        softmax_val = y_proba[benign_page][guess]
+        cnt +=1
+        if softmax_val > 0.95:
+            print("B: " + class_names[guess] + "\t" + str(softmax_val))
+            cnt9 += 1
+
+print('We have ' + str(cnt9) + ' false-positives with softmax > 0.95 out of ' +str(cnt) + '/' + str(benign_pages.size))
diff --git a/util/plot-softmax b/util/plot-softmax
new file mode 100755
index 0000000..c6c2774
--- /dev/null
+++ b/util/plot-softmax
@@ -0,0 +1,94 @@
+#!/usr/bin/python
+import tensorflow as tf
+import numpy as np
+import matplotlib.pyplot as plt
+import scikitplot as skplt
+
+from sklearn.preprocessing import label_binarize
+from sklearn.preprocessing import LabelEncoder
+from sklearn.metrics import auc
+from sklearn.metrics import precision_recall_curve
+from sklearn.metrics import average_precision_score
+
+flags = tf.app.flags
+
+flags.DEFINE_string('softmax', None, 'The softmax.npz file contained labels and probas')
+flags.DEFINE_string('dinfo', None, 'The dinfo.npz file')
+flags.DEFINE_integer('chunks', 4, 'The number of plots to produce')
+
+
+FLAGS = flags.FLAGS
+
+softmax = np.load(FLAGS.softmax)
+dinfo = np.load(FLAGS.dinfo)
+
+class_names=dinfo['classes']
+
+y_true = softmax['labels']
+y_proba = softmax['predictions']
+
+
+def plot_precision_recall(y_true, y_probas,
+                          plot_micro=True,
+                          classes_to_plot=None, ax=None,
+                          figsize=None, cmap='nipy_spectral',
+                          text_fontsize="medium"):
+
+    y_true = np.array(y_true)
+    y_probas = np.array(y_probas)
+
+    classes = np.unique(y_true)
+    probas = y_probas
+
+    if classes_to_plot is None:
+        classes_to_plot = classes
+
+    binarized_y_true = label_binarize(y_true, classes=classes)
+    if len(classes) == 2:
+        binarized_y_true = np.hstack(
+            (1 - binarized_y_true, binarized_y_true))
+
+    fig, ax = plt.subplots(int(FLAGS.chunks/2), 2, figsize=figsize)
+    chunk_size = int(len(classes)/FLAGS.chunks) + int(len(classes) % FLAGS.chunks > 0)
+    print('Chunk size', chunk_size)
+
+
+
+    indices_to_plot = np.in1d(classes, classes_to_plot)
+
+    for i, img_class in enumerate(classes):
+            average_precision = average_precision_score(
+                binarized_y_true[:, i],
+                probas[:, i])
+            precision, recall, _ = precision_recall_curve(
+                y_true, probas[:, i], pos_label=img_class)
+            color = plt.cm.get_cmap(cmap)(float(i%chunk_size) / chunk_size)
+            ax[int(i/(chunk_size*2)), int(i%(chunk_size*2) > chunk_size)].plot(recall, precision, lw=2,
+                    label='{0} '
+                          '(area = {1:0.3f})'.format(class_names[int(img_class)],
+                                                     average_precision),
+                    color=color)
+
+    if plot_micro:
+        precision, recall, _ = precision_recall_curve(
+            binarized_y_true.ravel(), probas.ravel())
+        average_precision = average_precision_score(binarized_y_true,
+                                                    probas,
+                                                    average='micro')
+        ax[int(FLAGS.chunks/2)-1,1].plot(recall, precision,
+                label='micro-average PR '
+                      '(area = {0:0.3f})'.format(average_precision),
+                color='navy', linestyle=':', linewidth=4)
+
+    for x in range(int(FLAGS.chunks/2)):
+       for y in range(2):
+         ax[x,y].set_xlim([0.0, 1.0])
+         ax[x,y].set_ylim([0.0, 1.05])
+         ax[x,y].set_xlabel('Recall')
+         ax[x,y].set_ylabel('Precision')
+         ax[x,y].tick_params(labelsize=text_fontsize)
+         ax[x,y].legend(loc='lower left', fontsize=text_fontsize)
+    return ax
+
+plot_precision_recall(y_true, y_proba, text_fontsize="xx-small", classes_to_plot=[3,16,41,70,77,82])
+plt.show()
diff --git a/util/splitter b/util/splitter
new file mode 100755
index 0000000..0373669
--- /dev/null
+++ b/util/splitter
@@ -0,0 +1,30 @@
+#!/usr/bin/perl
+my $target;
+my $md5_hash;
+my $png;
+my $count = 0;
+
+
+while (<>){
+  if (/(.*),,,vas,,,(.*),,,vas,,,(.*)/) {
+    if($target){
+      mkdir "images-man/$target" unless -d "images-man/$target";
+      open(my $fh, '>', "images-man/$target/$target-$md5_hash.png") or die "could not write";
+      print $fh $png;
+      close $fh;
+    } 
+    $count++;
+    $target = $1;
+    $md5_hash = $2;
+    $png = $3."\n";
+  } else {
+    $png.=$_;
+  }
+}
+
+mkdir "images-man/$target" unless -d "images-man/$target";
+open(my $fh, '>', "images-man/$target/$target-$md5_hash.png") or die;
+print $fh $png;
+close $fh;
+
+print($count, " images written\n");
diff --git a/util/splitter-man b/util/splitter-man
new file mode 100755
index 0000000..fbdef17
--- /dev/null
+++ b/util/splitter-man
@@ -0,0 +1,30 @@
+#!/usr/bin/perl
+my $target;
+my $md5_hash;
+my $png;
+my $count = 0;
+
+
+while (<>){
+  if (/(.*),,,vas,,,(.*),,,vas,,,(.*)/) {
+    if($target){
+      mkdir "images/$target" unless -d "images/$target";
+      open(my $fh, '>', "images/$target/$target-$md5_hash.png") or die "could not write";
+      print $fh $png;
+      close $fh;
+    } 
+    $count++;
+    $target = $1;
+    $md5_hash = $2;
+    $png = $3."\n";
+  } else {
+    $png.=$_;
+  }
+}
+
+mkdir "images/$target" unless -d "images/$target";
+open(my $fh, '>', "images/$target/$target-$md5_hash.png") or die;
+print $fh $png;
+close $fh;
+
+print($count, " images written\n");
diff --git a/util/test.dump b/util/test.dump
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/util/test.dump
author	Vasil Zlatanov <v@skozl.com>	2019-06-24 21:31:41 +0100
committer	Vasil Zlatanov <v@skozl.com>	2019-06-24 21:31:41 +0100
commit	02f4425ceadec0c10c1e9903286c1121a9313357 (patch)
tree	cc8b2447ec0a5fa8d38926fd516823c62341c3d8
download	e4-fyp-netcraft-02f4425ceadec0c10c1e9903286c1121a9313357.tar.gz e4-fyp-netcraft-02f4425ceadec0c10c1e9903286c1121a9313357.tar.bz2 e4-fyp-netcraft-02f4425ceadec0c10c1e9903286c1121a9313357.zip