From 740e1b0c6a02a7bec20008758373f0dd80baade4 Mon Sep 17 00:00:00 2001 From: Vasil Zlatanov Date: Tue, 5 Mar 2019 14:29:29 +0000 Subject: Add virtual_batch support --- cgan.py | 28 +- dcgan.py | 34 ++- lib/__pycache__/virtual_batch.cpython-37.pyc | Bin 0 -> 1758 bytes .../virtual_batchnorm_impl.cpython-37.pyc | Bin 0 -> 8723 bytes lib/virtual_batch.py | 39 +++ lib/virtual_batchnorm_impl.py | 306 +++++++++++++++++++++ 6 files changed, 386 insertions(+), 21 deletions(-) mode change 100755 => 100644 cgan.py create mode 100644 lib/__pycache__/virtual_batch.cpython-37.pyc create mode 100644 lib/__pycache__/virtual_batchnorm_impl.cpython-37.pyc create mode 100644 lib/virtual_batch.py create mode 100644 lib/virtual_batchnorm_impl.py diff --git a/cgan.py b/cgan.py old mode 100755 new mode 100644 index 5ab0c10..b9928f0 --- a/cgan.py +++ b/cgan.py @@ -1,21 +1,23 @@ from __future__ import print_function, division import tensorflow.keras as keras import tensorflow as tf -from keras.datasets import mnist -from keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply -from keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import UpSampling2D, Conv2D -from keras.models import Sequential, Model -from keras.optimizers import Adam +from tensorflow.keras.datasets import mnist +from tensorflow.keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply +from tensorflow.keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D +from tensorflow.keras.layers import LeakyReLU +from tensorflow.keras.layers import UpSampling2D, Conv2D +from tensorflow.keras.models import Sequential, Model +from tensorflow.keras.optimizers import Adam import matplotlib.pyplot as plt from IPython.display import clear_output from tqdm import tqdm +from lib.virtual_batch import VirtualBatchNormalization + import numpy as np class CGAN(): - def __init__(self, dense_layers = 3): + def __init__(self, dense_layers = 3, virtual_batch_normalization=False): # Input shape self.img_rows = 28 self.img_cols = 28 @@ -24,6 +26,7 @@ class CGAN(): self.num_classes = 10 self.latent_dim = 100 self.dense_layers = dense_layers + self.virtual_batch_normalization = virtual_batch_normalization optimizer = Adam(0.0002, 0.5) @@ -63,7 +66,10 @@ class CGAN(): output_size = 2**(8+i) model.add(Dense(output_size, input_dim=self.latent_dim)) model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) + if self.virtual_batch_normalization: + model.add(VirtualBatchNormalization(momentum=0.8)) + else: + model.add(BatchNormalization(momentum=0.8)) model.add(Dense(np.prod(self.img_shape), activation='tanh')) model.add(Reshape(self.img_shape)) @@ -136,6 +142,7 @@ class CGAN(): # Sample noise as generator input noise = np.random.normal(0, 1, (batch_size, 100)) + tf.keras.backend.get_session().run(tf.global_variables_initializer()) # Generate a half batch of new images gen_imgs = self.generator.predict([noise, labels]) @@ -217,10 +224,9 @@ class CGAN(): return train_data, test_data, val_data, labels_train, labels_test, labels_val - ''' if __name__ == '__main__': - cgan = CGAN(dense_layers=1) + cgan = CGAN(dense_layers=1, virtual_batch_normalization=True) cgan.train(epochs=7000, batch_size=32, sample_interval=200) train, test, tr_labels, te_labels = cgan.generate_data() print(train.shape, test.shape) diff --git a/dcgan.py b/dcgan.py index bc7e14e..eca1852 100644 --- a/dcgan.py +++ b/dcgan.py @@ -1,11 +1,15 @@ from __future__ import print_function, division -from keras.datasets import mnist -from keras.layers import Input, Dense, Reshape, Flatten, Dropout -from keras.layers import BatchNormalization, Activation, ZeroPadding2D -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import UpSampling2D, Conv2D -from keras.models import Sequential, Model -from keras.optimizers import Adam +import tensorflow as keras + +from tensorflow.keras.datasets import mnist +from tensorflow.keras.layers import Input, Dense, Reshape, Flatten, Dropout +from tensorflow.keras.layers import BatchNormalization, Activation, ZeroPadding2D +from tensorflow.keras.layers.advanced_activations import LeakyReLU +from tensorflow.keras.layers.convolutional import UpSampling2D, Conv2D +from tensorflow.keras.models import Sequential, Model +from tensorflow.keras.optimizers import Adam + +from lib/virtual_batch import VirtualBatchNormalization import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec @@ -17,7 +21,7 @@ import sys import numpy as np class DCGAN(): - def __init__(self, conv_layers = 1): + def __init__(self, conv_layers = 1, virtual_batch_normalization=False): # Input shape self.img_rows = 28 self.img_cols = 28 @@ -25,6 +29,7 @@ class DCGAN(): self.img_shape = (self.img_rows, self.img_cols, self.channels) self.latent_dim = 100 self.conv_layers = conv_layers + self.virtual_batch_normalization = virtual_batch_normalization optimizer = Adam(0.002, 0.5) @@ -62,14 +67,21 @@ class DCGAN(): for i in range(self.conv_layers): model.add(Conv2D(128, kernel_size=3, padding="same")) - model.add(BatchNormalization()) + if self.virtual_batch_normalization: + model.add(VirtualBatchNormalization()) + else: + model.add(BatchNormalization()) model.add(Activation("relu")) model.add(UpSampling2D()) for i in range(self.conv_layers): model.add(Conv2D(64, kernel_size=3, padding="same")) - model.add(BatchNormalization()) + if self.virtual_batch_normalization: + model.add(VirtualBatchNormalization()) + else: + model.add(BatchNormalization()) + model.add(Activation("relu")) model.add(Conv2D(self.channels, kernel_size=3, padding="same")) @@ -137,6 +149,8 @@ class DCGAN(): idx = np.random.randint(0, X_train.shape[0], batch_size) imgs = X_train[idx] + tf.keras.backend.get_session().run(tf.global_variables_initializer()) + # Sample noise and generate a batch of new images noise = np.random.normal(0, 1, (batch_size, self.latent_dim)) gen_imgs = self.generator.predict(noise) diff --git a/lib/__pycache__/virtual_batch.cpython-37.pyc b/lib/__pycache__/virtual_batch.cpython-37.pyc new file mode 100644 index 0000000..1ca89c1 Binary files /dev/null and b/lib/__pycache__/virtual_batch.cpython-37.pyc differ diff --git a/lib/__pycache__/virtual_batchnorm_impl.cpython-37.pyc b/lib/__pycache__/virtual_batchnorm_impl.cpython-37.pyc new file mode 100644 index 0000000..1d41d7f Binary files /dev/null and b/lib/__pycache__/virtual_batchnorm_impl.cpython-37.pyc differ diff --git a/lib/virtual_batch.py b/lib/virtual_batch.py new file mode 100644 index 0000000..dab0419 --- /dev/null +++ b/lib/virtual_batch.py @@ -0,0 +1,39 @@ +import tensorflow as tf +from tensorflow.keras import backend as K +from tensorflow.keras.layers import Layer +from lib.virtual_batchnorm_impl import VBN +from tensorflow.python.framework import tensor_shape +from tensorflow.python.keras.engine.base_layer import InputSpec +from tensorflow.python.keras import initializers + +class VirtualBatchNormalization(Layer): + def __init__(self, + momentum=0.99, + center=True, + scale=True, + beta_initializer='zeros', + gamma_initializer='ones', + beta_regularizer=None, + gamma_regularizer=None, + **kwargs): + + self.beta_initializer = initializers.get(beta_initializer) + self.gamma_initializer = initializers.get(gamma_initializer) + + super(VirtualBatchNormalization, self).__init__(**kwargs) + + def build(self, input_shape): + input_shape = tensor_shape.TensorShape(input_shape) + if not input_shape.ndims: + raise ValueError('Input has undefined rank:', input_shape) + ndims = len(input_shape) + self.input_spec = InputSpec(ndim=ndims) + #super(VirtualBatchNormalization, self).build(input_shape) # Be sure to call this at the end + + def call(self, x): + outputs = VBN(x, gamma_initializer=self.gamma_initializer, beta_initializer=self.beta_initializer)(x) + outputs.set_shape(x.get_shape()) + return outputs + + def compute_output_shape(self, input_shape): + return input_shape diff --git a/lib/virtual_batchnorm_impl.py b/lib/virtual_batchnorm_impl.py new file mode 100644 index 0000000..650eab9 --- /dev/null +++ b/lib/virtual_batchnorm_impl.py @@ -0,0 +1,306 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Virtual batch normalization. + +This technique was first introduced in `Improved Techniques for Training GANs` +(Salimans et al, https://arxiv.org/abs/1606.03498). Instead of using batch +normalization on a minibatch, it fixes a reference subset of the data to use for +calculating normalization statistics. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn +from tensorflow.python.ops import variable_scope + +__all__ = [ + 'VBN', +] + + +def _static_or_dynamic_batch_size(tensor, batch_axis): + """Returns the static or dynamic batch size.""" + batch_size = array_ops.shape(tensor)[batch_axis] + static_batch_size = tensor_util.constant_value(batch_size) + return static_batch_size or batch_size + + +def _statistics(x, axes): + """Calculate the mean and mean square of `x`. + + Modified from the implementation of `tf.nn.moments`. + + Args: + x: A `Tensor`. + axes: Array of ints. Axes along which to compute mean and + variance. + + Returns: + Two `Tensor` objects: `mean` and `square mean`. + """ + # The dynamic range of fp16 is too limited to support the collection of + # sufficient statistics. As a workaround we simply perform the operations + # on 32-bit floats before converting the mean and variance back to fp16 + y = math_ops.cast(x, dtypes.float32) if x.dtype == dtypes.float16 else x + + # Compute true mean while keeping the dims for proper broadcasting. + shift = array_ops.stop_gradient(math_ops.reduce_mean(y, axes, keepdims=True)) + + shifted_mean = math_ops.reduce_mean(y - shift, axes, keepdims=True) + mean = shifted_mean + shift + mean_squared = math_ops.reduce_mean(math_ops.square(y), axes, keepdims=True) + + mean = array_ops.squeeze(mean, axes) + mean_squared = array_ops.squeeze(mean_squared, axes) + if x.dtype == dtypes.float16: + return (math_ops.cast(mean, dtypes.float16), + math_ops.cast(mean_squared, dtypes.float16)) + else: + return (mean, mean_squared) + + +def _validate_init_input_and_get_axis(reference_batch, axis): + """Validate input and return the used axis value.""" + if reference_batch.shape.ndims is None: + raise ValueError('`reference_batch` has unknown dimensions.') + + ndims = reference_batch.shape.ndims + if axis < 0: + used_axis = ndims + axis + else: + used_axis = axis + if used_axis < 0 or used_axis >= ndims: + raise ValueError('Value of `axis` argument ' + str(used_axis) + + ' is out of range for input with rank ' + str(ndims)) + return used_axis + + +def _validate_call_input(tensor_list, batch_dim): + """Verifies that tensor shapes are compatible, except for `batch_dim`.""" + def _get_shape(tensor): + shape = tensor.shape.as_list() + del shape[batch_dim] + return shape + base_shape = tensor_shape.TensorShape(_get_shape(tensor_list[0])) + for tensor in tensor_list: + base_shape.assert_is_compatible_with(_get_shape(tensor)) + + +class VBN(object): + """A class to perform virtual batch normalization. + + This technique was first introduced in `Improved Techniques for Training GANs` + (Salimans et al, https://arxiv.org/abs/1606.03498). Instead of using batch + normalization on a minibatch, it fixes a reference subset of the data to use + for calculating normalization statistics. + + To do this, we calculate the reference batch mean and mean square, and modify + those statistics for each example. We use mean square instead of variance, + since it is linear. + + Note that if `center` or `scale` variables are created, they are shared + between all calls to this object. + + The `__init__` API is intended to mimic `tf.layers.batch_normalization` as + closely as possible. + """ + + def __init__(self, + reference_batch, + axis=-1, + epsilon=1e-3, + center=True, + scale=True, + beta_initializer=init_ops.zeros_initializer(), + gamma_initializer=init_ops.ones_initializer(), + beta_regularizer=None, + gamma_regularizer=None, + trainable=True, + name=None, + batch_axis=0): + """Initialize virtual batch normalization object. + + We precompute the 'mean' and 'mean squared' of the reference batch, so that + `__call__` is efficient. This means that the axis must be supplied when the + object is created, not when it is called. + + We precompute 'square mean' instead of 'variance', because the square mean + can be easily adjusted on a per-example basis. + + Args: + reference_batch: A minibatch tensors. This will form the reference data + from which the normalization statistics are calculated. See + https://arxiv.org/abs/1606.03498 for more details. + axis: Integer, the axis that should be normalized (typically the features + axis). For instance, after a `Convolution2D` layer with + `data_format="channels_first"`, set `axis=1` in `BatchNormalization`. + epsilon: Small float added to variance to avoid dividing by zero. + center: If True, add offset of `beta` to normalized tensor. If False, + `beta` is ignored. + scale: If True, multiply by `gamma`. If False, `gamma` is + not used. When the next layer is linear (also e.g. `nn.relu`), this can + be disabled since the scaling can be done by the next layer. + beta_initializer: Initializer for the beta weight. + gamma_initializer: Initializer for the gamma weight. + beta_regularizer: Optional regularizer for the beta weight. + gamma_regularizer: Optional regularizer for the gamma weight. + trainable: Boolean, if `True` also add variables to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). + name: String, the name of the ops. + batch_axis: The axis of the batch dimension. This dimension is treated + differently in `virtual batch normalization` vs `batch normalization`. + + Raises: + ValueError: If `reference_batch` has unknown dimensions at graph + construction. + ValueError: If `batch_axis` is the same as `axis`. + """ + axis = _validate_init_input_and_get_axis(reference_batch, axis) + self._epsilon = epsilon + self._beta = 0 + self._gamma = 1 + self._batch_axis = _validate_init_input_and_get_axis( + reference_batch, batch_axis) + + if axis == self._batch_axis: + raise ValueError('`axis` and `batch_axis` cannot be the same.') + + with variable_scope.variable_scope(name, 'VBN', + values=[reference_batch]) as self._vs: + self._reference_batch = reference_batch + + # Calculate important shapes: + # 1) Reduction axes for the reference batch + # 2) Broadcast shape, if necessary + # 3) Reduction axes for the virtual batchnormed batch + # 4) Shape for optional parameters + input_shape = self._reference_batch.shape + ndims = input_shape.ndims + reduction_axes = list(range(ndims)) + del reduction_axes[axis] + + self._broadcast_shape = [1] * len(input_shape) + self._broadcast_shape[axis] = input_shape[axis].value + + self._example_reduction_axes = list(range(ndims)) + del self._example_reduction_axes[max(axis, self._batch_axis)] + del self._example_reduction_axes[min(axis, self._batch_axis)] + + params_shape = self._reference_batch.shape[axis] + + # Determines whether broadcasting is needed. This is slightly different + # than in the `nn.batch_normalization` case, due to `batch_dim`. + self._needs_broadcasting = ( + sorted(self._example_reduction_axes) != list(range(ndims))[:-2]) + + # Calculate the sufficient statistics for the reference batch in a way + # that can be easily modified by additional examples. + self._ref_mean, self._ref_mean_squares = _statistics( + self._reference_batch, reduction_axes) + self._ref_variance = (self._ref_mean_squares - + math_ops.square(self._ref_mean)) + + # Virtual batch normalization uses a weighted average between example + # statistics and the reference batch statistics. + ref_batch_size = _static_or_dynamic_batch_size( + self._reference_batch, self._batch_axis) + self._example_weight = 1. / (math_ops.to_float(ref_batch_size) + 1.) + self._ref_weight = 1. - self._example_weight + + # Make the variables, if necessary. + if center: + self._beta = variable_scope.get_variable( + name='beta', + shape=(params_shape,), + initializer=beta_initializer, + regularizer=beta_regularizer, + trainable=trainable) + if scale: + self._gamma = variable_scope.get_variable( + name='gamma', + shape=(params_shape,), + initializer=gamma_initializer, + regularizer=gamma_regularizer, + trainable=trainable) + + def _virtual_statistics(self, inputs, reduction_axes): + """Compute the statistics needed for virtual batch normalization.""" + cur_mean, cur_mean_sq = _statistics(inputs, reduction_axes) + vb_mean = (self._example_weight * cur_mean + + self._ref_weight * self._ref_mean) + vb_mean_sq = (self._example_weight * cur_mean_sq + + self._ref_weight * self._ref_mean_squares) + return (vb_mean, vb_mean_sq) + + def _broadcast(self, v, broadcast_shape=None): + # The exact broadcast shape depends on the current batch, not the reference + # batch, unless we're calculating the batch normalization of the reference + # batch. + b_shape = broadcast_shape or self._broadcast_shape + if self._needs_broadcasting and v is not None: + return array_ops.reshape(v, b_shape) + return v + + def reference_batch_normalization(self): + """Return the reference batch, but batch normalized.""" + with ops.name_scope(self._vs.name): + return nn.batch_normalization(self._reference_batch, + self._broadcast(self._ref_mean), + self._broadcast(self._ref_variance), + self._broadcast(self._beta), + self._broadcast(self._gamma), + self._epsilon) + + def __call__(self, inputs): + """Run virtual batch normalization on inputs. + + Args: + inputs: Tensor input. + + Returns: + A virtual batch normalized version of `inputs`. + + Raises: + ValueError: If `inputs` shape isn't compatible with the reference batch. + """ + _validate_call_input([inputs, self._reference_batch], self._batch_axis) + + with ops.name_scope(self._vs.name, values=[inputs, self._reference_batch]): + # Calculate the statistics on the current input on a per-example basis. + vb_mean, vb_mean_sq = self._virtual_statistics( + inputs, self._example_reduction_axes) + vb_variance = vb_mean_sq - math_ops.square(vb_mean) + + # The exact broadcast shape of the input statistic Tensors depends on the + # current batch, not the reference batch. The parameter broadcast shape + # is independent of the shape of the input statistic Tensor dimensions. + b_shape = self._broadcast_shape[:] # deep copy + b_shape[self._batch_axis] = _static_or_dynamic_batch_size( + inputs, self._batch_axis) + return nn.batch_normalization( + inputs, + self._broadcast(vb_mean, b_shape), + self._broadcast(vb_variance, b_shape), + self._broadcast(self._beta, self._broadcast_shape), + self._broadcast(self._gamma, self._broadcast_shape), + self._epsilon) -- cgit v1.2.3