diff options
-rw-r--r-- | classifier_metrics_impl.py | 1114 | ||||
-rw-r--r-- | lenet.py | 8 |
2 files changed, 1120 insertions, 2 deletions
diff --git a/classifier_metrics_impl.py b/classifier_metrics_impl.py new file mode 100644 index 0000000..2334d29 --- /dev/null +++ b/classifier_metrics_impl.py @@ -0,0 +1,1114 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Model evaluation tools for TFGAN. + +These methods come from https://arxiv.org/abs/1606.03498, +https://arxiv.org/abs/1706.08500, and https://arxiv.org/abs/1801.01401. + +NOTE: This implementation uses the same weights as in +https://github.com/openai/improved-gan/blob/master/inception_score/model.py, +but is more numerically stable and is an unbiased estimator of the true +Inception score even when splitting the inputs into batches. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import functools +import os +import sys +import tarfile + +from six.moves import urllib + +from tensorflow.contrib.layers.python.layers import layers +from tensorflow.core.framework import graph_pb2 +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import importer +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import functional_ops +from tensorflow.python.ops import image_ops +from tensorflow.python.ops import linalg_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_impl +from tensorflow.python.ops import nn_ops +from tensorflow.python.platform import gfile +from tensorflow.python.platform import resource_loader + + +__all__ = [ + 'get_graph_def_from_disk', + 'get_graph_def_from_resource', + 'get_graph_def_from_url_tarball', + 'preprocess_image', + 'run_image_classifier', + 'run_inception', + 'inception_score', + 'classifier_score', + 'classifier_score_from_logits', + 'frechet_inception_distance', + 'frechet_classifier_distance', + 'frechet_classifier_distance_from_activations', + 'mean_only_frechet_classifier_distance_from_activations', + 'diagonal_only_frechet_classifier_distance_from_activations', + 'kernel_inception_distance', + 'kernel_inception_distance_and_std', + 'kernel_classifier_distance', + 'kernel_classifier_distance_and_std', + 'kernel_classifier_distance_from_activations', + 'kernel_classifier_distance_and_std_from_activations', + 'INCEPTION_DEFAULT_IMAGE_SIZE', +] + +INCEPTION_URL = 'http://download.tensorflow.org/models/frozen_inception_v1_2015_12_05.tar.gz' +INCEPTION_FROZEN_GRAPH = 'inceptionv1_for_inception_score.pb' +INCEPTION_INPUT = 'Mul:0' +INCEPTION_OUTPUT = 'logits:0' +INCEPTION_FINAL_POOL = 'pool_3:0' +INCEPTION_DEFAULT_IMAGE_SIZE = 299 + + +def _validate_images(images, image_size): + images = ops.convert_to_tensor(images) + images.shape.with_rank(4) + images.shape.assert_is_compatible_with([None, image_size, image_size, None]) + return images + + +def _symmetric_matrix_square_root(mat, eps=1e-10): + """Compute square root of a symmetric matrix. + + Note that this is different from an elementwise square root. We want to + compute M' where M' = sqrt(mat) such that M' * M' = mat. + + Also note that this method **only** works for symmetric matrices. + + Args: + mat: Matrix to take the square root of. + eps: Small epsilon such that any element less than eps will not be square + rooted to guard against numerical instability. + + Returns: + Matrix square root of mat. + """ + # Unlike numpy, tensorflow's return order is (s, u, v) + s, u, v = linalg_ops.svd(mat) + # sqrt is unstable around 0, just use 0 in such case + si = array_ops.where(math_ops.less(s, eps), s, math_ops.sqrt(s)) + # Note that the v returned by Tensorflow is v = V + # (when referencing the equation A = U S V^T) + # This is unlike Numpy which returns v = V^T + return math_ops.matmul( + math_ops.matmul(u, array_ops.diag(si)), v, transpose_b=True) + + +def preprocess_image(images, + height=INCEPTION_DEFAULT_IMAGE_SIZE, + width=INCEPTION_DEFAULT_IMAGE_SIZE, + scope=None): + """Prepare a batch of images for evaluation. + + This is the preprocessing portion of the graph from + http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz. + + Note that it expects Tensors in [0, 255]. This function maps pixel values to + [-1, 1] and resizes to match the InceptionV1 network. + + Args: + images: 3-D or 4-D Tensor of images. Values are in [0, 255]. + height: Integer. Height of resized output image. + width: Integer. Width of resized output image. + scope: Optional scope for name_scope. + + Returns: + 3-D or 4-D float Tensor of prepared image(s). Values are in [-1, 1]. + """ + is_single = images.shape.ndims == 3 + with ops.name_scope(scope, 'preprocess', [images, height, width]): + if not images.dtype.is_floating: + images = math_ops.to_float(images) + if is_single: + images = array_ops.expand_dims(images, axis=0) + resized = image_ops.resize_bilinear(images, [height, width]) + resized = (resized - 128.0) / 128.0 + if is_single: + resized = array_ops.squeeze(resized, axis=0) + return resized + + +def _kl_divergence(p, p_logits, q): + """Computes the Kullback-Liebler divergence between p and q. + + This function uses p's logits in some places to improve numerical stability. + + Specifically: + + KL(p || q) = sum[ p * log(p / q) ] + = sum[ p * ( log(p) - log(q) ) ] + = sum[ p * ( log_softmax(p_logits) - log(q) ) ] + + Args: + p: A 2-D floating-point Tensor p_ij, where `i` corresponds to the minibatch + example and `j` corresponds to the probability of being in class `j`. + p_logits: A 2-D floating-point Tensor corresponding to logits for `p`. + q: A 1-D floating-point Tensor, where q_j corresponds to the probability + of class `j`. + + Returns: + KL divergence between two distributions. Output dimension is 1D, one entry + per distribution in `p`. + + Raises: + ValueError: If any of the inputs aren't floating-point. + ValueError: If p or p_logits aren't 2D. + ValueError: If q isn't 1D. + """ + for tensor in [p, p_logits, q]: + if not tensor.dtype.is_floating: + raise ValueError('Input %s must be floating type.', tensor.name) + p.shape.assert_has_rank(2) + p_logits.shape.assert_has_rank(2) + q.shape.assert_has_rank(1) + return math_ops.reduce_sum( + p * (nn_ops.log_softmax(p_logits) - math_ops.log(q)), axis=1) + + +def get_graph_def_from_disk(filename): + """Get a GraphDef proto from a disk location.""" + with gfile.FastGFile(filename, 'rb') as f: + return graph_pb2.GraphDef.FromString(f.read()) + + +def get_graph_def_from_resource(filename): + """Get a GraphDef proto from within a .par file.""" + return graph_pb2.GraphDef.FromString(resource_loader.load_resource(filename)) + + +def get_graph_def_from_url_tarball(url, filename, tar_filename=None): + """Get a GraphDef proto from a tarball on the web. + + Args: + url: Web address of tarball + filename: Filename of graph definition within tarball + tar_filename: Temporary download filename (None = always download) + + Returns: + A GraphDef loaded from a file in the downloaded tarball. + """ + if not (tar_filename and os.path.exists(tar_filename)): + + def _progress(count, block_size, total_size): + sys.stdout.write('\r>> Downloading %s %.1f%%' % + (url, + float(count * block_size) / float(total_size) * 100.0)) + sys.stdout.flush() + + tar_filename, _ = urllib.request.urlretrieve(url, tar_filename, _progress) + with tarfile.open(tar_filename, 'r:gz') as tar: + proto_str = tar.extractfile(filename).read() + return graph_pb2.GraphDef.FromString(proto_str) + + +def _default_graph_def_fn(): + return get_graph_def_from_url_tarball(INCEPTION_URL, INCEPTION_FROZEN_GRAPH, + os.path.basename(INCEPTION_URL)) + + +def run_inception(images, + graph_def=None, + default_graph_def_fn=_default_graph_def_fn, + image_size=INCEPTION_DEFAULT_IMAGE_SIZE, + input_tensor=INCEPTION_INPUT, + output_tensor=INCEPTION_OUTPUT): + """Run images through a pretrained Inception classifier. + + Args: + images: Input tensors. Must be [batch, height, width, channels]. Input shape + and values must be in [-1, 1], which can be achieved using + `preprocess_image`. + graph_def: A GraphDef proto of a pretrained Inception graph. If `None`, + call `default_graph_def_fn` to get GraphDef. + default_graph_def_fn: A function that returns a GraphDef. Used if + `graph_def` is `None. By default, returns a pretrained InceptionV3 graph. + image_size: Required image width and height. See unit tests for the default + values. + input_tensor: Name of input Tensor. + output_tensor: Name or list of output Tensors. This function will compute + activations at the specified layer. Examples include INCEPTION_V3_OUTPUT + and INCEPTION_V3_FINAL_POOL which would result in this function computing + the final logits or the penultimate pooling layer. + + Returns: + Tensor or Tensors corresponding to computed `output_tensor`. + + Raises: + ValueError: If images are not the correct size. + ValueError: If neither `graph_def` nor `default_graph_def_fn` are provided. + """ + images = _validate_images(images, image_size) + + if graph_def is None: + if default_graph_def_fn is None: + raise ValueError('If `graph_def` is `None`, must provide ' + '`default_graph_def_fn`.') + graph_def = default_graph_def_fn() + + activations = run_image_classifier(images, graph_def, input_tensor, + output_tensor) + if isinstance(activations, list): + for i, activation in enumerate(activations): + if array_ops.rank(activation) != 2: + activations[i] = layers.flatten(activation) + else: + if array_ops.rank(activations) != 2: + activations = layers.flatten(activations) + + return activations + + +def run_image_classifier(tensor, + graph_def, + input_tensor, + output_tensor, + scope='RunClassifier'): + """Runs a network from a frozen graph. + + Args: + tensor: An Input tensor. + graph_def: A GraphDef proto. + input_tensor: Name of input tensor in graph def. + output_tensor: A tensor name or list of tensor names in graph def. + scope: Name scope for classifier. + + Returns: + Classifier output if `output_tensor` is a string, or a list of outputs if + `output_tensor` is a list. + + Raises: + ValueError: If `input_tensor` or `output_tensor` aren't in the graph_def. + """ + input_map = {input_tensor: tensor} + is_singleton = isinstance(output_tensor, str) + if is_singleton: + output_tensor = [output_tensor] + classifier_outputs = importer.import_graph_def( + graph_def, input_map, output_tensor, name=scope) + if is_singleton: + classifier_outputs = classifier_outputs[0] + + return classifier_outputs + + +def classifier_score(images, classifier_fn, num_batches=1): + """Classifier score for evaluating a conditional generative model. + + This is based on the Inception Score, but for an arbitrary classifier. + + This technique is described in detail in https://arxiv.org/abs/1606.03498. In + summary, this function calculates + + exp( E[ KL(p(y|x) || p(y)) ] ) + + which captures how different the network's classification prediction is from + the prior distribution over classes. + + NOTE: This function consumes images, computes their logits, and then + computes the classifier score. If you would like to precompute many logits for + large batches, use classifier_score_from_logits(), which this method also + uses. + + Args: + images: Images to calculate the classifier score for. + classifier_fn: A function that takes images and produces logits based on a + classifier. + num_batches: Number of batches to split `generated_images` in to in order to + efficiently run them through the classifier network. + + Returns: + The classifier score. A floating-point scalar of the same type as the output + of `classifier_fn`. + """ + generated_images_list = array_ops.split( + images, num_or_size_splits=num_batches) + + # Compute the classifier splits using the memory-efficient `map_fn`. + logits = functional_ops.map_fn( + fn=classifier_fn, + elems=array_ops.stack(generated_images_list), + parallel_iterations=1, + back_prop=False, + swap_memory=True, + name='RunClassifier') + logits = array_ops.concat(array_ops.unstack(logits), 0) + + return classifier_score_from_logits(logits) + + +def classifier_score_from_logits(logits): + """Classifier score for evaluating a generative model from logits. + + This method computes the classifier score for a set of logits. This can be + used independently of the classifier_score() method, especially in the case + of using large batches during evaluation where we would like precompute all + of the logits before computing the classifier score. + + This technique is described in detail in https://arxiv.org/abs/1606.03498. In + summary, this function calculates: + + exp( E[ KL(p(y|x) || p(y)) ] ) + + which captures how different the network's classification prediction is from + the prior distribution over classes. + + Args: + logits: Precomputed 2D tensor of logits that will be used to + compute the classifier score. + + Returns: + The classifier score. A floating-point scalar of the same type as the output + of `logits`. + """ + logits.shape.assert_has_rank(2) + + # Use maximum precision for best results. + logits_dtype = logits.dtype + if logits_dtype != dtypes.float64: + logits = math_ops.to_double(logits) + + p = nn_ops.softmax(logits) + q = math_ops.reduce_mean(p, axis=0) + kl = _kl_divergence(p, logits, q) + kl.shape.assert_has_rank(1) + log_score = math_ops.reduce_mean(kl) + final_score = math_ops.exp(log_score) + + if logits_dtype != dtypes.float64: + final_score = math_ops.cast(final_score, logits_dtype) + + return final_score + + +inception_score = functools.partial( + classifier_score, + classifier_fn=functools.partial( + run_inception, output_tensor=INCEPTION_OUTPUT)) + + +def trace_sqrt_product(sigma, sigma_v): + """Find the trace of the positive sqrt of product of covariance matrices. + + '_symmetric_matrix_square_root' only works for symmetric matrices, so we + cannot just take _symmetric_matrix_square_root(sigma * sigma_v). + ('sigma' and 'sigma_v' are symmetric, but their product is not necessarily). + + Let sigma = A A so A = sqrt(sigma), and sigma_v = B B. + We want to find trace(sqrt(sigma sigma_v)) = trace(sqrt(A A B B)) + Note the following properties: + (i) forall M1, M2: eigenvalues(M1 M2) = eigenvalues(M2 M1) + => eigenvalues(A A B B) = eigenvalues (A B B A) + (ii) if M1 = sqrt(M2), then eigenvalues(M1) = sqrt(eigenvalues(M2)) + => eigenvalues(sqrt(sigma sigma_v)) = sqrt(eigenvalues(A B B A)) + (iii) forall M: trace(M) = sum(eigenvalues(M)) + => trace(sqrt(sigma sigma_v)) = sum(eigenvalues(sqrt(sigma sigma_v))) + = sum(sqrt(eigenvalues(A B B A))) + = sum(eigenvalues(sqrt(A B B A))) + = trace(sqrt(A B B A)) + = trace(sqrt(A sigma_v A)) + A = sqrt(sigma). Both sigma and A sigma_v A are symmetric, so we **can** + use the _symmetric_matrix_square_root function to find the roots of these + matrices. + + Args: + sigma: a square, symmetric, real, positive semi-definite covariance matrix + sigma_v: same as sigma + + Returns: + The trace of the positive square root of sigma*sigma_v + """ + + # Note sqrt_sigma is called "A" in the proof above + sqrt_sigma = _symmetric_matrix_square_root(sigma) + + # This is sqrt(A sigma_v A) above + sqrt_a_sigmav_a = math_ops.matmul(sqrt_sigma, + math_ops.matmul(sigma_v, sqrt_sigma)) + + return math_ops.trace(_symmetric_matrix_square_root(sqrt_a_sigmav_a)) + + +def frechet_classifier_distance(real_images, + generated_images, + classifier_fn, + num_batches=1): + """Classifier distance for evaluating a generative model. + + This is based on the Frechet Inception distance, but for an arbitrary + classifier. + + This technique is described in detail in https://arxiv.org/abs/1706.08500. + Given two Gaussian distribution with means m and m_w and covariance matrices + C and C_w, this function calculates + + |m - m_w|^2 + Tr(C + C_w - 2(C * C_w)^(1/2)) + + which captures how different the distributions of real images and generated + images (or more accurately, their visual features) are. Note that unlike the + Inception score, this is a true distance and utilizes information about real + world images. + + Note that when computed using sample means and sample covariance matrices, + Frechet distance is biased. It is more biased for small sample sizes. (e.g. + even if the two distributions are the same, for a small sample size, the + expected Frechet distance is large). It is important to use the same + sample size to compute Frechet classifier distance when comparing two + generative models. + + NOTE: This function consumes images, computes their activations, and then + computes the classifier score. If you would like to precompute many + activations for real and generated images for large batches, please use + frechet_clasifier_distance_from_activations(), which this method also uses. + + Args: + real_images: Real images to use to compute Frechet Inception distance. + generated_images: Generated images to use to compute Frechet Inception + distance. + classifier_fn: A function that takes images and produces activations + based on a classifier. + num_batches: Number of batches to split images in to in order to + efficiently run them through the classifier network. + + Returns: + The Frechet Inception distance. A floating-point scalar of the same type + as the output of `classifier_fn`. + """ + real_images_list = array_ops.split( + real_images, num_or_size_splits=num_batches) + generated_images_list = array_ops.split( + generated_images, num_or_size_splits=num_batches) + + real_imgs = array_ops.stack(real_images_list) + generated_imgs = array_ops.stack(generated_images_list) + + # Compute the activations using the memory-efficient `map_fn`. + def compute_activations(elems): + return functional_ops.map_fn(fn=classifier_fn, + elems=elems, + parallel_iterations=1, + back_prop=False, + swap_memory=True, + name='RunClassifier') + + real_a = compute_activations(real_imgs) + gen_a = compute_activations(generated_imgs) + + # Ensure the activations have the right shapes. + real_a = array_ops.concat(array_ops.unstack(real_a), 0) + gen_a = array_ops.concat(array_ops.unstack(gen_a), 0) + + return frechet_classifier_distance_from_activations(real_a, gen_a) + + +def mean_only_frechet_classifier_distance_from_activations( + real_activations, generated_activations): + """Classifier distance for evaluating a generative model from activations. + + Given two Gaussian distribution with means m and m_w and covariance matrices + C and C_w, this function calcuates + + |m - m_w|^2 + + which captures how different the distributions of real images and generated + images (or more accurately, their visual features) are. Note that unlike the + Inception score, this is a true distance and utilizes information about real + world images. + + Note that when computed using sample means and sample covariance matrices, + Frechet distance is biased. It is more biased for small sample sizes. (e.g. + even if the two distributions are the same, for a small sample size, the + expected Frechet distance is large). It is important to use the same + sample size to compute frechet classifier distance when comparing two + generative models. + + In this variant, we only compute the difference between the means of the + fitted Gaussians. The computation leads to O(n) vs. O(n^2) memory usage, yet + still retains much of the same information as FID. + + Args: + real_activations: 2D array of activations of real images of size + [num_images, num_dims] to use to compute Frechet Inception distance. + generated_activations: 2D array of activations of generated images of size + [num_images, num_dims] to use to compute Frechet Inception distance. + + Returns: + The mean-only Frechet Inception distance. A floating-point scalar of the + same type as the output of the activations. + """ + real_activations.shape.assert_has_rank(2) + generated_activations.shape.assert_has_rank(2) + + activations_dtype = real_activations.dtype + if activations_dtype != dtypes.float64: + real_activations = math_ops.to_double(real_activations) + generated_activations = math_ops.to_double(generated_activations) + + # Compute means of activations. + m = math_ops.reduce_mean(real_activations, 0) + m_w = math_ops.reduce_mean(generated_activations, 0) + + # Next the distance between means. + mean = math_ops.reduce_sum( + math_ops.squared_difference(m, m_w)) # Equivalent to L2 but more stable. + mofid = mean + if activations_dtype != dtypes.float64: + mofid = math_ops.cast(mofid, activations_dtype) + + return mofid + + +def diagonal_only_frechet_classifier_distance_from_activations( + real_activations, generated_activations): + """Classifier distance for evaluating a generative model. + + This is based on the Frechet Inception distance, but for an arbitrary + classifier. + + This technique is described in detail in https://arxiv.org/abs/1706.08500. + Given two Gaussian distribution with means m and m_w and covariance matrices + C and C_w, this function calcuates + + |m - m_w|^2 + (sigma + sigma_w - 2(sigma x sigma_w)^(1/2)) + + which captures how different the distributions of real images and generated + images (or more accurately, their visual features) are. Note that unlike the + Inception score, this is a true distance and utilizes information about real + world images. In this variant, we compute diagonal-only covariance matrices. + As a result, instead of computing an expensive matrix square root, we can do + something much simpler, and has O(n) vs O(n^2) space complexity. + + Note that when computed using sample means and sample covariance matrices, + Frechet distance is biased. It is more biased for small sample sizes. (e.g. + even if the two distributions are the same, for a small sample size, the + expected Frechet distance is large). It is important to use the same + sample size to compute frechet classifier distance when comparing two + generative models. + + Args: + real_activations: Real images to use to compute Frechet Inception distance. + generated_activations: Generated images to use to compute Frechet Inception + distance. + + Returns: + The diagonal-only Frechet Inception distance. A floating-point scalar of + the same type as the output of the activations. + + Raises: + ValueError: If the shape of the variance and mean vectors are not equal. + """ + real_activations.shape.assert_has_rank(2) + generated_activations.shape.assert_has_rank(2) + + activations_dtype = real_activations.dtype + if activations_dtype != dtypes.float64: + real_activations = math_ops.to_double(real_activations) + generated_activations = math_ops.to_double(generated_activations) + + # Compute mean and covariance matrices of activations. + m, var = nn_impl.moments(real_activations, axes=[0]) + m_w, var_w = nn_impl.moments(generated_activations, axes=[0]) + + actual_shape = var.get_shape() + expected_shape = m.get_shape() + + if actual_shape != expected_shape: + raise ValueError('shape: {} must match expected shape: {}'.format( + actual_shape, expected_shape)) + + # Compute the two components of FID. + + # First the covariance component. + # Here, note that trace(A + B) = trace(A) + trace(B) + trace = math_ops.reduce_sum( + (var + var_w) - 2.0 * math_ops.sqrt(math_ops.multiply(var, var_w))) + + # Next the distance between means. + mean = math_ops.reduce_sum( + math_ops.squared_difference(m, m_w)) # Equivalent to L2 but more stable. + dofid = trace + mean + if activations_dtype != dtypes.float64: + dofid = math_ops.cast(dofid, activations_dtype) + + return dofid + + +def frechet_classifier_distance_from_activations(real_activations, + generated_activations): + """Classifier distance for evaluating a generative model. + + This methods computes the Frechet classifier distance from activations of + real images and generated images. This can be used independently of the + frechet_classifier_distance() method, especially in the case of using large + batches during evaluation where we would like precompute all of the + activations before computing the classifier distance. + + This technique is described in detail in https://arxiv.org/abs/1706.08500. + Given two Gaussian distribution with means m and m_w and covariance matrices + C and C_w, this function calculates + + |m - m_w|^2 + Tr(C + C_w - 2(C * C_w)^(1/2)) + + which captures how different the distributions of real images and generated + images (or more accurately, their visual features) are. Note that unlike the + Inception score, this is a true distance and utilizes information about real + world images. + + Note that when computed using sample means and sample covariance matrices, + Frechet distance is biased. It is more biased for small sample sizes. (e.g. + even if the two distributions are the same, for a small sample size, the + expected Frechet distance is large). It is important to use the same + sample size to compute frechet classifier distance when comparing two + generative models. + + Args: + real_activations: 2D Tensor containing activations of real data. Shape is + [batch_size, activation_size]. + generated_activations: 2D Tensor containing activations of generated data. + Shape is [batch_size, activation_size]. + + Returns: + The Frechet Inception distance. A floating-point scalar of the same type + as the output of the activations. + + """ + real_activations.shape.assert_has_rank(2) + generated_activations.shape.assert_has_rank(2) + + activations_dtype = real_activations.dtype + if activations_dtype != dtypes.float64: + real_activations = math_ops.to_double(real_activations) + generated_activations = math_ops.to_double(generated_activations) + + # Compute mean and covariance matrices of activations. + m = math_ops.reduce_mean(real_activations, 0) + m_w = math_ops.reduce_mean(generated_activations, 0) + num_examples_real = math_ops.to_double(array_ops.shape(real_activations)[0]) + num_examples_generated = math_ops.to_double( + array_ops.shape(generated_activations)[0]) + + # sigma = (1 / (n - 1)) * (X - mu) (X - mu)^T + real_centered = real_activations - m + sigma = math_ops.matmul( + real_centered, real_centered, transpose_a=True) / ( + num_examples_real - 1) + + gen_centered = generated_activations - m_w + sigma_w = math_ops.matmul( + gen_centered, gen_centered, transpose_a=True) / ( + num_examples_generated - 1) + + # Find the Tr(sqrt(sigma sigma_w)) component of FID + sqrt_trace_component = trace_sqrt_product(sigma, sigma_w) + + # Compute the two components of FID. + + # First the covariance component. + # Here, note that trace(A + B) = trace(A) + trace(B) + trace = math_ops.trace(sigma + sigma_w) - 2.0 * sqrt_trace_component + + # Next the distance between means. + mean = math_ops.reduce_sum( + math_ops.squared_difference(m, m_w)) # Equivalent to L2 but more stable. + fid = trace + mean + if activations_dtype != dtypes.float64: + fid = math_ops.cast(fid, activations_dtype) + + return fid + +frechet_inception_distance = functools.partial( + frechet_classifier_distance, + classifier_fn=functools.partial( + run_inception, output_tensor=INCEPTION_FINAL_POOL)) + + +def kernel_classifier_distance(real_images, + generated_images, + classifier_fn, + num_classifier_batches=1, + max_block_size=1024, + dtype=None): + """Kernel "classifier" distance for evaluating a generative model. + + This is based on the Kernel Inception distance, but for an arbitrary + embedding. + + This technique is described in detail in https://arxiv.org/abs/1801.01401. + Given two distributions P and Q of activations, this function calculates + + E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')] + - 2 E_{X ~ P, Y ~ Q}[k(X, Y)] + + where k is the polynomial kernel + + k(x, y) = ( x^T y / dimension + 1 )^3. + + This captures how different the distributions of real and generated images' + visual features are. Like the Frechet distance (and unlike the Inception + score), this is a true distance and incorporates information about the + target images. Unlike the Frechet score, this function computes an + *unbiased* and asymptotically normal estimator, which makes comparing + estimates across models much more intuitive. + + The estimator used takes time quadratic in max_block_size. Larger values of + max_block_size will decrease the variance of the estimator but increase the + computational cost. This differs slightly from the estimator used by the + original paper; it is the block estimator of https://arxiv.org/abs/1307.1954. + + NOTE: the blocking code assumes that real_activations and + generated_activations are both in random order. If either is sorted in a + meaningful order, the estimator will behave poorly. + + NOTE: This function consumes images, computes their activations, and then + computes the classifier score. If you would like to precompute many + activations for real and generated images for large batches, or to compute + multiple scores based on the same images, please use + kernel_clasifier_distance_from_activations(), which this method also uses. + + Args: + real_images: Real images to use to compute Kernel Inception distance. + generated_images: Generated images to use to compute Kernel Inception + distance. + classifier_fn: A function that takes images and produces activations based + on a classifier. + num_classifier_batches: Number of batches to split images in to in order to + efficiently run them through the classifier network. + max_estimator_block_size: integer, default 1024. The distance estimator + splits samples into blocks for computational efficiency. Larger values are + more computationally expensive but decrease the variance of the distance + estimate. + dtype: if not None, coerce activations to this dtype before computations. + + Returns: + The Kernel Inception Distance. A floating-point scalar of the same type + as the output of the activations. + """ + return kernel_classifier_distance_and_std( + real_images, + generated_images, + classifier_fn, + num_classifier_batches=num_classifier_batches, + max_block_size=max_block_size, + dtype=dtype)[0] + + +kernel_inception_distance = functools.partial( + kernel_classifier_distance, + classifier_fn=functools.partial( + run_inception, output_tensor=INCEPTION_FINAL_POOL)) + + +def kernel_classifier_distance_and_std(real_images, + generated_images, + classifier_fn, + num_classifier_batches=1, + max_block_size=1024, + dtype=None): + """Kernel "classifier" distance for evaluating a generative model. + + This is based on the Kernel Inception distance, but for an arbitrary + embedding. Also returns an estimate of the standard error of the distance + estimator. + + This technique is described in detail in https://arxiv.org/abs/1801.01401. + Given two distributions P and Q of activations, this function calculates + + E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')] + - 2 E_{X ~ P, Y ~ Q}[k(X, Y)] + + where k is the polynomial kernel + + k(x, y) = ( x^T y / dimension + 1 )^3. + + This captures how different the distributions of real and generated images' + visual features are. Like the Frechet distance (and unlike the Inception + score), this is a true distance and incorporates information about the + target images. Unlike the Frechet score, this function computes an + *unbiased* and asymptotically normal estimator, which makes comparing + estimates across models much more intuitive. + + The estimator used takes time quadratic in max_block_size. Larger values of + max_block_size will decrease the variance of the estimator but increase the + computational cost. This differs slightly from the estimator used by the + original paper; it is the block estimator of https://arxiv.org/abs/1307.1954. + + NOTE: the blocking code assumes that real_activations and + generated_activations are both in random order. If either is sorted in a + meaningful order, the estimator will behave poorly. + + NOTE: This function consumes images, computes their activations, and then + computes the classifier score. If you would like to precompute many + activations for real and generated images for large batches, or to compute + multiple scores based on the same images, please use + kernel_clasifier_distance_from_activations(), which this method also uses. + + Args: + real_images: Real images to use to compute Kernel Inception distance. + generated_images: Generated images to use to compute Kernel Inception + distance. + classifier_fn: A function that takes images and produces activations based + on a classifier. + num_classifier_batches: Number of batches to split images in to in order to + efficiently run them through the classifier network. + max_estimator_block_size: integer, default 1024. The distance estimator + splits samples into blocks for computational efficiency. Larger values are + more computationally expensive but decrease the variance of the distance + estimate. Having a smaller block size also gives a better estimate of the + standard error. + dtype: if not None, coerce activations to this dtype before computations. + + Returns: + The Kernel Inception Distance. A floating-point scalar of the same type + as the output of the activations. + An estimate of the standard error of the distance estimator (a scalar of + the same type). + """ + real_images_list = array_ops.split( + real_images, num_or_size_splits=num_classifier_batches) + generated_images_list = array_ops.split( + generated_images, num_or_size_splits=num_classifier_batches) + + real_imgs = array_ops.stack(real_images_list) + generated_imgs = array_ops.stack(generated_images_list) + + # Compute the activations using the memory-efficient `map_fn`. + def compute_activations(elems): + return functional_ops.map_fn( + fn=classifier_fn, + elems=elems, + parallel_iterations=1, + back_prop=False, + swap_memory=True, + name='RunClassifier') + + real_a = compute_activations(real_imgs) + gen_a = compute_activations(generated_imgs) + + # Ensure the activations have the right shapes. + real_a = array_ops.concat(array_ops.unstack(real_a), 0) + gen_a = array_ops.concat(array_ops.unstack(gen_a), 0) + + return kernel_classifier_distance_and_std_from_activations( + real_a, gen_a, max_block_size=max_block_size) + + +kernel_inception_distance_and_std = functools.partial( + kernel_classifier_distance_and_std, + classifier_fn=functools.partial( + run_inception, output_tensor=INCEPTION_FINAL_POOL)) + + +def kernel_classifier_distance_from_activations(real_activations, + generated_activations, + max_block_size=1024, + dtype=None): + """Kernel "classifier" distance for evaluating a generative model. + + This methods computes the kernel classifier distance from activations of + real images and generated images. This can be used independently of the + kernel_classifier_distance() method, especially in the case of using large + batches during evaluation where we would like to precompute all of the + activations before computing the classifier distance, or if we want to + compute multiple metrics based on the same images. + + This technique is described in detail in https://arxiv.org/abs/1801.01401. + Given two distributions P and Q of activations, this function calculates + + E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')] + - 2 E_{X ~ P, Y ~ Q}[k(X, Y)] + + where k is the polynomial kernel + + k(x, y) = ( x^T y / dimension + 1 )^3. + + This captures how different the distributions of real and generated images' + visual features are. Like the Frechet distance (and unlike the Inception + score), this is a true distance and incorporates information about the + target images. Unlike the Frechet score, this function computes an + *unbiased* and asymptotically normal estimator, which makes comparing + estimates across models much more intuitive. + + The estimator used takes time quadratic in max_block_size. Larger values of + max_block_size will decrease the variance of the estimator but increase the + computational cost. This differs slightly from the estimator used by the + original paper; it is the block estimator of https://arxiv.org/abs/1307.1954. + + NOTE: the blocking code assumes that real_activations and + generated_activations are both in random order. If either is sorted in a + meaningful order, the estimator will behave poorly. + + Args: + real_activations: 2D Tensor containing activations of real data. Shape is + [batch_size, activation_size]. + generated_activations: 2D Tensor containing activations of generated data. + Shape is [batch_size, activation_size]. + max_block_size: integer, default 1024. The distance estimator splits samples + into blocks for computational efficiency. Larger values are more + computationally expensive but decrease the variance of the distance + estimate. + dtype: if not None, coerce activations to this dtype before computations. + + Returns: + The Kernel Inception Distance. A floating-point scalar of the same type + as the output of the activations. + """ + return kernel_classifier_distance_and_std_from_activations( + real_activations, generated_activations, max_block_size=max_block_size)[0] + + +def kernel_classifier_distance_and_std_from_activations(real_activations, + generated_activations, + max_block_size=1024, + dtype=None): + """Kernel "classifier" distance for evaluating a generative model. + + This methods computes the kernel classifier distance from activations of + real images and generated images. This can be used independently of the + kernel_classifier_distance() method, especially in the case of using large + batches during evaluation where we would like to precompute all of the + activations before computing the classifier distance, or if we want to + compute multiple metrics based on the same images. It also returns a rough + estimate of the standard error of the estimator. + + This technique is described in detail in https://arxiv.org/abs/1801.01401. + Given two distributions P and Q of activations, this function calculates + + E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')] + - 2 E_{X ~ P, Y ~ Q}[k(X, Y)] + + where k is the polynomial kernel + + k(x, y) = ( x^T y / dimension + 1 )^3. + + This captures how different the distributions of real and generated images' + visual features are. Like the Frechet distance (and unlike the Inception + score), this is a true distance and incorporates information about the + target images. Unlike the Frechet score, this function computes an + *unbiased* and asymptotically normal estimator, which makes comparing + estimates across models much more intuitive. + + The estimator used takes time quadratic in max_block_size. Larger values of + max_block_size will decrease the variance of the estimator but increase the + computational cost. This differs slightly from the estimator used by the + original paper; it is the block estimator of https://arxiv.org/abs/1307.1954. + The estimate of the standard error will also be more reliable when there are + more blocks, i.e. when max_block_size is smaller. + + NOTE: the blocking code assumes that real_activations and + generated_activations are both in random order. If either is sorted in a + meaningful order, the estimator will behave poorly. + + Args: + real_activations: 2D Tensor containing activations of real data. Shape is + [batch_size, activation_size]. + generated_activations: 2D Tensor containing activations of generated data. + Shape is [batch_size, activation_size]. + max_block_size: integer, default 1024. The distance estimator splits samples + into blocks for computational efficiency. Larger values are more + computationally expensive but decrease the variance of the distance + estimate. Having a smaller block size also gives a better estimate of the + standard error. + dtype: if not None, coerce activations to this dtype before computations. + + Returns: + The Kernel Inception Distance. A floating-point scalar of the same type + as the output of the activations. + An estimate of the standard error of the distance estimator (a scalar of + the same type). + """ + + real_activations.shape.assert_has_rank(2) + generated_activations.shape.assert_has_rank(2) + real_activations.shape[1].assert_is_compatible_with( + generated_activations.shape[1]) + + if dtype is None: + dtype = real_activations.dtype + assert generated_activations.dtype == dtype + else: + real_activations = math_ops.cast(real_activations, dtype) + generated_activations = math_ops.cast(generated_activations, dtype) + + # Figure out how to split the activations into blocks of approximately + # equal size, with none larger than max_block_size. + n_r = array_ops.shape(real_activations)[0] + n_g = array_ops.shape(generated_activations)[0] + + n_bigger = math_ops.maximum(n_r, n_g) + n_blocks = math_ops.to_int32(math_ops.ceil(n_bigger / max_block_size)) + + v_r = n_r // n_blocks + v_g = n_g // n_blocks + + n_plusone_r = n_r - v_r * n_blocks + n_plusone_g = n_g - v_g * n_blocks + + sizes_r = array_ops.concat([ + array_ops.fill([n_blocks - n_plusone_r], v_r), + array_ops.fill([n_plusone_r], v_r + 1), + ], 0) + sizes_g = array_ops.concat([ + array_ops.fill([n_blocks - n_plusone_g], v_g), + array_ops.fill([n_plusone_g], v_g + 1), + ], 0) + + zero = array_ops.zeros([1], dtype=dtypes.int32) + inds_r = array_ops.concat([zero, math_ops.cumsum(sizes_r)], 0) + inds_g = array_ops.concat([zero, math_ops.cumsum(sizes_g)], 0) + + dim = math_ops.cast(real_activations.shape[1], dtype) + + def compute_kid_block(i): + 'Compute the ith block of the KID estimate.' + r_s = inds_r[i] + r_e = inds_r[i + 1] + r = real_activations[r_s:r_e] + m = math_ops.cast(r_e - r_s, dtype) + + g_s = inds_g[i] + g_e = inds_g[i + 1] + g = generated_activations[g_s:g_e] + n = math_ops.cast(g_e - g_s, dtype) + + k_rr = (math_ops.matmul(r, r, transpose_b=True) / dim + 1)**3 + k_rg = (math_ops.matmul(r, g, transpose_b=True) / dim + 1)**3 + k_gg = (math_ops.matmul(g, g, transpose_b=True) / dim + 1)**3 + return (-2 * math_ops.reduce_mean(k_rg) + + (math_ops.reduce_sum(k_rr) - math_ops.trace(k_rr)) / (m * (m - 1)) + + (math_ops.reduce_sum(k_gg) - math_ops.trace(k_gg)) / (n * (n - 1))) + + ests = functional_ops.map_fn( + compute_kid_block, math_ops.range(n_blocks), dtype=dtype, back_prop=False) + + mn = math_ops.reduce_mean(ests) + + # nn_impl.moments doesn't use the Bessel correction, which we want here + n_blocks_ = math_ops.cast(n_blocks, dtype) + var = control_flow_ops.cond( + math_ops.less_equal(n_blocks, 1), + lambda: array_ops.constant(float('nan'), dtype=dtype), + lambda: math_ops.reduce_sum(math_ops.square(ests - mn)) / (n_blocks_ - 1)) + + return mn, math_ops.sqrt(var / n_blocks_) @@ -13,6 +13,8 @@ import random from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split +from classifier_metrics_impl import classifier_score_from_logits + def import_mnist(): from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets("MNIST_data/", reshape=False) @@ -126,10 +128,12 @@ def train_classifier(x_train, y_train, x_val, y_val, batch_size=128, epochs=100, def test_classifier(model, x_test, y_true): x_test = np.pad(x_test, ((0,0),(2,2),(2,2),(0,0)), 'constant') y_pred = model.predict(x_test) + logits = tf.convert_to_tensor(y_pred, dtype=tf.float32) + inception_score = tf.keras.backend.eval(classifier_score_from_logits(logits)) y_pred = np.argmax(y_pred, axis=1) y_true = np.argmax(y_true, axis=1) plot_example_errors(y_pred, y_true, x_test) - return accuracy_score(y_true, y_pred) + return accuracy_score(y_true, y_pred), inception_score def mix_data(X_train, y_train, X_validation, y_validation, train_gen, tr_labels_gen, val_gen, val_labels_gen, split=0): @@ -162,4 +166,4 @@ if __name__ == '__main__': x_train, y_train, x_val, y_val, x_t, y_t = import_mnist() print(y_t.shape) model = train_classifier(x_train[:100], y_train[:100], x_val, y_val, epochs=3) - test_classifier(model, x_t, y_t) + print(test_classifier(model, x_t, y_t)) |