aboutsummaryrefslogtreecommitdiff
path: root/classifier_metrics_impl.py
diff options
context:
space:
mode:
Diffstat (limited to 'classifier_metrics_impl.py')
-rw-r--r--classifier_metrics_impl.py1114
1 files changed, 1114 insertions, 0 deletions
diff --git a/classifier_metrics_impl.py b/classifier_metrics_impl.py
new file mode 100644
index 0000000..2334d29
--- /dev/null
+++ b/classifier_metrics_impl.py
@@ -0,0 +1,1114 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Model evaluation tools for TFGAN.
+
+These methods come from https://arxiv.org/abs/1606.03498,
+https://arxiv.org/abs/1706.08500, and https://arxiv.org/abs/1801.01401.
+
+NOTE: This implementation uses the same weights as in
+https://github.com/openai/improved-gan/blob/master/inception_score/model.py,
+but is more numerically stable and is an unbiased estimator of the true
+Inception score even when splitting the inputs into batches.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import functools
+import os
+import sys
+import tarfile
+
+from six.moves import urllib
+
+from tensorflow.contrib.layers.python.layers import layers
+from tensorflow.core.framework import graph_pb2
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import importer
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import functional_ops
+from tensorflow.python.ops import image_ops
+from tensorflow.python.ops import linalg_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_impl
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import resource_loader
+
+
+__all__ = [
+ 'get_graph_def_from_disk',
+ 'get_graph_def_from_resource',
+ 'get_graph_def_from_url_tarball',
+ 'preprocess_image',
+ 'run_image_classifier',
+ 'run_inception',
+ 'inception_score',
+ 'classifier_score',
+ 'classifier_score_from_logits',
+ 'frechet_inception_distance',
+ 'frechet_classifier_distance',
+ 'frechet_classifier_distance_from_activations',
+ 'mean_only_frechet_classifier_distance_from_activations',
+ 'diagonal_only_frechet_classifier_distance_from_activations',
+ 'kernel_inception_distance',
+ 'kernel_inception_distance_and_std',
+ 'kernel_classifier_distance',
+ 'kernel_classifier_distance_and_std',
+ 'kernel_classifier_distance_from_activations',
+ 'kernel_classifier_distance_and_std_from_activations',
+ 'INCEPTION_DEFAULT_IMAGE_SIZE',
+]
+
+INCEPTION_URL = 'http://download.tensorflow.org/models/frozen_inception_v1_2015_12_05.tar.gz'
+INCEPTION_FROZEN_GRAPH = 'inceptionv1_for_inception_score.pb'
+INCEPTION_INPUT = 'Mul:0'
+INCEPTION_OUTPUT = 'logits:0'
+INCEPTION_FINAL_POOL = 'pool_3:0'
+INCEPTION_DEFAULT_IMAGE_SIZE = 299
+
+
+def _validate_images(images, image_size):
+ images = ops.convert_to_tensor(images)
+ images.shape.with_rank(4)
+ images.shape.assert_is_compatible_with([None, image_size, image_size, None])
+ return images
+
+
+def _symmetric_matrix_square_root(mat, eps=1e-10):
+ """Compute square root of a symmetric matrix.
+
+ Note that this is different from an elementwise square root. We want to
+ compute M' where M' = sqrt(mat) such that M' * M' = mat.
+
+ Also note that this method **only** works for symmetric matrices.
+
+ Args:
+ mat: Matrix to take the square root of.
+ eps: Small epsilon such that any element less than eps will not be square
+ rooted to guard against numerical instability.
+
+ Returns:
+ Matrix square root of mat.
+ """
+ # Unlike numpy, tensorflow's return order is (s, u, v)
+ s, u, v = linalg_ops.svd(mat)
+ # sqrt is unstable around 0, just use 0 in such case
+ si = array_ops.where(math_ops.less(s, eps), s, math_ops.sqrt(s))
+ # Note that the v returned by Tensorflow is v = V
+ # (when referencing the equation A = U S V^T)
+ # This is unlike Numpy which returns v = V^T
+ return math_ops.matmul(
+ math_ops.matmul(u, array_ops.diag(si)), v, transpose_b=True)
+
+
+def preprocess_image(images,
+ height=INCEPTION_DEFAULT_IMAGE_SIZE,
+ width=INCEPTION_DEFAULT_IMAGE_SIZE,
+ scope=None):
+ """Prepare a batch of images for evaluation.
+
+ This is the preprocessing portion of the graph from
+ http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz.
+
+ Note that it expects Tensors in [0, 255]. This function maps pixel values to
+ [-1, 1] and resizes to match the InceptionV1 network.
+
+ Args:
+ images: 3-D or 4-D Tensor of images. Values are in [0, 255].
+ height: Integer. Height of resized output image.
+ width: Integer. Width of resized output image.
+ scope: Optional scope for name_scope.
+
+ Returns:
+ 3-D or 4-D float Tensor of prepared image(s). Values are in [-1, 1].
+ """
+ is_single = images.shape.ndims == 3
+ with ops.name_scope(scope, 'preprocess', [images, height, width]):
+ if not images.dtype.is_floating:
+ images = math_ops.to_float(images)
+ if is_single:
+ images = array_ops.expand_dims(images, axis=0)
+ resized = image_ops.resize_bilinear(images, [height, width])
+ resized = (resized - 128.0) / 128.0
+ if is_single:
+ resized = array_ops.squeeze(resized, axis=0)
+ return resized
+
+
+def _kl_divergence(p, p_logits, q):
+ """Computes the Kullback-Liebler divergence between p and q.
+
+ This function uses p's logits in some places to improve numerical stability.
+
+ Specifically:
+
+ KL(p || q) = sum[ p * log(p / q) ]
+ = sum[ p * ( log(p) - log(q) ) ]
+ = sum[ p * ( log_softmax(p_logits) - log(q) ) ]
+
+ Args:
+ p: A 2-D floating-point Tensor p_ij, where `i` corresponds to the minibatch
+ example and `j` corresponds to the probability of being in class `j`.
+ p_logits: A 2-D floating-point Tensor corresponding to logits for `p`.
+ q: A 1-D floating-point Tensor, where q_j corresponds to the probability
+ of class `j`.
+
+ Returns:
+ KL divergence between two distributions. Output dimension is 1D, one entry
+ per distribution in `p`.
+
+ Raises:
+ ValueError: If any of the inputs aren't floating-point.
+ ValueError: If p or p_logits aren't 2D.
+ ValueError: If q isn't 1D.
+ """
+ for tensor in [p, p_logits, q]:
+ if not tensor.dtype.is_floating:
+ raise ValueError('Input %s must be floating type.', tensor.name)
+ p.shape.assert_has_rank(2)
+ p_logits.shape.assert_has_rank(2)
+ q.shape.assert_has_rank(1)
+ return math_ops.reduce_sum(
+ p * (nn_ops.log_softmax(p_logits) - math_ops.log(q)), axis=1)
+
+
+def get_graph_def_from_disk(filename):
+ """Get a GraphDef proto from a disk location."""
+ with gfile.FastGFile(filename, 'rb') as f:
+ return graph_pb2.GraphDef.FromString(f.read())
+
+
+def get_graph_def_from_resource(filename):
+ """Get a GraphDef proto from within a .par file."""
+ return graph_pb2.GraphDef.FromString(resource_loader.load_resource(filename))
+
+
+def get_graph_def_from_url_tarball(url, filename, tar_filename=None):
+ """Get a GraphDef proto from a tarball on the web.
+
+ Args:
+ url: Web address of tarball
+ filename: Filename of graph definition within tarball
+ tar_filename: Temporary download filename (None = always download)
+
+ Returns:
+ A GraphDef loaded from a file in the downloaded tarball.
+ """
+ if not (tar_filename and os.path.exists(tar_filename)):
+
+ def _progress(count, block_size, total_size):
+ sys.stdout.write('\r>> Downloading %s %.1f%%' %
+ (url,
+ float(count * block_size) / float(total_size) * 100.0))
+ sys.stdout.flush()
+
+ tar_filename, _ = urllib.request.urlretrieve(url, tar_filename, _progress)
+ with tarfile.open(tar_filename, 'r:gz') as tar:
+ proto_str = tar.extractfile(filename).read()
+ return graph_pb2.GraphDef.FromString(proto_str)
+
+
+def _default_graph_def_fn():
+ return get_graph_def_from_url_tarball(INCEPTION_URL, INCEPTION_FROZEN_GRAPH,
+ os.path.basename(INCEPTION_URL))
+
+
+def run_inception(images,
+ graph_def=None,
+ default_graph_def_fn=_default_graph_def_fn,
+ image_size=INCEPTION_DEFAULT_IMAGE_SIZE,
+ input_tensor=INCEPTION_INPUT,
+ output_tensor=INCEPTION_OUTPUT):
+ """Run images through a pretrained Inception classifier.
+
+ Args:
+ images: Input tensors. Must be [batch, height, width, channels]. Input shape
+ and values must be in [-1, 1], which can be achieved using
+ `preprocess_image`.
+ graph_def: A GraphDef proto of a pretrained Inception graph. If `None`,
+ call `default_graph_def_fn` to get GraphDef.
+ default_graph_def_fn: A function that returns a GraphDef. Used if
+ `graph_def` is `None. By default, returns a pretrained InceptionV3 graph.
+ image_size: Required image width and height. See unit tests for the default
+ values.
+ input_tensor: Name of input Tensor.
+ output_tensor: Name or list of output Tensors. This function will compute
+ activations at the specified layer. Examples include INCEPTION_V3_OUTPUT
+ and INCEPTION_V3_FINAL_POOL which would result in this function computing
+ the final logits or the penultimate pooling layer.
+
+ Returns:
+ Tensor or Tensors corresponding to computed `output_tensor`.
+
+ Raises:
+ ValueError: If images are not the correct size.
+ ValueError: If neither `graph_def` nor `default_graph_def_fn` are provided.
+ """
+ images = _validate_images(images, image_size)
+
+ if graph_def is None:
+ if default_graph_def_fn is None:
+ raise ValueError('If `graph_def` is `None`, must provide '
+ '`default_graph_def_fn`.')
+ graph_def = default_graph_def_fn()
+
+ activations = run_image_classifier(images, graph_def, input_tensor,
+ output_tensor)
+ if isinstance(activations, list):
+ for i, activation in enumerate(activations):
+ if array_ops.rank(activation) != 2:
+ activations[i] = layers.flatten(activation)
+ else:
+ if array_ops.rank(activations) != 2:
+ activations = layers.flatten(activations)
+
+ return activations
+
+
+def run_image_classifier(tensor,
+ graph_def,
+ input_tensor,
+ output_tensor,
+ scope='RunClassifier'):
+ """Runs a network from a frozen graph.
+
+ Args:
+ tensor: An Input tensor.
+ graph_def: A GraphDef proto.
+ input_tensor: Name of input tensor in graph def.
+ output_tensor: A tensor name or list of tensor names in graph def.
+ scope: Name scope for classifier.
+
+ Returns:
+ Classifier output if `output_tensor` is a string, or a list of outputs if
+ `output_tensor` is a list.
+
+ Raises:
+ ValueError: If `input_tensor` or `output_tensor` aren't in the graph_def.
+ """
+ input_map = {input_tensor: tensor}
+ is_singleton = isinstance(output_tensor, str)
+ if is_singleton:
+ output_tensor = [output_tensor]
+ classifier_outputs = importer.import_graph_def(
+ graph_def, input_map, output_tensor, name=scope)
+ if is_singleton:
+ classifier_outputs = classifier_outputs[0]
+
+ return classifier_outputs
+
+
+def classifier_score(images, classifier_fn, num_batches=1):
+ """Classifier score for evaluating a conditional generative model.
+
+ This is based on the Inception Score, but for an arbitrary classifier.
+
+ This technique is described in detail in https://arxiv.org/abs/1606.03498. In
+ summary, this function calculates
+
+ exp( E[ KL(p(y|x) || p(y)) ] )
+
+ which captures how different the network's classification prediction is from
+ the prior distribution over classes.
+
+ NOTE: This function consumes images, computes their logits, and then
+ computes the classifier score. If you would like to precompute many logits for
+ large batches, use classifier_score_from_logits(), which this method also
+ uses.
+
+ Args:
+ images: Images to calculate the classifier score for.
+ classifier_fn: A function that takes images and produces logits based on a
+ classifier.
+ num_batches: Number of batches to split `generated_images` in to in order to
+ efficiently run them through the classifier network.
+
+ Returns:
+ The classifier score. A floating-point scalar of the same type as the output
+ of `classifier_fn`.
+ """
+ generated_images_list = array_ops.split(
+ images, num_or_size_splits=num_batches)
+
+ # Compute the classifier splits using the memory-efficient `map_fn`.
+ logits = functional_ops.map_fn(
+ fn=classifier_fn,
+ elems=array_ops.stack(generated_images_list),
+ parallel_iterations=1,
+ back_prop=False,
+ swap_memory=True,
+ name='RunClassifier')
+ logits = array_ops.concat(array_ops.unstack(logits), 0)
+
+ return classifier_score_from_logits(logits)
+
+
+def classifier_score_from_logits(logits):
+ """Classifier score for evaluating a generative model from logits.
+
+ This method computes the classifier score for a set of logits. This can be
+ used independently of the classifier_score() method, especially in the case
+ of using large batches during evaluation where we would like precompute all
+ of the logits before computing the classifier score.
+
+ This technique is described in detail in https://arxiv.org/abs/1606.03498. In
+ summary, this function calculates:
+
+ exp( E[ KL(p(y|x) || p(y)) ] )
+
+ which captures how different the network's classification prediction is from
+ the prior distribution over classes.
+
+ Args:
+ logits: Precomputed 2D tensor of logits that will be used to
+ compute the classifier score.
+
+ Returns:
+ The classifier score. A floating-point scalar of the same type as the output
+ of `logits`.
+ """
+ logits.shape.assert_has_rank(2)
+
+ # Use maximum precision for best results.
+ logits_dtype = logits.dtype
+ if logits_dtype != dtypes.float64:
+ logits = math_ops.to_double(logits)
+
+ p = nn_ops.softmax(logits)
+ q = math_ops.reduce_mean(p, axis=0)
+ kl = _kl_divergence(p, logits, q)
+ kl.shape.assert_has_rank(1)
+ log_score = math_ops.reduce_mean(kl)
+ final_score = math_ops.exp(log_score)
+
+ if logits_dtype != dtypes.float64:
+ final_score = math_ops.cast(final_score, logits_dtype)
+
+ return final_score
+
+
+inception_score = functools.partial(
+ classifier_score,
+ classifier_fn=functools.partial(
+ run_inception, output_tensor=INCEPTION_OUTPUT))
+
+
+def trace_sqrt_product(sigma, sigma_v):
+ """Find the trace of the positive sqrt of product of covariance matrices.
+
+ '_symmetric_matrix_square_root' only works for symmetric matrices, so we
+ cannot just take _symmetric_matrix_square_root(sigma * sigma_v).
+ ('sigma' and 'sigma_v' are symmetric, but their product is not necessarily).
+
+ Let sigma = A A so A = sqrt(sigma), and sigma_v = B B.
+ We want to find trace(sqrt(sigma sigma_v)) = trace(sqrt(A A B B))
+ Note the following properties:
+ (i) forall M1, M2: eigenvalues(M1 M2) = eigenvalues(M2 M1)
+ => eigenvalues(A A B B) = eigenvalues (A B B A)
+ (ii) if M1 = sqrt(M2), then eigenvalues(M1) = sqrt(eigenvalues(M2))
+ => eigenvalues(sqrt(sigma sigma_v)) = sqrt(eigenvalues(A B B A))
+ (iii) forall M: trace(M) = sum(eigenvalues(M))
+ => trace(sqrt(sigma sigma_v)) = sum(eigenvalues(sqrt(sigma sigma_v)))
+ = sum(sqrt(eigenvalues(A B B A)))
+ = sum(eigenvalues(sqrt(A B B A)))
+ = trace(sqrt(A B B A))
+ = trace(sqrt(A sigma_v A))
+ A = sqrt(sigma). Both sigma and A sigma_v A are symmetric, so we **can**
+ use the _symmetric_matrix_square_root function to find the roots of these
+ matrices.
+
+ Args:
+ sigma: a square, symmetric, real, positive semi-definite covariance matrix
+ sigma_v: same as sigma
+
+ Returns:
+ The trace of the positive square root of sigma*sigma_v
+ """
+
+ # Note sqrt_sigma is called "A" in the proof above
+ sqrt_sigma = _symmetric_matrix_square_root(sigma)
+
+ # This is sqrt(A sigma_v A) above
+ sqrt_a_sigmav_a = math_ops.matmul(sqrt_sigma,
+ math_ops.matmul(sigma_v, sqrt_sigma))
+
+ return math_ops.trace(_symmetric_matrix_square_root(sqrt_a_sigmav_a))
+
+
+def frechet_classifier_distance(real_images,
+ generated_images,
+ classifier_fn,
+ num_batches=1):
+ """Classifier distance for evaluating a generative model.
+
+ This is based on the Frechet Inception distance, but for an arbitrary
+ classifier.
+
+ This technique is described in detail in https://arxiv.org/abs/1706.08500.
+ Given two Gaussian distribution with means m and m_w and covariance matrices
+ C and C_w, this function calculates
+
+ |m - m_w|^2 + Tr(C + C_w - 2(C * C_w)^(1/2))
+
+ which captures how different the distributions of real images and generated
+ images (or more accurately, their visual features) are. Note that unlike the
+ Inception score, this is a true distance and utilizes information about real
+ world images.
+
+ Note that when computed using sample means and sample covariance matrices,
+ Frechet distance is biased. It is more biased for small sample sizes. (e.g.
+ even if the two distributions are the same, for a small sample size, the
+ expected Frechet distance is large). It is important to use the same
+ sample size to compute Frechet classifier distance when comparing two
+ generative models.
+
+ NOTE: This function consumes images, computes their activations, and then
+ computes the classifier score. If you would like to precompute many
+ activations for real and generated images for large batches, please use
+ frechet_clasifier_distance_from_activations(), which this method also uses.
+
+ Args:
+ real_images: Real images to use to compute Frechet Inception distance.
+ generated_images: Generated images to use to compute Frechet Inception
+ distance.
+ classifier_fn: A function that takes images and produces activations
+ based on a classifier.
+ num_batches: Number of batches to split images in to in order to
+ efficiently run them through the classifier network.
+
+ Returns:
+ The Frechet Inception distance. A floating-point scalar of the same type
+ as the output of `classifier_fn`.
+ """
+ real_images_list = array_ops.split(
+ real_images, num_or_size_splits=num_batches)
+ generated_images_list = array_ops.split(
+ generated_images, num_or_size_splits=num_batches)
+
+ real_imgs = array_ops.stack(real_images_list)
+ generated_imgs = array_ops.stack(generated_images_list)
+
+ # Compute the activations using the memory-efficient `map_fn`.
+ def compute_activations(elems):
+ return functional_ops.map_fn(fn=classifier_fn,
+ elems=elems,
+ parallel_iterations=1,
+ back_prop=False,
+ swap_memory=True,
+ name='RunClassifier')
+
+ real_a = compute_activations(real_imgs)
+ gen_a = compute_activations(generated_imgs)
+
+ # Ensure the activations have the right shapes.
+ real_a = array_ops.concat(array_ops.unstack(real_a), 0)
+ gen_a = array_ops.concat(array_ops.unstack(gen_a), 0)
+
+ return frechet_classifier_distance_from_activations(real_a, gen_a)
+
+
+def mean_only_frechet_classifier_distance_from_activations(
+ real_activations, generated_activations):
+ """Classifier distance for evaluating a generative model from activations.
+
+ Given two Gaussian distribution with means m and m_w and covariance matrices
+ C and C_w, this function calcuates
+
+ |m - m_w|^2
+
+ which captures how different the distributions of real images and generated
+ images (or more accurately, their visual features) are. Note that unlike the
+ Inception score, this is a true distance and utilizes information about real
+ world images.
+
+ Note that when computed using sample means and sample covariance matrices,
+ Frechet distance is biased. It is more biased for small sample sizes. (e.g.
+ even if the two distributions are the same, for a small sample size, the
+ expected Frechet distance is large). It is important to use the same
+ sample size to compute frechet classifier distance when comparing two
+ generative models.
+
+ In this variant, we only compute the difference between the means of the
+ fitted Gaussians. The computation leads to O(n) vs. O(n^2) memory usage, yet
+ still retains much of the same information as FID.
+
+ Args:
+ real_activations: 2D array of activations of real images of size
+ [num_images, num_dims] to use to compute Frechet Inception distance.
+ generated_activations: 2D array of activations of generated images of size
+ [num_images, num_dims] to use to compute Frechet Inception distance.
+
+ Returns:
+ The mean-only Frechet Inception distance. A floating-point scalar of the
+ same type as the output of the activations.
+ """
+ real_activations.shape.assert_has_rank(2)
+ generated_activations.shape.assert_has_rank(2)
+
+ activations_dtype = real_activations.dtype
+ if activations_dtype != dtypes.float64:
+ real_activations = math_ops.to_double(real_activations)
+ generated_activations = math_ops.to_double(generated_activations)
+
+ # Compute means of activations.
+ m = math_ops.reduce_mean(real_activations, 0)
+ m_w = math_ops.reduce_mean(generated_activations, 0)
+
+ # Next the distance between means.
+ mean = math_ops.reduce_sum(
+ math_ops.squared_difference(m, m_w)) # Equivalent to L2 but more stable.
+ mofid = mean
+ if activations_dtype != dtypes.float64:
+ mofid = math_ops.cast(mofid, activations_dtype)
+
+ return mofid
+
+
+def diagonal_only_frechet_classifier_distance_from_activations(
+ real_activations, generated_activations):
+ """Classifier distance for evaluating a generative model.
+
+ This is based on the Frechet Inception distance, but for an arbitrary
+ classifier.
+
+ This technique is described in detail in https://arxiv.org/abs/1706.08500.
+ Given two Gaussian distribution with means m and m_w and covariance matrices
+ C and C_w, this function calcuates
+
+ |m - m_w|^2 + (sigma + sigma_w - 2(sigma x sigma_w)^(1/2))
+
+ which captures how different the distributions of real images and generated
+ images (or more accurately, their visual features) are. Note that unlike the
+ Inception score, this is a true distance and utilizes information about real
+ world images. In this variant, we compute diagonal-only covariance matrices.
+ As a result, instead of computing an expensive matrix square root, we can do
+ something much simpler, and has O(n) vs O(n^2) space complexity.
+
+ Note that when computed using sample means and sample covariance matrices,
+ Frechet distance is biased. It is more biased for small sample sizes. (e.g.
+ even if the two distributions are the same, for a small sample size, the
+ expected Frechet distance is large). It is important to use the same
+ sample size to compute frechet classifier distance when comparing two
+ generative models.
+
+ Args:
+ real_activations: Real images to use to compute Frechet Inception distance.
+ generated_activations: Generated images to use to compute Frechet Inception
+ distance.
+
+ Returns:
+ The diagonal-only Frechet Inception distance. A floating-point scalar of
+ the same type as the output of the activations.
+
+ Raises:
+ ValueError: If the shape of the variance and mean vectors are not equal.
+ """
+ real_activations.shape.assert_has_rank(2)
+ generated_activations.shape.assert_has_rank(2)
+
+ activations_dtype = real_activations.dtype
+ if activations_dtype != dtypes.float64:
+ real_activations = math_ops.to_double(real_activations)
+ generated_activations = math_ops.to_double(generated_activations)
+
+ # Compute mean and covariance matrices of activations.
+ m, var = nn_impl.moments(real_activations, axes=[0])
+ m_w, var_w = nn_impl.moments(generated_activations, axes=[0])
+
+ actual_shape = var.get_shape()
+ expected_shape = m.get_shape()
+
+ if actual_shape != expected_shape:
+ raise ValueError('shape: {} must match expected shape: {}'.format(
+ actual_shape, expected_shape))
+
+ # Compute the two components of FID.
+
+ # First the covariance component.
+ # Here, note that trace(A + B) = trace(A) + trace(B)
+ trace = math_ops.reduce_sum(
+ (var + var_w) - 2.0 * math_ops.sqrt(math_ops.multiply(var, var_w)))
+
+ # Next the distance between means.
+ mean = math_ops.reduce_sum(
+ math_ops.squared_difference(m, m_w)) # Equivalent to L2 but more stable.
+ dofid = trace + mean
+ if activations_dtype != dtypes.float64:
+ dofid = math_ops.cast(dofid, activations_dtype)
+
+ return dofid
+
+
+def frechet_classifier_distance_from_activations(real_activations,
+ generated_activations):
+ """Classifier distance for evaluating a generative model.
+
+ This methods computes the Frechet classifier distance from activations of
+ real images and generated images. This can be used independently of the
+ frechet_classifier_distance() method, especially in the case of using large
+ batches during evaluation where we would like precompute all of the
+ activations before computing the classifier distance.
+
+ This technique is described in detail in https://arxiv.org/abs/1706.08500.
+ Given two Gaussian distribution with means m and m_w and covariance matrices
+ C and C_w, this function calculates
+
+ |m - m_w|^2 + Tr(C + C_w - 2(C * C_w)^(1/2))
+
+ which captures how different the distributions of real images and generated
+ images (or more accurately, their visual features) are. Note that unlike the
+ Inception score, this is a true distance and utilizes information about real
+ world images.
+
+ Note that when computed using sample means and sample covariance matrices,
+ Frechet distance is biased. It is more biased for small sample sizes. (e.g.
+ even if the two distributions are the same, for a small sample size, the
+ expected Frechet distance is large). It is important to use the same
+ sample size to compute frechet classifier distance when comparing two
+ generative models.
+
+ Args:
+ real_activations: 2D Tensor containing activations of real data. Shape is
+ [batch_size, activation_size].
+ generated_activations: 2D Tensor containing activations of generated data.
+ Shape is [batch_size, activation_size].
+
+ Returns:
+ The Frechet Inception distance. A floating-point scalar of the same type
+ as the output of the activations.
+
+ """
+ real_activations.shape.assert_has_rank(2)
+ generated_activations.shape.assert_has_rank(2)
+
+ activations_dtype = real_activations.dtype
+ if activations_dtype != dtypes.float64:
+ real_activations = math_ops.to_double(real_activations)
+ generated_activations = math_ops.to_double(generated_activations)
+
+ # Compute mean and covariance matrices of activations.
+ m = math_ops.reduce_mean(real_activations, 0)
+ m_w = math_ops.reduce_mean(generated_activations, 0)
+ num_examples_real = math_ops.to_double(array_ops.shape(real_activations)[0])
+ num_examples_generated = math_ops.to_double(
+ array_ops.shape(generated_activations)[0])
+
+ # sigma = (1 / (n - 1)) * (X - mu) (X - mu)^T
+ real_centered = real_activations - m
+ sigma = math_ops.matmul(
+ real_centered, real_centered, transpose_a=True) / (
+ num_examples_real - 1)
+
+ gen_centered = generated_activations - m_w
+ sigma_w = math_ops.matmul(
+ gen_centered, gen_centered, transpose_a=True) / (
+ num_examples_generated - 1)
+
+ # Find the Tr(sqrt(sigma sigma_w)) component of FID
+ sqrt_trace_component = trace_sqrt_product(sigma, sigma_w)
+
+ # Compute the two components of FID.
+
+ # First the covariance component.
+ # Here, note that trace(A + B) = trace(A) + trace(B)
+ trace = math_ops.trace(sigma + sigma_w) - 2.0 * sqrt_trace_component
+
+ # Next the distance between means.
+ mean = math_ops.reduce_sum(
+ math_ops.squared_difference(m, m_w)) # Equivalent to L2 but more stable.
+ fid = trace + mean
+ if activations_dtype != dtypes.float64:
+ fid = math_ops.cast(fid, activations_dtype)
+
+ return fid
+
+frechet_inception_distance = functools.partial(
+ frechet_classifier_distance,
+ classifier_fn=functools.partial(
+ run_inception, output_tensor=INCEPTION_FINAL_POOL))
+
+
+def kernel_classifier_distance(real_images,
+ generated_images,
+ classifier_fn,
+ num_classifier_batches=1,
+ max_block_size=1024,
+ dtype=None):
+ """Kernel "classifier" distance for evaluating a generative model.
+
+ This is based on the Kernel Inception distance, but for an arbitrary
+ embedding.
+
+ This technique is described in detail in https://arxiv.org/abs/1801.01401.
+ Given two distributions P and Q of activations, this function calculates
+
+ E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')]
+ - 2 E_{X ~ P, Y ~ Q}[k(X, Y)]
+
+ where k is the polynomial kernel
+
+ k(x, y) = ( x^T y / dimension + 1 )^3.
+
+ This captures how different the distributions of real and generated images'
+ visual features are. Like the Frechet distance (and unlike the Inception
+ score), this is a true distance and incorporates information about the
+ target images. Unlike the Frechet score, this function computes an
+ *unbiased* and asymptotically normal estimator, which makes comparing
+ estimates across models much more intuitive.
+
+ The estimator used takes time quadratic in max_block_size. Larger values of
+ max_block_size will decrease the variance of the estimator but increase the
+ computational cost. This differs slightly from the estimator used by the
+ original paper; it is the block estimator of https://arxiv.org/abs/1307.1954.
+
+ NOTE: the blocking code assumes that real_activations and
+ generated_activations are both in random order. If either is sorted in a
+ meaningful order, the estimator will behave poorly.
+
+ NOTE: This function consumes images, computes their activations, and then
+ computes the classifier score. If you would like to precompute many
+ activations for real and generated images for large batches, or to compute
+ multiple scores based on the same images, please use
+ kernel_clasifier_distance_from_activations(), which this method also uses.
+
+ Args:
+ real_images: Real images to use to compute Kernel Inception distance.
+ generated_images: Generated images to use to compute Kernel Inception
+ distance.
+ classifier_fn: A function that takes images and produces activations based
+ on a classifier.
+ num_classifier_batches: Number of batches to split images in to in order to
+ efficiently run them through the classifier network.
+ max_estimator_block_size: integer, default 1024. The distance estimator
+ splits samples into blocks for computational efficiency. Larger values are
+ more computationally expensive but decrease the variance of the distance
+ estimate.
+ dtype: if not None, coerce activations to this dtype before computations.
+
+ Returns:
+ The Kernel Inception Distance. A floating-point scalar of the same type
+ as the output of the activations.
+ """
+ return kernel_classifier_distance_and_std(
+ real_images,
+ generated_images,
+ classifier_fn,
+ num_classifier_batches=num_classifier_batches,
+ max_block_size=max_block_size,
+ dtype=dtype)[0]
+
+
+kernel_inception_distance = functools.partial(
+ kernel_classifier_distance,
+ classifier_fn=functools.partial(
+ run_inception, output_tensor=INCEPTION_FINAL_POOL))
+
+
+def kernel_classifier_distance_and_std(real_images,
+ generated_images,
+ classifier_fn,
+ num_classifier_batches=1,
+ max_block_size=1024,
+ dtype=None):
+ """Kernel "classifier" distance for evaluating a generative model.
+
+ This is based on the Kernel Inception distance, but for an arbitrary
+ embedding. Also returns an estimate of the standard error of the distance
+ estimator.
+
+ This technique is described in detail in https://arxiv.org/abs/1801.01401.
+ Given two distributions P and Q of activations, this function calculates
+
+ E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')]
+ - 2 E_{X ~ P, Y ~ Q}[k(X, Y)]
+
+ where k is the polynomial kernel
+
+ k(x, y) = ( x^T y / dimension + 1 )^3.
+
+ This captures how different the distributions of real and generated images'
+ visual features are. Like the Frechet distance (and unlike the Inception
+ score), this is a true distance and incorporates information about the
+ target images. Unlike the Frechet score, this function computes an
+ *unbiased* and asymptotically normal estimator, which makes comparing
+ estimates across models much more intuitive.
+
+ The estimator used takes time quadratic in max_block_size. Larger values of
+ max_block_size will decrease the variance of the estimator but increase the
+ computational cost. This differs slightly from the estimator used by the
+ original paper; it is the block estimator of https://arxiv.org/abs/1307.1954.
+
+ NOTE: the blocking code assumes that real_activations and
+ generated_activations are both in random order. If either is sorted in a
+ meaningful order, the estimator will behave poorly.
+
+ NOTE: This function consumes images, computes their activations, and then
+ computes the classifier score. If you would like to precompute many
+ activations for real and generated images for large batches, or to compute
+ multiple scores based on the same images, please use
+ kernel_clasifier_distance_from_activations(), which this method also uses.
+
+ Args:
+ real_images: Real images to use to compute Kernel Inception distance.
+ generated_images: Generated images to use to compute Kernel Inception
+ distance.
+ classifier_fn: A function that takes images and produces activations based
+ on a classifier.
+ num_classifier_batches: Number of batches to split images in to in order to
+ efficiently run them through the classifier network.
+ max_estimator_block_size: integer, default 1024. The distance estimator
+ splits samples into blocks for computational efficiency. Larger values are
+ more computationally expensive but decrease the variance of the distance
+ estimate. Having a smaller block size also gives a better estimate of the
+ standard error.
+ dtype: if not None, coerce activations to this dtype before computations.
+
+ Returns:
+ The Kernel Inception Distance. A floating-point scalar of the same type
+ as the output of the activations.
+ An estimate of the standard error of the distance estimator (a scalar of
+ the same type).
+ """
+ real_images_list = array_ops.split(
+ real_images, num_or_size_splits=num_classifier_batches)
+ generated_images_list = array_ops.split(
+ generated_images, num_or_size_splits=num_classifier_batches)
+
+ real_imgs = array_ops.stack(real_images_list)
+ generated_imgs = array_ops.stack(generated_images_list)
+
+ # Compute the activations using the memory-efficient `map_fn`.
+ def compute_activations(elems):
+ return functional_ops.map_fn(
+ fn=classifier_fn,
+ elems=elems,
+ parallel_iterations=1,
+ back_prop=False,
+ swap_memory=True,
+ name='RunClassifier')
+
+ real_a = compute_activations(real_imgs)
+ gen_a = compute_activations(generated_imgs)
+
+ # Ensure the activations have the right shapes.
+ real_a = array_ops.concat(array_ops.unstack(real_a), 0)
+ gen_a = array_ops.concat(array_ops.unstack(gen_a), 0)
+
+ return kernel_classifier_distance_and_std_from_activations(
+ real_a, gen_a, max_block_size=max_block_size)
+
+
+kernel_inception_distance_and_std = functools.partial(
+ kernel_classifier_distance_and_std,
+ classifier_fn=functools.partial(
+ run_inception, output_tensor=INCEPTION_FINAL_POOL))
+
+
+def kernel_classifier_distance_from_activations(real_activations,
+ generated_activations,
+ max_block_size=1024,
+ dtype=None):
+ """Kernel "classifier" distance for evaluating a generative model.
+
+ This methods computes the kernel classifier distance from activations of
+ real images and generated images. This can be used independently of the
+ kernel_classifier_distance() method, especially in the case of using large
+ batches during evaluation where we would like to precompute all of the
+ activations before computing the classifier distance, or if we want to
+ compute multiple metrics based on the same images.
+
+ This technique is described in detail in https://arxiv.org/abs/1801.01401.
+ Given two distributions P and Q of activations, this function calculates
+
+ E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')]
+ - 2 E_{X ~ P, Y ~ Q}[k(X, Y)]
+
+ where k is the polynomial kernel
+
+ k(x, y) = ( x^T y / dimension + 1 )^3.
+
+ This captures how different the distributions of real and generated images'
+ visual features are. Like the Frechet distance (and unlike the Inception
+ score), this is a true distance and incorporates information about the
+ target images. Unlike the Frechet score, this function computes an
+ *unbiased* and asymptotically normal estimator, which makes comparing
+ estimates across models much more intuitive.
+
+ The estimator used takes time quadratic in max_block_size. Larger values of
+ max_block_size will decrease the variance of the estimator but increase the
+ computational cost. This differs slightly from the estimator used by the
+ original paper; it is the block estimator of https://arxiv.org/abs/1307.1954.
+
+ NOTE: the blocking code assumes that real_activations and
+ generated_activations are both in random order. If either is sorted in a
+ meaningful order, the estimator will behave poorly.
+
+ Args:
+ real_activations: 2D Tensor containing activations of real data. Shape is
+ [batch_size, activation_size].
+ generated_activations: 2D Tensor containing activations of generated data.
+ Shape is [batch_size, activation_size].
+ max_block_size: integer, default 1024. The distance estimator splits samples
+ into blocks for computational efficiency. Larger values are more
+ computationally expensive but decrease the variance of the distance
+ estimate.
+ dtype: if not None, coerce activations to this dtype before computations.
+
+ Returns:
+ The Kernel Inception Distance. A floating-point scalar of the same type
+ as the output of the activations.
+ """
+ return kernel_classifier_distance_and_std_from_activations(
+ real_activations, generated_activations, max_block_size=max_block_size)[0]
+
+
+def kernel_classifier_distance_and_std_from_activations(real_activations,
+ generated_activations,
+ max_block_size=1024,
+ dtype=None):
+ """Kernel "classifier" distance for evaluating a generative model.
+
+ This methods computes the kernel classifier distance from activations of
+ real images and generated images. This can be used independently of the
+ kernel_classifier_distance() method, especially in the case of using large
+ batches during evaluation where we would like to precompute all of the
+ activations before computing the classifier distance, or if we want to
+ compute multiple metrics based on the same images. It also returns a rough
+ estimate of the standard error of the estimator.
+
+ This technique is described in detail in https://arxiv.org/abs/1801.01401.
+ Given two distributions P and Q of activations, this function calculates
+
+ E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')]
+ - 2 E_{X ~ P, Y ~ Q}[k(X, Y)]
+
+ where k is the polynomial kernel
+
+ k(x, y) = ( x^T y / dimension + 1 )^3.
+
+ This captures how different the distributions of real and generated images'
+ visual features are. Like the Frechet distance (and unlike the Inception
+ score), this is a true distance and incorporates information about the
+ target images. Unlike the Frechet score, this function computes an
+ *unbiased* and asymptotically normal estimator, which makes comparing
+ estimates across models much more intuitive.
+
+ The estimator used takes time quadratic in max_block_size. Larger values of
+ max_block_size will decrease the variance of the estimator but increase the
+ computational cost. This differs slightly from the estimator used by the
+ original paper; it is the block estimator of https://arxiv.org/abs/1307.1954.
+ The estimate of the standard error will also be more reliable when there are
+ more blocks, i.e. when max_block_size is smaller.
+
+ NOTE: the blocking code assumes that real_activations and
+ generated_activations are both in random order. If either is sorted in a
+ meaningful order, the estimator will behave poorly.
+
+ Args:
+ real_activations: 2D Tensor containing activations of real data. Shape is
+ [batch_size, activation_size].
+ generated_activations: 2D Tensor containing activations of generated data.
+ Shape is [batch_size, activation_size].
+ max_block_size: integer, default 1024. The distance estimator splits samples
+ into blocks for computational efficiency. Larger values are more
+ computationally expensive but decrease the variance of the distance
+ estimate. Having a smaller block size also gives a better estimate of the
+ standard error.
+ dtype: if not None, coerce activations to this dtype before computations.
+
+ Returns:
+ The Kernel Inception Distance. A floating-point scalar of the same type
+ as the output of the activations.
+ An estimate of the standard error of the distance estimator (a scalar of
+ the same type).
+ """
+
+ real_activations.shape.assert_has_rank(2)
+ generated_activations.shape.assert_has_rank(2)
+ real_activations.shape[1].assert_is_compatible_with(
+ generated_activations.shape[1])
+
+ if dtype is None:
+ dtype = real_activations.dtype
+ assert generated_activations.dtype == dtype
+ else:
+ real_activations = math_ops.cast(real_activations, dtype)
+ generated_activations = math_ops.cast(generated_activations, dtype)
+
+ # Figure out how to split the activations into blocks of approximately
+ # equal size, with none larger than max_block_size.
+ n_r = array_ops.shape(real_activations)[0]
+ n_g = array_ops.shape(generated_activations)[0]
+
+ n_bigger = math_ops.maximum(n_r, n_g)
+ n_blocks = math_ops.to_int32(math_ops.ceil(n_bigger / max_block_size))
+
+ v_r = n_r // n_blocks
+ v_g = n_g // n_blocks
+
+ n_plusone_r = n_r - v_r * n_blocks
+ n_plusone_g = n_g - v_g * n_blocks
+
+ sizes_r = array_ops.concat([
+ array_ops.fill([n_blocks - n_plusone_r], v_r),
+ array_ops.fill([n_plusone_r], v_r + 1),
+ ], 0)
+ sizes_g = array_ops.concat([
+ array_ops.fill([n_blocks - n_plusone_g], v_g),
+ array_ops.fill([n_plusone_g], v_g + 1),
+ ], 0)
+
+ zero = array_ops.zeros([1], dtype=dtypes.int32)
+ inds_r = array_ops.concat([zero, math_ops.cumsum(sizes_r)], 0)
+ inds_g = array_ops.concat([zero, math_ops.cumsum(sizes_g)], 0)
+
+ dim = math_ops.cast(real_activations.shape[1], dtype)
+
+ def compute_kid_block(i):
+ 'Compute the ith block of the KID estimate.'
+ r_s = inds_r[i]
+ r_e = inds_r[i + 1]
+ r = real_activations[r_s:r_e]
+ m = math_ops.cast(r_e - r_s, dtype)
+
+ g_s = inds_g[i]
+ g_e = inds_g[i + 1]
+ g = generated_activations[g_s:g_e]
+ n = math_ops.cast(g_e - g_s, dtype)
+
+ k_rr = (math_ops.matmul(r, r, transpose_b=True) / dim + 1)**3
+ k_rg = (math_ops.matmul(r, g, transpose_b=True) / dim + 1)**3
+ k_gg = (math_ops.matmul(g, g, transpose_b=True) / dim + 1)**3
+ return (-2 * math_ops.reduce_mean(k_rg) +
+ (math_ops.reduce_sum(k_rr) - math_ops.trace(k_rr)) / (m * (m - 1)) +
+ (math_ops.reduce_sum(k_gg) - math_ops.trace(k_gg)) / (n * (n - 1)))
+
+ ests = functional_ops.map_fn(
+ compute_kid_block, math_ops.range(n_blocks), dtype=dtype, back_prop=False)
+
+ mn = math_ops.reduce_mean(ests)
+
+ # nn_impl.moments doesn't use the Bessel correction, which we want here
+ n_blocks_ = math_ops.cast(n_blocks, dtype)
+ var = control_flow_ops.cond(
+ math_ops.less_equal(n_blocks, 1),
+ lambda: array_ops.constant(float('nan'), dtype=dtype),
+ lambda: math_ops.reduce_sum(math_ops.square(ests - mn)) / (n_blocks_ - 1))
+
+ return mn, math_ops.sqrt(var / n_blocks_)