aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornunzip <np.scarh@gmail.com>2019-03-14 17:01:33 +0000
committernunzip <np.scarh@gmail.com>2019-03-14 17:01:33 +0000
commitc917a6b70fa66f33a533edd90e8800105bc110ae (patch)
tree58cbda5da9179e09af61c35dc047006899fec6b5
parenta4ce2edb09f2c8d0b200b9f77f8df3fd89643b38 (diff)
downloade4-gan-c917a6b70fa66f33a533edd90e8800105bc110ae.tar.gz
e4-gan-c917a6b70fa66f33a533edd90e8800105bc110ae.tar.bz2
e4-gan-c917a6b70fa66f33a533edd90e8800105bc110ae.zip
Add final figures and rewrite part 4
-rw-r--r--report/fig/added_generated_data.pngbin21511 -> 24835 bytes
-rw-r--r--report/fig/bal1.pngbin0 -> 13340 bytes
-rw-r--r--report/fig/bal2.pngbin0 -> 14250 bytes
-rw-r--r--report/fig/bal3.pngbin0 -> 13953 bytes
-rw-r--r--report/fig/bal4.pngbin0 -> 48548 bytes
-rw-r--r--report/fig/cdc1.pngbin0 -> 12711 bytes
-rw-r--r--report/fig/cdc2.pngbin0 -> 12143 bytes
-rw-r--r--report/fig/cdc3.pngbin0 -> 12822 bytes
-rw-r--r--report/fig/cdcloss1.pngbin0 -> 28847 bytes
-rw-r--r--report/fig/cdcloss2.pngbin0 -> 27520 bytes
-rw-r--r--report/fig/cdcsmooth.pngbin0 -> 11821 bytes
-rw-r--r--report/fig/fake_only.pngbin14446 -> 26010 bytes
-rw-r--r--report/fig/mix_zoom.pngbin23682 -> 25640 bytes
-rw-r--r--report/fig/pca-cdc.pngbin0 -> 80062 bytes
-rw-r--r--report/fig/pr-cdc.pngbin0 -> 73889 bytes
-rw-r--r--report/fig/pr-retrain.pngbin0 -> 88390 bytes
-rw-r--r--report/fig/retrain_fail.pngbin12925 -> 12401 bytes
-rw-r--r--report/fig/tsne-cdc.pngbin0 -> 74235 bytes
-rw-r--r--report/paper.md155
19 files changed, 92 insertions, 63 deletions
diff --git a/report/fig/added_generated_data.png b/report/fig/added_generated_data.png
index 37c3e1e..9268555 100644
--- a/report/fig/added_generated_data.png
+++ b/report/fig/added_generated_data.png
Binary files differ
diff --git a/report/fig/bal1.png b/report/fig/bal1.png
new file mode 100644
index 0000000..7b25003
--- /dev/null
+++ b/report/fig/bal1.png
Binary files differ
diff --git a/report/fig/bal2.png b/report/fig/bal2.png
new file mode 100644
index 0000000..6bed173
--- /dev/null
+++ b/report/fig/bal2.png
Binary files differ
diff --git a/report/fig/bal3.png b/report/fig/bal3.png
new file mode 100644
index 0000000..8f9e528
--- /dev/null
+++ b/report/fig/bal3.png
Binary files differ
diff --git a/report/fig/bal4.png b/report/fig/bal4.png
new file mode 100644
index 0000000..3b65f91
--- /dev/null
+++ b/report/fig/bal4.png
Binary files differ
diff --git a/report/fig/cdc1.png b/report/fig/cdc1.png
new file mode 100644
index 0000000..ba79852
--- /dev/null
+++ b/report/fig/cdc1.png
Binary files differ
diff --git a/report/fig/cdc2.png b/report/fig/cdc2.png
new file mode 100644
index 0000000..8dfdb71
--- /dev/null
+++ b/report/fig/cdc2.png
Binary files differ
diff --git a/report/fig/cdc3.png b/report/fig/cdc3.png
new file mode 100644
index 0000000..ff64302
--- /dev/null
+++ b/report/fig/cdc3.png
Binary files differ
diff --git a/report/fig/cdcloss1.png b/report/fig/cdcloss1.png
new file mode 100644
index 0000000..4ecd326
--- /dev/null
+++ b/report/fig/cdcloss1.png
Binary files differ
diff --git a/report/fig/cdcloss2.png b/report/fig/cdcloss2.png
new file mode 100644
index 0000000..22d479f
--- /dev/null
+++ b/report/fig/cdcloss2.png
Binary files differ
diff --git a/report/fig/cdcsmooth.png b/report/fig/cdcsmooth.png
new file mode 100644
index 0000000..43afa96
--- /dev/null
+++ b/report/fig/cdcsmooth.png
Binary files differ
diff --git a/report/fig/fake_only.png b/report/fig/fake_only.png
index 27ceba1..60ce5ba 100644
--- a/report/fig/fake_only.png
+++ b/report/fig/fake_only.png
Binary files differ
diff --git a/report/fig/mix_zoom.png b/report/fig/mix_zoom.png
index b88ce7d..598d5e2 100644
--- a/report/fig/mix_zoom.png
+++ b/report/fig/mix_zoom.png
Binary files differ
diff --git a/report/fig/pca-cdc.png b/report/fig/pca-cdc.png
new file mode 100644
index 0000000..310719b
--- /dev/null
+++ b/report/fig/pca-cdc.png
Binary files differ
diff --git a/report/fig/pr-cdc.png b/report/fig/pr-cdc.png
new file mode 100644
index 0000000..4ab42a9
--- /dev/null
+++ b/report/fig/pr-cdc.png
Binary files differ
diff --git a/report/fig/pr-retrain.png b/report/fig/pr-retrain.png
new file mode 100644
index 0000000..526f6d7
--- /dev/null
+++ b/report/fig/pr-retrain.png
Binary files differ
diff --git a/report/fig/retrain_fail.png b/report/fig/retrain_fail.png
index 2a71fd4..daaa3f3 100644
--- a/report/fig/retrain_fail.png
+++ b/report/fig/retrain_fail.png
Binary files differ
diff --git a/report/fig/tsne-cdc.png b/report/fig/tsne-cdc.png
new file mode 100644
index 0000000..a037380
--- /dev/null
+++ b/report/fig/tsne-cdc.png
Binary files differ
diff --git a/report/paper.md b/report/paper.md
index f104cf0..f1ebe7f 100644
--- a/report/paper.md
+++ b/report/paper.md
@@ -4,7 +4,7 @@ In this coursework we present two variants of the GAN architecture - DCGAN and C
Generative Adversarial Networks present a system of models which learn to output data, similar to training data. A trained GAN takes noise as an input and is able to provide an output with the same dimensions and relevant features as the samples it has been trained with.
-GAN's employ two neural networks - a *discriminator* and a *generator* which contest in a zero-sum game. The task of the *discriminator* is to distinguish generated images from real images, while the task of the generator is to produce realistic images which are able to fool the discriminator.
+GANs employ two neural networks - a *discriminator* and a *generator* which contest in a zero-sum game. The task of the *discriminator* is to distinguish generated images from real images, while the task of the generator is to produce realistic images which are able to fool the discriminator.
Training a shallow GAN with no convolutional layers poses problems such as mode collapse and unbalanced G-D losses which lead to low quality image output.
@@ -16,8 +16,7 @@ Training a shallow GAN with no convolutional layers poses problems such as mode
\end{center}
\end{figure}
-
-Mode collapse is achieved with our naive *vanilla GAN* (Appendix-\ref{fig:vanilla_gan}) implementation after 200,000 batches. The generated images observed during a mode collapse can be seen on figure \ref{fig:mode_collapse}. The output of the generator only represents few of the labels originally fed. When mode collapse is reached loss function of the generator stops improving as shown in figure \ref{fig:vanilla_loss}. We observe, the discriminator loss tends to zero as the discriminator learns to assume and classify the fake 1's, while the generator is stuck producing 1 and hence not able to improve.
+Mode collapse is achieved with our naive *vanilla GAN* (Appendix-\ref{fig:vanilla_gan}) implementation after 200,000 batches. The generated images observed during a mode collapse can be seen on figure \ref{fig:mode_collapse}. The output of the generator only represents few of the labels originally fed. When mode collapse is reached loss function of the generator stops improving as shown in figure \ref{fig:vanilla_loss}. We observe, the discriminator loss tends to zero as the discriminator learns to assume and classify the fake 1s, while the generator is stuck producing 1 and hence not able to improve.
A significant improvement to this vanilla architecture is Deep Convolutional Generative Adversarial Networks (DCGAN).
@@ -71,14 +70,6 @@ between figures \ref{fig:dcmed}, \ref{fig:dcshort} and \ref{fig:dclong}.
Applying Virtual Batch Normalization our Medium DCGAN does not provide observable changes in G-D balancing, but reduces within-batch correlation. Although it is difficult to qualitatively assess the improvements, figure \ref{fig:vbn_dc} shows results of the introduction of this technique.
-\begin{figure}
-\begin{center}
-\includegraphics[width=24em]{fig/vbn_dc.pdf}
-\caption{DCGAN Virtual Batch Normalization}
-\label{fig:vbn_dc}
-\end{center}
-\end{figure}
-
We evaluated the effect of different dropout rates (results in appendix figures \ref{fig:dcdrop1_1}, \ref{fig:dcdrop1_2}, \ref{fig:dcdrop2_1}, \ref{fig:dcdrop2_2}) and concluded that the optimisation
of the dropout hyper-parameter is essential for maximising performance. A high dropout rate results in DCGAN producing only artifacts that do not match any specific class due to the generator performing better than the discriminator. Conversely a low dropout rate leads to an initial stabilisation of G-D losses, but ultimately results in instability under the form of oscillation when training for a large number of batches.
@@ -103,7 +94,7 @@ While training the different proposed DCGAN architectures, we did not observe mo
CGAN is a conditional version of a GAN which utilises labeled data. Unlike DCGAN, CGAN is trained with explicitly provided labels which allow CGAN to associate features with specific labels. This has the intrinsic advantage of allowing us to specify the label of generated data. The baseline CGAN which we evaluate is visible in figure \ref{fig:cganarc}. The baseline CGAN architecture presents a series blocks each contained a dense layer, LeakyReLu layer (slope=0.2) and a Batch Normalisation layer. The baseline discriminator uses Dense layers, followed by LeakyReLu (slope=0.2) and a Droupout layer.
The optimizer used for training is `Adam`(`learning_rate=0.002`, `beta=0.5`).
-The Convolutional CGAN analysed follows the structure presented in the relevant Appendix section. It uses TODO ADD BRIEF DESCRIPTION
+The Convolutional CGAN (CDCGAN) analysed follows the structure presented in the relevant Appendix section. It uses TODO ADD BRIEF DESCRIPTION
We evaluate permutations of the architecture involving:
@@ -175,10 +166,10 @@ the same classes, indicating that mode collapse still did not occur.
\end{center}
\end{figure}
-The best performing architecture was Convolutional CGAN. It is difficult to assess any potential improvement at this stage, since the samples produced
-after around 10,000 batches are indistinguishable from the ones of the MNIST dataset (as it can be seen in figure \ref{fig:cdc}). Training CDCGAN for more than
+The best performing architecture was CDCGAN. It is difficult to assess any potential improvement at this stage, since the samples produced
+between 8,000 and 13,000 batches are indistinguishable from the ones of the MNIST dataset (as it can be seen in figure \ref{fig:cdc}, middle). Training CDCGAN for more than
15,000 batches is however not beneficial, as the discriminator will keep improving, leading the generator to produce bad samples as shown in the reported example.
-We find a good balance for 12,000 batches.
+We find a good balance for 12,000 batches.
\begin{figure}
\begin{center}
@@ -186,19 +177,31 @@ We find a good balance for 12,000 batches.
\includegraphics[width=8em]{fig/cdc2.png}
\includegraphics[width=8em]{fig/cdc3.png}
\caption{CDCGAN outputs; 1000 batches - 12000 batches - 20000 batches}
-\label{fig:cbalance}
+\label{fig:cdc}
\end{center}
\end{figure}
+Oscillation on the generator loss is noticeable in figure {fig:cdcloss} due to the discriminator loss approaching zero. One possible
+adjustment to tackle this issue was balancing G-D training steps, opting for G/D=3, allowing the generator to gain some advantage over the discriminator. This
+technique allowed to smooth oscillation while producing images of similar quality. A quantitative performance assessment will be performed in the following section.
+
+\begin{figure}
+\begin{center}
+\includegraphics[width=12em]{fig/cdcloss1.png}
+\includegraphics[width=12em]{fig/cdcloss2.png}
+\caption{CDCGAN G-D loss; Left G/D=1; Right G/D=3}
+\label{fig:cdcloss}
+\end{center}
+\end{figure}
Virtual Batch Normalization on this architecture was not attempted as it significantly
increased the training time (about twice more).
-Introducing one-sided label smoothing produced very similar results, hence a quantitative performance assessment will need to
+Introducing one-sided label smoothing produced very similar results (figure \ref{fig:cdcsmooth}), hence a quantitative performance assessment will need to
be performed in the next section through the introduction of Inception Scores.
# Inception Score
-Inception score is calculated as introduced by Tim Salimans et. al [@improved]. However as we are evaluating MNIST, we use LeNet-5 [@lenet] as the basis of the inceptioen score.
+Inception score is calculated as introduced by Tim Salimans et. al [@improved]. However as we are evaluating MNIST, we use LeNet-5 [@lenet] as the basis of the inception score.
We use the logits extracted from LeNet:
$$ \textrm{IS}(x) = \exp(\mathbb{E}_x \left( \textrm{KL} ( p(y\mid x) \| p(y) ) \right) ) $$
@@ -212,9 +215,10 @@ calculated training the LeNet classifier under the same conditions across all ex
Shallow CGAN & 0.645 & 3.57 & 8:14 \\
Medium CGAN & 0.715 & 3.79 & 10:23 \\
Deep CGAN & 0.739 & 3.85 & 16:27 \\
-Convolutional CGAN & 0.899 & 7.41 & 1:05:27 \\
+\textbf{CDCGAN} & \textbf{0.899} & \textbf{7.41} & 1:05:27 \\
Medium CGAN+LS & 0.749 & 3.643 & 10:42 \\
-Convolutional CGAN+LS & & & 1:12:39 \\
+CDCGAN+LS & 0.846 & 6.63 & 1:12:39 \\
+CCGAN-G/D=3 & 0.849 & 6.59 & 1:04:11 \\
Medium CGAN DO=0.1 & 0.761 & 3.836 & 10:36 \\
Medium CGAN DO=0.5 & 0.725 & 3.677 & 10:36 \\
Medium CGAN+VBN & 0.735 & 3.82 & 19:38 \\
@@ -227,22 +231,30 @@ Medium CGAN+VBN+LS & 0.763 & 3.91 & 19:43 \\
### Architecture
-We observe increased accruacy as we increase the depth of the GAN arhitecture at the cost of the training time. There appears to be diminishing returns with the deeper networks, and larger improvements are achievable with specific optimisation techniques. Despite the initial considerations about G-D losses for the Convolutional CGAN, there seems to be an improvement in inception score and test accuracy with respect to the other analysed cases. One sided label smoothing however did not improve this performanc any further, suggesting that reinforcing discriminator behaviour does not benefit the system in this case.
+We observe increased accruacy as we increase the depth of the GAN arhitecture at the cost of the training time. There appears to be diminishing returns with the deeper networks, and larger improvements are achievable with specific optimisation techniques. CDCGAN achieves improved performance in comparison to the other cases analysed as we expected from the results obtained in the previous section, since the samples produced are almost identical to the ones of the original MNIST dataset.
### One Side Label Smoothing
-One sided label smoothing involves relaxing our confidence on the labels in our data. Tim Salimans et. al. [@improved] show smoothing of the positive labels reduces the vulnerability of the neural network to adversarial examples. We observe significant improvements to the Inception score and classification accuracy in the case of our baseline (Medium CGAN).
+One sided label smoothing involves relaxing our confidence on the labels in our data. Tim Salimans et. al. [@improved] show smoothing of the positive labels reduces the vulnerability of the neural network to adversarial examples. We observe significant improvements to the Inception score and classification accuracy in the case of our baseline (Medium CGAN). This technique however did not improve the performance of the CDCGAN any further, suggesting that reinforcing discriminator behaviour does not benefit the system in this case.
### Virtual Batch Normalisation
-Virtual Batch Noramlisation is a further optimisation technique proposed by Tim Salimans et. al. [@improved]. Virtual batch normalisation is a modification to the batch normalisation layer, which performs normalisation based on statistics from a reference batch. We observe that VBN improved the classification accuracy and the Inception score due to the provided reduction in intra-batch correlation.
+Virtual Batch Normalisation is a further optimisation technique proposed by Tim Salimans et. al. [@improved]. Virtual batch normalisation is a modification to the batch normalisation layer, which performs normalisation based on statistics from a reference batch. We observe that VBN improved the classification accuracy and the Inception score due to the provided reduction in intra-batch correlation.
### Dropout
The effect of dropout for the non-convolutional CGAN architecture does not affect performance as much as in DCGAN, nor does it seem to affect the quality of images produced, together with the G-D loss remain almost unchanged. Ultimately, judging from the inception scores, it is preferable to use a low dropout rate (in our case 0.1 seems to be the dropout rate that achieves the best results).
+### G-D Balancing on CDCGAN
+
+Despite achieving lower oscillation on the losses, using G/D=3 to incentivize generator training did not improve the performance of CDCGAN as it is observed from
+the inception score and testing accuracy. We obtain in fact 5% less testing precision, meaning that using this technique in our architecture produces on
+average lower quality images when compared to our standard CDCGAN.
+
# Re-training the handwritten digit classifier
+*In the following section the generated data we use will be exclusively produced by our CDCGAN architecture.*
+
## Results
In this section we analyze the effect of retraining the classification network using a mix of real and generated data, highlighting the benefits of
@@ -262,7 +274,7 @@ As observed in figure \ref{fig:mix1} we performed two experiments for performanc
\end{center}
\end{figure}
-Both experiments show that an optimal amount of data to boost testing accuracy on the original MNIST dataset is around 30% generated data as in both cases we observe an increase in accuracy by around 0.3%. In absence of original data the testing accuracy drops significantly to around 20% for both cases.
+Both experiments show that training the classification network with the injection of generated data (between 40% and 90%) causes on average a small increase in accuracy of up to 0.2%. In absence of original data the testing accuracy drops significantly to around 40% for both cases.
## Adapted Training Strategy
@@ -272,29 +284,12 @@ Training for 100 epochs, similarly to the previous section, is clearly not enoug
is only 62%, while training for 300 epochs we can reach up to 88%. The learning curve in figure \ref{fig:few_real} suggests
we cannot achieve much better with this very small amount of data, since the validation accuracy plateaus, while the training accuracy almost reaches 100%.
-\begin{figure}
-\begin{center}
-\includegraphics[width=24em]{fig/train_few_real.png}
-\caption{Training with few real samples}
-\label{fig:few_real}
-\end{center}
-\end{figure}
-
We conduct one experiment, feeding the test set to a LeNet trained exclusively on data generated from our CGAN. It is noticeable that training
-for the first 5 epochs gives good results (figure \ref{fig:fake_only}) when compared to the learning curve obtained when training the network with only the few real samples. This
+for the first 20 epochs gives good results before reaching a plateau (figure \ref{fig:fake_only}) when compared to the learning curve obtained when training the network with only the few real samples. This
indicates that we can use the generated data to train the first steps of the network (initial weights) and apply the real sample for 300 epochs to obtain
a finer tuning. As observed in figure \ref{fig:few_init} the first steps of retraining will show oscillation, since the fine tuning will try and adapt to the newly fed data. The maximum accuracy reached before the validation curve plateaus is 88.6%, indicating that this strategy proved to be somewhat successful at
improving testing accuracy.
-\begin{figure}
-\begin{center}
-\includegraphics[width=24em]{fig/initialization.png}
-\caption{Retraining with initialization from generated samples}
-\label{fig:few_init}
-\end{center}
-\end{figure}
-
-
We try to improve the results obtained earlier by retraining LeNet with mixed data: few real samples and plenty of generated samples (160,000)
(learning curve show in figure \ref{fig:training_mixed}. The peak accuracy reached is 91%. We then try to remove the generated
samples to apply fine tuning, using only the real samples. After 300 more epochs (figure \ref{fig:training_mixed}) the test accuracy is
@@ -309,8 +304,18 @@ boosted to 92%, making this technique the most successful attempt of improvement
\end{center}
\end{figure}
-Failures classification examples are displayed in figure \ref{fig:retrain_fail}. The results showed indicate that the network we trained is actually performing quite well,
-as most of the testing images that got misclassified (mainly nines and fours) show ambiguities.
+Examples of misclassification are displayed in figure \ref{fig:retrain_fail}. It is visible from a cross comparison between these results and the precision-recall
+curve displayed in figure \ref{fig:pr-retrain} that the network we trained performs really well for most of the digits, but the low confidence on the digit $8$ lowers
+the overall performance.
+
+\begin{figure}
+\begin{center}
+\includegraphics[width=24em]{fig/pr-retrain.png}
+\caption{Retraining; Precision-Recall Curve}
+\label{fig:pr-retrain}
+\end{center}
+\end{figure}
+
\newpage
@@ -318,11 +323,19 @@ as most of the testing images that got misclassified (mainly nines and fours) sh
## Relation to PCA
-Similarly to GAN's, PCA can be used to formulate **generative** models of a system. While GAN's are trained neural networks, PCA is a definite statistical procedure which perform orthogonal transformations of the data. Both attempt to identify the most important or *variant* features of the data (which we may then use to generate new data), but PCA by itself is only able to extract linearly related features. In a purely linear system, a GAN would be converging to PCA. In a more complicated system, we would indeed to identify relevant kernels in order to extract relevant features with PCA, while a GAN is able to leverage dense and convolutional neural network layers which may be trained to perform relevant transformations.
+Similarly to GANs, PCA can be used to formulate **generative** models of a system. While GANs are trained neural networks, PCA is a definite statistical procedure which perform orthogonal transformations of the data. Both attempt to identify the most important or *variant* features of the data (which we may then use to generate new data), but PCA by itself is only able to extract linearly related features. In a purely linear system, a GAN would be converging to PCA. In a more complicated system, we would indeed to identify relevant kernels in order to extract relevant features with PCA, while a GAN is able to leverage dense and convolutional neural network layers which may be trained to perform relevant transformations.
## Data representation
-TODO EXPLAIN WHAT WE HAVE DONE HERE
+Using the pre-trained classification on real training examples we extract embeddings of 10,000 randomly sampled real
+test examples and 10,000 randomly sampled synthetic examples using both CGAN and CDCGAN from the different classes.
+We obtain both a PCA and TSNE representation of our data on two dimensions in figure \ref{fig:features}.
+
+It is observable that the network that achieved a good inception score (CDCGAN) produces embeddings that are very similar
+to the ones obtained from the original MNIST dataset, further strengthening our hypothesis about the performance of this
+specific model. On the other hand, with non CDCGAN we notice higher correlation between the two represented features
+for the different classes, meaning that a good data separation was not achieved. This is probably due to the additional blur
+produced around the images with our simple CGAN model.
\begin{figure}
\centering
@@ -336,19 +349,21 @@ TODO EXPLAIN WHAT WE HAVE DONE HERE
\label{fig:features}
\end{figure}
+TODO COMMENT ON PR CURVES
\begin{figure}
\centering
\subfloat[][]{\includegraphics[width=.22\textwidth]{fig/pr-mnist.png}}\quad
- \subfloat[][]{\includegraphics[width=.22\textwidth]{fig/pr-cgan.png}}
- \caption{Precisional Recall Curves a) MNIST : b) CGAN output}
+ \subfloat[][]{\includegraphics[width=.22\textwidth]{fig/pr-cgan.png}}\\
+ \subfloat[][]{\includegraphics[width=.22\textwidth]{fig/pr-cdc.png}}
+ \caption{Precisional Recall Curves a) MNIST : b) CGAN output c)CDCGAN output}
\label{fig:rocpr}
\end{figure}
## Factoring in classification loss into GAN
Classification accuracy and Inception score can be factored into the GAN to attempt to produce more realistic images. Shane Barrat and Rishi Sharma are able to indirectly optimise the inception score to over 900, and note that directly optimising for maximised Inception score produces adversarial examples [@inception-note].
-Nevertheless, a pretrained static classifier may be added to the GAN model, and it's loss incorporated into the loss added too the loss of the GAN.
+Nevertheless, a pre-trained static classifier may be added to the GAN model, and its loss incorporated into the loss added too the loss of the GAN.
$$ L_{\textrm{total}} = \alpha L_{\textrm{LeNet}} + \beta L_{\textrm{generator}} $$
@@ -399,6 +414,14 @@ $$ L_{\textrm{total}} = \alpha L_{\textrm{LeNet}} + \beta L_{\textrm{generator}}
\begin{figure}[H]
\begin{center}
+\includegraphics[width=24em]{fig/vbn_dc.pdf}
+\caption{DCGAN Virtual Batch Normalization}
+\label{fig:vbn_dc}
+\end{center}
+\end{figure}
+
+\begin{figure}[H]
+\begin{center}
\includegraphics[width=24em]{fig/dcgan_dropout01_gd.png}
\caption{DCGAN Dropout 0.1 G-D Losses}
\label{fig:dcdrop1_1}
@@ -433,14 +456,6 @@ $$ L_{\textrm{total}} = \alpha L_{\textrm{LeNet}} + \beta L_{\textrm{generator}}
\begin{figure}[H]
\begin{center}
-\includegraphics[width=24em]{fig/CDCGAN_arch.pdf}
-\caption{Deep Convolutional CGAN Architecture}
-\label{fig:cdcganarc}
-\end{center}
-\end{figure}
-
-\begin{figure}[H]
-\begin{center}
\includegraphics[width=24em]{fig/short_cgan_ex.png}
\includegraphics[width=24em]{fig/short_cgan.png}
\caption{Shallow CGAN}
@@ -489,13 +504,11 @@ $$ L_{\textrm{total}} = \alpha L_{\textrm{LeNet}} + \beta L_{\textrm{generator}}
\end{center}
\end{figure}
-\begin{figure}[H]
+\begin{figure}
\begin{center}
-\includegraphics[width=12em]{fig/good_ex.png}
-\includegraphics[width=12em]{fig/bad_ex.png}
-\includegraphics[width=24em]{fig/cdcgan.png}
-\caption{Convolutional CGAN+LS}
-\label{fig:cdcloss}
+\includegraphics[width=8em]{fig/cdcsmooth.png}
+\caption{CDCGAN+LS outputs 12000 batches}
+\label{fig:cdcsmooth}
\end{center}
\end{figure}
@@ -640,6 +653,14 @@ ________________________________________________________________________________
## Retrain-Appendix
+\begin{figure}
+\begin{center}
+\includegraphics[width=24em]{fig/train_few_real.png}
+\caption{Training with few real samples}
+\label{fig:few_real}
+\end{center}
+\end{figure}
+
\begin{figure}[H]
\begin{center}
\includegraphics[width=24em]{fig/fake_only.png}
@@ -650,6 +671,14 @@ ________________________________________________________________________________
\begin{figure}[H]
\begin{center}
+\includegraphics[width=24em]{fig/initialization.png}
+\caption{Retraining with initialization from generated samples}
+\label{fig:few_init}
+\end{center}
+\end{figure}
+
+\begin{figure}[H]
+\begin{center}
\includegraphics[width=12em]{fig/retrain_fail.png}
\caption{Retraining failures}
\label{fig:retrain_fail}