From 8e997dd9bf12ce35d6c56a9da1c85bd8ef2d0f8c Mon Sep 17 00:00:00 2001 From: nunzip Date: Wed, 13 Mar 2019 21:37:12 +0000 Subject: Fix part 3 --- report/paper.md | 50 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/report/paper.md b/report/paper.md index c2c1a56..b76ba5b 100644 --- a/report/paper.md +++ b/report/paper.md @@ -24,8 +24,6 @@ A significant improvement to this vanilla architecture is Deep Convolutional Gen It is possible to artificially balance the number of steps between G and D backpropagation, however we think with a solid GAN structure this step is not really needed. Updating D more frequently than G resulted in additional cases of mode collapse due to the vanishing gradient issue. Updating G more frequently has not proved to be beneficial either, as the discriminator did not learn how to distinguish real samples from fake samples quickly enough. -For this reasons the following sections will not present any artificial balancing of G-D training steps, opting for a standard single step update for both -discriminator and generator. # DCGAN @@ -84,6 +82,18 @@ Applying Virtual Batch Normalization our Medium DCGAN does not provide observabl We evaluated the effect of different dropout rates (results in appendix figures \ref{fig:dcdrop1_1}, \ref{fig:dcdrop1_2}, \ref{fig:dcdrop2_1}, \ref{fig:dcdrop2_2}) and concluded that the optimisation of the dropout hyper-parameter is essential for maximising performance. A high dropout rate results in DCGAN producing only artifacts that do not match any specific class due to the generator performing better than the discriminator. Conversely a low dropout rate leads to an initial stabilisation of G-D losses, but ultimately results in instability under the form of oscillation when training for a large number of batches. +Trying different parameters for artificial G-D balancing in the training stage did not achieve any significant benefits as discussed in section I, +exclusively leading to the generation of more artifacts (figure \ref{fig:baldc}). We also attempted to increase the D training steps with respect to G, +but no mode collapse was observed even with the shallow model. + +\begin{figure} +\begin{center} +\includegraphics[width=12em]{fig/bal4.png} +\caption{DCGAN Balancing G-D; D/G=3} +\label{fig:baldc} +\end{center} +\end{figure} + While training the different proposed DCGAN architectures, we did not observe mode collapse, indicating the DCGAN is less prone to a collapse compared to our *vanilla GAN*. # CGAN @@ -118,8 +128,7 @@ When comparing the three levels of depth for the architectures it is possible to a shallow architecture we notice a high oscillation of the generator loss (figure \ref{fig:cshort}), which is being overpowered by the discriminator. Despite this we don't experience any issues with vanishing gradient, hence no mode collapse is reached. Similarly, with a deep architecture the discriminator still overpowers the generator, and an equilibrium between the two losses is not achieved. The image quality in both cases is not really high: we can see that even after 20,000 batches the some pictures appear to be slightly blurry (figure \ref{fig:clong}). -The best compromise is reached for 3 Dense-LeakyReLu-BN blocks as shown in figure \ref{fig:cmed}. It is possible to observe that G-D losses are perfectly balanced, -and their value goes below 1, meaning the GAN is approaching the theoretical Nash Equilibrium of 0.5. +The best compromise is reached for 3 Dense-LeakyReLu-BN blocks as shown in figure \ref{fig:cmed}. It is possible to observe that G-D losses are perfectly balanced, and their value goes below 1. The image quality is better than the two examples reported earlier, proving that this Medium-depth architecture is the best compromise. \begin{figure} @@ -135,13 +144,12 @@ The three levels of dropout rates attempted do not affect the performance signif image quality and G-D losses are comparable. The biggest improvement in performance is obtained through one-sided label smoothing, shifting the true labels form 1 to 0.9 to reinforce discriminator behaviour. -Using 0.1 instead of zero for the fake labels does not improve performance, as the discriminator loses incentive to do better (generator behaviour is reinforced). Performance results for -one-sided labels smoothing with true labels = 0.9 are shown in figure \ref{fig:smooth}. +Using 0.1 instead of zero for the fake labels does not improve performance, as the discriminator loses incentive to do better (generator behaviour is reinforced). +Performance results for one-sided labels smoothing with true labels = 0.9 are shown in figure \ref{fig:smooth}. \begin{figure} \begin{center} \includegraphics[width=24em]{fig/smoothing_ex.png} -\includegraphics[width=24em]{fig/smoothing.png} \caption{One sided label smoothing} \label{fig:smooth} \end{center} @@ -152,11 +160,29 @@ mostly unchanged. The biggest change we expect to see is a lower correlation bet performance when training a classifier with the generated images from CGAN, as we will obtain more diverse images. Training with a larger batch size would show more significant results, but since we set this parameter to 128 the issue of within-batch correlation is limited. +Similarly to DCGAN, changing the G-D steps did not lead to good quality results as it can be seen in figure \ref{fig:cbalance}, in which we tried to train +with D/G=15 for 10,000 batches, trying to initialize good discriminator weights, to then revert to a D/G=1, aiming to balance the losses of the two networks. +Even in the case of a shallow network, in which mode collapse should have been more likely, we observed diversity between the samples produced for +the same classes, indicating that mode collapse still did not occur. + +\begin{figure} +\begin{center} +\includegraphics[width=8em]{fig/bal1.png} +\includegraphics[width=8em]{fig/bal2.png} +\includegraphics[width=8em]{fig/bal3.png} +\caption{CGAN G-D balancing results} +\label{fig:cbalance} +\end{center} +\end{figure} + +FIX THIS Convolutional CGAN did not achieve better results than our baseline approach for the architecture analyzed, although we believe that it is possible to achieve a better performance by finer tuning of the Convolutional CGAN parameters. Figure \ref{fig:cdcloss} shows a very high oscillation of the generator loss, hence the image quality varies a lot at each training step. Attempting LS on this architecture achieved a similar outcome when compared to the non-convolutional counterpart. +ADD PIC + # Inception Score Inception score is calculated as introduced by Tim Salimans et. al [@improved]. However as we are evaluating MNIST, we use LeNet-5 [@lenet] as the basis of the inceptioen score. @@ -165,7 +191,7 @@ We use the logits extracted from LeNet: $$ \textrm{IS}(x) = \exp(\mathbb{E}_x \left( \textrm{KL} ( p(y\mid x) \| p(y) ) \right) ) $$ We further report the classification accuracy as found with LeNet. For coherence purposes the inception scores were -calculated training the LeNet classifier under the same conditions across all experiments (100 epochs with SGD optimizer, learning rate = 0.001). +calculated training the LeNet classifier under the same conditions across all experiments (100 epochs with `SGD`, `learning rate=0.001`). \begin{table}[H] \begin{tabular}{llll} @@ -458,6 +484,14 @@ $$ L_{\textrm{total}} = \alpha L_{\textrm{LeNet}} + \beta L_{\textrm{generator}} \end{center} \end{figure} +\begin{figure}[H] +\begin{center} +\includegraphics[width=24em]{fig/smoothing.png} +\caption{CGAN+LS G-D Losses} +\label{fig:smoothgd} +\end{center} +\end{figure} + ## Retrain-Appendix \begin{figure}[H] -- cgit v1.2.3-54-g00ecf From e33168c1471e651527e6d4ae15faaebbcf6fa5d9 Mon Sep 17 00:00:00 2001 From: nunzip Date: Wed, 13 Mar 2019 22:25:16 +0000 Subject: Normalize labels --- ncdcgan.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ncdcgan.py b/ncdcgan.py index ccb99d3..ce6b907 100755 --- a/ncdcgan.py +++ b/ncdcgan.py @@ -157,6 +157,7 @@ class nCDCGAN(): X_train = (X_train.astype(np.float32) - 127.5) / 127.5 X_train = np.expand_dims(X_train, axis=3) y_train = y_train.reshape(-1, 1) + y_train = (y_train.astype(np.float32)-4.5)/4.5 # Adversarial ground truths valid = np.ones((batch_size, 1)) @@ -214,6 +215,7 @@ class nCDCGAN(): r, c = 2, 5 noise = np.random.normal(0, 1, (r * c, 100)) sampled_labels = np.arange(0, 10).reshape(-1, 1) + sampled_labels = (sampled_labels.astype(np.float32)-4.5)/4.5 #using dummy_labels would just print zeros to help identify image quality #dummy_labels = np.zeros(32).reshape(-1, 1) @@ -239,6 +241,7 @@ class nCDCGAN(): noise_test = np.random.normal(0, 1, (10000, 100)) noise_val = np.random.normal(0, 1, (5000, 100)) + ((labels_val.astype(np.float32)-4.5)/4.5) labels_train = np.zeros(55000).reshape(-1, 1) labels_test = np.zeros(10000).reshape(-1, 1) labels_val = np.zeros(5000).reshape(-1, 1) @@ -248,9 +251,9 @@ class nCDCGAN(): labels_test[i*1000:-1] = i labels_val[i*500:-1] = i - train_data = self.generator.predict([noise_train, labels_train]) - test_data = self.generator.predict([noise_test, labels_test]) - val_data = self.generator.predict([noise_val, labels_val]) + train_data = self.generator.predict([noise_train, ((labels_train.astype(np.float32)-4.5)/4.5)]) + test_data = self.generator.predict([noise_test, ((labels_test.astype(np.float32)-4.5)/4.5)]) + val_data = self.generator.predict([noise_val,((labels_val.astype(np.float32)-4.5)/4.5)]) labels_train = keras.utils.to_categorical(labels_train, 10) labels_test = keras.utils.to_categorical(labels_test, 10) -- cgit v1.2.3-54-g00ecf From 5dabb5d0ba596539901ca7521402618a3b595e5f Mon Sep 17 00:00:00 2001 From: nunzip Date: Wed, 13 Mar 2019 22:33:36 +0000 Subject: Revert "Normalize labels" This reverts commit e33168c1471e651527e6d4ae15faaebbcf6fa5d9. --- ncdcgan.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/ncdcgan.py b/ncdcgan.py index ce6b907..ccb99d3 100755 --- a/ncdcgan.py +++ b/ncdcgan.py @@ -157,7 +157,6 @@ class nCDCGAN(): X_train = (X_train.astype(np.float32) - 127.5) / 127.5 X_train = np.expand_dims(X_train, axis=3) y_train = y_train.reshape(-1, 1) - y_train = (y_train.astype(np.float32)-4.5)/4.5 # Adversarial ground truths valid = np.ones((batch_size, 1)) @@ -215,7 +214,6 @@ class nCDCGAN(): r, c = 2, 5 noise = np.random.normal(0, 1, (r * c, 100)) sampled_labels = np.arange(0, 10).reshape(-1, 1) - sampled_labels = (sampled_labels.astype(np.float32)-4.5)/4.5 #using dummy_labels would just print zeros to help identify image quality #dummy_labels = np.zeros(32).reshape(-1, 1) @@ -241,7 +239,6 @@ class nCDCGAN(): noise_test = np.random.normal(0, 1, (10000, 100)) noise_val = np.random.normal(0, 1, (5000, 100)) - ((labels_val.astype(np.float32)-4.5)/4.5) labels_train = np.zeros(55000).reshape(-1, 1) labels_test = np.zeros(10000).reshape(-1, 1) labels_val = np.zeros(5000).reshape(-1, 1) @@ -251,9 +248,9 @@ class nCDCGAN(): labels_test[i*1000:-1] = i labels_val[i*500:-1] = i - train_data = self.generator.predict([noise_train, ((labels_train.astype(np.float32)-4.5)/4.5)]) - test_data = self.generator.predict([noise_test, ((labels_test.astype(np.float32)-4.5)/4.5)]) - val_data = self.generator.predict([noise_val,((labels_val.astype(np.float32)-4.5)/4.5)]) + train_data = self.generator.predict([noise_train, labels_train]) + test_data = self.generator.predict([noise_test, labels_test]) + val_data = self.generator.predict([noise_val, labels_val]) labels_train = keras.utils.to_categorical(labels_train, 10) labels_test = keras.utils.to_categorical(labels_test, 10) -- cgit v1.2.3-54-g00ecf