Generative Adversarial Networks (GANs)

Formulation

\begin{aligned} &G(\cdot,\theta_G):\mathbb{R}^z\rightarrow\mathcal{X}\subseteq\mathbb{R}^n\\ &D(\cdot,\theta_D):\mathcal{X}\rightarrow\mathbb{R}\\ &\text{Alternatively optimize } \theta_G,\theta_D\\ &\hat\theta_D=\arg\max_{\theta_D}\mathbb{E}_{x\sim \mathbb{P}_{real}}\big[log(D(x,\theta_D))\big]+\mathbb{E}_{x\sim \mathbb{P}_{fake}}\big[log(1-D(x,\theta_D))\big]\\ &\hat\theta_G=\arg\max_{\theta_G}\mathbb{E}_{x\sim \mathbb{P}_{fake}}\big[log(D(x,\hat\theta_D))\big] \end{aligned}

Spectral Normalized GANs

$$\text{Let Discriminator }D(\cdot,\theta_D):\mathbb{R}^n\rightarrow\mathbb{R}\text{ be a L-layer network}$$

\begin{aligned} &D(x,\theta_D)=W_L\bigg(\phi_{L-1}\Big(W_{L-1}\big(\cdots \phi_1(W_1 x)\big)\Big)\bigg)\\ &W_k\in\mathbb{R}^{h_k\times h_{k-1}},\forall k\in[1\cdots L-1],h_0=n,h_L=1\\ &\phi_k\in\{\text{ReLU},\text{ LeakyReLU}\},\forall k\in[1\cdots L-1] \end{aligned}

$$\text{Normalize each layer by spectral norm}$$

\begin{aligned} &\bar{W_k}=W_k/\sigma(W_k)\\ &\bar{D}(x,\theta_D)=\bar{W}_L\bigg(\phi_{L-1}\Big(\bar{W}_{L-1}\big(\cdots \phi_1(\bar{W}_1 x)\big)\Big)\bigg)\\ \end{aligned}

Spectral Normalized GANs

$$\text{Normalize each layer by spectral norm}$$

\begin{aligned} &\bar{W_k}=W_k/\sigma(W_k)\Rightarrow\sigma(\bar{W_k})=\Vert W_k\Vert_{Lip}=1\\ &\bar{D}(x,\theta_D)=\bar{W}_L\bigg(\phi_{L-1}\Big(\bar{W}_{L-1}\big(\cdots \phi_1(\bar{W}_1 x)\big)\Big)\bigg)\\ &\Rightarrow\Vert \bar{D}(\cdot,\theta_D)\Vert_{Lip}\le 1 \end{aligned}

$$\text{ReLU}$$

$$\text{LeakyReLU}$$

y= \begin{dcases} x,& \text{if } x\geq 1\\ 0,& \text{otherwise} \end{dcases}
y= \begin{dcases} x,& \text{if } x\geq 1\\ \alpha x,& \text{otherwise} \end{dcases}

Spectral Normalized GANs

\begin{aligned} \Vert \bar{D}(\cdot,\theta_D)\Vert_{Lip}\le 1 \end{aligned}
\begin{aligned} &\text{Conclusion:}\\ &\text{Finite Lipschitz constant is the most important characteristic}\\ &\text{for Discriminator}\\ \end{aligned}
\Vert W_k\Vert_{Lip}=k\Leftrightarrow\frac{\Vert W_kx-Wk_y\Vert}{\Vert x-y\Vert}\le k,\forall x,y\in\mathcal{X}

Gradient Normalized GANs

\begin{aligned} &\text{Suppose Discriminator }f:\mathcal{X}\subseteq\mathbb{R}^n\rightarrow\mathbb{R}\text{ is continuously differentiable,}\\ &\text{then}\\ \end{aligned}
\Vert f\Vert_{Lip}=1\Leftrightarrow\Vert\nabla f\Vert\le 1
\begin{aligned} \Vert f(x)-f(y)\Vert&=\Big\Vert\int_y^x\nabla f(r)\cdot dr\Big\Vert\\ &=\Big\Vert\int_0^1\langle \nabla f(x\cdot t+y\cdot(1-t)),x-y\rangle dt\Big\Vert\\ &\le\Big\Vert\int_0^1\Vert \nabla f(x\cdot t+y\cdot(1-t))\Vert\cdot\Vert x-y\Vert\cdot dt\Big\Vert\\ &\le\Big\Vert\int_0^1\Vert x-y\Vert\cdot dt\Big\Vert\\ &=\Vert x-y\Vert \end{aligned}
\begin{aligned} &\text{Proof: }(\Leftarrow)\\ &f\text{ is continuously differentiable}\\ &\Rightarrow\nabla f\text{ is a conservative vector field}\\ &\Rightarrow\text{Path independence of the line integral} \end{aligned}

Gradient Normalized GANs

\begin{aligned} &\text{Define Gradient Normalized Discriminator }\bar{f}=\frac{f}{\Vert\nabla f\Vert_2},\\ &\text{and let} \end{aligned}
f(x)=W_L\bigg(\phi_{L-1}\Big(W_{L-1}\big(\cdots \phi_1(W_1 x)\big)\Big)\bigg)
\begin{aligned} &\text{Suppose }\phi_k\in\{\text{ReLU, LeakyReLU}\},\forall k\in[1\cdots L-1]\text{, then} \end{aligned}
\begin{aligned} &\nabla^2 f=\mathbb{0}\\ &\Rightarrow\Vert\nabla \bar{f}\Vert=\Big\Vert\frac{\nabla f\Vert\nabla f\Vert-f\frac{\nabla^2f\nabla f}{\Vert\nabla f\Vert}}{\Vert\nabla f\Vert^2}\Big\Vert=1\\ \end{aligned}
\begin{aligned} &\text{for most of points in }\mathcal{X}.\\ &\text{However} \end{aligned}
\nRightarrow\Vert\bar{f}\Vert_{Lip}=1

Generative Adversarial Networks

By w86763777

Generative Adversarial Networks

  • 295