Derivare funzioni di matrici

f:\mathbb{R}\to\mathbb{R}\\ f'(x) = \lim_{h\to 0}\frac{f(x+h)-f(x)}{h}\in\mathbb{R}

LIVELLO 1

f:\mathbb{R}^n\to\mathbb{R}\\ \nabla f(x) = [\partial_{x_1}f(x),...,\partial_{x_n}f(x)]\in\mathbb{R}^n\\ \partial_{x_i} f(x) = \lim_{h\to 0}\frac{f(x_1,...,x_{i-1},x_{i}+h,x_{i+1},...,x_n)-f(x)}{h}

LIVELLO 2

f:\mathbb{R}^n\to\mathbb{R}^m\\ f(x) = \begin{bmatrix} f_1(x) & \cdots & f_m(x)\end{bmatrix}\\ \,\\ Jf(x) = \begin{bmatrix} \nabla f_1(x)\\ \vdots \\ \nabla f_m(x)\end{bmatrix}\in \mathbb{R}^{m\times n}

LIVELLO 3

f:\mathbb{R}^{m\times n}\to\mathbb{R}\\ \text{esempio : }f(A)=\frac{1}{2}\sum_{i=1}^m\sum_{j=1}^n A_{ij},\\ \,\\ A = \begin{bmatrix} a_{:1} & a_{:2} & \cdots & a_{:n} \end{bmatrix}\in\mathbb{R}^{m\times n},\\ a:=\mathrm{vec}(A)=\begin{bmatrix} a_{:1} \\ a_{:2} \\ \vdots \\ a_{:n}\end{bmatrix}\in\mathbb{R}^{mn}

LIVELLO 4

f:\mathbb{R}^{m\times n}\to\mathbb{R}\\ \text{esempio : }f(A)=\frac{1}{2}\sum_{i=1}^m\sum_{j=1}^n A_{ij},\\ a:=\mathrm{vec}(A)=\begin{bmatrix} a_{:1} \\ a_{:2} \\ \vdots \\ a_{:n}\end{bmatrix}\in\mathbb{R}^{mn}\\ \,\\ \nabla_a f(A) = \begin{bmatrix} 1 & 1 & ... & 1 \end{bmatrix}\in\mathbb{R}^{mn}

LIVELLO 4

LIVELLO 5

f:\mathbb{R}^{m\times n}\to\mathbb{R}^{m\times n}\\ \text{esempio : }f(A)=UAV,\,\,U\in\mathbb{R}^{m\times m},\,V\in\mathbb{R}^{n\times n},\\ a:=\mathrm{vec}(A)=\begin{bmatrix} a_{:1} \\ a_{:2} \\ \vdots \\ a_{:n}\end{bmatrix}\in\mathbb{R}^{mn}\\ \mathrm{vec}(f(A))=(V^T\otimes U)a \in\mathbb{R}^{mn} \,\\ \frac{\partial\mathrm{vec}(f(A))}{\partial a}=V^T\otimes U\in\mathbb{R}^{mn\times mn}.

ESEMPIO

x\in\mathbb{R}^n,\sigma(x):=[\sin(x_1),...,\sin(x_n)]\\ F(x)=v^T\sigma(Ax+b)\in\mathbb{R},\,\,v\in\mathbb{R}^m,A\in\mathbb{R}^{m\times n},\,b\in\mathbb{R}^m\\ L(x)=(F(x)-y)^2
x
\sigma(Ax+b)
F(x)
y
L(x)

ESEMPIO

x\in\mathbb{R}^n,\sigma(x):=[\sin(x_1),...,\sin(x_n)]\\ F(x)=v^T\sigma(Ax+b)\in\mathbb{R},\,\,v\in\mathbb{R}^m,A\in\mathbb{R}^{m\times n},\,b\in\mathbb{R}^m\\ L(x)=(F(x)-y)^2
\frac{\partial L(x)}{\partial \mathrm{vec}(A)}=?
\frac{\partial L(x)}{\partial \mathrm{vec}(A)} = 2(z-y)\vert_{z=F(x)}\frac{\partial F(x)}{\partial \mathrm{vec}(A)} =2(F(x)-y)\frac{\partial F(x)}{\partial \mathrm{vec}(A)}

ESEMPIO

x\in\mathbb{R}^n,\sigma(x):=[\sin(x_1),...,\sin(x_n)]\\ F(x)=v^T\sigma(Ax+b)\in\mathbb{R},\,\,v\in\mathbb{R}^m,A\in\mathbb{R}^{m\times n},\,b\in\mathbb{R}^m\\ L(x)=(F(x)-y)^2
\frac{\partial L(x)}{\partial \mathrm{vec}(A)} = 2(z-y)\vert_{z=F(x)}\frac{\partial F(x)}{\partial \mathrm{vec}(A)} =2(F(x)-y)\frac{\partial F(x)}{\partial \mathrm{vec}(A)}
\frac{\partial F(x)}{\partial \mathrm{vec}(A)}=v^T\frac{\partial \sigma(Ax+b))}{\partial \mathrm{vec}(A)}\\ \,\\ =v^T\mathrm{diag}(\sigma'(Ax+b))\frac{\partial (Ax+b)}{\partial \mathrm{vec}(A)}

ESEMPIO

x\in\mathbb{R}^n,\sigma(x):=[\sin(x_1),...,\sin(x_n)]\\ F(x)=v^T\sigma(Ax+b)\in\mathbb{R},\,\,v\in\mathbb{R}^m,A\in\mathbb{R}^{m\times n},\,b\in\mathbb{R}^m\\ L(x)=(F(x)-y)^2
\frac{\partial L(x)}{\partial \mathrm{vec}(A)} = 2(z-y)\vert_{z=F(x)}\frac{\partial F(x)}{\partial \mathrm{vec}(A)} =2(F(x)-y)\frac{\partial F(x)}{\partial \mathrm{vec}(A)}
\frac{\partial F(x)}{\partial \mathrm{vec}(A)}=v^T\mathrm{diag}(\sigma'(Ax+b))\frac{\partial (Ax+b)}{\partial \mathrm{vec}(A)}
Ax = (x^T\otimes I_m)\mathrm{vec}(A),\,\,(x^T\otimes I_m)\in\mathbb{R}^{m\times mn}
\frac{\partial (Ax+b)}{\partial \mathrm{vec}(A)} = x^T\otimes I_m

ESEMPIO

x\in\mathbb{R}^n,\sigma(x):=[\sin(x_1),...,\sin(x_n)]\\ F(x)=v^T\sigma(Ax+b)\in\mathbb{R},\,\,v\in\mathbb{R}^m,A\in\mathbb{R}^{m\times n},\,b\in\mathbb{R}^m\\ L(x)=(F(x)-y)^2
\frac{\partial L(x)}{\partial \mathrm{vec}(A)} = 2(F(x)-y)\left[v^T\mathrm{diag}(\sigma'(Ax+b))(x^T\otimes I_m)\right]

ESEMPIO

deck

By Davide Murari

deck

  • 104