Automatic animations between code
Dr. Srijith Rajamohan
Xarray - uses NetCDF - out-of-core performance extension with Dask
conda install -c conda-forge cupy
@vectorize([float64(float64, float64)],
nopython=True, target='cpu')
def f(x, y):
return x + y
@guvectorize([(int64[:], int64, int64[:])], '(n),()->(n)')
def g(x, y, res):
for i in range(x.shape[0]):
res[i] = x[i] + y
dum_var = np.array([0.0,0.0])
res = numba_svd_derivative(j, self.nrow, self.ncol, self.projection_components, \
self.u, self.v, self.Xorig, self.X, d_sq, dum_var)
Derivative[:,:,:] = res
@guvectorize(
["u8, u8, u8, u8, f8[:,:], f8[:,:], f8[:,:], f8[:,:], f8[:], f8[:], f8[:,:,:]"],
"(), (), (), (), (nrow, nrow), (ncol, ncol), (nrow, ncol), (nrow, ncol), (nrow), (ndim)
-> (ncol, nrow, ndim)", nopython=True, target="cpu")
def numba_svd_derivative(j, nrow, ncol, ncomp, u, v, Xorig, X, d_sq, dum_var, result):
jval = j
for j in prange(0, ncol):
contrib = np.zeros((ncol, ncomp))
loopindex = min(nrow, ncol)
for i in range(0,nrow):
sigma_v = np.zeros((ncol, ncomp))
...
contrib = np.dot(v,contrib)
dX_dw = np.zeros((nrow,ncol))
dX_dw[:,j] = Xorig[:,j]
res_elem1 = np.dot(dX_dw, v[:,0:2])
res_elem2 = np.dot(X, contrib[:,0:2])
result[j,:,:] = res_elem1 + res_elem2
Accelerating SVD with Numba
Python(s) | Numba Object Mode(s) | GUVectorize |
---|---|---|
0.68 | Same as regular Python | 0.008 |
clang++ -I/usr/local/include/eigen3/ test_eigen.cxx -o test_eigen
typedef struct{
int highdim;
int lowdim;
int numrows;
} params;
params parameters;
MatrixXd w_ext;
X = load_csv<MatrixXd>("X.csv"); // templatized function
z = load_csv<MatrixXd>("z.csv"); // templatized function
parameters.highdim = X.cols();
parameters.lowdim = z.cols();
parameters.numrows = X.rows();
X_t = X.transpose();
z_t = z.transpose();
z_product.noalias() = z*z_t;
...
Eigen for MDS Optimization
double nlopt_unconstrained_function_form1(const double *wd)
{
...
// Loop assignment
for(i = 0; i < parameters.numrows ; i++)
{
term1.row(i) = X.row(i).array() * w_ext.row(0).array();
}
//term1 = X.array() * w_ext.row(0).replicate(parameters.numrows,1).array();
amat.noalias() = term1*X_t;
res.noalias() = amat - z_product;
return (res.array()*res.array()).sum();
}
Nlopt Function
double nlopt_unconstrained_function_form1(const double *wd)
{
...
/*for(i = 0; i < parameters.numrows ; i++)
{
term1.row(i) = X.row(i).array() * w_ext.row(0).array();
}*/
// Optimized version
term1 = X.array() * w_ext.row(0).replicate(parameters.numrows,1).array();
amat.noalias() = term1*X_t;
res.noalias() = amat - z_product;
return (res.array()*res.array()).sum();
}
Nlopt Function - Optimized
void nlopt_unconstrained_gradient_form1(const double *wd, double *df_dw)
{
...
term1 = X.array() * w_ext.row(0).replicate(parameters.numrows,1).array();
amat.noalias() = term1*X_t;
res.noalias() = amat - z_product;
sub_term = 2.0 * res;
#pragma omp parallel
{
for(i = 0 ; i < parameters.highdim; i++)
{
weight = wd[i];
intermediate = MatrixXd::Zero(parameters.numrows,parameters.numrows);
for(j = 0; j < parameters.numrows; j++)
{
intermediate.row(j) = X(j,i) * X.col(i);
}
df_dw[i] = (sub_term.array() * intermediate.array()).sum();
}}}
Nlopt Gradient
Dataset | Python(s) | C++(s) | Speedup |
---|---|---|---|
70 x 100 | 0.0896 | 0.00388 | 23x |
40 x 45 | 0.01861 | 0.000624 | 29x |
20 x 25 | 0.0032 | 0.000099 | 32x |
Xtensor-python
Python bindings from the Pybind11 C++ library
Can use the NumPy data structures in C++ using the Python Buffer Protocol
Compile Xtensor C++
g++ -I /PATH_TO_ANACONDA/envs/xtensor_env/include -std=c++17 test.cpp -o test
export CPLUS_INCLUDE_PATH=PATH_TO_ANACONDA/envs/xtensor_env/include/:
PATH_TO_EIGEN/eigen3/
g++ -std=c++14 `python3-config --ldflags` -shared -fPIC -Wl,-undefined,dynamic_lookup
xtensor_python_test.cpp -o xtensor_python_test.so
Python caller
import xtensor_python_test as xt
# Call function from C++ library by passing numpy objects
derivative = xt.imds_wrapper(X,z,np.sqrt(weights),u,s,v)
PYBIND11_MODULE(xtensor_python_test, m)
{
xt::import_numpy();
m.doc() = "Test module for xtensor python bindings";
m.def("imds_wrapper", imds_wrapper, "Wrapper for IMDS in C++");
}
Declare the entry point in C++
xt::xarray<double> imds_wrapper(xt::pyarray<double>& X, xt::pyarray<double>& z, xt::pyarray<double>& weights)
{
Matrix_SVD obj(X, weights, 0, 2);
obj.Compute_SVD_derivative();
}
Xtensor Wrapper in C++
void Optimized_Compute_SVD_derivative()
{
auto contrib = xt::zeros<double>({ncol, projection_components});
auto sigma_v = xt::zeros<double>({ncol, projection_components});
auto Derivative = xt::xarray<double>::from_shape({nrow, ncol, 2});
...
};
void Optimized_Compute_SVD_derivative()
{
...
contrib.setZero(ncol*ncol, projection_components); // Make one big matrix for all 'ncol' weights
for(i = 0; i < nrow; i++)
{
sigma_v.setZero(ncol*ncol,projection_components); // Make one big sigma_v matrix for all 'ncol' weights
for(k = 0; k < k_upper; k++)
{
for(l = 0; l < projection_components; l++)
{
if(k == l) continue;
# block of size 'ncol' x one (or single column), starting at k*ncol, 1
sigma_v.block(k*ncol, l, ncol, 1) = -1.0 * Xorig.row(i).transpose().array() *
(( u(i,k)*v.col(l) + u(i,l)*v.col(k) ) / ( d_sq(k) - d_sq(l))).array();
}
}
contrib += sigma_v;
}
...
Map<Matrix<double,Dynamic,Dynamic>,0,Stride<Dynamic,Dynamic> > M4(contrib.data() + j,
ncol, projection_components, Stride<Dynamic,Dynamic>(contrib.rows(),ncol));
res_elem1 = dX_dw * v.leftCols<2>() + X_V * M4; }}};
Eigen Derivative of SVD
void Optimized_Compute_SVD_derivative()
{
...
contrib.setZero(ncol*ncol, projection_components); // Make one big matrix for all 'ncol' weights
for(i = 0; i < nrow; i++)
{
sigma_v.setZero(ncol*ncol,projection_components); // Make one big sigma_v matrix for all 'ncol' weights
for(k = 0; k < k_upper; k++)
{
for(l = 0; l < projection_components; l++)
{
if(k == l) continue;
sigma_v.block(k*ncol, l, ncol, 1) = -1.0 * Xorig.row(i).transpose().array() *(( u(i,k)*v.col(l) + u(i,l)*v.col(k) ) / ( d_sq(k) - d_sq(l))).array();
}
}
contrib += sigma_v;
}
...
Map<Matrix<double,Dynamic,Dynamic>,0,Stride<Dynamic,Dynamic> > M4(contrib.data() + j,
ncol, projection_components, Stride<Dynamic,Dynamic>(contrib.rows(),ncol));
res_elem1 = dX_dw * v.leftCols<2>() + X_V * M4;
}}};
Eigen Derivative of SVD
void Optimized_Compute_SVD_derivative()
{
...
auto temp = xt::tile(s_in,(1,nrow));
temp = temp.reshape(nrow, s_size);
u_in = xt::linalg::dot(u_in, temp);
contrib.fill(0);
for(i = 0; i < nrow; i++)
{
sigma_v.fill(0); // Make one big sigma_v matrix for all 'ncol' weights
for(k = 0; k < k_upper; k++)
{
for(l = 0; l < projection_components; l++)
{
if(k == l) continue;
lhs = xt::view(arr1, xt::range(k*ncol, k*ncol + ncol), xt::range(l, l + 1))
rhs_term1 = -1.0 * xt::transpose(xt::row(Xorig, i))
rhs_term2 = u(i, k) * xt::col(v, l) + u(i, l) * xt::col(v, k)
/ ( d_sq(k) - d_sq(l) )
lhs = xt::linalg::dot(rhs_term1, rhs_term2)
...
Xtensor Derivative of SVD