def get_model():
return Pipeline([
('reshape', GenericTransformer(
lambda x: x.reshape(x.shape[0], 51, 51,51)
)),
('discritize', PrimitiveTransformer(n_state=2, min_=0.0, max_=1.0)),
('correlations', TwoPointCorrelation(periodic_boundary=True, correlations=[(0, 0)])),
('flatten', GenericTransformer(lambda x: x.reshape(x.shape[0], -1))),
('pca', PCA(n_components=3, svd_solver='randomized')),
('poly', PolynomialFeatures(degree=4)),
('regressor', LinearRegression(solver_kwargs={"normalize":False}))
])
def prepare_data(n_sample, n_chunk):
x_data = da.from_zarr("../notebooks/x_data.zarr" , chunks=(100, -1))
x_data = x_data[:n_sample].rechunk((n_sample // n_chunk,) + x_data.shape[1:])
x_data.to_zarr('x_data.zarr', overwrite=True)
Single Chunk
20 Chunks