From 39ba9904a68a71cd744651ba68002de2a787f627 Mon Sep 17 00:00:00 2001 From: daxiongshu Date: Sat, 10 Oct 2020 11:35:12 -0400 Subject: [PATCH 1/7] lbfgs works with cupy --- dask_glm/algorithms.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/dask_glm/algorithms.py b/dask_glm/algorithms.py index 1320e7b..695d97b 100644 --- a/dask_glm/algorithms.py +++ b/dask_glm/algorithms.py @@ -9,7 +9,7 @@ import numpy as np import dask.array as da from scipy.optimize import fmin_l_bfgs_b - +from dask.array.utils import normalize_to_array from dask_glm.utils import dot, normalize, scatter_array, get_distributed_client from dask_glm.families import Logistic @@ -339,19 +339,27 @@ def lbfgs(X, y, regularizer=None, lamduh=1.0, max_iter=100, tol=1e-4, beta0 = np.zeros(p) def compute_loss_grad(beta, X, y): + beta = _maybe_to_cupy(beta, X) scatter_beta = scatter_array( beta, dask_distributed_client) if dask_distributed_client else beta loss_fn = pointwise_loss(scatter_beta, X, y) gradient_fn = pointwise_gradient(scatter_beta, X, y) loss, gradient = compute(loss_fn, gradient_fn) - return loss, gradient.copy() + return normalize_to_array(loss), normalize_to_array(gradient.copy()) with dask.config.set(fuse_ave_width=0): # optimizations slows this down beta, loss, info = fmin_l_bfgs_b( compute_loss_grad, beta0, fprime=None, args=(X, y), iprint=(verbose > 0) - 1, pgtol=tol, maxiter=max_iter) + beta = _maybe_to_cupy(beta, X) + return beta + +def _maybe_to_cupy(beta, X): + if 'cupy' in str(type(X._meta)): + import cupy + return cupy.asarray(beta) return beta From a429ca3ef519e0d24b623aa09290784fed95ff55 Mon Sep 17 00:00:00 2001 From: Jiwei Liu Date: Wed, 11 Nov 2020 09:07:13 -0800 Subject: [PATCH 2/7] basic works --- dask_glm/algorithms.py | 18 +++++++++++++----- dask_glm/utils.py | 4 ++-- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/dask_glm/algorithms.py b/dask_glm/algorithms.py index 695d97b..49ed956 100644 --- a/dask_glm/algorithms.py +++ b/dask_glm/algorithms.py @@ -225,14 +225,22 @@ def admm(X, y, regularizer='l1', lamduh=0.1, rho=1, over_relax=1, def create_local_gradient(func): @functools.wraps(func) def wrapped(beta, X, y, z, u, rho): - return func(beta, X, y) + rho * (beta - z + u) + beta = _maybe_to_cupy(beta, X) + z = _maybe_to_cupy(z, X) + u = _maybe_to_cupy(u, X) + res = func(beta, X, y) + rho * (beta - z + u) + return normalize_to_array(res) return wrapped def create_local_f(func): @functools.wraps(func) def wrapped(beta, X, y, z, u, rho): - return func(beta, X, y) + (rho / 2) * np.dot(beta - z + u, - beta - z + u) + beta = _maybe_to_cupy(beta, X) + z = _maybe_to_cupy(z, X) + u = _maybe_to_cupy(u, X) + res = func(beta, X, y) + (rho / 2) * np.dot(beta - z + u, + beta - z + u) + return normalize_to_array(res) return wrapped f = create_local_f(pointwise_loss) @@ -286,7 +294,7 @@ def wrapped(beta, X, y, z, u, rho): if primal_res < eps_pri and dual_res < eps_dual: break - return z + return _maybe_to_cupy(z, X) def local_update(X, y, beta, z, u, rho, f, fprime, solver=fmin_l_bfgs_b): @@ -357,7 +365,7 @@ def compute_loss_grad(beta, X, y): def _maybe_to_cupy(beta, X): - if 'cupy' in str(type(X._meta)): + if "cupy" in str(type(X)) or 'cupy' in str(type(X._meta)): import cupy return cupy.asarray(beta) return beta diff --git a/dask_glm/utils.py b/dask_glm/utils.py index 0fe3429..aa3572c 100644 --- a/dask_glm/utils.py +++ b/dask_glm/utils.py @@ -41,7 +41,7 @@ def sigmoid(x): @dispatch(object) def exp(A): - return A.exp() + return np.exp(A) @dispatch(float) @@ -91,7 +91,7 @@ def sign(A): @dispatch(object) def log1p(A): - return A.log1p() + return np.log1p(A) @dispatch(np.ndarray) From ef41c89b296815ce454f4f6676e49c25eac6a211 Mon Sep 17 00:00:00 2001 From: Jiwei Liu Date: Wed, 11 Nov 2020 10:12:09 -0800 Subject: [PATCH 3/7] original test passed --- dask_glm/algorithms.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dask_glm/algorithms.py b/dask_glm/algorithms.py index 49ed956..958c7e9 100644 --- a/dask_glm/algorithms.py +++ b/dask_glm/algorithms.py @@ -365,7 +365,11 @@ def compute_loss_grad(beta, X, y): def _maybe_to_cupy(beta, X): - if "cupy" in str(type(X)) or 'cupy' in str(type(X._meta)): + """ convert beta, a numpy array, to a cupy array + if X is a cupy array or dask cupy array + """ + if "cupy" in str(type(X)) or \ + hasattr(X, '_meta') and 'cupy' in str(type(X._meta)): import cupy return cupy.asarray(beta) return beta From 2244636728c76e8ee476597b5be7ccdc3ac5e503 Mon Sep 17 00:00:00 2001 From: Jiwei Liu Date: Wed, 11 Nov 2020 11:18:44 -0800 Subject: [PATCH 4/7] add cupy to test_admm --- dask_glm/tests/test_admm.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dask_glm/tests/test_admm.py b/dask_glm/tests/test_admm.py index 7b0373a..71b72a2 100644 --- a/dask_glm/tests/test_admm.py +++ b/dask_glm/tests/test_admm.py @@ -46,11 +46,18 @@ def wrapped(beta, X, y, z, u, rho): @pytest.mark.parametrize('N', [1000, 10000]) @pytest.mark.parametrize('nchunks', [5, 10]) @pytest.mark.parametrize('p', [1, 5, 10]) -def test_admm_with_large_lamduh(N, p, nchunks): +@pytest.mark.parametrize('is_cupy', [True, False]) +def test_admm_with_large_lamduh(N, p, nchunks, is_cupy): X = da.random.random((N, p), chunks=(N // nchunks, p)) beta = np.random.random(p) y = make_y(X, beta=np.array(beta), chunks=(N // nchunks,)) + if is_cupy: + cupy = pytest.importorskip('cupy') + X = X.map_blocks(lambda x: cupy.asarray(x), + dtype=X.dtype, meta=cupy.asarray(X._meta)) + y = y.map_blocks(lambda x: cupy.asarray(x), + dtype=y.dtype, meta=cupy.asarray(y._meta)) X, y = persist(X, y) z = admm(X, y, regularizer=L1(), lamduh=1e5, rho=20, max_iter=500) From 519b7f7b72afdc92450a841703849e400aaf489e Mon Sep 17 00:00:00 2001 From: Jiwei Liu Date: Thu, 12 Nov 2020 06:03:42 -0800 Subject: [PATCH 5/7] cupy tests passed for lbfgs & admm --- dask_glm/algorithms.py | 31 +++++---------- dask_glm/tests/test_algos_families.py | 41 +++++++++++++++++--- dask_glm/tests/test_estimators.py | 54 +++++++++++++++++++++++---- dask_glm/utils.py | 11 ++++++ 4 files changed, 103 insertions(+), 34 deletions(-) diff --git a/dask_glm/algorithms.py b/dask_glm/algorithms.py index 958c7e9..e2c4c96 100644 --- a/dask_glm/algorithms.py +++ b/dask_glm/algorithms.py @@ -11,7 +11,7 @@ from scipy.optimize import fmin_l_bfgs_b from dask.array.utils import normalize_to_array -from dask_glm.utils import dot, normalize, scatter_array, get_distributed_client +from dask_glm.utils import dot, normalize, scatter_array, get_distributed_client, maybe_to_cupy from dask_glm.families import Logistic from dask_glm.regularizers import Regularizer @@ -225,9 +225,9 @@ def admm(X, y, regularizer='l1', lamduh=0.1, rho=1, over_relax=1, def create_local_gradient(func): @functools.wraps(func) def wrapped(beta, X, y, z, u, rho): - beta = _maybe_to_cupy(beta, X) - z = _maybe_to_cupy(z, X) - u = _maybe_to_cupy(u, X) + beta = maybe_to_cupy(beta, X) + z = maybe_to_cupy(z, X) + u = maybe_to_cupy(u, X) res = func(beta, X, y) + rho * (beta - z + u) return normalize_to_array(res) return wrapped @@ -235,9 +235,9 @@ def wrapped(beta, X, y, z, u, rho): def create_local_f(func): @functools.wraps(func) def wrapped(beta, X, y, z, u, rho): - beta = _maybe_to_cupy(beta, X) - z = _maybe_to_cupy(z, X) - u = _maybe_to_cupy(u, X) + beta = maybe_to_cupy(beta, X) + z = maybe_to_cupy(z, X) + u = maybe_to_cupy(u, X) res = func(beta, X, y) + (rho / 2) * np.dot(beta - z + u, beta - z + u) return normalize_to_array(res) @@ -294,7 +294,7 @@ def wrapped(beta, X, y, z, u, rho): if primal_res < eps_pri and dual_res < eps_dual: break - return _maybe_to_cupy(z, X) + return maybe_to_cupy(z, X) def local_update(X, y, beta, z, u, rho, f, fprime, solver=fmin_l_bfgs_b): @@ -347,7 +347,7 @@ def lbfgs(X, y, regularizer=None, lamduh=1.0, max_iter=100, tol=1e-4, beta0 = np.zeros(p) def compute_loss_grad(beta, X, y): - beta = _maybe_to_cupy(beta, X) + beta = maybe_to_cupy(beta, X) scatter_beta = scatter_array( beta, dask_distributed_client) if dask_distributed_client else beta loss_fn = pointwise_loss(scatter_beta, X, y) @@ -360,18 +360,7 @@ def compute_loss_grad(beta, X, y): compute_loss_grad, beta0, fprime=None, args=(X, y), iprint=(verbose > 0) - 1, pgtol=tol, maxiter=max_iter) - beta = _maybe_to_cupy(beta, X) - return beta - - -def _maybe_to_cupy(beta, X): - """ convert beta, a numpy array, to a cupy array - if X is a cupy array or dask cupy array - """ - if "cupy" in str(type(X)) or \ - hasattr(X, '_meta') and 'cupy' in str(type(X._meta)): - import cupy - return cupy.asarray(beta) + beta = maybe_to_cupy(beta, X) return beta diff --git a/dask_glm/tests/test_algos_families.py b/dask_glm/tests/test_algos_families.py index 1f1cfc4..f9a75aa 100644 --- a/dask_glm/tests/test_algos_families.py +++ b/dask_glm/tests/test_algos_families.py @@ -10,7 +10,7 @@ gradient_descent, admm) from dask_glm.families import Logistic, Normal, Poisson from dask_glm.regularizers import Regularizer -from dask_glm.utils import sigmoid, make_y +from dask_glm.utils import sigmoid, make_y, maybe_to_cupy def add_l1(f, lam): @@ -46,8 +46,15 @@ def make_intercept_data(N, p, seed=20009): [(100, 2, 20009), (250, 12, 90210), (95, 6, 70605)]) -def test_methods(N, p, seed, opt): +@pytest.mark.parametrize('is_cupy', [True, False]) +def test_methods(N, p, seed, opt, is_cupy): X, y = make_intercept_data(N, p, seed=seed) + if is_cupy: + cupy = pytest.importorskip('cupy') + X = X.map_blocks(lambda x: cupy.asarray(x), + dtype=X.dtype, meta=cupy.asarray(X._meta)) + y = y.map_blocks(lambda x: cupy.asarray(x), + dtype=y.dtype, meta=cupy.asarray(y._meta)) coefs = opt(X, y) p = sigmoid(X.dot(coefs).compute()) @@ -64,16 +71,25 @@ def test_methods(N, p, seed, opt): @pytest.mark.parametrize('N', [1000]) @pytest.mark.parametrize('nchunks', [1, 10]) @pytest.mark.parametrize('family', [Logistic, Normal, Poisson]) -def test_basic_unreg_descent(func, kwargs, N, nchunks, family): +@pytest.mark.parametrize('is_cupy', [True, False]) +def test_basic_unreg_descent(func, kwargs, N, nchunks, family, is_cupy): beta = np.random.normal(size=2) M = len(beta) X = da.random.random((N, M), chunks=(N // nchunks, M)) y = make_y(X, beta=np.array(beta), chunks=(N // nchunks,)) + if is_cupy: + cupy = pytest.importorskip('cupy') + X = X.map_blocks(lambda x: cupy.asarray(x), + dtype=X.dtype, meta=cupy.asarray(X._meta)) + y = y.map_blocks(lambda x: cupy.asarray(x), + dtype=y.dtype, meta=cupy.asarray(y._meta)) + X, y = persist(X, y) result = func(X, y, family=family, **kwargs) test_vec = np.random.normal(size=2) + test_vec = maybe_to_cupy(test_vec, X) opt = family.pointwise_loss(result, X, y).compute() test_val = family.pointwise_loss(test_vec, X, y).compute() @@ -90,16 +106,24 @@ def test_basic_unreg_descent(func, kwargs, N, nchunks, family): @pytest.mark.parametrize('family', [Logistic, Normal, Poisson]) @pytest.mark.parametrize('lam', [0.01, 1.2, 4.05]) @pytest.mark.parametrize('reg', [r() for r in Regularizer.__subclasses__()]) -def test_basic_reg_descent(func, kwargs, N, nchunks, family, lam, reg): +@pytest.mark.parametrize('is_cupy', [True, False]) +def test_basic_reg_descent(func, kwargs, N, nchunks, family, lam, reg, is_cupy): beta = np.random.normal(size=2) M = len(beta) X = da.random.random((N, M), chunks=(N // nchunks, M)) y = make_y(X, beta=np.array(beta), chunks=(N // nchunks,)) + if is_cupy: + cupy = pytest.importorskip('cupy') + X = X.map_blocks(lambda x: cupy.asarray(x), + dtype=X.dtype, meta=cupy.asarray(X._meta)) + y = y.map_blocks(lambda x: cupy.asarray(x), + dtype=y.dtype, meta=cupy.asarray(y._meta)) X, y = persist(X, y) result = func(X, y, family=family, lamduh=lam, regularizer=reg, **kwargs) test_vec = np.random.normal(size=2) + test_vec = maybe_to_cupy(test_vec, X) f = reg.add_reg_f(family.pointwise_loss, lam) @@ -120,8 +144,15 @@ def test_basic_reg_descent(func, kwargs, N, nchunks, family, lam, reg): 'threading', 'multiprocessing' ]) -def test_determinism(func, kwargs, scheduler): +@pytest.mark.parametrize('is_cupy', [True, False]) +def test_determinism(func, kwargs, scheduler, is_cupy): X, y = make_intercept_data(1000, 10) + if is_cupy: + cupy = pytest.importorskip('cupy') + X = X.map_blocks(lambda x: cupy.asarray(x), + dtype=X.dtype, meta=cupy.asarray(X._meta)) + y = y.map_blocks(lambda x: cupy.asarray(x), + dtype=y.dtype, meta=cupy.asarray(y._meta)) with dask.config.set(scheduler=scheduler): a = func(X, y, **kwargs) diff --git a/dask_glm/tests/test_estimators.py b/dask_glm/tests/test_estimators.py index d2212c4..ed84458 100644 --- a/dask_glm/tests/test_estimators.py +++ b/dask_glm/tests/test_estimators.py @@ -44,9 +44,20 @@ def test_pr_init(solver): @pytest.mark.parametrize('fit_intercept', [True, False]) -@pytest.mark.parametrize('is_sparse', [True, False]) -def test_fit(fit_intercept, is_sparse): +@pytest.mark.parametrize('is_sparse,is_cupy', [ + (True, False), + (False, False), + (False, True)]) +def test_fit(fit_intercept, is_sparse, is_cupy): X, y = make_classification(n_samples=100, n_features=5, chunksize=10, is_sparse=is_sparse) + + if is_cupy and not is_sparse: + cupy = pytest.importorskip('cupy') + X = X.map_blocks(lambda x: cupy.asarray(x), + dtype=X.dtype, meta=cupy.asarray(X._meta)) + y = y.map_blocks(lambda x: cupy.asarray(x), + dtype=y.dtype, meta=cupy.asarray(y._meta)) + lr = LogisticRegression(fit_intercept=fit_intercept) lr.fit(X, y) lr.predict(X) @@ -54,9 +65,18 @@ def test_fit(fit_intercept, is_sparse): @pytest.mark.parametrize('fit_intercept', [True, False]) -@pytest.mark.parametrize('is_sparse', [True, False]) -def test_lm(fit_intercept, is_sparse): +@pytest.mark.parametrize('is_sparse,is_cupy', [ + (True, False), + (False, False), + (False, True)]) +def test_lm(fit_intercept, is_sparse, is_cupy): X, y = make_regression(n_samples=100, n_features=5, chunksize=10, is_sparse=is_sparse) + if is_cupy and not is_sparse: + cupy = pytest.importorskip('cupy') + X = X.map_blocks(lambda x: cupy.asarray(x), + dtype=X.dtype, meta=cupy.asarray(X._meta)) + y = y.map_blocks(lambda x: cupy.asarray(x), + dtype=y.dtype, meta=cupy.asarray(y._meta)) lr = LinearRegression(fit_intercept=fit_intercept) lr.fit(X, y) lr.predict(X) @@ -65,10 +85,19 @@ def test_lm(fit_intercept, is_sparse): @pytest.mark.parametrize('fit_intercept', [True, False]) -@pytest.mark.parametrize('is_sparse', [True, False]) -def test_big(fit_intercept, is_sparse): +@pytest.mark.parametrize('is_sparse,is_cupy', [ + (True, False), + (False, False), + (False, True)]) +def test_big(fit_intercept, is_sparse, is_cupy): with dask.config.set(scheduler='synchronous'): X, y = make_classification(is_sparse=is_sparse) + if is_cupy and not is_sparse: + cupy = pytest.importorskip('cupy') + X = X.map_blocks(lambda x: cupy.asarray(x), + dtype=X.dtype, meta=cupy.asarray(X._meta)) + y = y.map_blocks(lambda x: cupy.asarray(x), + dtype=y.dtype, meta=cupy.asarray(y._meta)) lr = LogisticRegression(fit_intercept=fit_intercept) lr.fit(X, y) lr.predict(X) @@ -78,10 +107,19 @@ def test_big(fit_intercept, is_sparse): @pytest.mark.parametrize('fit_intercept', [True, False]) -@pytest.mark.parametrize('is_sparse', [True, False]) -def test_poisson_fit(fit_intercept, is_sparse): +@pytest.mark.parametrize('is_sparse,is_cupy', [ + (True, False), + (False, False), + (False, True)]) +def test_poisson_fit(fit_intercept, is_sparse, is_cupy): with dask.config.set(scheduler='synchronous'): X, y = make_poisson(is_sparse=is_sparse) + if is_cupy and not is_sparse: + cupy = pytest.importorskip('cupy') + X = X.map_blocks(lambda x: cupy.asarray(x), + dtype=X.dtype, meta=cupy.asarray(X._meta)) + y = y.map_blocks(lambda x: cupy.asarray(x), + dtype=y.dtype, meta=cupy.asarray(y._meta)) pr = PoissonRegression(fit_intercept=fit_intercept) pr.fit(X, y) pr.predict(X) diff --git a/dask_glm/utils.py b/dask_glm/utils.py index aa3572c..c37f0f1 100644 --- a/dask_glm/utils.py +++ b/dask_glm/utils.py @@ -205,3 +205,14 @@ def get_distributed_client(): return get_client() except ValueError: return None + + +def maybe_to_cupy(beta, X): + """ convert beta, a numpy array, to a cupy array + if X is a cupy array or dask cupy array + """ + if "cupy" in str(type(X)) or \ + hasattr(X, '_meta') and 'cupy' in str(type(X._meta)): + import cupy + return cupy.asarray(beta) + return beta From d83f08144aa055d6ebf4d7b3b7c4dc47dd9147b3 Mon Sep 17 00:00:00 2001 From: Jiwei Liu Date: Fri, 13 Nov 2020 06:41:32 -0800 Subject: [PATCH 6/7] add to_dask_cupy_array util --- dask_glm/tests/test_admm.py | 8 +++----- dask_glm/tests/test_algos_families.py | 26 +++++++++----------------- dask_glm/tests/test_estimators.py | 21 +++++---------------- dask_glm/utils.py | 11 +++++++++++ 4 files changed, 28 insertions(+), 38 deletions(-) diff --git a/dask_glm/tests/test_admm.py b/dask_glm/tests/test_admm.py index 71b72a2..648053e 100644 --- a/dask_glm/tests/test_admm.py +++ b/dask_glm/tests/test_admm.py @@ -7,7 +7,7 @@ from dask_glm.algorithms import admm, local_update from dask_glm.families import Logistic, Normal from dask_glm.regularizers import L1 -from dask_glm.utils import make_y +from dask_glm.utils import make_y, to_dask_cupy_array_xy @pytest.mark.parametrize('N', [1000, 10000]) @@ -54,10 +54,8 @@ def test_admm_with_large_lamduh(N, p, nchunks, is_cupy): if is_cupy: cupy = pytest.importorskip('cupy') - X = X.map_blocks(lambda x: cupy.asarray(x), - dtype=X.dtype, meta=cupy.asarray(X._meta)) - y = y.map_blocks(lambda x: cupy.asarray(x), - dtype=y.dtype, meta=cupy.asarray(y._meta)) + X, y = to_dask_cupy_array_xy(X, y, cupy) + X, y = persist(X, y) z = admm(X, y, regularizer=L1(), lamduh=1e5, rho=20, max_iter=500) diff --git a/dask_glm/tests/test_algos_families.py b/dask_glm/tests/test_algos_families.py index f9a75aa..04d2690 100644 --- a/dask_glm/tests/test_algos_families.py +++ b/dask_glm/tests/test_algos_families.py @@ -10,7 +10,8 @@ gradient_descent, admm) from dask_glm.families import Logistic, Normal, Poisson from dask_glm.regularizers import Regularizer -from dask_glm.utils import sigmoid, make_y, maybe_to_cupy +from dask_glm.utils import (sigmoid, make_y, maybe_to_cupy, + to_dask_cupy_array_xy) def add_l1(f, lam): @@ -49,12 +50,11 @@ def make_intercept_data(N, p, seed=20009): @pytest.mark.parametrize('is_cupy', [True, False]) def test_methods(N, p, seed, opt, is_cupy): X, y = make_intercept_data(N, p, seed=seed) + if is_cupy: cupy = pytest.importorskip('cupy') - X = X.map_blocks(lambda x: cupy.asarray(x), - dtype=X.dtype, meta=cupy.asarray(X._meta)) - y = y.map_blocks(lambda x: cupy.asarray(x), - dtype=y.dtype, meta=cupy.asarray(y._meta)) + X, y = to_dask_cupy_array_xy(X, y, cupy) + coefs = opt(X, y) p = sigmoid(X.dot(coefs).compute()) @@ -80,10 +80,7 @@ def test_basic_unreg_descent(func, kwargs, N, nchunks, family, is_cupy): if is_cupy: cupy = pytest.importorskip('cupy') - X = X.map_blocks(lambda x: cupy.asarray(x), - dtype=X.dtype, meta=cupy.asarray(X._meta)) - y = y.map_blocks(lambda x: cupy.asarray(x), - dtype=y.dtype, meta=cupy.asarray(y._meta)) + X, y = to_dask_cupy_array_xy(X, y, cupy) X, y = persist(X, y) @@ -112,12 +109,10 @@ def test_basic_reg_descent(func, kwargs, N, nchunks, family, lam, reg, is_cupy): M = len(beta) X = da.random.random((N, M), chunks=(N // nchunks, M)) y = make_y(X, beta=np.array(beta), chunks=(N // nchunks,)) + if is_cupy: cupy = pytest.importorskip('cupy') - X = X.map_blocks(lambda x: cupy.asarray(x), - dtype=X.dtype, meta=cupy.asarray(X._meta)) - y = y.map_blocks(lambda x: cupy.asarray(x), - dtype=y.dtype, meta=cupy.asarray(y._meta)) + X, y = to_dask_cupy_array_xy(X, y, cupy) X, y = persist(X, y) @@ -149,10 +144,7 @@ def test_determinism(func, kwargs, scheduler, is_cupy): X, y = make_intercept_data(1000, 10) if is_cupy: cupy = pytest.importorskip('cupy') - X = X.map_blocks(lambda x: cupy.asarray(x), - dtype=X.dtype, meta=cupy.asarray(X._meta)) - y = y.map_blocks(lambda x: cupy.asarray(x), - dtype=y.dtype, meta=cupy.asarray(y._meta)) + X, y = to_dask_cupy_array_xy(X, y, cupy) with dask.config.set(scheduler=scheduler): a = func(X, y, **kwargs) diff --git a/dask_glm/tests/test_estimators.py b/dask_glm/tests/test_estimators.py index ed84458..b3c13f8 100644 --- a/dask_glm/tests/test_estimators.py +++ b/dask_glm/tests/test_estimators.py @@ -4,6 +4,7 @@ from dask_glm.estimators import LogisticRegression, LinearRegression, PoissonRegression from dask_glm.datasets import make_classification, make_regression, make_poisson from dask_glm.regularizers import Regularizer +from dask_glm.utils import to_dask_cupy_array_xy @pytest.fixture(params=[r() for r in Regularizer.__subclasses__()]) @@ -53,10 +54,7 @@ def test_fit(fit_intercept, is_sparse, is_cupy): if is_cupy and not is_sparse: cupy = pytest.importorskip('cupy') - X = X.map_blocks(lambda x: cupy.asarray(x), - dtype=X.dtype, meta=cupy.asarray(X._meta)) - y = y.map_blocks(lambda x: cupy.asarray(x), - dtype=y.dtype, meta=cupy.asarray(y._meta)) + X, y = to_dask_cupy_array_xy(X, y, cupy) lr = LogisticRegression(fit_intercept=fit_intercept) lr.fit(X, y) @@ -73,10 +71,7 @@ def test_lm(fit_intercept, is_sparse, is_cupy): X, y = make_regression(n_samples=100, n_features=5, chunksize=10, is_sparse=is_sparse) if is_cupy and not is_sparse: cupy = pytest.importorskip('cupy') - X = X.map_blocks(lambda x: cupy.asarray(x), - dtype=X.dtype, meta=cupy.asarray(X._meta)) - y = y.map_blocks(lambda x: cupy.asarray(x), - dtype=y.dtype, meta=cupy.asarray(y._meta)) + X, y = to_dask_cupy_array_xy(X, y, cupy) lr = LinearRegression(fit_intercept=fit_intercept) lr.fit(X, y) lr.predict(X) @@ -94,10 +89,7 @@ def test_big(fit_intercept, is_sparse, is_cupy): X, y = make_classification(is_sparse=is_sparse) if is_cupy and not is_sparse: cupy = pytest.importorskip('cupy') - X = X.map_blocks(lambda x: cupy.asarray(x), - dtype=X.dtype, meta=cupy.asarray(X._meta)) - y = y.map_blocks(lambda x: cupy.asarray(x), - dtype=y.dtype, meta=cupy.asarray(y._meta)) + X, y = to_dask_cupy_array_xy(X, y, cupy) lr = LogisticRegression(fit_intercept=fit_intercept) lr.fit(X, y) lr.predict(X) @@ -116,10 +108,7 @@ def test_poisson_fit(fit_intercept, is_sparse, is_cupy): X, y = make_poisson(is_sparse=is_sparse) if is_cupy and not is_sparse: cupy = pytest.importorskip('cupy') - X = X.map_blocks(lambda x: cupy.asarray(x), - dtype=X.dtype, meta=cupy.asarray(X._meta)) - y = y.map_blocks(lambda x: cupy.asarray(x), - dtype=y.dtype, meta=cupy.asarray(y._meta)) + X, y = to_dask_cupy_array_xy(X, y, cupy) pr = PoissonRegression(fit_intercept=fit_intercept) pr.fit(X, y) pr.predict(X) diff --git a/dask_glm/utils.py b/dask_glm/utils.py index c37f0f1..1c205de 100644 --- a/dask_glm/utils.py +++ b/dask_glm/utils.py @@ -216,3 +216,14 @@ def maybe_to_cupy(beta, X): import cupy return cupy.asarray(beta) return beta + + +def to_dask_cupy_array(X, cupy): + """ convert a dask numpy array to a dask cupy array + """ + return X.map_blocks(lambda x: cupy.asarray(x), + dtype=X.dtype, meta=cupy.asarray(X._meta)) + + +def to_dask_cupy_array_xy(X, y, cupy): + return dask_array_to_cupy(X, cupy), dask_array_to_cupy(y, cupy) From 3a4a262778622870fa795eabd6cec6b59e69104c Mon Sep 17 00:00:00 2001 From: Jiwei Liu Date: Fri, 13 Nov 2020 06:42:55 -0800 Subject: [PATCH 7/7] fix typo --- dask_glm/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dask_glm/utils.py b/dask_glm/utils.py index 1c205de..0ae88eb 100644 --- a/dask_glm/utils.py +++ b/dask_glm/utils.py @@ -226,4 +226,4 @@ def to_dask_cupy_array(X, cupy): def to_dask_cupy_array_xy(X, y, cupy): - return dask_array_to_cupy(X, cupy), dask_array_to_cupy(y, cupy) + return to_dask_cupy_array(X, cupy), to_dask_cupy_array(y, cupy)