-
-
Notifications
You must be signed in to change notification settings - Fork 46
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Review] Allow lbfgs and admm with dask cupy inputs #89
Changes from 9 commits
fecd27f
52da624
39ba990
a429ca3
ef41c89
f827cd4
2244636
519b7f7
974f3f0
d83f081
3a4a262
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,7 +10,7 @@ | |
gradient_descent, admm) | ||
from dask_glm.families import Logistic, Normal, Poisson | ||
from dask_glm.regularizers import Regularizer | ||
from dask_glm.utils import sigmoid, make_y | ||
from dask_glm.utils import sigmoid, make_y, maybe_to_cupy | ||
|
||
|
||
def add_l1(f, lam): | ||
|
@@ -46,8 +46,15 @@ def make_intercept_data(N, p, seed=20009): | |
[(100, 2, 20009), | ||
(250, 12, 90210), | ||
(95, 6, 70605)]) | ||
def test_methods(N, p, seed, opt): | ||
@pytest.mark.parametrize('is_cupy', [True, False]) | ||
def test_methods(N, p, seed, opt, is_cupy): | ||
X, y = make_intercept_data(N, p, seed=seed) | ||
if is_cupy: | ||
cupy = pytest.importorskip('cupy') | ||
X = X.map_blocks(lambda x: cupy.asarray(x), | ||
dtype=X.dtype, meta=cupy.asarray(X._meta)) | ||
y = y.map_blocks(lambda x: cupy.asarray(x), | ||
dtype=y.dtype, meta=cupy.asarray(y._meta)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as comment above. |
||
coefs = opt(X, y) | ||
p = sigmoid(X.dot(coefs).compute()) | ||
|
||
|
@@ -64,16 +71,25 @@ def test_methods(N, p, seed, opt): | |
@pytest.mark.parametrize('N', [1000]) | ||
@pytest.mark.parametrize('nchunks', [1, 10]) | ||
@pytest.mark.parametrize('family', [Logistic, Normal, Poisson]) | ||
def test_basic_unreg_descent(func, kwargs, N, nchunks, family): | ||
@pytest.mark.parametrize('is_cupy', [True, False]) | ||
def test_basic_unreg_descent(func, kwargs, N, nchunks, family, is_cupy): | ||
beta = np.random.normal(size=2) | ||
M = len(beta) | ||
X = da.random.random((N, M), chunks=(N // nchunks, M)) | ||
y = make_y(X, beta=np.array(beta), chunks=(N // nchunks,)) | ||
|
||
if is_cupy: | ||
cupy = pytest.importorskip('cupy') | ||
X = X.map_blocks(lambda x: cupy.asarray(x), | ||
dtype=X.dtype, meta=cupy.asarray(X._meta)) | ||
y = y.map_blocks(lambda x: cupy.asarray(x), | ||
dtype=y.dtype, meta=cupy.asarray(y._meta)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto. |
||
|
||
X, y = persist(X, y) | ||
|
||
result = func(X, y, family=family, **kwargs) | ||
test_vec = np.random.normal(size=2) | ||
test_vec = maybe_to_cupy(test_vec, X) | ||
|
||
opt = family.pointwise_loss(result, X, y).compute() | ||
test_val = family.pointwise_loss(test_vec, X, y).compute() | ||
|
@@ -90,16 +106,24 @@ def test_basic_unreg_descent(func, kwargs, N, nchunks, family): | |
@pytest.mark.parametrize('family', [Logistic, Normal, Poisson]) | ||
@pytest.mark.parametrize('lam', [0.01, 1.2, 4.05]) | ||
@pytest.mark.parametrize('reg', [r() for r in Regularizer.__subclasses__()]) | ||
def test_basic_reg_descent(func, kwargs, N, nchunks, family, lam, reg): | ||
@pytest.mark.parametrize('is_cupy', [True, False]) | ||
def test_basic_reg_descent(func, kwargs, N, nchunks, family, lam, reg, is_cupy): | ||
beta = np.random.normal(size=2) | ||
M = len(beta) | ||
X = da.random.random((N, M), chunks=(N // nchunks, M)) | ||
y = make_y(X, beta=np.array(beta), chunks=(N // nchunks,)) | ||
if is_cupy: | ||
cupy = pytest.importorskip('cupy') | ||
X = X.map_blocks(lambda x: cupy.asarray(x), | ||
dtype=X.dtype, meta=cupy.asarray(X._meta)) | ||
y = y.map_blocks(lambda x: cupy.asarray(x), | ||
dtype=y.dtype, meta=cupy.asarray(y._meta)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto. |
||
|
||
X, y = persist(X, y) | ||
|
||
result = func(X, y, family=family, lamduh=lam, regularizer=reg, **kwargs) | ||
test_vec = np.random.normal(size=2) | ||
test_vec = maybe_to_cupy(test_vec, X) | ||
|
||
f = reg.add_reg_f(family.pointwise_loss, lam) | ||
|
||
|
@@ -120,8 +144,15 @@ def test_basic_reg_descent(func, kwargs, N, nchunks, family, lam, reg): | |
'threading', | ||
'multiprocessing' | ||
]) | ||
def test_determinism(func, kwargs, scheduler): | ||
@pytest.mark.parametrize('is_cupy', [True, False]) | ||
def test_determinism(func, kwargs, scheduler, is_cupy): | ||
X, y = make_intercept_data(1000, 10) | ||
if is_cupy: | ||
cupy = pytest.importorskip('cupy') | ||
X = X.map_blocks(lambda x: cupy.asarray(x), | ||
dtype=X.dtype, meta=cupy.asarray(X._meta)) | ||
y = y.map_blocks(lambda x: cupy.asarray(x), | ||
dtype=y.dtype, meta=cupy.asarray(y._meta)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto. |
||
|
||
with dask.config.set(scheduler=scheduler): | ||
a = func(X, y, **kwargs) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,19 +44,39 @@ def test_pr_init(solver): | |
|
||
|
||
@pytest.mark.parametrize('fit_intercept', [True, False]) | ||
@pytest.mark.parametrize('is_sparse', [True, False]) | ||
def test_fit(fit_intercept, is_sparse): | ||
@pytest.mark.parametrize('is_sparse,is_cupy', [ | ||
(True, False), | ||
(False, False), | ||
(False, True)]) | ||
def test_fit(fit_intercept, is_sparse, is_cupy): | ||
X, y = make_classification(n_samples=100, n_features=5, chunksize=10, is_sparse=is_sparse) | ||
|
||
if is_cupy and not is_sparse: | ||
cupy = pytest.importorskip('cupy') | ||
X = X.map_blocks(lambda x: cupy.asarray(x), | ||
dtype=X.dtype, meta=cupy.asarray(X._meta)) | ||
y = y.map_blocks(lambda x: cupy.asarray(x), | ||
dtype=y.dtype, meta=cupy.asarray(y._meta)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto. |
||
|
||
lr = LogisticRegression(fit_intercept=fit_intercept) | ||
lr.fit(X, y) | ||
lr.predict(X) | ||
lr.predict_proba(X) | ||
|
||
|
||
@pytest.mark.parametrize('fit_intercept', [True, False]) | ||
@pytest.mark.parametrize('is_sparse', [True, False]) | ||
def test_lm(fit_intercept, is_sparse): | ||
@pytest.mark.parametrize('is_sparse,is_cupy', [ | ||
(True, False), | ||
(False, False), | ||
(False, True)]) | ||
def test_lm(fit_intercept, is_sparse, is_cupy): | ||
X, y = make_regression(n_samples=100, n_features=5, chunksize=10, is_sparse=is_sparse) | ||
if is_cupy and not is_sparse: | ||
cupy = pytest.importorskip('cupy') | ||
X = X.map_blocks(lambda x: cupy.asarray(x), | ||
dtype=X.dtype, meta=cupy.asarray(X._meta)) | ||
y = y.map_blocks(lambda x: cupy.asarray(x), | ||
dtype=y.dtype, meta=cupy.asarray(y._meta)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto. |
||
lr = LinearRegression(fit_intercept=fit_intercept) | ||
lr.fit(X, y) | ||
lr.predict(X) | ||
|
@@ -65,10 +85,19 @@ def test_lm(fit_intercept, is_sparse): | |
|
||
|
||
@pytest.mark.parametrize('fit_intercept', [True, False]) | ||
@pytest.mark.parametrize('is_sparse', [True, False]) | ||
def test_big(fit_intercept, is_sparse): | ||
@pytest.mark.parametrize('is_sparse,is_cupy', [ | ||
(True, False), | ||
(False, False), | ||
(False, True)]) | ||
def test_big(fit_intercept, is_sparse, is_cupy): | ||
with dask.config.set(scheduler='synchronous'): | ||
X, y = make_classification(is_sparse=is_sparse) | ||
if is_cupy and not is_sparse: | ||
cupy = pytest.importorskip('cupy') | ||
X = X.map_blocks(lambda x: cupy.asarray(x), | ||
dtype=X.dtype, meta=cupy.asarray(X._meta)) | ||
y = y.map_blocks(lambda x: cupy.asarray(x), | ||
dtype=y.dtype, meta=cupy.asarray(y._meta)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto. |
||
lr = LogisticRegression(fit_intercept=fit_intercept) | ||
lr.fit(X, y) | ||
lr.predict(X) | ||
|
@@ -78,10 +107,19 @@ def test_big(fit_intercept, is_sparse): | |
|
||
|
||
@pytest.mark.parametrize('fit_intercept', [True, False]) | ||
@pytest.mark.parametrize('is_sparse', [True, False]) | ||
def test_poisson_fit(fit_intercept, is_sparse): | ||
@pytest.mark.parametrize('is_sparse,is_cupy', [ | ||
(True, False), | ||
(False, False), | ||
(False, True)]) | ||
def test_poisson_fit(fit_intercept, is_sparse, is_cupy): | ||
with dask.config.set(scheduler='synchronous'): | ||
X, y = make_poisson(is_sparse=is_sparse) | ||
if is_cupy and not is_sparse: | ||
cupy = pytest.importorskip('cupy') | ||
X = X.map_blocks(lambda x: cupy.asarray(x), | ||
dtype=X.dtype, meta=cupy.asarray(X._meta)) | ||
y = y.map_blocks(lambda x: cupy.asarray(x), | ||
dtype=y.dtype, meta=cupy.asarray(y._meta)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto. |
||
pr = PoissonRegression(fit_intercept=fit_intercept) | ||
pr.fit(X, y) | ||
pr.predict(X) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks very convoluted, the best way to generate
X
is:It seems that
make_y
could also be changed to something such as:And then you'd call it similar to:
NOTE: I didn't actually test the code above, so it may need some fixing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You seem to be using this in 10 or so different tests, it's probably a good idea to write a small util to avoid copying it all back everywhere.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for the review! I notice that
X
andy
are generated in different ways in the current tests. The following utils are used:X, y = make_intercept_data(N, p, seed=seed)
X = da.random.random
andy = make_y(...)
X, y = make_classification(...)
X, y = make_regression(...)
X, y = make_poisson(...)
I would like cupy inputs to be created using the same functions. Should I go into each of the utils above and add
is_cupy
branch? What I did currently is like just using one util function to convert the dask array to cupy after they are created.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see your point, maybe this is indeed too much work for very little benefit. With that said, I don't necessarily oppose to just leaving things as they are now, I'll defer to @mrocklin on whether he thinks it's acceptable. The only real downside I see with this is with people eventually referring to this code and thinking this is a good way to handle such cases, for tests this is ok but for real-world application this is not.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you. I wrapped up the repeated code in a util function to make it look nicer.