diff --git a/dask_glm/algorithms.py b/dask_glm/algorithms.py
index e959616..1320e7b 100644
--- a/dask_glm/algorithms.py
+++ b/dask_glm/algorithms.py
@@ -97,7 +97,7 @@ def gradient_descent(X, y, max_iter=100, tol=1e-14, family=Logistic, **kwargs):
     stepSize = 1.0
     recalcRate = 10
     backtrackMult = firstBacktrackMult
-    beta = np.zeros(p)
+    beta = np.zeros_like(X._meta, shape=p)
 
     for k in range(max_iter):
         # how necessary is this recalculation?
@@ -161,7 +161,7 @@ def newton(X, y, max_iter=50, tol=1e-8, family=Logistic, **kwargs):
     """
     gradient, hessian = family.gradient, family.hessian
     n, p = X.shape
-    beta = np.zeros(p)  # always init to zeros?
+    beta = np.zeros_like(X._meta, shape=p)
     Xbeta = dot(X, beta)
 
     iter_count = 0
@@ -387,7 +387,7 @@ def proximal_grad(X, y, regularizer='l1', lamduh=0.1, family=Logistic,
     stepSize = 1.0
     recalcRate = 10
     backtrackMult = firstBacktrackMult
-    beta = np.zeros(p)
+    beta = np.zeros_like(X._meta, shape=p)
     regularizer = Regularizer.get(regularizer)
 
     for k in range(max_iter):
@@ -406,8 +406,8 @@ def proximal_grad(X, y, regularizer='l1', lamduh=0.1, family=Logistic,
         # Compute the step size
         lf = func
         for ii in range(100):
-            beta = regularizer.proximal_operator(obeta - stepSize * gradient, stepSize * lamduh)
-            step = obeta - beta
+            beta = regularizer.proximal_operator(- stepSize * gradient + obeta, stepSize * lamduh)
+            step = - beta + obeta
             Xbeta = X.dot(beta)
 
             Xbeta, beta = persist(Xbeta, beta)
diff --git a/dask_glm/utils.py b/dask_glm/utils.py
index a189c6a..0fe3429 100644
--- a/dask_glm/utils.py
+++ b/dask_glm/utils.py
@@ -23,7 +23,7 @@ def normalize_inputs(X, y, *args, **kwargs):
                 raise ValueError('Multiple constant columns detected!')
             mean[intercept_idx] = 0
             std[intercept_idx] = 1
-            mean = mean if len(intercept_idx[0]) else np.zeros(mean.shape)
+            mean = mean if len(intercept_idx[0]) else np.zeros_like(X._meta, shape=mean.shape)
             Xn = (X - mean) / std
             out = algo(Xn, y, *args, **kwargs).copy()
             i_adj = np.sum(out * mean / std)
@@ -140,7 +140,7 @@ def add_intercept(X):
         raise NotImplementedError("Can not add intercept to array with "
                                   "unknown chunk shape")
     j, k = X.chunks
-    o = da.ones((X.shape[0], 1), chunks=(j, 1))
+    o = da.ones_like(X, shape=(X.shape[0], 1), chunks=(j, 1))
     if is_dask_array_sparse(X):
         o = o.map_blocks(sparse.COO)
     # TODO: Needed this `.rechunk` for the solver to work