Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Forward-merge branch-23.10 to branch-23.12 #5594

Merged
merged 2 commits into from
Oct 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/test_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ if [[ "$(arch)" == "aarch64" ]]; then
fi

# Always install latest dask for testing
python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/[email protected]
python -m pip install git+https://github.com/dask/dask.git@2023.9.2 git+https://github.com/dask/distributed.git@2023.9.2 git+https://github.com/rapidsai/[email protected]

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install $(echo ./dist/cuml*.whl)[test]
Expand Down
6 changes: 3 additions & 3 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ dependencies:
- cupy>=12.0.0
- cxx-compiler
- cython>=3.0.0
- dask-core>=2023.7.1
- dask-core==2023.9.2
- dask-cuda==23.10.*
- dask-cudf==23.10.*
- dask-ml
- dask>=2023.7.1
- distributed>=2023.7.1
- dask==2023.9.2
- distributed==2023.9.2
- doxygen=1.9.1
- gcc_linux-64=11.*
- gmock>=1.13.0
Expand Down
6 changes: 3 additions & 3 deletions conda/environments/all_cuda-120_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ dependencies:
- cupy>=12.0.0
- cxx-compiler
- cython>=3.0.0
- dask-core>=2023.7.1
- dask-core==2023.9.2
- dask-cuda==23.10.*
- dask-cudf==23.10.*
- dask-ml
- dask>=2023.7.1
- distributed>=2023.7.1
- dask==2023.9.2
- distributed==2023.9.2
- doxygen=1.9.1
- gcc_linux-64=11.*
- gmock>=1.13.0
Expand Down
6 changes: 3 additions & 3 deletions conda/recipes/cuml/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@ requirements:
- cudf ={{ minor_version }}
- cupy >=12.0.0
- dask-cudf ={{ minor_version }}
- dask >=2023.7.1
- dask-core>=2023.7.1
- distributed >=2023.7.1
- dask ==2023.9.2
- dask-core==2023.9.2
- distributed ==2023.9.2
- joblib >=0.11
- libcuml ={{ version }}
- libcumlprims ={{ minor_version }}
Expand Down
12 changes: 12 additions & 0 deletions cpp/include/cuml/linear_model/qn_mg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,24 @@

#include <cumlprims/opg/matrix/data.hpp>
#include <cumlprims/opg/matrix/part_descriptor.hpp>
#include <vector>
using namespace MLCommon;

namespace ML {
namespace GLM {
namespace opg {

/**
* @brief Calculate unique class labels across multiple GPUs in a multi-node environment.
* @param[in] handle: the internal cuml handle object
* @param[in] input_desc: PartDescriptor object for the input
* @param[in] labels: labels data
* @returns host vector that stores the distinct labels
*/
std::vector<float> getUniquelabelsMG(const raft::handle_t& handle,
Matrix::PartDescriptor& input_desc,
std::vector<Matrix::Data<float>*>& labels);

/**
* @brief performs MNMG fit operation for the logistic regression using quasi newton methods
* @param[in] handle: the internal cuml handle object
Expand Down
6 changes: 6 additions & 0 deletions cpp/src/glm/qn/mg/qn_mg.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ inline void qn_fit_x_mg(const raft::handle_t& handle,
ML::GLM::opg::qn_fit_mg<T, decltype(loss)>(
handle, pams, loss, X, y, Z, w0_data, f, num_iters, n_samples, rank, n_ranks);
} break;
case QN_LOSS_SOFTMAX: {
ASSERT(C > 2, "qn_mg.cuh: softmax invalid C");
ML::GLM::detail::Softmax<T> loss(handle, D, C, pams.fit_intercept);
ML::GLM::opg::qn_fit_mg<T, decltype(loss)>(
handle, pams, loss, X, y, Z, w0_data, f, num_iters, n_samples, rank, n_ranks);
} break;
default: {
ASSERT(false, "qn_mg.cuh: unknown loss function type (id = %d).", pams.loss);
}
Expand Down
66 changes: 55 additions & 11 deletions cpp/src/glm/qn_mg.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,59 @@
#include <cuml/linear_model/qn.h>
#include <cuml/linear_model/qn_mg.hpp>
#include <raft/core/comms.hpp>
#include <raft/core/device_mdarray.hpp>
#include <raft/core/error.hpp>
#include <raft/core/handle.hpp>
#include <raft/label/classlabels.cuh>
#include <raft/util/cudart_utils.hpp>
#include <vector>
using namespace MLCommon;

namespace ML {
namespace GLM {
namespace opg {

template <typename T>
std::vector<T> distinct_mg(const raft::handle_t& handle, T* y, size_t n)
{
cudaStream_t stream = handle.get_stream();
raft::comms::comms_t const& comm = raft::resource::get_comms(handle);
int rank = comm.get_rank();
int n_ranks = comm.get_size();

rmm::device_uvector<T> unique_y(0, stream);
raft::label::getUniquelabels(unique_y, y, n, stream);

rmm::device_uvector<size_t> recv_counts(n_ranks, stream);
auto send_count = raft::make_device_scalar<size_t>(handle, unique_y.size());
comm.allgather(send_count.data_handle(), recv_counts.data(), 1, stream);
comm.sync_stream(stream);

std::vector<size_t> recv_counts_host(n_ranks);
raft::copy(recv_counts_host.data(), recv_counts.data(), n_ranks, stream);

std::vector<size_t> displs(n_ranks);
size_t pos = 0;
for (int i = 0; i < n_ranks; ++i) {
displs[i] = pos;
pos += recv_counts_host[i];
}

rmm::device_uvector<T> recv_buff(displs.back() + recv_counts_host.back(), stream);
comm.allgatherv(
unique_y.data(), recv_buff.data(), recv_counts_host.data(), displs.data(), stream);
comm.sync_stream(stream);

rmm::device_uvector<T> global_unique_y(0, stream);
int n_distinct =
raft::label::getUniquelabels(global_unique_y, recv_buff.data(), recv_buff.size(), stream);

std::vector<T> global_unique_y_host(global_unique_y.size());
raft::copy(global_unique_y_host.data(), global_unique_y.data(), global_unique_y.size(), stream);

return global_unique_y_host;
}

template <typename T>
void qnFit_impl(const raft::handle_t& handle,
const qn_params& pams,
Expand All @@ -46,17 +90,6 @@ void qnFit_impl(const raft::handle_t& handle,
int rank,
int n_ranks)
{
switch (pams.loss) {
case QN_LOSS_LOGISTIC: {
RAFT_EXPECTS(
C == 2,
"qn_mg.cu: only the LOGISTIC loss is supported currently. The number of classes must be 2");
} break;
default: {
RAFT_EXPECTS(false, "qn_mg.cu: unknown loss function type (id = %d).", pams.loss);
}
}

auto X_simple = SimpleDenseMat<T>(X, N, D, X_col_major ? COL_MAJOR : ROW_MAJOR);

ML::GLM::opg::qn_fit_x_mg(handle,
Expand Down Expand Up @@ -113,6 +146,17 @@ void qnFit_impl(raft::handle_t& handle,
input_desc.uniqueRanks().size());
}

std::vector<float> getUniquelabelsMG(const raft::handle_t& handle,
Matrix::PartDescriptor& input_desc,
std::vector<Matrix::Data<float>*>& labels)
{
RAFT_EXPECTS(labels.size() == 1,
"getUniqueLabelsMG currently does not accept more than one data chunk");
Matrix::Data<float>* data_y = labels[0];
int n_rows = input_desc.totalElementsOwnedBy(input_desc.rank);
return distinct_mg<float>(handle, data_y->ptr, n_rows);
}

void qnFit(raft::handle_t& handle,
std::vector<Matrix::Data<float>*>& input_data,
Matrix::PartDescriptor& input_desc,
Expand Down
6 changes: 3 additions & 3 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,10 @@ dependencies:
- output_types: [conda, requirements, pyproject]
packages:
- cudf==23.10.*
- dask>=2023.7.1
- dask==2023.9.2
- dask-cuda==23.10.*
- dask-cudf==23.10.*
- distributed>=2023.7.1
- distributed==2023.9.2
- joblib>=0.11
- numba>=0.57
# TODO: Is scipy really a hard dependency, or should
Expand All @@ -192,7 +192,7 @@ dependencies:
- cupy>=12.0.0
- output_types: conda
packages:
- dask-core>=2023.7.1
- dask-core==2023.9.2
- output_types: pyproject
packages:
- *treelite_runtime
Expand Down
4 changes: 2 additions & 2 deletions python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ Packages required for multigpu algorithms*:
- ucx-py version matching the cuML version
- dask-cudf version matching the cuML version
- nccl>=2.5
- dask>=2023.7.1
- distributed>=2023.7.1
- dask==2023.9.2
- distributed==2023.9.2

* this can be avoided with `--singlegpu` argument flag.

Expand Down
9 changes: 8 additions & 1 deletion python/cuml/dask/linear_model/logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,4 +174,11 @@ def _create_model(sessionId, datatype, **kwargs):
def _func_fit(f, data, n_rows, n_cols, partsToSizes, rank):
inp_X = concatenate([X for X, _ in data])
inp_y = concatenate([y for _, y in data])
return f.fit([(inp_X, inp_y)], n_rows, n_cols, partsToSizes, rank)
n_ranks = max([p[0] for p in partsToSizes]) + 1
aggregated_partsToSizes = [[i, 0] for i in range(n_ranks)]
for p in partsToSizes:
aggregated_partsToSizes[p[0]][1] += p[1]

return f.fit(
[(inp_X, inp_y)], n_rows, n_cols, aggregated_partsToSizes, rank
)
28 changes: 23 additions & 5 deletions python/cuml/linear_model/logistic_regression_mg.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,18 @@ cdef extern from "cuml/linear_model/qn_mg.hpp" namespace "ML::GLM::opg" nogil:
float *f,
int *num_iters) except +

cdef vector[float] getUniquelabelsMG(
const handle_t& handle,
PartDescriptor &input_desc,
vector[floatData_t*] labels) except+


class LogisticRegressionMG(MGFitMixin, LogisticRegression):

def __init__(self, **kwargs):
super(LogisticRegressionMG, self).__init__(**kwargs)
if self.penalty != "l2" and self.penalty != "none":
assert False, "Currently only support 'l2' and 'none' penalty"

@property
@cuml.internals.api_base_return_array_skipall
Expand All @@ -102,8 +109,8 @@ class LogisticRegressionMG(MGFitMixin, LogisticRegression):

self.solver_model.coef_ = value

def prepare_for_fit(self, n_classes):
self.solver_model.qnparams = QNParams(
def create_qnparams(self):
return QNParams(
loss=self.loss,
penalty_l1=self.l1_strength,
penalty_l2=self.l2_strength,
Expand All @@ -118,8 +125,11 @@ class LogisticRegressionMG(MGFitMixin, LogisticRegression):
penalty_normalized=self.penalty_normalized
)

def prepare_for_fit(self, n_classes):
self.solver_model.qnparams = self.create_qnparams()

# modified
qnpams = self.qnparams.params
qnpams = self.solver_model.qnparams.params

# modified qnp
solves_classification = qnpams['loss'] in {
Expand Down Expand Up @@ -174,8 +184,14 @@ class LogisticRegressionMG(MGFitMixin, LogisticRegression):
cdef float objective32
cdef int num_iters

# TODO: calculate _num_classes at runtime
self._num_classes = 2
cdef vector[float] c_classes_
c_classes_ = getUniquelabelsMG(
handle_[0],
deref(<PartDescriptor*><uintptr_t>input_desc),
deref(<vector[floatData_t*]*><uintptr_t>y))
self.classes_ = np.sort(list(c_classes_)).astype('float32')

self._num_classes = len(self.classes_)
self.loss = "sigmoid" if self._num_classes <= 2 else "softmax"
self.prepare_for_fit(self._num_classes)
cdef uintptr_t mat_coef_ptr = self.coef_.ptr
Expand All @@ -194,6 +210,8 @@ class LogisticRegressionMG(MGFitMixin, LogisticRegression):
self._num_classes,
<float*> &objective32,
<int*> &num_iters)
else:
assert False, "dtypes other than float32 are currently not supported yet. See issue: https://github.com/rapidsai/cuml/issues/5589"

self.solver_model._calc_intercept()

Expand Down
Loading