Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make setup async to get rid of setup() #167

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
19 changes: 19 additions & 0 deletions .devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"build": {
"dockerfile": "docker/dev-cuda12.1.dockerfile",
"context": "."
},
"runArgs": ["--gpus", "all"],
"features": {
"ghcr.io/devcontainers/features/github-cli:1": {},
},
"customizations": {
"vscode": {
"extensions": ["ms-vscode.cmake-tools"]
}
},
"remoteEnv": {
"OMPI_ALLOW_RUN_AS_ROOT": "1",
"OMPI_ALLOW_RUN_AS_ROOT_CONFIRM": "1"
}
}
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ __pycache__
.*.swp
.idea/
*.so
.venv/
3 changes: 2 additions & 1 deletion docker/dev-cuda12.1.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ WORKDIR ${MSCCLPP_SRC_DIR}
ENV CMAKE_HOME="/tmp/cmake-${CMAKE_VERSION}-linux-x86_64" \
CMAKE_URL="https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz"
RUN curl -L ${CMAKE_URL} -o ${CMAKE_HOME}.tar.gz && \
tar xzf ${CMAKE_HOME}.tar.gz -C /usr/local
tar xzf ${CMAKE_HOME}.tar.gz -C /usr/local && \
rm -rf ${CMAKE_HOME}.tar.gz
ENV PATH="/usr/local/cmake-${CMAKE_VERSION}-linux-x86_64/bin:${PATH}"

# Install pytest & dependencies
Expand Down
162 changes: 162 additions & 0 deletions docs/setup_example.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation.\n",
"Licensed under the MIT license.\n",
"\n",
"The following example demonstrates how to initialize the MSCCL++ library and perform necessary setup for communicating from GPU kernels. First we define a function for registering memory, making connections and creating channels."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import mscclpp\n",
"\n",
"def setup_channels(comm, memory, proxy_service):\n",
" # Register the memory with the communicator\n",
" reg_mem = comm.register_memory(memory.data.ptr, memory.nbytes, mscclpp.Transport.CudaIpc)\n",
"\n",
" # Create connections to all other ranks and exchange registered memories\n",
" connections = []\n",
" remote_memories = []\n",
" for r in range(comm.bootstrap.size):\n",
" if r == comm.bootstrap.rank: # Don't connect to self\n",
" continue\n",
" connections.append(comm.connect(r, 0, mscclpp.Transport.CudaIpc))\n",
" comm.send_memory(reg_mem, r, 0)\n",
" remote_mem = comm.recv_memory(r, 0)\n",
" remote_memories.append(remote_mem)\n",
"\n",
" # Both connections and received remote memories are returned as futures,\n",
" # so we wait for them to complete and unwrap them.\n",
" connections = [conn.get() for conn in connections]\n",
" remote_memories = [mem.get() for mem in remote_memories]\n",
"\n",
" # Finally, create proxy channels for each connection\n",
" proxy_channels = [mscclpp.SimpleProxyChannel(\n",
" proxy_service.proxy_channel(proxy_service.build_and_add_semaphore(comm, conn)),\n",
" proxy_service.add_memory(remote_memories[i]),\n",
" proxy_service.add_memory(reg_mem),\n",
" ) for i, conn in enumerate(connections)]\n",
"\n",
" return proxy_channels"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we are ready to write the top-level code for each rank."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import cupy as cp\n",
"\n",
"def run(rank, world_size, if_ip_port_trio):\n",
" # Use the right GPU for this rank\n",
" cp.cuda.Device(rank).use()\n",
" \n",
" # Allocate memory on the GPU\n",
" memory = cp.zeros(1024, dtype=cp.int32)\n",
"\n",
" # Initialize a bootstrapper using a known interface/IP/port trio for the root rank\n",
" boot = mscclpp.TcpBootstrap.create(rank, world_size)\n",
" boot.initialize(if_ip_port_trio)\n",
"\n",
" # Create a communicator for the processes in the bootstrapper\n",
" comm = mscclpp.Communicator(boot)\n",
"\n",
" # Create a proxy service, which enables GPU kernels to use connections\n",
" proxy_service = mscclpp.ProxyService()\n",
"\n",
" if rank == 0:\n",
" print(\"Setting up channels\")\n",
" proxy_channels = setup_channels(comm, memory, proxy_service)\n",
"\n",
" if rank == 0:\n",
" print(\"Starting proxy service\")\n",
" proxy_service.start_proxy()\n",
"\n",
" # This is where we could launch a GPU kernel that uses proxy_channels[i].device_handle\n",
" # to initiate communication. See include/mscclpp/proxy_channel_device.hpp for details.\n",
" if rank == 0:\n",
" print(\"GPU kernels that use the proxy go here.\")\n",
"\n",
" if rank == 0:\n",
" print(f\"Stopping proxy service\")\n",
" proxy_service.stop_proxy()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Finally, to test the code we can run each process using the `multiprocessing` package."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Setting up channels\n",
"Starting proxy service\n",
"GPU kernels that use the proxy go here.\n",
"Stopping proxy service\n",
"\n",
"Starting proxy service\n",
"GPU kernels that use the proxy go here.\n",
"Stopping proxy service\n"
]
}
],
"source": [
"import multiprocessing as mp\n",
"\n",
"world_size = 2\n",
"processes = [mp.Process(target=run, args=(rank, world_size, \"eth0:localhost:50051\")) for rank in range(world_size)]\n",
"for p in processes:\n",
" p.start()\n",
"for p in processes:\n",
" p.join()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading
Loading