Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/notebook-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ permissions:
jobs:
run-openmpi:

runs-on: ubuntu-latest
runs-on: ubuntu-22.04

steps:
- uses: actions/checkout@v3
Expand All @@ -26,7 +26,7 @@ jobs:
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y openmpi-bin
sudo apt-get install -y openmpi-bin libopenmpi-dev
python -m pip install --upgrade pip
pip install .
pip install nbformat mpi4py
Expand All @@ -37,7 +37,7 @@ jobs:

run-local:

runs-on: ubuntu-latest
runs-on: ubuntu-22.04

steps:
- uses: actions/checkout@v3
Expand All @@ -56,7 +56,7 @@ jobs:

run-dask-cluster:

runs-on: ubuntu-latest
runs-on: ubuntu-22.04

steps:
- uses: actions/checkout@v3
Expand Down
89 changes: 89 additions & 0 deletions examples/osu_bw_cupy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# Adapted from https://github.com/mpi4py/mpi4py/blob/master/demo/osu_bw.py
# and https://mpi4py.readthedocs.io/en/stable/tutorial.html#cuda-aware-mpi-python-gpu-arrays
# http://mvapich.cse.ohio-state.edu/benchmarks/

from mpi4py import MPI
import cupy as cp
import os


def osu_bw(
BENCHMARH = "MPI G2G Bandwidth Test",
skip = 10,
loop = 100,
window_size = 64,
skip_large = 2,
loop_large = 20,
window_size_large = 64,
large_message_size = 8192,
MAX_MSG_SIZE = 1<<22,
):

comm = MPI.COMM_WORLD
myid = comm.Get_rank()
numprocs = comm.Get_size()

cp.cuda.Device(myid).use()

if numprocs != 2:
if myid == 0:
errmsg = "This test requires exactly two processes"
else:
errmsg = None
raise SystemExit(errmsg)

s_buf = cp.arange(MAX_MSG_SIZE, dtype='i')
r_buf = cp.empty_like(s_buf)
cp.cuda.get_current_stream().synchronize()

if myid == 0:
print ('# %s' % (BENCHMARH,))
if myid == 0:
print ('# %-8s%20s' % ("Size [B]", "Bandwidth [MB/s]"))

message_sizes = [2**i for i in range(30)]
for size in message_sizes:
if size > MAX_MSG_SIZE:
break
if size > large_message_size:
skip = skip_large
loop = loop_large
window_size = window_size_large

iterations = list(range(loop+skip))
window_sizes = list(range(window_size))
requests = [MPI.REQUEST_NULL] * window_size
#
comm.Barrier()
if myid == 0:
s_msg = [s_buf, size, MPI.BYTE]
r_msg = [r_buf, 4, MPI.BYTE]
for i in iterations:
if i == skip:
t_start = MPI.Wtime()
for j in window_sizes:
requests[j] = comm.Isend(s_msg, 1, 100)
MPI.Request.Waitall(requests)
comm.Recv(r_msg, 1, 101)

t_end = MPI.Wtime()
elif myid == 1:
s_msg = [s_buf, 4, MPI.BYTE]
r_msg = [r_buf, size, MPI.BYTE]
for i in iterations:
for j in window_sizes:
requests[j] = comm.Irecv(r_msg, 0, 100)
MPI.Request.Waitall(requests)
comm.Send(s_msg, 0, 101)
#
if myid == 0:
MB = size / 1e6 * loop * window_size
s = t_end - t_start
print ('%-10d%20.2f' % (size, MB/s))


cp.allclose(s_buf, r_buf)


if __name__ == '__main__':
osu_bw()
135 changes: 135 additions & 0 deletions examples/osu_bw_mpi4py-cupy.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "86b84834-b048-4832-bd49-f49c29d4fdfb",
"metadata": {
"tags": []
},
"source": [
"## OSU G2G Bandwidth Benchmark with MPI4Py\n",
"In this example we use [IPCMagic](https://github.com/eth-cscs/ipcluster_magic/tree/master) to run a test from the [OSU Bandwidth benchmark](http://mvapich.cse.ohio-state.edu/benchmarks/) with MPI4Py from a Jupyter notebook.\n",
"Using [this example](https://mpi4py.readthedocs.io/en/stable/tutorial.html#cuda-aware-mpi-python-gpu-arrays), we adapted the [osu_bw.py](https://github.com/mpi4py/mpi4py/blob/d0228f0397403ff73d8f41d90d97b411efda6128/demo/osu_bw.py) script from the MPI4Py repository so it uses arrays allocated on the GPU."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b55135ce-b328-4d73-b498-d8369ead8380",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import ipcmagic"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "64b6492d-1dbd-441d-95fa-7eb1717e0bdb",
"metadata": {},
"outputs": [],
"source": [
"# os.environ['MPICH_RDMA_ENABLED_CUDA'] = '1' # Enable direct communication between GPUs\n",
"os.environ['MPICH_GPU_SUPPORT_ENABLED'] = '1'"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "79928bb7-8091-458f-b438-b66e1af23a8b",
"metadata": {},
"outputs": [],
"source": [
"%ipcluster --version"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a7d237d6-f385-49c6-9bad-323ba3c2d7a6",
"metadata": {},
"outputs": [],
"source": [
"%ipcluster start -n 2"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cbaea7a2-4171-438d-8ad9-3a00411fd007",
"metadata": {},
"outputs": [],
"source": [
"# Disable IPyParallel's progress bar\n",
"%pxconfig --progress-after -1"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "12b975be-ceed-412d-9117-b3d8b7904549",
"metadata": {},
"outputs": [],
"source": [
"%%px\n",
"import socket\n",
"\n",
"socket.gethostname()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d81d6c89-d339-45ac-aff7-88a11558ce15",
"metadata": {},
"outputs": [],
"source": [
"%%px\n",
"from osu_bw_cupy import osu_bw"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8ebf8f80-37b1-4d74-a911-33c1a32a461d",
"metadata": {},
"outputs": [],
"source": [
"%%px\n",
"osu_bw()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1cb656f2-2367-405a-9173-d79fe53281a0",
"metadata": {},
"outputs": [],
"source": [
"%ipcluster stop"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "test env 2",
"language": "python",
"name": "test-env-2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}