当进行矩阵乘法时,往往需要将节点映射为二维网格,就会用到笛卡尔坐标。本文展示了基于Fortran的MPI_CART的用法,以及相邻坐标之间的通信。
program cartesian
include 'mpif.h'
integer SIZE, UP, DOWN, LEFT, RIGHT
parameter(SIZE=16)
parameter(UP=1)
parameter(DOWN=2)
parameter(LEFT=3)
parameter(RIGHT=4)
integer numtasks, rank, source, dest, outbuf, i, tag, ierr, &
inbuf(4), nbrs(4), dims(2), coords(2), periods(2), &
subcoords(2), remainX(2), remainY(2), reorder
integer stats(MPI_STATUS_SIZE, 8), reqs(8)
integer cartcomm , commX, commY ! required variable
data inbuf /MPI_PROC_NULL,MPI_PROC_NULL,MPI_PROC_NULL,MPI_PROC_NULL/, &
dims /4,4/, tag /1/, periods /1,1/, reorder /0/, &
remainX /1,0/, remainY /0,1/
call MPI_INIT(ierr)
call MPI_COMM_SIZE(MPI_COMM_WORLD, numtasks, ierr)
if (numtasks .eq. SIZE) then
! create cartesian virtual topology, get rank, coordinates, neighbor ranks
call MPI_CART_CREATE(MPI_COMM_WORLD, 2, dims, periods, reorder, &
cartcomm, ierr)
! MPI_CART_CREATE创建虚拟拓扑,各参数的含义依次为:原始的输入通信子,创建的笛卡尔网格
! 的维度,每一维的大小(每一维的进程数),每一维的网格是否有周期性(是否是循环移位,
! true代表周期性,false代表非周期),标识数是否可以重排列(true可以重排,false不可重
! 排,个人理解,能否重新排列rank值),输出的通信子,返回标识
call MPI_COMM_RANK(cartcomm, rank, ierr)
call MPI_CART_COORDS(cartcomm, rank, 2, coords, ierr)
! MPI_CART_COORDS获取在笛卡尔坐标下的坐标值,各参数含义依次为:笛卡尔坐标通信子,
! 原通信空间进程号,笛卡尔坐标维数,返回的笛卡尔坐标值,返回标识
call MPI_CART_SHIFT(cartcomm, 0, 1, nbrs(UP), nbrs(DOWN), ierr)
call MPI_CART_SHIFT(cartcomm, 1, 1, nbrs(LEFT), nbrs(RIGHT), ierr)
! MPI_CART_SHIFT笛卡尔轮换定位,也即返回笛卡尔坐标中上下/左右的rank值,各参数含义依次
! 为:笛卡尔坐标通信子,轮换坐标维数(在哪一维进行轮换),偏移(>0向上轮换,<0向下轮
! 换),源进程标识数,目标进程标识数,返回标识
call MPI_CART_SUB(cartcomm,remainX,commX,ierr)
call MPI_CART_SUB(cartcomm,remainY,commY,ierr)
! MPI_CART_SUB 将笛卡尔通信子分割为新的通信子,各参数含义依次为:笛卡尔坐标通信子,需
! 要保留的维度,新通信子名称,返回标识
call MPI_COMM_RANK(commX,subcoords(1),ierr)
call MPI_COMM_RANK(commY,subcoords(2),ierr)
! 以上代码MPI_CART_SUB + MPI_COMM_RANK 实现的功能和MPI_CART_SHIFT实现的功能相同
! write(*,20) rank,coords(1),coords(2),subcoords(1),subcoords(2),&
! nbrs(UP),nbrs(DOWN), &
! nbrs(LEFT),nbrs(RIGHT)
! 笛卡尔通信空间通信,将rank发送给其相邻的前一个进程和上一个进行,并接受其后一个进程和
! 下一个进程接受数据
outbuf = rank
dest = nbrs(1)
source = nbrs(2)
call MPI_ISEND(outbuf, 1, MPI_INTEGER, dest, tag, &
MPI_COMM_WORLD, reqs(1), ierr)
call MPI_IRECV(inbuf(1), 1, MPI_INTEGER, source, tag, &
MPI_COMM_WORLD, reqs(2), ierr)
dest = nbrs(3)
source = nbrs(4)
call MPI_ISEND(outbuf, 1, MPI_INTEGER, dest, tag, &
MPI_COMM_WORLD, reqs(3), ierr)
call MPI_IRECV(inbuf(2), 1, MPI_INTEGER, source, tag, &
MPI_COMM_WORLD, reqs(4), ierr)
call MPI_WAITALL(8, reqs, stats, ierr)
write(*,30) rank,inbuf(1),inbuf(2)
else
print *, 'Must specify',SIZE,' processors. Terminating.'
endif
call MPI_FINALIZE(ierr)
20 format('rank= ',I3,' coords= ',I2,I2, ' subcoords= ',I2,I2, &
' neighbors(u,d,l,r)= ',I3,I3,I3,I3 )
30 format('rank= ',I3,' ', &
' inbuf(d,r)= ',I3,I3 )
end
输出结果如下:
rank= 0 coords= 0 0 subcoords= 0 0 neighbors(u,d,l,r)= 12 4 3 1
rank= 1 coords= 0 1 subcoords= 0 1 neighbors(u,d,l,r)= 13 5 0 2
rank= 2 coords= 0 2 subcoords= 0 2 neighbors(u,d,l,r)= 14 6 1 3
rank= 3 coords= 0 3 subcoords= 0 3 neighbors(u,d,l,r)= 15 7 2 0
rank= 4 coords= 1 0 subcoords= 1 0 neighbors(u,d,l,r)= 0 8 7 5
rank= 5 coords= 1 1 subcoords= 1 1 neighbors(u,d,l,r)= 1 9 4 6
rank= 6 coords= 1 2 subcoords= 1 2 neighbors(u,d,l,r)= 2 10 5 7
rank= 7 coords= 1 3 subcoords= 1 3 neighbors(u,d,l,r)= 3 11 6 4
rank= 8 coords= 2 0 subcoords= 2 0 neighbors(u,d,l,r)= 4 12 11 9
rank= 9 coords= 2 1 subcoords= 2 1 neighbors(u,d,l,r)= 5 13 8 10
rank= 10 coords= 2 2 subcoords= 2 2 neighbors(u,d,l,r)= 6 14 9 11
rank= 11 coords= 2 3 subcoords= 2 3 neighbors(u,d,l,r)= 7 15 10 8
rank= 12 coords= 3 0 subcoords= 3 0 neighbors(u,d,l,r)= 8 0 15 13
rank= 13 coords= 3 1 subcoords= 3 1 neighbors(u,d,l,r)= 9 1 12 14
rank= 14 coords= 3 2 subcoords= 3 2 neighbors(u,d,l,r)= 10 2 13 15
rank= 15 coords= 3 3 subcoords= 3 3 neighbors(u,d,l,r)= 11 3 14 12
rank= 8 inbuf(d,r)= 12 9
rank= 10 inbuf(d,r)= 14 11
rank= 12 inbuf(d,r)= 0 13
rank= 0 inbuf(d,r)= 4 1
rank= 1 inbuf(d,r)= 5 2
rank= 2 inbuf(d,r)= 6 3
rank= 3 inbuf(d,r)= 7 0
rank= 4 inbuf(d,r)= 8 5
rank= 5 inbuf(d,r)= 9 6
rank= 6 inbuf(d,r)= 10 7
rank= 7 inbuf(d,r)= 11 4
rank= 9 inbuf(d,r)= 13 10
rank= 11 inbuf(d,r)= 15 8
rank= 13 inbuf(d,r)= 1 14
rank= 14 inbuf(d,r)= 2 15
rank= 15 inbuf(d,r)= 3 12