From 2a710755ee65fa3d7b952981a0652600a4162a07 Mon Sep 17 00:00:00 2001 From: Filippo Zonta Date: Wed, 7 Jun 2023 16:06:55 +0300 Subject: [PATCH 1/8] WIP cuda --- fsgrid.hpp | 92 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 51 insertions(+), 41 deletions(-) diff --git a/fsgrid.hpp b/fsgrid.hpp index bd4a25d..aaa6ff0 100644 --- a/fsgrid.hpp +++ b/fsgrid.hpp @@ -22,6 +22,8 @@ #include #include #include +#include "cuda.h" +#include "cuda_runtime.h" #include #include #include @@ -136,15 +138,19 @@ template class FsGrid : public FsGridTools{ typedef int64_t LocalID; typedef int64_t GlobalID; + // Legacy constructor from coupling reference + FsGrid(std::array globalSize, MPI_Comm parent_comm, std::array isPeriodic, FsGridCouplingInformation& coupling) : FsGrid(globalSize, parent_comm, isPeriodic, &coupling) {} + /*! Constructor for this grid. * \param globalSize Cell size of the global simulation domain. * \param MPI_Comm The MPI communicator this grid should use. * \param isPeriodic An array specifying, for each dimension, whether it is to be treated as periodic. */ - FsGrid(std::array globalSize, MPI_Comm parent_comm, std::array isPeriodic, FsGridCouplingInformation& coupling) - : globalSize(globalSize),coupling(coupling) { + FsGrid(std::array globalSize, MPI_Comm parent_comm, std::array isPeriodic, FsGridCouplingInformation* coupling) + : globalSize(globalSize), coupling(coupling) { int status; int size; + globalSizeTotal = globalSize[0] * globalSize[1] * globalSize[2]; status = MPI_Comm_size(parent_comm, &size); @@ -280,8 +286,9 @@ template class FsGrid : public FsGridTools{ } totalStorageSize *= storageSize[i]; } - data.resize(totalStorageSize); - coupling.setCouplingSize(totalStorageSize); + // data = new std::vector(totalStorageSize); + data = (T*) malloc(totalStorageSize * sizeof(T)); + coupling->setCouplingSize(totalStorageSize); MPI_Datatype mpiTypeT; MPI_Type_contiguous(sizeof(T), MPI_BYTE, &mpiTypeT); @@ -393,6 +400,20 @@ template class FsGrid : public FsGridTools{ } + /*! Sets the data pointer to the given vector + * \param data pointer to the data vector + */ + void setData(T *data) { + this->data = data; + } + + /*! Returns the data pointer + * \return pointer to the data vector + */ + T *getData() { + return data; + } + /*! Finalize instead of destructor, as the MPI calls fail after the main program called MPI_Finalize(). * Cleans up the cartesian communicator */ @@ -495,7 +516,7 @@ template class FsGrid : public FsGridTools{ * \param y The cell's task-local y coordinate * \param z The cell's task-local z coordinate */ - LocalID LocalIDForCoords(int x, int y, int z) { + __host__ __device__ LocalID LocalIDForCoords(int x, int y, int z) { LocalID index=0; if(globalSize[2] > 1) { index += storageSize[0]*storageSize[1]*(stencil+z); @@ -524,8 +545,8 @@ template class FsGrid : public FsGridTools{ numRequests=0; // If previous coupling information was present, remove it. - for(uint i=0; iexternalRank.size(); i++) { + coupling->externalRank[i] = MPI_PROC_NULL; } for(int z=0; z class FsGrid : public FsGridTools{ // Calculate LocalID for this cell LocalID thisCell = LocalIDForCoords(x,y,z); assert(numRequests < requests.size()); - assert(thisCell < coupling.externalRank.size()); - status = MPI_Irecv(&coupling.externalRank[thisCell], 1, MPI_INT, MPI_ANY_SOURCE, thisCell, comm3d, + assert(thisCell < coupling->externalRank.size()); + status = MPI_Irecv(&coupling->externalRank[thisCell], 1, MPI_INT, MPI_ANY_SOURCE, thisCell, comm3d, &requests[numRequests++]); if(status != MPI_SUCCESS) { std::cerr << "Error setting up MPI Irecv in FsGrid::setupForGridCoupling" << std::endl; @@ -593,7 +614,7 @@ template class FsGrid : public FsGridTools{ // Calculate LocalID for this cell LocalID thisCell = LocalIDForCoords(x,y,z); assert(numRequests < requests.size()); - status = MPI_Irecv(get(thisCell), sizeof(T), MPI_BYTE, coupling.externalRank[thisCell], + status = MPI_Irecv(get(thisCell), sizeof(T), MPI_BYTE, coupling->externalRank[thisCell], thisCell, comm3d, &requests[numRequests++]); if(status != MPI_SUCCESS) { std::cerr << "Error setting up MPI Irecv in FsGrid::setupForTransferIn" << std::endl; @@ -677,7 +698,7 @@ template class FsGrid : public FsGridTools{ // Calculate LocalID for this cell LocalID thisCell = LocalIDForCoords(x,y,z); assert(numRequests < requests.size()); - status = MPI_Isend(get(thisCell), sizeof(T), MPI_BYTE, coupling.externalRank[thisCell], thisCell, comm3d, + status = MPI_Isend(get(thisCell), sizeof(T), MPI_BYTE, coupling->externalRank[thisCell], thisCell, comm3d, &requests[numRequests++]); if(status != MPI_SUCCESS) { std::cerr << "Error setting up MPI Isend in FsGrid::setupForTransferOut" << std::endl; @@ -710,7 +731,8 @@ template class FsGrid : public FsGridTools{ int receiveId = (1 - x) * 9 + ( 1 - y) * 3 + ( 1 - z); if(neighbour[receiveId] != MPI_PROC_NULL && neighbourSendType[shiftId] != MPI_DATATYPE_NULL) { - MPI_Irecv(data.data(), 1, neighbourReceiveType[shiftId], neighbour[receiveId], shiftId, comm3d, &(receiveRequests[shiftId])); + // MPI_Irecv(data.data(), 1, neighbourReceiveType[shiftId], neighbour[receiveId], shiftId, comm3d, &(receiveRequests[shiftId])); + MPI_Irecv(data, 1, neighbourReceiveType[shiftId], neighbour[receiveId], shiftId, comm3d, &(receiveRequests[shiftId])); } } } @@ -723,7 +745,8 @@ template class FsGrid : public FsGridTools{ int sendId = shiftId; if(neighbour[sendId] != MPI_PROC_NULL && neighbourSendType[shiftId] != MPI_DATATYPE_NULL) { - MPI_Isend(data.data(), 1, neighbourSendType[shiftId], neighbour[sendId], shiftId, comm3d, &(sendRequests[shiftId])); + // MPI_Isend(data.data(), 1, neighbourSendType[shiftId], neighbour[sendId], shiftId, comm3d, &(sendRequests[shiftId])); + MPI_Isend(data, 1, neighbourSendType[shiftId], neighbour[sendId], shiftId, comm3d, &(sendRequests[shiftId])); } } } @@ -733,11 +756,14 @@ template class FsGrid : public FsGridTools{ } - + std::array getStorageSize() { + return storageSize; + } /*! Get the size of the local domain handled by this grid. */ - std::array& getLocalSize() { + int32_t* getLocalSize() { + // std::array& getLocalSize() { return localSize; } @@ -776,7 +802,7 @@ template class FsGrid : public FsGridTools{ * \param z z-Coordinate, in cells * \return A reference to cell data in the given cell */ - T* get(int x, int y, int z) { + __host__ __device__ T* get(int x, int y, int z) { // Keep track which neighbour this cell actually belongs to (13 = ourself) int isInNeighbourDomain=13; @@ -877,9 +903,9 @@ template class FsGrid : public FsGridTools{ } T* get(LocalID id) { - if(id < 0 || (unsigned int)id > data.size()) { + if(id < 0 || (unsigned int)id > globalSizeTotal) { std::cerr << "Out-of-bounds access in FsGrid::get!" << std::endl - << "(LocalID = " << id << ", but storage space is " << data.size() + << "(LocalID = " << id << ", but storage space is " << globalSizeTotal << ". Expect weirdness." << std::endl; return NULL; } @@ -961,25 +987,6 @@ template class FsGrid : public FsGridTools{ return MPI_Allreduce(sendbuf, recvbuf, count, datatype, op, comm3d); } - - //! Copy the entire data from another FsGrid of the same signature over. - FsGrid& operator=(const FsGrid& other) { - - // Don't copy if sizes mismatch. - // (Should this instead crash the program?) - if(other.localSize[0] != localSize[0] || - other.localSize[1] != localSize[1] || - other.localSize[2] != localSize[2]) { - return *this; - } - data = other.data; - - return *this; - } - - - - private: //! MPI Cartesian communicator used in this grid MPI_Comm comm3d; @@ -987,7 +994,7 @@ template class FsGrid : public FsGridTools{ std::vector requests; uint numRequests; - std::array neighbour; //!< Tasks of the 26 neighbours (plus ourselves) + int neighbour[27]; //!< Tasks of the 26 neighbours (plus ourselves) std::vector neighbour_index; //!< Lookup table from rank to index in the neighbour array // We have, fundamentally, two different coordinate systems we're dealing with: @@ -998,13 +1005,15 @@ template class FsGrid : public FsGridTools{ std::array periodic; //!< Information about whether a given direction is periodic std::array globalSize; //!< Global size of the simulation space, in cells - std::array localSize; //!< Local size of simulation space handled by this task (without ghost cells) + int32_t globalSizeTotal; //!< Total number of cells in the simulation space + int32_t localSize[3]; + // std::array localSize; //!< Local size of simulation space handled by this task (without ghost cells) std::array storageSize; //!< Local size of simulation space handled by this task (including ghost cells) std::array localStart; //!< Offset of the local //!coordinate system against //!the global one - FsGridCouplingInformation& coupling; // Information required to couple to external grids + FsGridCouplingInformation* coupling; // Information required to couple to external grids std::array neighbourSendType; //!< Datatype for sending data std::array neighbourReceiveType; //!< Datatype for receiving data @@ -1012,7 +1021,8 @@ template class FsGrid : public FsGridTools{ //! Actual storage of field data - std::vector data; + // std::vector* data; + T* data; //! Helper function: given a global cellID, calculate the global cell coordinate from it. // This is then used do determine the task responsible for this cell, and the From 43289e782d03bf1566264a9a21f000c9c7aa4119 Mon Sep 17 00:00:00 2001 From: Filippo Zonta Date: Mon, 26 Jun 2023 10:37:58 +0300 Subject: [PATCH 2/8] Convert fsgrid to C arrays --- fsgrid.hpp | 109 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 76 insertions(+), 33 deletions(-) diff --git a/fsgrid.hpp b/fsgrid.hpp index aaa6ff0..60aa698 100644 --- a/fsgrid.hpp +++ b/fsgrid.hpp @@ -66,7 +66,7 @@ struct FsGridTools{ //! Helper function to optimize decomposition of this grid over the given number of tasks - static void computeDomainDecomposition(const std::array& GlobalSize, int nProcs, std::array& processDomainDecomposition) { + static void computeDomainDecomposition(const int GlobalSize[3], int nProcs, std::array& processDomainDecomposition) { std::array systemDim; std::array processBox; double optimValue = std::numeric_limits::max(); @@ -131,23 +131,25 @@ struct FsGridCouplingInformation { * \param T datastructure containing the field in each cell which this grid manages * \param stencil ghost cell width of this grid */ -template class FsGrid : public FsGridTools{ +template class FsGrid : public FsGridTools{ public: typedef int64_t LocalID; typedef int64_t GlobalID; + int diocane; + T* data; // Legacy constructor from coupling reference - FsGrid(std::array globalSize, MPI_Comm parent_comm, std::array isPeriodic, FsGridCouplingInformation& coupling) : FsGrid(globalSize, parent_comm, isPeriodic, &coupling) {} + FsGrid(int32_t globalSize[3], MPI_Comm parent_comm, std::array isPeriodic, FsGridCouplingInformation& coupling) : FsGrid(globalSize, parent_comm, isPeriodic, &coupling) {} /*! Constructor for this grid. * \param globalSize Cell size of the global simulation domain. * \param MPI_Comm The MPI communicator this grid should use. * \param isPeriodic An array specifying, for each dimension, whether it is to be treated as periodic. */ - FsGrid(std::array globalSize, MPI_Comm parent_comm, std::array isPeriodic, FsGridCouplingInformation* coupling) - : globalSize(globalSize), coupling(coupling) { + FsGrid(int32_t globalSize[3], MPI_Comm parent_comm, std::array isPeriodic, FsGridCouplingInformation* coupling) + : globalSize{globalSize[0], globalSize[1], globalSize[2]}, coupling(coupling) { int status; int size; globalSizeTotal = globalSize[0] * globalSize[1] * globalSize[2]; @@ -275,7 +277,7 @@ template class FsGrid : public FsGridTools{ } // Allocate local storage array - size_t totalStorageSize=1; + totalStorageSize=1; for(int i=0; i<3; i++) { if(globalSize[i] <= 1) { // Collapsed dimension => only one cell thick @@ -287,11 +289,11 @@ template class FsGrid : public FsGridTools{ totalStorageSize *= storageSize[i]; } // data = new std::vector(totalStorageSize); - data = (T*) malloc(totalStorageSize * sizeof(T)); + data = (T*) malloc(totalStorageSize * TDim * sizeof(T)); coupling->setCouplingSize(totalStorageSize); MPI_Datatype mpiTypeT; - MPI_Type_contiguous(sizeof(T), MPI_BYTE, &mpiTypeT); + MPI_Type_contiguous(TDim * sizeof(T), MPI_BYTE, &mpiTypeT); for(int x=-1; x<=1;x++) { for(int y=-1; y<=1;y++) { for(int z=-1; z<=1; z++) { @@ -307,8 +309,8 @@ template class FsGrid : public FsGridTools{ for(int x=-1; x<=1;x++) { for(int y=-1; y<=1;y++) { for(int z=-1; z<=1; z++) { - std::array subarraySize; - std::array subarrayStart; + int subarraySize[3]; + int subarrayStart[3]; const int shiftId = (x+1) * 9 + (y + 1) * 3 + (z + 1); @@ -343,14 +345,14 @@ template class FsGrid : public FsGridTools{ if(storageSize[i] == 1) subarrayStart[i] = 0; - std::array swappedStorageSize = storageSize; + int swappedStorageSize[3] = {storageSize[0], storageSize[1], storageSize[2]}; swapArray(swappedStorageSize); swapArray(subarraySize); swapArray(subarrayStart); MPI_Type_create_subarray(3, - swappedStorageSize.data(), - subarraySize.data(), - subarrayStart.data(), + swappedStorageSize, + subarraySize, + subarrayStart, MPI_ORDER_C, mpiTypeT, &(neighbourSendType[shiftId]) ); @@ -379,9 +381,9 @@ template class FsGrid : public FsGridTools{ swapArray(subarrayStart); MPI_Type_create_subarray(3, - swappedStorageSize.data(), - subarraySize.data(), - subarrayStart.data(), + swappedStorageSize, + subarraySize, + subarrayStart, MPI_ORDER_C, mpiTypeT, &(neighbourReceiveType[shiftId])); @@ -400,6 +402,39 @@ template class FsGrid : public FsGridTools{ } + + class Proxy3 { + public: + __host__ __device__ Proxy3(int x, int y, int z, const FsGrid &obj) : obj(obj), x(x), y(y), z(z) {} + + __host__ __device__ T& operator[](int j) { + return obj.get(x, y, z, j); + } + __host__ __device__ T& at(int j) { + return obj.get(x, y, z, j); + } + + private: + int x, y, z; + FsGrid obj; + }; + + class Proxy1 { + public: + __host__ __device__ Proxy1(int i, const FsGrid &obj) : i(i), obj(obj) {} + + __host__ __device__ T& operator[](int j) { + return obj.get(i, j); + } + __host__ __device__ T& at(int j) { + return obj.get(i, j); + } + + private: + int i; + FsGrid obj; + }; + /*! Sets the data pointer to the given vector * \param data pointer to the data vector */ @@ -756,7 +791,7 @@ template class FsGrid : public FsGridTools{ } - std::array getStorageSize() { + int32_t* getStorageSize() { return storageSize; } @@ -775,7 +810,7 @@ template class FsGrid : public FsGridTools{ /*! Get global size of the fsgrid domain */ - std::array& getGlobalSize() { + __host__ __device__ int* getGlobalSize() { return globalSize; } @@ -802,7 +837,7 @@ template class FsGrid : public FsGridTools{ * \param z z-Coordinate, in cells * \return A reference to cell data in the given cell */ - __host__ __device__ T* get(int x, int y, int z) { + __host__ __device__ T &get(int x, int y, int z, int offset) { // Keep track which neighbour this cell actually belongs to (13 = ourself) int isInNeighbourDomain=13; @@ -886,8 +921,6 @@ template class FsGrid : public FsGridTools{ if(neighbour[isInNeighbourDomain]==MPI_PROC_NULL) { // Neighbour doesn't exist, we must be an outer boundary cell // (or something is quite wrong) - return NULL; - } else if(neighbour[isInNeighbourDomain] == rank) { // For periodic boundaries, where the neighbour is actually ourself, // return our own actual cell instead of the ghost @@ -899,19 +932,28 @@ template class FsGrid : public FsGridTools{ } LocalID index = LocalIDForCoords(x,y,z); - return &data[index]; + return data[index * TDim + offset]; } - T* get(LocalID id) { + __host__ __device__ T &get(LocalID id, int offset) { if(id < 0 || (unsigned int)id > globalSizeTotal) { - std::cerr << "Out-of-bounds access in FsGrid::get!" << std::endl - << "(LocalID = " << id << ", but storage space is " << globalSizeTotal - << ". Expect weirdness." << std::endl; - return NULL; + #ifndef __CUDA_ARCH__ + std::cerr << "Out-of-bounds access in FsGrid::get!" << std::endl + << "(LocalID = " << id << ", but storage space is " << globalSizeTotal + << ". Expect weirdness." << std::endl; + #endif } - return &data[id]; + return data[id * TDim + offset]; + } + + __host__ __device__ Proxy1 get(int i) const { + return Proxy1(i, *this); } + __host__ __device__ Proxy3 get(int x, int y, int z) const { + return Proxy3(x, y, z, *this); + } + /*! Physical grid spacing and physical coordinate space start. * TODO: Should this be private and have accesor-functions? */ @@ -1004,11 +1046,12 @@ template class FsGrid : public FsGridTools{ // 2) Cell numbers in global and local view std::array periodic; //!< Information about whether a given direction is periodic - std::array globalSize; //!< Global size of the simulation space, in cells + int32_t totalStorageSize; //!< Total number of cells in the local storage, including ghost cells + int32_t globalSize[3]; //!< Global size of the simulation space, in cells int32_t globalSizeTotal; //!< Total number of cells in the simulation space int32_t localSize[3]; // std::array localSize; //!< Local size of simulation space handled by this task (without ghost cells) - std::array storageSize; //!< Local size of simulation space handled by this task (including ghost cells) + int32_t storageSize[3]; //!< Local size of simulation space handled by this task (including ghost cells) std::array localStart; //!< Offset of the local //!coordinate system against //!the global one @@ -1022,7 +1065,7 @@ template class FsGrid : public FsGridTools{ //! Actual storage of field data // std::vector* data; - T* data; + //! Helper function: given a global cellID, calculate the global cell coordinate from it. // This is then used do determine the task responsible for this cell, and the @@ -1044,7 +1087,7 @@ template class FsGrid : public FsGridTools{ return cell; } - void swapArray(std::array& array) { + void swapArray(int array[3]) { int a = array[0]; array[0] = array[2]; array[2] = a; From 737b8e9255e99f9b800be56a27c1a3ad47aa6680 Mon Sep 17 00:00:00 2001 From: Filippo Zonta Date: Tue, 27 Jun 2023 16:49:54 +0300 Subject: [PATCH 3/8] Implement new get methods with proxy and error handling --- fsgrid.hpp | 57 +++++++++++++++++++----------------------------------- 1 file changed, 20 insertions(+), 37 deletions(-) diff --git a/fsgrid.hpp b/fsgrid.hpp index 60aa698..63f0f92 100644 --- a/fsgrid.hpp +++ b/fsgrid.hpp @@ -403,36 +403,28 @@ template class FsGrid : public FsGridTools{ } - class Proxy3 { + class Proxy { public: - __host__ __device__ Proxy3(int x, int y, int z, const FsGrid &obj) : obj(obj), x(x), y(y), z(z) {} + __host__ __device__ Proxy() : valid(false) {} + __host__ __device__ Proxy(int i, const FsGrid &obj) : i(i), obj(&obj), valid(true) {} __host__ __device__ T& operator[](int j) { - return obj.get(x, y, z, j); + assert(valid); + return obj->getData(i + j); } __host__ __device__ T& at(int j) { - return obj.get(x, y, z, j); + assert(valid); + return obj->getData(i + j); } - private: - int x, y, z; - FsGrid obj; - }; - - class Proxy1 { - public: - __host__ __device__ Proxy1(int i, const FsGrid &obj) : i(i), obj(obj) {} - - __host__ __device__ T& operator[](int j) { - return obj.get(i, j); - } - __host__ __device__ T& at(int j) { - return obj.get(i, j); + __host__ __device__ bool isValid() { + return valid; } private: + bool valid; int i; - FsGrid obj; + const FsGrid* obj; }; /*! Sets the data pointer to the given vector @@ -442,13 +434,6 @@ template class FsGrid : public FsGridTools{ this->data = data; } - /*! Returns the data pointer - * \return pointer to the data vector - */ - T *getData() { - return data; - } - /*! Finalize instead of destructor, as the MPI calls fail after the main program called MPI_Finalize(). * Cleans up the cartesian communicator */ @@ -837,7 +822,7 @@ template class FsGrid : public FsGridTools{ * \param z z-Coordinate, in cells * \return A reference to cell data in the given cell */ - __host__ __device__ T &get(int x, int y, int z, int offset) { + __host__ __device__ Proxy get(int x, int y, int z) { // Keep track which neighbour this cell actually belongs to (13 = ourself) int isInNeighbourDomain=13; @@ -911,7 +896,7 @@ template class FsGrid : public FsGridTools{ } if(!inside) { std::cerr << "Out-of bounds access in FsGrid::get! Expect weirdness." << std::endl; - return NULL; + return Proxy(); } #endif // FSGRID_DEBUG @@ -921,6 +906,7 @@ template class FsGrid : public FsGridTools{ if(neighbour[isInNeighbourDomain]==MPI_PROC_NULL) { // Neighbour doesn't exist, we must be an outer boundary cell // (or something is quite wrong) + return Proxy(); } else if(neighbour[isInNeighbourDomain] == rank) { // For periodic boundaries, where the neighbour is actually ourself, // return our own actual cell instead of the ghost @@ -932,28 +918,25 @@ template class FsGrid : public FsGridTools{ } LocalID index = LocalIDForCoords(x,y,z); - return data[index * TDim + offset]; + return Proxy(index * TDim, *this); } - __host__ __device__ T &get(LocalID id, int offset) { + __host__ __device__ Proxy &get(LocalID id) { if(id < 0 || (unsigned int)id > globalSizeTotal) { #ifndef __CUDA_ARCH__ std::cerr << "Out-of-bounds access in FsGrid::get!" << std::endl << "(LocalID = " << id << ", but storage space is " << globalSizeTotal << ". Expect weirdness." << std::endl; #endif + return Proxy(); } - return data[id * TDim + offset]; + return Proxy(id * TDim, *this); } - __host__ __device__ Proxy1 get(int i) const { - return Proxy1(i, *this); + __host__ __device__ T& getData(int i=0) const { + return data[i]; } - __host__ __device__ Proxy3 get(int x, int y, int z) const { - return Proxy3(x, y, z, *this); - } - /*! Physical grid spacing and physical coordinate space start. * TODO: Should this be private and have accesor-functions? */ From 4f2c9f9ce7d541ba32ca6cb2c7bdaabb6579bdd5 Mon Sep 17 00:00:00 2001 From: Filippo Zonta Date: Wed, 28 Jun 2023 14:52:32 +0300 Subject: [PATCH 4/8] Remove proxy, use plain arrays --- fsgrid.hpp | 43 +++++++++++-------------------------------- 1 file changed, 11 insertions(+), 32 deletions(-) diff --git a/fsgrid.hpp b/fsgrid.hpp index 63f0f92..03cf8b7 100644 --- a/fsgrid.hpp +++ b/fsgrid.hpp @@ -402,31 +402,6 @@ template class FsGrid : public FsGridTools{ } - - class Proxy { - public: - __host__ __device__ Proxy() : valid(false) {} - __host__ __device__ Proxy(int i, const FsGrid &obj) : i(i), obj(&obj), valid(true) {} - - __host__ __device__ T& operator[](int j) { - assert(valid); - return obj->getData(i + j); - } - __host__ __device__ T& at(int j) { - assert(valid); - return obj->getData(i + j); - } - - __host__ __device__ bool isValid() { - return valid; - } - - private: - bool valid; - int i; - const FsGrid* obj; - }; - /*! Sets the data pointer to the given vector * \param data pointer to the data vector */ @@ -822,7 +797,7 @@ template class FsGrid : public FsGridTools{ * \param z z-Coordinate, in cells * \return A reference to cell data in the given cell */ - __host__ __device__ Proxy get(int x, int y, int z) { + __host__ __device__ T* get(int x, int y, int z) { // Keep track which neighbour this cell actually belongs to (13 = ourself) int isInNeighbourDomain=13; @@ -896,7 +871,7 @@ template class FsGrid : public FsGridTools{ } if(!inside) { std::cerr << "Out-of bounds access in FsGrid::get! Expect weirdness." << std::endl; - return Proxy(); + return NULL; } #endif // FSGRID_DEBUG @@ -906,7 +881,7 @@ template class FsGrid : public FsGridTools{ if(neighbour[isInNeighbourDomain]==MPI_PROC_NULL) { // Neighbour doesn't exist, we must be an outer boundary cell // (or something is quite wrong) - return Proxy(); + return NULL; } else if(neighbour[isInNeighbourDomain] == rank) { // For periodic boundaries, where the neighbour is actually ourself, // return our own actual cell instead of the ghost @@ -918,19 +893,23 @@ template class FsGrid : public FsGridTools{ } LocalID index = LocalIDForCoords(x,y,z); - return Proxy(index * TDim, *this); + return &data[index * TDim]; } - __host__ __device__ Proxy &get(LocalID id) { + __host__ __device__ T* get(LocalID id) { if(id < 0 || (unsigned int)id > globalSizeTotal) { #ifndef __CUDA_ARCH__ std::cerr << "Out-of-bounds access in FsGrid::get!" << std::endl << "(LocalID = " << id << ", but storage space is " << globalSizeTotal << ". Expect weirdness." << std::endl; #endif - return Proxy(); + return NULL; } - return Proxy(id * TDim, *this); + return &data[id * TDim]; + } + + __host__ __device__ T& get(int x, int y, int z, int i) { + return (get(x,y,z)[i]); } __host__ __device__ T& getData(int i=0) const { From e1ae108fc58d3329c55cda3c037d92a4146ee391 Mon Sep 17 00:00:00 2001 From: Filippo Zonta Date: Thu, 29 Jun 2023 10:15:06 +0300 Subject: [PATCH 5/8] Fix CUDA annotations for cpu build --- fsgrid.hpp | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/fsgrid.hpp b/fsgrid.hpp index 03cf8b7..eef4302 100644 --- a/fsgrid.hpp +++ b/fsgrid.hpp @@ -22,12 +22,18 @@ #include #include #include -#include "cuda.h" -#include "cuda_runtime.h" #include #include #include #include +#ifdef __CUDACC__ + #define ARCH_HOSTDEV __host__ __device__ + #include "cuda.h" + #include "cuda_runtime.h" +#else + #define ARCH_HOSTDEV +#endif + @@ -511,7 +517,7 @@ template class FsGrid : public FsGridTools{ * \param y The cell's task-local y coordinate * \param z The cell's task-local z coordinate */ - __host__ __device__ LocalID LocalIDForCoords(int x, int y, int z) { + ARCH_HOSTDEV LocalID LocalIDForCoords(int x, int y, int z) { LocalID index=0; if(globalSize[2] > 1) { index += storageSize[0]*storageSize[1]*(stencil+z); @@ -770,7 +776,7 @@ template class FsGrid : public FsGridTools{ /*! Get global size of the fsgrid domain */ - __host__ __device__ int* getGlobalSize() { + ARCH_HOSTDEV int* getGlobalSize() { return globalSize; } @@ -797,7 +803,7 @@ template class FsGrid : public FsGridTools{ * \param z z-Coordinate, in cells * \return A reference to cell data in the given cell */ - __host__ __device__ T* get(int x, int y, int z) { + ARCH_HOSTDEV T* get(int x, int y, int z) { // Keep track which neighbour this cell actually belongs to (13 = ourself) int isInNeighbourDomain=13; @@ -896,7 +902,7 @@ template class FsGrid : public FsGridTools{ return &data[index * TDim]; } - __host__ __device__ T* get(LocalID id) { + ARCH_HOSTDEV T* get(LocalID id) { if(id < 0 || (unsigned int)id > globalSizeTotal) { #ifndef __CUDA_ARCH__ std::cerr << "Out-of-bounds access in FsGrid::get!" << std::endl @@ -908,11 +914,11 @@ template class FsGrid : public FsGridTools{ return &data[id * TDim]; } - __host__ __device__ T& get(int x, int y, int z, int i) { + ARCH_HOSTDEV T& get(int x, int y, int z, int i) { return (get(x,y,z)[i]); } - __host__ __device__ T& getData(int i=0) const { + ARCH_HOSTDEV T& getData(int i=0) const { return data[i]; } From 1c0f3b534d5b4001d296e2221669d66380870ecf Mon Sep 17 00:00:00 2001 From: Filippo Zonta Date: Thu, 29 Jun 2023 13:38:15 +0300 Subject: [PATCH 6/8] Fix get methods using wrong storage size information --- fsgrid.hpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fsgrid.hpp b/fsgrid.hpp index eef4302..1bc439d 100644 --- a/fsgrid.hpp +++ b/fsgrid.hpp @@ -158,7 +158,6 @@ template class FsGrid : public FsGridTools{ : globalSize{globalSize[0], globalSize[1], globalSize[2]}, coupling(coupling) { int status; int size; - globalSizeTotal = globalSize[0] * globalSize[1] * globalSize[2]; status = MPI_Comm_size(parent_comm, &size); @@ -903,10 +902,10 @@ template class FsGrid : public FsGridTools{ } ARCH_HOSTDEV T* get(LocalID id) { - if(id < 0 || (unsigned int)id > globalSizeTotal) { + if(id < 0 || (unsigned int)id > totalStorageSize) { #ifndef __CUDA_ARCH__ std::cerr << "Out-of-bounds access in FsGrid::get!" << std::endl - << "(LocalID = " << id << ", but storage space is " << globalSizeTotal + << "(LocalID = " << id << ", but storage space is " << totalStorageSize << ". Expect weirdness." << std::endl; #endif return NULL; @@ -1016,7 +1015,6 @@ template class FsGrid : public FsGridTools{ std::array periodic; //!< Information about whether a given direction is periodic int32_t totalStorageSize; //!< Total number of cells in the local storage, including ghost cells int32_t globalSize[3]; //!< Global size of the simulation space, in cells - int32_t globalSizeTotal; //!< Total number of cells in the simulation space int32_t localSize[3]; // std::array localSize; //!< Local size of simulation space handled by this task (without ghost cells) int32_t storageSize[3]; //!< Local size of simulation space handled by this task (including ghost cells) From 90127249a2b7de66bd66c4a950b8697d3f94c3bd Mon Sep 17 00:00:00 2001 From: Filippo Zonta Date: Thu, 10 Aug 2023 13:10:26 +0300 Subject: [PATCH 7/8] Fix std::array for device code --- fsgrid.hpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/fsgrid.hpp b/fsgrid.hpp index 1bc439d..e8746c0 100644 --- a/fsgrid.hpp +++ b/fsgrid.hpp @@ -143,7 +143,6 @@ template class FsGrid : public FsGridTools{ typedef int64_t LocalID; typedef int64_t GlobalID; - int diocane; T* data; // Legacy constructor from coupling reference @@ -769,7 +768,7 @@ template class FsGrid : public FsGridTools{ /*! Get the sstart coordinates of the local domain handled by this grid. */ - std::array& getLocalStart() { + int32_t* getLocalStart() { return localStart; } @@ -787,13 +786,10 @@ template class FsGrid : public FsGridTools{ * * \return Global cell coordinates */ - std::array getGlobalIndices(int x, int y, int z) { - std::array retval; + ARCH_HOSTDEV void getGlobalIndices(int x, int y, int z, int32_t (&retval)[3]) { retval[0] = localStart[0] + x; retval[1] = localStart[1] + y; retval[2] = localStart[2] + z; - - return retval; } /*! Get a reference to the field data in a cell @@ -1018,9 +1014,7 @@ template class FsGrid : public FsGridTools{ int32_t localSize[3]; // std::array localSize; //!< Local size of simulation space handled by this task (without ghost cells) int32_t storageSize[3]; //!< Local size of simulation space handled by this task (including ghost cells) - std::array localStart; //!< Offset of the local - //!coordinate system against - //!the global one + int32_t localStart[3]; //!< Offset of the local coordinate system against the global one FsGridCouplingInformation* coupling; // Information required to couple to external grids From 9dfb6e9cee2c0bc57f361af58a540ba1371b039b Mon Sep 17 00:00:00 2001 From: Markus Battarbee Date: Mon, 28 Aug 2023 16:52:08 +0300 Subject: [PATCH 8/8] Added required memset to buffer creation --- fsgrid.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/fsgrid.hpp b/fsgrid.hpp index daa6444..5577537 100644 --- a/fsgrid.hpp +++ b/fsgrid.hpp @@ -296,6 +296,7 @@ template class FsGrid : public FsGridTools{ totalStorageSize *= storageSize[i]; } data = (T*) malloc(totalStorageSize * TDim * sizeof(T)); + memset(data, 0, totalStorageSize * TDim * sizeof(T)); coupling->setCouplingSize(totalStorageSize); MPI_Datatype mpiTypeT;