Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 32 additions & 32 deletions SRC/EXTENSIONS/GAD/CUDA/cuda_GADDevice.cu
Original file line number Diff line number Diff line change
Expand Up @@ -102,26 +102,26 @@ extern "C" int cuda_GADDeviceSetup(){
cudaMemcpyToSymbol(numgridCells_away_d, &numgridCells_away, sizeof(int));

/*Device memory allocations and Host-to-Device memcopy for turbine arrays */
fecuda_DeviceMallocInt(GADNumTurbines*sizeof(int), &GAD_turbineType_d);
fecuda_DeviceMallocInt(GADNumTurbines*sizeof(int), &GAD_turbineRank_d);
fecuda_DeviceMallocInt(GADNumTurbines*sizeof(int), &GAD_turbineRefi_d);
fecuda_DeviceMallocInt(GADNumTurbines*sizeof(int), &GAD_turbineRefj_d);
fecuda_DeviceMallocInt(GADNumTurbines*sizeof(int), &GAD_turbineRefk_d);
fecuda_DeviceMallocInt(GADNumTurbines*sizeof(int), &GAD_turbineYawing_d);
fecuda_DeviceMallocInt((size_t)(GADNumTurbines), &GAD_turbineType_d);
fecuda_DeviceMallocInt((size_t)(GADNumTurbines), &GAD_turbineRank_d);
fecuda_DeviceMallocInt((size_t)(GADNumTurbines), &GAD_turbineRefi_d);
fecuda_DeviceMallocInt((size_t)(GADNumTurbines), &GAD_turbineRefj_d);
fecuda_DeviceMallocInt((size_t)(GADNumTurbines), &GAD_turbineRefk_d);
fecuda_DeviceMallocInt((size_t)(GADNumTurbines), &GAD_turbineYawing_d);
cudaMemcpy(GAD_turbineType_d, GAD_turbineType, GADNumTurbines*sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(GAD_turbineRank_d, GAD_turbineRank, GADNumTurbines*sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(GAD_turbineRefi_d, GAD_turbineRefi, GADNumTurbines*sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(GAD_turbineRefj_d, GAD_turbineRefj, GADNumTurbines*sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(GAD_turbineRefk_d, GAD_turbineRefk, GADNumTurbines*sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(GAD_turbineYawing_d, GAD_turbineYawing, GADNumTurbines*sizeof(int), cudaMemcpyHostToDevice);

fecuda_DeviceMalloc(GADNumTurbines*sizeof(float), &GAD_turbineRefMag_d);
fecuda_DeviceMalloc(GADNumTurbines*sizeof(float), &GAD_turbineRefDir_d);
fecuda_DeviceMalloc(GADNumTurbines*sizeof(float), &GAD_Xcoords_d);
fecuda_DeviceMalloc(GADNumTurbines*sizeof(float), &GAD_Ycoords_d);
fecuda_DeviceMalloc(GADNumTurbines*sizeof(float), &GAD_rotorTheta_d);
fecuda_DeviceMalloc(GADNumTurbines*sizeof(float), &GAD_yawError_d);
fecuda_DeviceMalloc(GADNumTurbines*sizeof(float), &GAD_anFactor_d);
fecuda_DeviceMalloc((size_t)(GADNumTurbines), &GAD_turbineRefMag_d);
fecuda_DeviceMalloc((size_t)(GADNumTurbines), &GAD_turbineRefDir_d);
fecuda_DeviceMalloc((size_t)(GADNumTurbines), &GAD_Xcoords_d);
fecuda_DeviceMalloc((size_t)(GADNumTurbines), &GAD_Ycoords_d);
fecuda_DeviceMalloc((size_t)(GADNumTurbines), &GAD_rotorTheta_d);
fecuda_DeviceMalloc((size_t)(GADNumTurbines), &GAD_yawError_d);
fecuda_DeviceMalloc((size_t)(GADNumTurbines), &GAD_anFactor_d);
cudaMemcpy(GAD_turbineRefMag_d, GAD_turbineRefMag, GADNumTurbines*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(GAD_turbineRefDir_d, GAD_turbineRefDir, GADNumTurbines*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(GAD_Xcoords_d, GAD_Xcoords, GADNumTurbines*sizeof(float), cudaMemcpyHostToDevice);
Expand All @@ -136,10 +136,10 @@ extern "C" int cuda_GADDeviceSetup(){
cudaMemcpy(GAD_yawError_d, GAD_yawError, GADNumTurbines*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(GAD_anFactor_d, GAD_anFactor, GADNumTurbines*sizeof(float), cudaMemcpyHostToDevice);

fecuda_DeviceMalloc(GADNumTurbines*GADrefSeriesLength*sizeof(float), &GAD_turbineUseries_d);
fecuda_DeviceMalloc(GADNumTurbines*GADrefSeriesLength*sizeof(float), &GAD_turbineVseries_d);
fecuda_DeviceMalloc(GADNumTurbines*sizeof(float), &u_sampAvg_d);
fecuda_DeviceMalloc(GADNumTurbines*sizeof(float), &v_sampAvg_d);
fecuda_DeviceMalloc((size_t)(GADNumTurbines*GADrefSeriesLength), &GAD_turbineUseries_d);
fecuda_DeviceMalloc((size_t)(GADNumTurbines*GADrefSeriesLength), &GAD_turbineVseries_d);
fecuda_DeviceMalloc((size_t)(GADNumTurbines), &u_sampAvg_d);
fecuda_DeviceMalloc((size_t)(GADNumTurbines), &v_sampAvg_d);

//Initialize u_sampAvg & GAD_turbineUseries as constant (per-turbine) then send down to the device
tmp_vector = (float *) malloc(GADrefSeriesLength*sizeof(float));
Expand Down Expand Up @@ -171,40 +171,40 @@ extern "C" int cuda_GADDeviceSetup(){
}
free(tmp_vector);

fecuda_DeviceMalloc(GADNumTurbineTypes*sizeof(float), &GAD_hubHeights_d);
fecuda_DeviceMalloc(GADNumTurbineTypes*sizeof(float), &GAD_rotorD_d);
fecuda_DeviceMalloc(GADNumTurbineTypes*sizeof(float), &GAD_nacelleD_d);
fecuda_DeviceMalloc((size_t)(GADNumTurbineTypes), &GAD_hubHeights_d);
fecuda_DeviceMalloc((size_t)(GADNumTurbineTypes), &GAD_rotorD_d);
fecuda_DeviceMalloc((size_t)(GADNumTurbineTypes), &GAD_nacelleD_d);
cudaMemcpy(GAD_hubHeights_d, GAD_hubHeights, GADNumTurbineTypes*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(GAD_rotorD_d, GAD_rotorD, GADNumTurbineTypes*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(GAD_nacelleD_d, GAD_nacelleD, GADNumTurbineTypes*sizeof(float), cudaMemcpyHostToDevice);


fecuda_DeviceMalloc(GADNumTurbineTypes*turbinePolyOrderMax*sizeof(float), &turbinePolyTwist_d);
fecuda_DeviceMalloc(GADNumTurbineTypes*turbinePolyOrderMax*sizeof(float), &turbinePolyChord_d);
fecuda_DeviceMalloc(GADNumTurbineTypes*turbinePolyOrderMax*sizeof(float), &turbinePolyPitch_d);
fecuda_DeviceMalloc(GADNumTurbineTypes*turbinePolyOrderMax*sizeof(float), &turbinePolyOmega_d);
fecuda_DeviceMalloc((size_t)(GADNumTurbineTypes*turbinePolyOrderMax), &turbinePolyTwist_d);
fecuda_DeviceMalloc((size_t)(GADNumTurbineTypes*turbinePolyOrderMax), &turbinePolyChord_d);
fecuda_DeviceMalloc((size_t)(GADNumTurbineTypes*turbinePolyOrderMax), &turbinePolyPitch_d);
fecuda_DeviceMalloc((size_t)(GADNumTurbineTypes*turbinePolyOrderMax), &turbinePolyOmega_d);
cudaMemcpy(turbinePolyTwist_d, turbinePolyTwist, GADNumTurbineTypes*turbinePolyOrderMax*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(turbinePolyChord_d, turbinePolyChord, GADNumTurbineTypes*turbinePolyOrderMax*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(turbinePolyPitch_d, turbinePolyPitch, GADNumTurbineTypes*turbinePolyOrderMax*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(turbinePolyOmega_d, turbinePolyOmega, GADNumTurbineTypes*turbinePolyOrderMax*sizeof(float), cudaMemcpyHostToDevice);

fecuda_DeviceMalloc(GADNumTurbineTypes*(turbinePolyClCdrNormSegments+1)*sizeof(float), &rnorm_vect_d);
fecuda_DeviceMalloc(GADNumTurbineTypes*alphaBounds*sizeof(float), &alpha_minmax_vect_d);
fecuda_DeviceMalloc(GADNumTurbineTypes*turbinePolyClCdrNormSegments*turbinePolyOrderMax*sizeof(float), &turbinePolyCl_d);
fecuda_DeviceMalloc(GADNumTurbineTypes*turbinePolyClCdrNormSegments*turbinePolyOrderMax*sizeof(float), &turbinePolyCd_d);
fecuda_DeviceMalloc((size_t)(GADNumTurbineTypes*(turbinePolyClCdrNormSegments+1)), &rnorm_vect_d);
fecuda_DeviceMalloc((size_t)(GADNumTurbineTypes*alphaBounds), &alpha_minmax_vect_d);
fecuda_DeviceMalloc((size_t)(GADNumTurbineTypes*turbinePolyClCdrNormSegments*turbinePolyOrderMax), &turbinePolyCl_d);
fecuda_DeviceMalloc((size_t)(GADNumTurbineTypes*turbinePolyClCdrNormSegments*turbinePolyOrderMax), &turbinePolyCd_d);

cudaMemcpy(rnorm_vect_d, rnorm_vect, GADNumTurbineTypes*(turbinePolyClCdrNormSegments+1)*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(alpha_minmax_vect_d, alpha_minmax_vect, GADNumTurbineTypes*alphaBounds*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(turbinePolyCd_d, turbinePolyCd, GADNumTurbineTypes*turbinePolyClCdrNormSegments*turbinePolyOrderMax*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(turbinePolyCl_d, turbinePolyCl, GADNumTurbineTypes*turbinePolyClCdrNormSegments*turbinePolyOrderMax*sizeof(float), cudaMemcpyHostToDevice);

fecuda_DeviceMalloc((Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh)*sizeof(float), &GAD_turbineVolMask_d);
fecuda_DeviceMalloc((size_t)((Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh)), &GAD_turbineVolMask_d);
cudaMemcpy(GAD_turbineVolMask_d, GAD_turbineVolMask, (Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh)*sizeof(float), cudaMemcpyHostToDevice);

if (GADoutputForces == 1){
fecuda_DeviceMalloc((Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh)*sizeof(float), &GAD_forceX_d);
fecuda_DeviceMalloc((Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh)*sizeof(float), &GAD_forceY_d);
fecuda_DeviceMalloc((Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh)*sizeof(float), &GAD_forceZ_d);
fecuda_DeviceMalloc((size_t)((Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh)), &GAD_forceX_d);
fecuda_DeviceMalloc((size_t)((Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh)), &GAD_forceY_d);
fecuda_DeviceMalloc((size_t)((Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh)), &GAD_forceZ_d);
cudaMemcpy(GAD_forceX_d, GAD_forceX, (Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh)*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(GAD_forceY_d, GAD_forceY, (Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh)*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(GAD_forceZ_d, GAD_forceZ, (Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh)*sizeof(float), cudaMemcpyHostToDevice);
Expand Down
8 changes: 4 additions & 4 deletions SRC/EXTENSIONS/URBAN/CUDA/cuda_urbanDevice.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,21 @@ float *urban_heat_redis_d; /* Base Address of memory containing
*/
extern "C" int cuda_urbanDeviceSetup(){
int errorCode = CUDA_URBAN_SUCCESS;
int Nelems;
size_t Nelems;

cudaMemcpyToSymbol(urbanSelector_d, &urbanSelector, sizeof(int));
cudaMemcpyToSymbol(cd_build_d, &cd_build, sizeof(float));
cudaMemcpyToSymbol(ct_build_d, &ct_build, sizeof(float));

Nelems = (Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh);
fecuda_DeviceMalloc(Nelems*sizeof(float), &building_mask_d);
Nelems = (size_t)((Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh));
fecuda_DeviceMalloc(Nelems, &building_mask_d);
cudaMemcpy(building_mask_d, building_mask, Nelems*sizeof(float), cudaMemcpyHostToDevice);

cudaMemcpyToSymbol(delta_aware_bdg_d, &delta_aware_bdg, sizeof(float));

if(urban_heatRedis > 0){
Nelems = (Nxp+2*Nh)*(Nyp+2*Nh);
fecuda_DeviceMalloc(Nelems*sizeof(float), &urban_heat_redis_d);
fecuda_DeviceMalloc(Nelems, &urban_heat_redis_d);
cudaMemcpy(urban_heat_redis_d, urban_heat_redis, Nelems*sizeof(float), cudaMemcpyHostToDevice);
}

Expand Down
5 changes: 0 additions & 5 deletions SRC/FECUDA/fecuda_Device_cu.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,6 @@ extern __constant__ int rankYid_d;
*/
extern "C" int fecuda_DeviceSetup(int tBx, int tBy, int tBz);

/*----->>>>> void fecuda_DeviceMallocInt(); -----------------------------------------------------------
* Used to allocate device memory integer blocks and set the host memory addresses of device memory pointers.
*/
extern "C" void fecuda_DeviceMallocInt(int Nelems, int** memBlock_d);

/*----->>>>> int fecuda_SetBlocksPerGrid(); ------------------------------------------------------------------
* Used to set the "dim3 grid" module variable that is passed to any device kernel
* to specify the number of blocks per grid in each dimenaion
Expand Down
12 changes: 6 additions & 6 deletions SRC/FECUDA/fecuda_Utils.cu
Original file line number Diff line number Diff line change
Expand Up @@ -180,19 +180,19 @@ extern "C" int fecuda_UtilsDeallocateHaloBuffers(){
/*----->>>>> void fecuda_DeviceMalloc(); -----------------------------------------------------------
* Used to allocate device memory float blocks and set the host memory addresses of device memory pointers.
*/
extern "C" void fecuda_DeviceMalloc(int Nelems, float** memBlock_d) {
cudaMalloc((void**)memBlock_d,sizeof(float)*Nelems);
extern "C" void fecuda_DeviceMalloc(size_t Nelems, float** memBlock_d) {
cudaMalloc((void**)memBlock_d,(size_t)(sizeof(float))*Nelems);
gpuErrchk( cudaPeekAtLastError() );
cudaMemset(*memBlock_d,'\0',sizeof(float)*Nelems);
cudaMemset(*memBlock_d,'\0',(size_t)(sizeof(float))*Nelems);
gpuErrchk( cudaPeekAtLastError() );
#ifdef DEBUG
printf("New device memory allocation, device pointer is stored at host address %p as %p\n",memBlock_d, *memBlock_d);
#endif
}
extern "C" void fecuda_DeviceMallocInt(int Nelems, int** memBlock_d) {
cudaMalloc((void**)memBlock_d,sizeof(int)*Nelems);
extern "C" void fecuda_DeviceMallocInt(size_t Nelems, int** memBlock_d) {
cudaMalloc((void**)memBlock_d,(size_t)(sizeof(int))*Nelems);
gpuErrchk( cudaPeekAtLastError() );
cudaMemset(*memBlock_d,'\0',sizeof(int)*Nelems);
cudaMemset(*memBlock_d,'\0',(size_t)(sizeof(int))*Nelems);
gpuErrchk( cudaPeekAtLastError() );
#ifdef DEBUG
printf("New device memory allocation, device pointer is stored at host address %p as %p\n",memBlock_d, *memBlock_d);
Expand Down
7 changes: 6 additions & 1 deletion SRC/FECUDA/fecuda_Utils_cu.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,12 @@ extern "C" int fecuda_UtilsDeallocateHaloBuffers();
/*----->>>>> void fecuda_DeviceMalloc(); -----------------------------------------------------------
* Used to allocate device memory float blocks and set the host memory addresses of device memory pointers.
*/
extern "C" void fecuda_DeviceMalloc(int Nelems, float** memBlock_d);
extern "C" void fecuda_DeviceMalloc(size_t Nelems, float** memBlock_d);

/*----->>>>> void fecuda_DeviceMallocInt(); -----------------------------------------------------------
* Used to allocate device memory integer blocks and set the host memory addresses of device memory pointers.
*/
extern "C" void fecuda_DeviceMallocInt(size_t Nelems, int** memBlock_d);

/*----->>>>> int fecuda_SendRecvWestEast(); -------------------------------------------------------------------
Used to perform western/eastern device domain halo exchange for an arbitrary field.
Expand Down
26 changes: 13 additions & 13 deletions SRC/GRID/CUDA/cuda_gridDevice.cu
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ float *invD_Jac_d; //inverse Determinant of the Jacbian
*/
extern "C" int cuda_gridDeviceSetup(){
int errorCode = CUDA_GRID_SUCCESS;
int Nelems;
size_t Nelems;
#ifdef DEBUG
cudaEvent_t startE, stopE;
float elapsedTime;
Expand Down Expand Up @@ -100,21 +100,21 @@ extern "C" int cuda_gridDeviceSetup(){
gpuErrchk( cudaPeekAtLastError() ); /*Check for errors in the cudaMemCpy calls*/

/*Set the full memory block number of elements for grid fields*/
Nelems = (Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh);
Nelems = (size_t)((Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh));
/* Allocate the GRID arrays */
/* Coordinate Arrays */
fecuda_DeviceMalloc(Nelems*sizeof(float), &xPos_d);
fecuda_DeviceMalloc(Nelems*sizeof(float), &yPos_d);
fecuda_DeviceMalloc(Nelems*sizeof(float), &zPos_d);
fecuda_DeviceMalloc(((Nxp+2*Nh)*(Nyp+2*Nh))*sizeof(float), &topoPos_d);
fecuda_DeviceMalloc(Nelems, &xPos_d);
fecuda_DeviceMalloc(Nelems, &yPos_d);
fecuda_DeviceMalloc(Nelems, &zPos_d);
fecuda_DeviceMalloc((size_t)((Nxp+2*Nh)*(Nyp+2*Nh)), &topoPos_d);
/* Metric Tensors Fields */
fecuda_DeviceMalloc(Nelems*sizeof(float), &J13_d);
fecuda_DeviceMalloc(Nelems*sizeof(float), &J23_d);
fecuda_DeviceMalloc(Nelems*sizeof(float), &J31_d);
fecuda_DeviceMalloc(Nelems*sizeof(float), &J32_d);
fecuda_DeviceMalloc(Nelems*sizeof(float), &J33_d);
fecuda_DeviceMalloc(Nelems*sizeof(float), &D_Jac_d);
fecuda_DeviceMalloc(Nelems*sizeof(float), &invD_Jac_d);
fecuda_DeviceMalloc(Nelems, &J13_d);
fecuda_DeviceMalloc(Nelems, &J23_d);
fecuda_DeviceMalloc(Nelems, &J31_d);
fecuda_DeviceMalloc(Nelems, &J32_d);
fecuda_DeviceMalloc(Nelems, &J33_d);
fecuda_DeviceMalloc(Nelems, &D_Jac_d);
fecuda_DeviceMalloc(Nelems, &invD_Jac_d);
gpuErrchk( cudaPeekAtLastError() ); /*Check for errors in the cudaMalloc calls*/

/* cudaMemcpy the GRID arrays from Host to Device*/
Expand Down
24 changes: 12 additions & 12 deletions SRC/HYDRO_CORE/CUDA/cuda_BCsDevice.cu
Original file line number Diff line number Diff line change
Expand Up @@ -66,22 +66,22 @@ extern "C" int cuda_BCsDeviceSetup(){
/*Allocate arrays*/
if(hydroBCs==1){ //Using LAD BCs
if((rankYid == 0)||(rankYid == numProcsY-1)){
fecuda_DeviceMalloc(2*nBndyVars*(Nxp+2*Nh)*(Nzp+2*Nh)*sizeof(float), &XZBdyPlanes_d);
fecuda_DeviceMalloc(2*nBndyVars*(Nxp+2*Nh)*(Nzp+2*Nh)*sizeof(float), &XZBdyPlanesNext_d);
fecuda_DeviceMalloc(2*nBndyVars*(Nxp+2*Nh)*(Nzp+2*Nh)*sizeof(float), &XZBdyPlanesBuffer_d);
fecuda_DeviceMalloc((size_t)(2*nBndyVars*(Nxp+2*Nh)*(Nzp+2*Nh)), &XZBdyPlanes_d);
fecuda_DeviceMalloc((size_t)(2*nBndyVars*(Nxp+2*Nh)*(Nzp+2*Nh)), &XZBdyPlanesNext_d);
fecuda_DeviceMalloc((size_t)(2*nBndyVars*(Nxp+2*Nh)*(Nzp+2*Nh)), &XZBdyPlanesBuffer_d);
}
if((rankXid == 0)||(rankXid == numProcsX-1)){
fecuda_DeviceMalloc(2*nBndyVars*(Nyp+2*Nh)*(Nzp+2*Nh)*sizeof(float), &YZBdyPlanes_d);
fecuda_DeviceMalloc(2*nBndyVars*(Nyp+2*Nh)*(Nzp+2*Nh)*sizeof(float), &YZBdyPlanesNext_d);
fecuda_DeviceMalloc(2*nBndyVars*(Nyp+2*Nh)*(Nzp+2*Nh)*sizeof(float), &YZBdyPlanesBuffer_d);
fecuda_DeviceMalloc((size_t)(2*nBndyVars*(Nyp+2*Nh)*(Nzp+2*Nh)), &YZBdyPlanes_d);
fecuda_DeviceMalloc((size_t)(2*nBndyVars*(Nyp+2*Nh)*(Nzp+2*Nh)), &YZBdyPlanesNext_d);
fecuda_DeviceMalloc((size_t)(2*nBndyVars*(Nyp+2*Nh)*(Nzp+2*Nh)), &YZBdyPlanesBuffer_d);
}
fecuda_DeviceMalloc(2*nBndyVars*(Nxp+2*Nh)*(Nyp+2*Nh)*sizeof(float), &XYBdyPlanes_d);
fecuda_DeviceMalloc(2*nBndyVars*(Nxp+2*Nh)*(Nyp+2*Nh)*sizeof(float), &XYBdyPlanesNext_d);
fecuda_DeviceMalloc(2*nBndyVars*(Nxp+2*Nh)*(Nyp+2*Nh)*sizeof(float), &XYBdyPlanesBuffer_d);
fecuda_DeviceMalloc((size_t)(2*nBndyVars*(Nxp+2*Nh)*(Nyp+2*Nh)), &XYBdyPlanes_d);
fecuda_DeviceMalloc((size_t)(2*nBndyVars*(Nxp+2*Nh)*(Nyp+2*Nh)), &XYBdyPlanesNext_d);
fecuda_DeviceMalloc((size_t)(2*nBndyVars*(Nxp+2*Nh)*(Nyp+2*Nh)), &XYBdyPlanesBuffer_d);
if(surflayerSelector == 3){
fecuda_DeviceMalloc(nSurfBndyVars*(Nxp+2*Nh)*(Nyp+2*Nh)*sizeof(float), &SURFBdyPlanes_d);
fecuda_DeviceMalloc(nSurfBndyVars*(Nxp+2*Nh)*(Nyp+2*Nh)*sizeof(float), &SURFBdyPlanesNext_d);
fecuda_DeviceMalloc(nSurfBndyVars*(Nxp+2*Nh)*(Nyp+2*Nh)*sizeof(float), &SURFBdyPlanesBuffer_d);
fecuda_DeviceMalloc((size_t)(nSurfBndyVars*(Nxp+2*Nh)*(Nyp+2*Nh)), &SURFBdyPlanes_d);
fecuda_DeviceMalloc((size_t)(nSurfBndyVars*(Nxp+2*Nh)*(Nyp+2*Nh)), &SURFBdyPlanesNext_d);
fecuda_DeviceMalloc((size_t)(nSurfBndyVars*(Nxp+2*Nh)*(Nyp+2*Nh)), &SURFBdyPlanesBuffer_d);
}
}//end if hydroBCs == 1

Expand Down
8 changes: 4 additions & 4 deletions SRC/HYDRO_CORE/CUDA/cuda_BaseStateDevice.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@ float *hydroBaseStatePres_d; /*Base Adress of memory containing the diagnostic
*/
extern "C" int cuda_BaseStateDeviceSetup(){
int errorCode = CUDA_BASESTATE_SUCCESS;
int Nelems;
size_t Nelems;

/*Set the full memory block number of elements for base-state fields*/
Nelems = (Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh);
Nelems = (size_t)((Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh));
/* Allocate the Base State arrays on the device */
fecuda_DeviceMalloc(Nelems*2*sizeof(float), &hydroBaseStateFlds_d); //Only rho and theta base-state variables
fecuda_DeviceMalloc(Nelems*sizeof(float), &hydroBaseStatePres_d); //Only base-state pressure
fecuda_DeviceMalloc(Nelems*2, &hydroBaseStateFlds_d); //Only rho and theta base-state variables
fecuda_DeviceMalloc(Nelems, &hydroBaseStatePres_d); //Only base-state pressure

/* Send the Base State arrays down to the device */
cudaMemcpy(hydroBaseStateFlds_d, hydroBaseStateFlds, Nelems*2*sizeof(float), cudaMemcpyHostToDevice);
Expand Down
6 changes: 3 additions & 3 deletions SRC/HYDRO_CORE/CUDA/cuda_advectionDevice.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@ __constant__ float b_hyb_d; /*hybrid advection scheme param
*/
extern "C" int cuda_advectionDeviceSetup(){
int errorCode = CUDA_ADVECTION_SUCCESS;
int Nelems;
size_t Nelems;

cudaMemcpyToSymbol(advectionSelector_d, &advectionSelector, sizeof(int));
cudaMemcpyToSymbol(ceilingAdvectionBC_d, &ceilingAdvectionBC, sizeof(int));
cudaMemcpyToSymbol(b_hyb_d, &b_hyb, sizeof(float));

/*Set the full memory block number of elements for hydroCore fields*/
Nelems = (Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh);
fecuda_DeviceMalloc(Nelems*3*sizeof(float), &hydroFaceVels_d); /*Cell-face Velocities*/
Nelems = (size_t)((Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh));
fecuda_DeviceMalloc(Nelems*3, &hydroFaceVels_d); /*Cell-face Velocities*/

return(errorCode);
} //end cuda_advectionDeviceSetup()
Expand Down
Loading