Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 20 additions & 6 deletions SRC/GRID/CUDA/cuda_gridDevice.cu
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ float *yPos_d; // Cell-center position in y (meters)
float *zPos_d; // Cell-center position in z (meters)
float *topoPos_d; //Topography elevation (z in meters) at the cell center position in x and y.

float *J13_d; // dx/d_zeta
float *J23_d; // dy/d_zeta
float *J31_d; // dz/d_xi
float *J32_d; // dz/d_eta
float *J33_d; // dz/d_zeta
Expand Down Expand Up @@ -106,6 +108,8 @@ extern "C" int cuda_gridDeviceSetup(){
fecuda_DeviceMalloc(Nelems*sizeof(float), &zPos_d);
fecuda_DeviceMalloc(((Nxp+2*Nh)*(Nyp+2*Nh))*sizeof(float), &topoPos_d);
/* Metric Tensors Fields */
fecuda_DeviceMalloc(Nelems*sizeof(float), &J13_d);
fecuda_DeviceMalloc(Nelems*sizeof(float), &J23_d);
fecuda_DeviceMalloc(Nelems*sizeof(float), &J31_d);
fecuda_DeviceMalloc(Nelems*sizeof(float), &J32_d);
fecuda_DeviceMalloc(Nelems*sizeof(float), &J33_d);
Expand All @@ -119,6 +123,8 @@ extern "C" int cuda_gridDeviceSetup(){
cudaMemcpy(yPos_d, yPos, Nelems*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(zPos_d, zPos, Nelems*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(topoPos_d, topoPos, ((Nxp+2*Nh)*(Nyp+2*Nh))*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(J13_d, J13, Nelems*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(J23_d, J23, Nelems*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(J31_d, J31, Nelems*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(J32_d, J32, Nelems*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(J33_d, J33, Nelems*sizeof(float), cudaMemcpyHostToDevice);
Expand All @@ -134,7 +140,7 @@ extern "C" int cuda_gridDeviceSetup(){
printf("Calling cudaDevice_calculateJacobians...\n");
printf("grid = {%d, %d, %d}\n",grid.x, grid.y, grid.z);
printf("tBlock = {%d, %d, %d}\n",tBlock.x, tBlock.y, tBlock.z);
cudaDevice_calculateJacobians<<<grid, tBlock>>>(J31_d, J32_d, J33_d,
cudaDevice_calculateJacobians<<<grid, tBlock>>>(J13_d, J23_d, J31_d, J32_d, J33_d,
D_Jac_d, invD_Jac_d, xPos_d, yPos_d, zPos_d);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
Expand All @@ -145,6 +151,8 @@ extern "C" int cuda_gridDeviceSetup(){

/* cudaMemcpy the GPU-computed GRID arrays from Device Host*/
/* Coordinate Arrays */
cudaMemcpy(J13, J13_d, Nelems*sizeof(float), cudaMemcpyDeviceToHost);
cudaMemcpy(J23, J23_d, Nelems*sizeof(float), cudaMemcpyDeviceToHost);
cudaMemcpy(J31, J31_d, Nelems*sizeof(float), cudaMemcpyDeviceToHost);
cudaMemcpy(J32, J32_d, Nelems*sizeof(float), cudaMemcpyDeviceToHost);
cudaMemcpy(J33, J33_d, Nelems*sizeof(float), cudaMemcpyDeviceToHost);
Expand All @@ -166,6 +174,8 @@ extern "C" int cuda_gridDeviceCleanup(){

/* Free any GRID module arrays */
/* metric tensor fields */
cudaFree(J13_d);
cudaFree(J23_d);
cudaFree(J31_d);
cudaFree(J32_d);
cudaFree(J33_d);
Expand All @@ -185,7 +195,7 @@ extern "C" int cuda_gridDeviceCleanup(){
/*----->>>>> __global__ void cudaDevice_calculateJacobians(); --------------------------------------------------
This is the cuda version of the calculateJacobians routine from the GRID module
*/
__global__ void cudaDevice_calculateJacobians(float *J31_d, float *J32_d, float *J33_d,
__global__ void cudaDevice_calculateJacobians(float *J13_d, float *J23_d, float *J31_d, float *J32_d, float *J33_d,
float *D_Jac_d, float *invD_Jac_d,
float *xPos_d, float *yPos_d, float *zPos_d){
int i,j,k;
Expand Down Expand Up @@ -252,14 +262,18 @@ __global__ void cudaDevice_calculateJacobians(float *J31_d, float *J32_d, float
}//finite check */
#endif
/*Set the elements*/
/*d(x,y,z)/d_zetaa*/
/*dx/d_zeta*/
J13_d[ijk] = (T[1]*T[5] - T[2]*T[4])*invD_Jac_d[ijk];
/*dy/d_zeta*/
J23_d[ijk] = -(T[0]*T[5] - T[2]*T[3])*invD_Jac_d[ijk];
/*dz/d_(xi, eta, zeta)*/
J31_d[ijk] = (T[3]*T[7] - T[4]*T[6])*invD_Jac_d[ijk];
J32_d[ijk] = (T[0]*T[7] - T[1]*T[6])*invD_Jac_d[ijk];
J32_d[ijk] = -(T[0]*T[7] - T[1]*T[6])*invD_Jac_d[ijk];
J33_d[ijk] = (T[0]*T[4] - T[1]*T[3])*invD_Jac_d[ijk];
#ifdef DEBUG
if((i==64)&&(j==64)&&(k==64)){
printf("At (%d, %d, %d):\n\t\t J31=%f, J32=%f, J33=%f,\n\t\t D_Jac=%f, invD_Jac=%f\n",
i,j,k, J31_d[ijk],J32_d[ijk],J33_d[ijk],
printf("At (%d, %d, %d)::\n\t\t J13=%f, J23=%f,\n\t\t J31=%f, J32=%f, J33=%f,\n\t\t D_Jac=%f, invD_Jac=%f\n",
i,j,k, J13_d[ijk],J23_d[ijk],J31_d[ijk],J32_d[ijk],J33_d[ijk],
D_Jac_d[ijk],invD_Jac_d[ijk]);
}
#endif
Expand Down
4 changes: 3 additions & 1 deletion SRC/GRID/CUDA/cuda_gridDevice_cu.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ extern float *yPos_d; /* Cell-center position in y (meters) */
extern float *zPos_d; /* Cell-center position in z (meters) */
extern float *topoPos_d; /*Topography elevation (z in meters) at the cell center position in x and y. */

extern float *J13_d; // dx/d_zeta
extern float *J23_d; // dy/d_zeta
extern float *J31_d; // dz/d_xi
extern float *J32_d; // dz/d_eta
extern float *J33_d; // dz/d_zeta
Expand All @@ -70,7 +72,7 @@ extern "C" int cuda_gridDeviceCleanup();
/*----->>>>> __global__ void cudaDevice_calculateJacobians(); --------------------------------------------------
* This is the cuda version of the calculateJacobians routine from the GRID module
* */
__global__ void cudaDevice_calculateJacobians(float *J31_d, float *J32_d, float *J33_d,
__global__ void cudaDevice_calculateJacobians(float *J13_d, float *J23_d, float *J31_d, float *J32_d, float *J33_d,
float *D_Jac_d, float *invD_Jac_d,
float *xPos_d, float *yPos_d, float *zPos_d);

Expand Down
29 changes: 26 additions & 3 deletions SRC/GRID/grid.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,15 @@ float *zPos; /* Cell-center position in z (meters) */
float *topoPosGlobal; /*Topography elevation (z in meters) at the cell center position in x and y. (Global domain) */
float *topoPos; /*Topography elevation (z in meters) at the cell center position in x and y. (per-rank domain) */

//float *J11; // dx/d_xi -- assumed = 1.0
//float *J12; // dx/d_eta -- assumed = 0.0

//float *J21; // dy/d_xi -- assumed = 0.0
//float *J22; // dy/d_eta -- assumed = 1.0

float *J13; // dx/d_zeta
float *J23; // dy/d_zeta

float *J31; // dz/d_xi
float *J32; // dz/d_eta
float *J33; // dz/d_zeta
Expand Down Expand Up @@ -240,6 +249,8 @@ int gridInit(){
topoPos = memAllocateFloat2DField(Nxp, Nyp, Nh, "topoPos");
topoPosGlobal = memAllocateFloat2DField(Nx, Ny, 0, "topoPos");
/* Metric Tensors Fields */
J13 = memAllocateFloat3DField(Nxp, Nyp, Nzp, Nh, "J13");
J23 = memAllocateFloat3DField(Nxp, Nyp, Nzp, Nh, "J23");
J31 = memAllocateFloat3DField(Nxp, Nyp, Nzp, Nh, "J31");
J32 = memAllocateFloat3DField(Nxp, Nyp, Nzp, Nh, "J32");
J33 = memAllocateFloat3DField(Nxp, Nyp, Nzp, Nh, "J33");
Expand All @@ -266,6 +277,8 @@ int gridInit(){
//#if 1
errorCode = ioRegisterVar("D_Jac", "float", 4, dims4d, D_Jac);
errorCode = ioRegisterVar("invD_Jac", "float", 4, dims4d, invD_Jac);
errorCode = ioRegisterVar("J13", "float", 4, dims4d, J13);
errorCode = ioRegisterVar("J23", "float", 4, dims4d, J23);
errorCode = ioRegisterVar("J31", "float", 4, dims4d, J31);
errorCode = ioRegisterVar("J32", "float", 4, dims4d, J32);
errorCode = ioRegisterVar("J33", "float", 4, dims4d, J33);
Expand Down Expand Up @@ -665,7 +678,11 @@ int calculateJacobians(){
/* Set the determinant and inverse determinant for this ijk location */
D_Jac[ijk] = determinant;
invD_Jac[ijk] = 1.0/determinant;
//d(x,y,z)/d_zeta
//dx/d_zeta
J13[ijk] = (T[0][1]*T[1][2] - T[0][2]*T[1][1])*invD_Jac[ijk];
//dy/d_zeta
J23[ijk] = -(T[0][0]*T[1][2] - T[0][2]*T[1][0])*invD_Jac[ijk];
//dz/d_(xi, eta, zeta)
J31[ijk] = (T[1][0]*T[2][1] - T[1][1]*T[2][0])*invD_Jac[ijk];
J32[ijk] = -(T[0][0]*T[2][1] - T[0][1]*T[2][0])*invD_Jac[ijk];
J33[ijk] = (T[0][0]*T[1][1] - T[0][1]*T[1][0])*invD_Jac[ijk];
Expand All @@ -685,7 +702,7 @@ int calculateJacobians(){
printf("mpi_rank_world--%d/%d coordHalos, calculateJacaobians()-Boundaries!\n",mpi_rank_world, mpi_size_world);
fflush(stdout);
#endif
/* TODO technically these only work for periodic domain. Since we currently use strictly constant horizontal and vertical spacing they work for now. */
/* TODO technically these only work for horizontally iconstant resolution domain. */
/*lower i-index halos*/
for(iBnd=0; iBnd < 6; iBnd++){
#ifdef DEBUG
Expand Down Expand Up @@ -755,7 +772,11 @@ int calculateJacobians(){
/* Set the determinant and inverse determinant for this ijk location */
D_Jac[ijkDest] = D_Jac[ijkTarget];
invD_Jac[ijkDest] = invD_Jac[ijkTarget];
//d(x,y,z)/d_zeta
//dx/d_zeta
J13[ijkDest] = J13[ijkTarget];
//dy/d_zeta
J23[ijkDest] = J23[ijkTarget];
//dz/d_(xi, eta, zeta)
J31[ijkDest] = J31[ijkTarget];
J32[ijkDest] = J32[ijkTarget];
J33[ijkDest] = J33[ijkTarget];
Expand Down Expand Up @@ -789,6 +810,8 @@ int calculateJacobians(){
printf("(xPos,yPos,zPos)[%d,%d,%d] = (%f,%f,%f)\n",i,j,k+1,xPos[ijkp1],yPos[ijkp1],zPos[ijkp1]);
printf("D_Jac[%d] = %f\n",ijk,D_Jac[ijk]);
printf("invD_Jac[%d] = %f\n",ijk,invD_Jac[ijk]);
printf("J13[%d] = %f\n",ijk,J13[ijk]);
printf("J23[%d] = %f\n",ijk,J23[ijk]);
printf("J31[%d] = %f\n",ijk,J31[ijk]);
printf("J32[%d] = %f\n",ijk,J32[ijk]);
printf("J33[%d] = %f\n",ijk,J33[ijk]);
Expand Down
9 changes: 9 additions & 0 deletions SRC/GRID/grid.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,15 @@ extern float *zPos; /* Cell-center position in z (meters) */
extern float *topoPos; /*Topography elevation (z in meters) at the cell center position in x and y. */
extern float *topoPosGlobal; /*Topography elevation (z in meters) at the cell center position in x and y. (Global domain) */

//extern float *J11; // dx/d_xi -- assumed = 1.0
//extern float *J12; // dx/d_eta -- assumed = 0.0

//extern float *J21; // dy/d_xi -- assumed = 0.0
//extern float *J22; // dy/d_eta -- assumed = 1.0

extern float *J13; // dx/d_zeta
extern float *J23; // dy/d_zeta

extern float *J31; // dz/d_xi
extern float *J32; // dz/d_eta
extern float *J33; // dz/d_zeta
Expand Down
5 changes: 3 additions & 2 deletions SRC/HYDRO_CORE/CUDA/cuda_advectionDevice.cu
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ extern "C" int cuda_advectionDeviceCleanup(){
* This device function calculates the cell face velocities to prepare for use in the chosen advection scheme
*/
__device__ void cudaDevice_calcFaceVelocities(float* hydroFlds_d, float* hydroFaceVels_d,
float* J13_d, float* J23_d,
float* J31_d, float* J32_d, float* J33_d, float* D_Jac_d){
int i,j,k,ijk;
int im1jk,ijm1k,ijkm1;
Expand Down Expand Up @@ -97,8 +98,8 @@ __device__ void cudaDevice_calcFaceVelocities(float* hydroFlds_d, float* hydroFa
( (D_Jac_d[ijk]/rho[ijk])*(v[ijk] + w[ijk]*J32_d[ijk])
+(D_Jac_d[ijm1k]/rho[ijm1k])*(v[ijm1k] + w[ijm1k]*J32_d[ijm1k]));
w_cf[ijk] = 0.5*dZi_d*
( (D_Jac_d[ijk]/rho[ijk])*(w[ijk]*J33_d[ijk])
+(D_Jac_d[ijkm1]/rho[ijkm1])*(w[ijkm1]*J33_d[ijkm1]));
( (D_Jac_d[ijk]/rho[ijk])*(u[ijk]*J13_d[ijk] + v[ijk]*J23_d[ijk] + w[ijk]*J33_d[ijk])
+(D_Jac_d[ijkm1]/rho[ijkm1])*(u[ijkm1]*J13_d[ijkm1] + v[ijkm1]*J23_d[ijkm1] + w[ijkm1]*J33_d[ijkm1]));

//Ensure ground and ceiling face vertical velocity component is set to 0
if((k==kMin_d)||((k>=kMax_d))){
Expand Down
1 change: 1 addition & 0 deletions SRC/HYDRO_CORE/CUDA/cuda_advectionDevice_cu.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ extern "C" int cuda_advectionDeviceCleanup();
* This device function calculates the cell face velocities to prepare for use in the chosen advection scheme
*/
__device__ void cudaDevice_calcFaceVelocities(float* hydroFlds_d, float* hydroFaceVels_d,
float* J13_d, float* J23_d,
float* J31_d, float* J32_d, float* J33_d, float* D_Jac_d);

/*----->>>>> __device__ void cudaDevice_UpstreamDivAdvFlux(); --------------------------------------------------
Expand Down
Loading