diff --git a/src/Makefile b/src/Makefile index 86404973a..627cb27b3 100644 --- a/src/Makefile +++ b/src/Makefile @@ -19,7 +19,7 @@ accelwattch_hw_power: rodinia-3.1_hw_power parboil_hw_power cuda_samples-11.0_hw #Disable clean for now, It has a bug! # clean_dragon-naive clean_pannotia clean_proxy-apps -clean: clean_mlperf_inference clean_rodinia_2.0-ft clean_dragon-cdp clean_ispass-2009 clean_lonestargpu-2.0 clean_custom_apps clean_parboil clean_cutlass clean_rodinia-3.1 clean_heterosync clean_UVMSmart_test clean_cuda_samples clean_huggingface +clean: clean_mlperf_inference clean_rodinia_2.0-ft clean_dragon-cdp clean_ispass-2009 clean_lonestargpu-2.0 clean_custom_apps clean_parboil clean_cutlass clean_rodinia-3.1 clean_heterosync clean_UVMSmart_test clean_cuda_samples clean_huggingface clean_GPU_Microbenchmark clean_accelwattch: clean_rodinia-3.1 clean_parboil clean_cutlass clean_cuda_samples-11.0 clean_cuda_samples_hw_power clean_rodinia-3.1_hw_power clean_parboil_hw_power clean_accelwattch_ubench clean_data: @@ -108,7 +108,7 @@ dragon-cdp: dragon-naive GPU_Microbenchmark: mkdir -p $(BINDIR)/$(BINSUBDIR)/ $(SETENV) $(MAKE) $(MAKE_ARGS) -C cuda/GPU_Microbenchmark - cp -r cuda/GPU_Microbenchmark/bin/* $(BINDIR)/$(BINSUBDIR)/ + mv cuda/GPU_Microbenchmark/bin/* $(BINDIR)/$(BINSUBDIR)/ clean_GPU_Microbenchmark: find cuda/GPU_Microbenchmark/ubench -type f -executable -delete diff --git a/src/cuda/GPU_Microbenchmark/common/common.mk b/src/cuda/GPU_Microbenchmark/common/common.mk index 70ac7864f..52a249e6f 100644 --- a/src/cuda/GPU_Microbenchmark/common/common.mk +++ b/src/cuda/GPU_Microbenchmark/common/common.mk @@ -11,11 +11,11 @@ LIB := release: $(CC) $(NVCC_FLAGS) $(CUOPTS) $(SRC) -o $(EXE) -I$(INCLUDE) -L$(LIB) -lcudart - cp $(EXE) $(BIN_DIR) + mv $(EXE) $(BIN_DIR) tuner: $(CC) $(NVCC_FLAGS) $(CUOPTS) -DTUNER $(SRC) -o $(EXE) -I$(INCLUDE) -L$(LIB) -lcudart - cp $(EXE) $(BIN_DIR) + mv $(EXE) $(BIN_DIR) clean: rm -f *.o; rm -f $(EXE) diff --git a/src/cuda/rodinia/2.0-ft/lud/lud_kernel.cu b/src/cuda/rodinia/2.0-ft/lud/lud_kernel.cu index ea1cbf1f8..8bd6b9fc7 100755 --- a/src/cuda/rodinia/2.0-ft/lud/lud_kernel.cu +++ b/src/cuda/rodinia/2.0-ft/lud/lud_kernel.cu @@ -22,14 +22,14 @@ lud_diagonal(float *m, int matrix_dim, int offset) for(j=0; j < i; j++) shadow[threadIdx.x][i] -= shadow[threadIdx.x][j]*shadow[j][i]; shadow[threadIdx.x][i] /= shadow[i][i]; + } + __syncthreads(); - __syncthreads(); - + if (threadIdx.x>i){ for(j=0; j < i+1; j++) shadow[i+1][threadIdx.x] -= shadow[i+1][j]*shadow[j][threadIdx.x]; - - __syncthreads(); } + __syncthreads(); } /*