-
Notifications
You must be signed in to change notification settings - Fork 4
Provide CUDA GPU implementation of GMGPolar #76
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
mknaranja
wants to merge
19
commits into
main
Choose a base branch
from
cuda-gpu-refactor2024
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
19 commits
Select commit
Hold shift + click to select a range
a6f7de1
Initial empty branch for CUDA GPU implementation
03fffca
Residual and Smoothertransfered to Cuda
1699fb6
Smoother finalized
bc3418e
Extrapolated Smoother done.
f5bc750
Fully working GPU implementation
dedb857
Some clean-up
05d84e2
Update extrapolated_residual.cu
julianlitz b1d036b
Update compute_exact_error.cu
julianlitz a873467
Removed Debug code
a92f1f2
iterations output in one line
3a661e6
padding through 16,17 and output
c44a0e0
batch job jedi
50a6bbd
batch jedi
f42ce82
shafranov geom
8d944fd
jedi batch
96f2d50
shafranov geom
d7d09cb
shafranov geom to activate
697b836
GPU race condition patch (#80)
julianlitz 221b921
output geom
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| { | ||
| "configurations": [ | ||
| { | ||
| "name": "Linux", | ||
| "includePath": [ | ||
| "${workspaceFolder}/**" | ||
| ], | ||
| "defines": [], | ||
| "compilerPath": "/usr/bin/clang", | ||
| "cStandard": "c17", | ||
| "cppStandard": "c++14", | ||
| "intelliSenseMode": "linux-clang-x64" | ||
| } | ||
| ], | ||
| "version": 4 | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| { | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pls remove |
||
| "version": "0.2.0", | ||
| "configurations": [ | ||
| { | ||
| "name": "Debug test_smoother", | ||
| "type": "cppdbg", | ||
| "request": "launch", | ||
| "program": "${workspaceFolder}/build/tests/test_smoother", | ||
| "args": [], | ||
| "stopAtEntry": false, | ||
| "cwd": "${workspaceFolder}/build/tests", | ||
| "environment": [], | ||
| } | ||
| ] | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,88 @@ | ||
| { | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pls remove |
||
| "files.associations": { | ||
| "cctype": "cpp", | ||
| "clocale": "cpp", | ||
| "cmath": "cpp", | ||
| "cstdarg": "cpp", | ||
| "cstddef": "cpp", | ||
| "cstdio": "cpp", | ||
| "cstdlib": "cpp", | ||
| "cstring": "cpp", | ||
| "ctime": "cpp", | ||
| "cwchar": "cpp", | ||
| "cwctype": "cpp", | ||
| "any": "cpp", | ||
| "array": "cpp", | ||
| "atomic": "cpp", | ||
| "hash_map": "cpp", | ||
| "*.tcc": "cpp", | ||
| "bitset": "cpp", | ||
| "chrono": "cpp", | ||
| "complex": "cpp", | ||
| "condition_variable": "cpp", | ||
| "cstdint": "cpp", | ||
| "deque": "cpp", | ||
| "forward_list": "cpp", | ||
| "list": "cpp", | ||
| "unordered_map": "cpp", | ||
| "unordered_set": "cpp", | ||
| "vector": "cpp", | ||
| "exception": "cpp", | ||
| "algorithm": "cpp", | ||
| "functional": "cpp", | ||
| "iterator": "cpp", | ||
| "map": "cpp", | ||
| "memory": "cpp", | ||
| "memory_resource": "cpp", | ||
| "optional": "cpp", | ||
| "ratio": "cpp", | ||
| "set": "cpp", | ||
| "string": "cpp", | ||
| "string_view": "cpp", | ||
| "system_error": "cpp", | ||
| "tuple": "cpp", | ||
| "type_traits": "cpp", | ||
| "utility": "cpp", | ||
| "fstream": "cpp", | ||
| "initializer_list": "cpp", | ||
| "iomanip": "cpp", | ||
| "iosfwd": "cpp", | ||
| "iostream": "cpp", | ||
| "istream": "cpp", | ||
| "limits": "cpp", | ||
| "mutex": "cpp", | ||
| "new": "cpp", | ||
| "ostream": "cpp", | ||
| "sstream": "cpp", | ||
| "stdexcept": "cpp", | ||
| "streambuf": "cpp", | ||
| "thread": "cpp", | ||
| "cinttypes": "cpp", | ||
| "typeindex": "cpp", | ||
| "typeinfo": "cpp", | ||
| "variant": "cpp", | ||
| "__bit_reference": "cpp", | ||
| "__config": "cpp", | ||
| "__debug": "cpp", | ||
| "__functional_base": "cpp", | ||
| "__hash_table": "cpp", | ||
| "__locale": "cpp", | ||
| "__mutex_base": "cpp", | ||
| "__nullptr": "cpp", | ||
| "__split_buffer": "cpp", | ||
| "__string": "cpp", | ||
| "__threading_support": "cpp", | ||
| "__tree": "cpp", | ||
| "__tuple": "cpp", | ||
| "ios": "cpp", | ||
| "locale": "cpp", | ||
| "queue": "cpp", | ||
| "codecvt": "cpp", | ||
| "filesystem": "cpp", | ||
| "stack": "cpp", | ||
| "random": "cpp", | ||
| "__functional_03": "cpp", | ||
| "numeric": "cpp", | ||
| "__memory": "cpp" | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,121 +1,112 @@ | ||
| cmake_minimum_required(VERSION 3.16.3) | ||
|
|
||
| project(GMGPolar VERSION 1.0.0) | ||
| cmake_minimum_required(VERSION 3.18) | ||
| project(GMGPolarGPU LANGUAGES CXX CUDA) | ||
|
|
||
| option(GMGPOLAR_BUILD_TESTS "Build GMGPolar unit tests." ON) | ||
| option(GMGPOLAR_USE_MUMPS "Use MUMPS to compute matrix factorizations." OFF) | ||
| option(GMGPOLAR_USE_LIKWID "Use LIKWID to measure code (regions)." OFF) | ||
|
|
||
|
|
||
| set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) | ||
| set(CMAKE_POSITION_INDEPENDENT_CODE ON) | ||
|
|
||
| set(CMAKE_CXX_FLAGS "") | ||
| set(CMAKE_LINKER_FLAGS "") | ||
|
|
||
| add_subdirectory(src) | ||
|
|
||
| # code coverage analysis | ||
| # Note: this only works under linux and with make | ||
| # Ninja creates different directory names which do not work together with this scrupt | ||
| # as STREQUAL is case-sensitive https://github.com/TriBITSPub/TriBITS/issues/131, also allow DEBUG as accepted input | ||
| option(GMGPOLAR_TEST_COVERAGE "Enable GCov coverage analysis (adds a 'coverage' target)" OFF) | ||
|
|
||
| if(CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "DEBUG") | ||
| if(GMGPOLAR_TEST_COVERAGE) | ||
| message(STATUS "Coverage enabled") | ||
| include(CodeCoverage) | ||
| append_coverage_compiler_flags() | ||
| setup_target_for_coverage_lcov( | ||
| NAME coverage | ||
| EXECUTABLE tests/gmgpolar_tests | ||
| EXCLUDE "${CMAKE_SOURCE_DIR}/tests*" "${CMAKE_SOURCE_DIR}/src/test_cases*" "${CMAKE_BINARY_DIR}/*" "/usr*" | ||
| ) | ||
| endif() | ||
| endif() | ||
|
|
||
|
|
||
| add_library(GMGPolar ${SOURCES_SRC}) | ||
|
|
||
| add_executable(gmgpolar_simulation ./src/main.cpp) | ||
|
|
||
| configure_file(${CMAKE_SOURCE_DIR}/include/config_internal.h.in ${CMAKE_SOURCE_DIR}/include/config_internal.h) | ||
|
|
||
| target_include_directories(gmgpolar_simulation PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/include/test_cases ) | ||
| target_include_directories(GMGPolar PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/include/test_cases ) | ||
|
|
||
| if(GMGPOLAR_USE_LIKWID) | ||
|
|
||
| find_package(LIKWID REQUIRED) | ||
|
|
||
| target_include_directories(GMGPolar PUBLIC ${LIKWID_INCLUDE_DIRS}) | ||
| target_link_libraries(GMGPolar PUBLIC ${LIKWID_LIBRARIES}) | ||
| target_compile_definitions(GMGPolar PUBLIC "-DLIKWID_PERFMON") | ||
|
|
||
| endif() | ||
|
|
||
|
|
||
|
|
||
| if(GMGPOLAR_USE_MUMPS) | ||
|
|
||
| set(INC_DIRS | ||
| /home/kueh_mj/.spack/rev.23.05/install/linux-rocky8-zen2/gcc-10.4.0/mumps-5.4.1-fftqkl/include | ||
| /sw/rev/23.05/linux-rocky8-zen2/gcc-10.4.0/metis-5.1.0-akhgsf/include | ||
| ) | ||
|
|
||
| set(LIB_DIRS | ||
| /home/kueh_mj/.spack/rev.23.05/install/linux-rocky8-zen2/gcc-10.4.0/mumps-5.4.1-fftqkl/lib | ||
| /sw/rev/23.05/linux-rocky8-zen2/gcc-10.4.0/metis-5.1.0-akhgsf/lib | ||
| ) | ||
|
|
||
| include_directories( | ||
| ${INC_DIRS} | ||
| ) | ||
|
|
||
| target_link_directories( | ||
| GMGPolar | ||
| PUBLIC | ||
| ${LIB_DIRS} | ||
| ) | ||
|
|
||
| set(LIBS | ||
| mpiseq | ||
| dmumps | ||
| mumps_common | ||
| metis | ||
| ) | ||
|
|
||
| target_link_libraries( | ||
| GMGPolar | ||
| PUBLIC | ||
| ${LIBS} | ||
| ) | ||
| endif() | ||
|
|
||
|
|
||
| find_package(OpenMP) | ||
|
|
||
| #currently works on GNU compiler | ||
| if((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 7)) | ||
|
|
||
| string(APPEND CMAKE_CXX_FLAGS " -O2 -Wall -MMD -MP -Wwrite-strings") | ||
| string(APPEND CMAKE_LINKER_FLAGS " -O2 -Wall -MMD -MP -Wwrite-strings") | ||
|
|
||
| if(OPENMP_FOUND) | ||
| string(APPEND CMAKE_CXX_FLAGS " -fopenmp") | ||
| string(APPEND CMAKE_LINKER_FLAGS " -fopenmp") | ||
|
|
||
| else() | ||
| message(FATAL_ERROR "OpenMP needed") | ||
| endif() | ||
| set(CMAKE_CXX_STANDARD 20) | ||
| set(CMAKE_CXX_STANDARD_REQUIRED ON) | ||
| set(CMAKE_CUDA_STANDARD 17) | ||
| set(CMAKE_CUDA_STANDARD_REQUIRED ON) | ||
| set(CMAKE_CUDA_ARCHITECTURES 70) | ||
| enable_language(CUDA) | ||
|
|
||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations") | ||
| set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -Wextra -pedantic -Wno-unused") | ||
| set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2 -mtune=generic") | ||
| set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fopenmp -arch=sm_70 -Wno-deprecated-declarations") | ||
| set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -O3") | ||
| set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -O0") | ||
|
|
||
| set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) | ||
| set(CMAKE_CUDA_RESOLVE_DEVICE_SYMBOLS ON) | ||
|
|
||
| # Find CUDA and OpenMP packages | ||
| find_package(CUDA REQUIRED) | ||
| find_package(OpenMP REQUIRED) | ||
|
|
||
| # OpenMP for host code (C++): | ||
| if(OpenMP_CXX_FOUND) | ||
| message(STATUS "OpenMP enabled for C++") | ||
| set(OpenMP_CXX_LIB OpenMP::OpenMP_CXX) | ||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") | ||
| else() | ||
| message(FATAL_ERROR "Please use GNU compiler or change CMakeLists manually") | ||
| message(WARNING "OpenMP not found for C++") | ||
| endif() | ||
|
|
||
|
|
||
| target_link_libraries(gmgpolar_simulation PRIVATE GMGPolar) | ||
|
|
||
|
|
||
| include(thirdparty/CMakeLists.txt) | ||
| add_subdirectory(tests) | ||
| # Mumps: Sparse Matrix Solver | ||
| set(MUMPS_PREFIX_PATH "~/spack/opt/spack/linux-debian11-ivybridge/gcc-10.2.1/mumps-5.6.2-hm3vnybofcspkmjb6xdhttmdt3uetvo4") | ||
| # set(MUMPS_PREFIX_PATH "~/spack/opt/spack/linux-almalinux9-x86_64/gcc-11.3.1/mumps-5.5.1-lrzoqcjlexwh3d5slabppc7epa2cnezu") | ||
| include_directories(${MUMPS_PREFIX_PATH}/include) | ||
| link_directories(${MUMPS_PREFIX_PATH}/lib) | ||
|
|
||
| # Metis: Matrix reordering for Mumps | ||
| set(METIS_PREFIX_PATH "~/spack/opt/spack/linux-debian11-ivybridge/gcc-10.2.1/metis-5.1.0-wkmpso5x6ytk7u7t5z7a5h4r4xtm35lv") | ||
| # set(METIS_PREFIX_PATH "~/spack/opt/spack/linux-almalinux9-x86_64/gcc-11.3.1/metis-5.1.0-naocq5j6bxwrw5srcjse2q3aje2q736h") | ||
| include_directories(${METIS_PREFIX_PATH}/include) | ||
| link_directories(${METIS_PREFIX_PATH}/lib) | ||
|
|
||
| set(MUMPS_LIBRARIES | ||
| mumps_common | ||
| smumps | ||
| dmumps | ||
| mpiseq | ||
| metis | ||
| ) | ||
|
|
||
| # 1. Create a library target for the PolarGrid module | ||
| file(GLOB_RECURSE POLAR_GRID_SOURCES "src/PolarGrid/*.cpp" "src/PolarGrid/*.cu") | ||
| add_library(PolarGrid STATIC ${POLAR_GRID_SOURCES}) | ||
| target_include_directories(PolarGrid PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src/PolarGrid) | ||
|
|
||
| # 2. Create a library target for the InputFunctions module | ||
| file(GLOB_RECURSE INPUT_FUNCTIONS_SOURCES "src/InputFunctions/*.cpp" "src/InputFunctions/*.cu") | ||
| add_library(InputFunctions STATIC ${INPUT_FUNCTIONS_SOURCES}) | ||
| target_include_directories(InputFunctions PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src/InputFunctions) | ||
|
|
||
| file(GLOB_RECURSE GMG_POLAR_SOURCES "src/GMGPolar/*.cpp" "src/GMGPolar/*.cu" "src/GMGPolar/MultigridMethods/*.cpp" "src/GMGPolar/MultigridMethods/*.cu") | ||
| file(GLOB_RECURSE LEVEL_SOURCES "src/Level/*.cpp" "src/Level/*.cu") | ||
| file(GLOB_RECURSE INTERPOLATION_SOURCES "src/Interpolation/*.cpp" "src/Interpolation/*.cu") | ||
| file(GLOB_RECURSE DIRECT_SOLVER_SOURCES "src/DirectSolver/*.cpp") | ||
| file(GLOB_RECURSE STENCIL_SOURCES "src/Stencil/*.cpp") | ||
|
|
||
| file(GLOB_RECURSE RESIDUAL_TAKE_CPU_SOURCES "src/Residual/ResidualTakeCPU/*.cpp") | ||
| file(GLOB_RECURSE RESIDUAL_TAKE_GPU_SOURCES "src/Residual/ResidualTakeGPU/*.cpp" "src/Residual/ResidualTakeGPU/*.cu") | ||
|
|
||
| file(GLOB_RECURSE SMOOTHER_TAKE_CPU_SOURCES "src/Smoother/SmootherTakeCPU/*.cpp") | ||
| file(GLOB_RECURSE SMOOTHER_TAKE_GPU_SOURCES "src/Smoother/SmootherTakeGPU/*.cpp" "src/Smoother/SmootherTakeGPU/*.cu") | ||
|
|
||
| file(GLOB_RECURSE EXTRAPOLATED_SMOOTHER_TAKE_CPU_SOURCES "src/ExtrapolatedSmoother/ExtrapolatedSmootherTakeCPU/*.cpp") | ||
| file(GLOB_RECURSE EXTRAPOLATED_SMOOTHER_TAKE_GPU_SOURCES "src/ExtrapolatedSmoother/ExtrapolatedSmootherTakeGPU/*.cpp" "src/ExtrapolatedSmoother/ExtrapolatedSmootherTakeGPU/*.cu") | ||
|
|
||
| # 4. Create the GMGPolarLib library and link PolarGrid and InputFunctions | ||
| add_library(GMGPolarLib STATIC | ||
| ${GMG_POLAR_SOURCES} | ||
| ${LEVEL_SOURCES} | ||
| ${INTERPOLATION_SOURCES} | ||
|
|
||
| ${DIRECT_SOLVER_SOURCES} | ||
| ${STENCIL_SOURCES} | ||
|
|
||
| ${RESIDUAL_TAKE_CPU_SOURCES} | ||
| ${RESIDUAL_TAKE_GPU_SOURCES} | ||
|
|
||
| ${SMOOTHER_TAKE_CPU_SOURCES} | ||
| ${SMOOTHER_TAKE_GPU_SOURCES} | ||
|
|
||
| ${EXTRAPOLATED_SMOOTHER_TAKE_CPU_SOURCES} | ||
| ${EXTRAPOLATED_SMOOTHER_TAKE_GPU_SOURCES} | ||
| ) | ||
| # Link PolarGrid and InputFunctions to GMGPolarLib | ||
| target_link_libraries(GMGPolarLib PUBLIC PolarGrid InputFunctions ${MUMPS_LIBRARIES} ${OpenMP_CXX_LIB} cuda cudart cudadevrt cusparse cusolver) | ||
|
|
||
| set(MAIN_SOURCE "src/main.cpp") | ||
| add_executable(gmgpolar ${MAIN_SOURCE}) | ||
|
|
||
| target_link_libraries(gmgpolar PRIVATE GMGPolarLib) | ||
|
|
||
| if(GMGPOLAR_BUILD_TESTS) | ||
| enable_testing() | ||
| add_subdirectory(third-party) | ||
| add_subdirectory(tests) | ||
| endif() | ||
|
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
pls remove