From 11cd9046080f7cb3e18c02e45533dec07a4ac0e2 Mon Sep 17 00:00:00 2001 From: GrahamZen <931252924@qq.com> Date: Thu, 18 Dec 2025 14:11:52 -0600 Subject: [PATCH 01/20] rename --- src/collision/broadphase.cu | 2 +- .../distance/{distance_type.cu => distance_type.cuh} | 4 +++- src/simulation/distance/distance_type.h | 4 ++-- src/simulation/distance/{edge_edge.cu => edge_edge.cuh} | 8 +++++--- src/simulation/distance/{line_line.cu => line_line.cuh} | 0 src/simulation/distance/{point_line.cu => point_line.cuh} | 2 ++ .../distance/{point_plane.cu => point_plane.cuh} | 2 ++ .../distance/{point_point.cu => point_point.cuh} | 2 ++ .../distance/{point_triangle.cu => point_triangle.cuh} | 8 +++++--- src/simulation/energy/barrier.cu | 2 +- src/simulation/solver/linear/pcgJacobi.cu | 4 +--- tests/test_distance.cpp | 2 +- 12 files changed, 25 insertions(+), 15 deletions(-) rename src/simulation/distance/{distance_type.cu => distance_type.cuh} (99%) rename src/simulation/distance/{edge_edge.cu => edge_edge.cuh} (98%) rename src/simulation/distance/{line_line.cu => line_line.cuh} (100%) rename src/simulation/distance/{point_line.cu => point_line.cuh} (99%) rename src/simulation/distance/{point_plane.cu => point_plane.cuh} (99%) rename src/simulation/distance/{point_point.cu => point_point.cuh} (99%) rename src/simulation/distance/{point_triangle.cu => point_triangle.cuh} (98%) diff --git a/src/collision/broadphase.cu b/src/collision/broadphase.cu index 2d64e67..72a8dbd 100644 --- a/src/collision/broadphase.cu +++ b/src/collision/broadphase.cu @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/simulation/distance/distance_type.cu b/src/simulation/distance/distance_type.cuh similarity index 99% rename from src/simulation/distance/distance_type.cu rename to src/simulation/distance/distance_type.cuh index c5168a6..04bf971 100644 --- a/src/simulation/distance/distance_type.cu +++ b/src/simulation/distance/distance_type.cuh @@ -4,6 +4,7 @@ using namespace ipc; +#ifdef __CUDACC__ template __global__ void GetDistanceType(const glm::tvec3* Xs, Query* queries, int numQueries) { int qIdx = blockIdx.x * blockDim.x + threadIdx.x; @@ -37,6 +38,7 @@ __global__ void ComputeDistance(const glm::tvec3* Xs, Query* queries, in template __global__ void ComputeDistance(const glm::tvec3* Xs, Query* queries, int numQueries); template __global__ void ComputeDistance(const glm::tvec3* Xs, Query* queries, int numQueries); +#endif /// @brief Solve the least square problem: min ||A * x - b||^2 /// @note A = [t1 - t0, glm::cross(t1 - t0, normal)], b = p - t0 @@ -261,4 +263,4 @@ template __host__ __device__ DistanceType edge_edge_distance_type( const glm::tvec3& ea0, const glm::tvec3& ea1, const glm::tvec3& eb0, - const glm::tvec3& eb1); \ No newline at end of file + const glm::tvec3& eb1); diff --git a/src/simulation/distance/distance_type.h b/src/simulation/distance/distance_type.h index 5d09145..bafc6e7 100644 --- a/src/simulation/distance/distance_type.h +++ b/src/simulation/distance/distance_type.h @@ -1,7 +1,7 @@ #pragma once #include -#include -#include +#include +#include #include template diff --git a/src/simulation/distance/edge_edge.cu b/src/simulation/distance/edge_edge.cuh similarity index 98% rename from src/simulation/distance/edge_edge.cu rename to src/simulation/distance/edge_edge.cuh index 93d02d4..29726b8 100644 --- a/src/simulation/distance/edge_edge.cu +++ b/src/simulation/distance/edge_edge.cuh @@ -1,8 +1,10 @@ +#pragma once + #include "edge_edge.h" -#include -#include -#include +#include +#include +#include namespace ipc { diff --git a/src/simulation/distance/line_line.cu b/src/simulation/distance/line_line.cuh similarity index 100% rename from src/simulation/distance/line_line.cu rename to src/simulation/distance/line_line.cuh diff --git a/src/simulation/distance/point_line.cu b/src/simulation/distance/point_line.cuh similarity index 99% rename from src/simulation/distance/point_line.cu rename to src/simulation/distance/point_line.cuh index 22de769..9cea97d 100644 --- a/src/simulation/distance/point_line.cu +++ b/src/simulation/distance/point_line.cuh @@ -1,3 +1,5 @@ +#pragma once + #include "point_line.h" #include diff --git a/src/simulation/distance/point_plane.cu b/src/simulation/distance/point_plane.cuh similarity index 99% rename from src/simulation/distance/point_plane.cu rename to src/simulation/distance/point_plane.cuh index d6d2377..f757d91 100644 --- a/src/simulation/distance/point_plane.cu +++ b/src/simulation/distance/point_plane.cuh @@ -1,3 +1,5 @@ +#pragma once + #include "point_plane.h" #include diff --git a/src/simulation/distance/point_point.cu b/src/simulation/distance/point_point.cuh similarity index 99% rename from src/simulation/distance/point_point.cu rename to src/simulation/distance/point_point.cuh index 758ee9e..fc53af2 100644 --- a/src/simulation/distance/point_point.cu +++ b/src/simulation/distance/point_point.cuh @@ -1,3 +1,5 @@ +#pragma once + #include "point_point.h" #include #include diff --git a/src/simulation/distance/point_triangle.cu b/src/simulation/distance/point_triangle.cuh similarity index 98% rename from src/simulation/distance/point_triangle.cu rename to src/simulation/distance/point_triangle.cuh index 2d04d7c..825675a 100644 --- a/src/simulation/distance/point_triangle.cu +++ b/src/simulation/distance/point_triangle.cuh @@ -1,8 +1,10 @@ +#pragma once + #include "point_triangle.h" -#include -#include -#include +#include +#include +#include #include namespace ipc { diff --git a/src/simulation/energy/barrier.cu b/src/simulation/energy/barrier.cu index cec928f..49ea48e 100644 --- a/src/simulation/energy/barrier.cu +++ b/src/simulation/energy/barrier.cu @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/simulation/solver/linear/pcgJacobi.cu b/src/simulation/solver/linear/pcgJacobi.cu index ae93056..28b6a6e 100644 --- a/src/simulation/solver/linear/pcgJacobi.cu +++ b/src/simulation/solver/linear/pcgJacobi.cu @@ -18,7 +18,7 @@ __global__ void ExtractInverseDiagonalKernel(int N, const T* A, const int* rowPt } // Avoid division by zero - if (abs(diagVal) < 1e-12) diagVal = 1.0; + if (abs(diagVal) < 1e-9) diagVal = 1.0; invDiag[idx] = 1.0 / diagVal; } } @@ -96,10 +96,8 @@ void PCGJacobiSolver::Solve(int N, T* d_b, T* d_x, T* A, int nz, int* rowIdx, if (dvec_x) CHECK_CUSPARSE(cusparseDestroyDnVec(dvec_x)); if (dvec_b) CHECK_CUSPARSE(cusparseDestroyDnVec(dvec_b)); - if (dvec_r) CHECK_CUSPARSE(cusparseDestroyDnVec(dvec_r)); CHECK_CUSPARSE(cusparseCreateDnVec(&dvec_x, N, d_x, dType)); CHECK_CUSPARSE(cusparseCreateDnVec(&dvec_b, N, d_b, dType)); - CHECK_CUSPARSE(cusparseCreateDnVec(&dvec_r, N, d_r, dType)); size_t bufferSize = 0; CHECK_CUSPARSE(cusparseSpMV_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, d_matA, diff --git a/tests/test_distance.cpp b/tests/test_distance.cpp index 029b6f0..245fe0d 100644 --- a/tests/test_distance.cpp +++ b/tests/test_distance.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include From 8e4d1649424043ffa554e5f135ae9d099f0dfd3b Mon Sep 17 00:00:00 2001 From: GrahamZen <931252924@qq.com> Date: Fri, 19 Dec 2025 01:11:14 -0600 Subject: [PATCH 02/20] fix linux compilation --- CMakeLists.txt | 31 +++++++++++++------- src/collision/aabb.h | 11 +++++-- src/collision/broadphase.cu | 20 +++++++++++-- src/collision/bvh.cu | 8 ++--- src/collision/ccd.cu | 7 +++-- src/collision/intersections.cu | 10 ++++++- src/collision/narrowphase.cu | 16 +++++----- src/context.cpp | 2 +- src/openglcontext/preview.cpp | 2 +- src/openglcontext/shaderprogram.cpp | 1 + src/simulation/collisionUtil.cu | 5 +--- src/simulation/distance/distance_type.cuh | 7 +++++ src/simulation/energy/barrier.cu | 5 ++-- src/simulation/energy/corotated.cu | 5 ++-- src/simulation/energy/implicitBarrier.cu | 4 +-- src/simulation/energy/inertia.cu | 4 +-- src/simulation/energy/neohookean08.cu | 5 ++-- src/simulation/simulationContext.cu | 2 ++ src/simulation/solver/femSolver.cu | 2 +- src/simulation/solver/linear/cg.h | 7 +++++ src/simulation/solver/linear/cholesky.cu | 7 +++-- src/simulation/solver/linear/cholesky.h | 8 +++++ src/simulation/solver/linear/choleskyImmed.h | 6 ++++ src/simulation/solver/linear/jacobi.h | 6 ++++ src/simulation/solver/linear/linear.h | 1 + src/simulation/solver/linear/pcgJacobi.h | 7 +++++ src/simulation/solver/solverUtil.cuh | 1 + src/simulation/utilities.cuh | 14 +++++++++ src/utilities.cpp | 3 +- tests/test_distance.cpp | 16 +++++----- tests/test_solveCubic.cpp | 12 ++++---- 31 files changed, 169 insertions(+), 66 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8ee8d1e..da41093 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,14 +36,14 @@ find_package(CUDAToolkit REQUIRED) find_package(OpenGL REQUIRED) +set(EXTERNAL "${PROJECT_SOURCE_DIR}/external") + if(UNIX) find_package(glfw3 REQUIRED) find_package(GLEW REQUIRED) - set(LIBRARIES glfw ${GLEW_LIBRARIES} ${OPENGL_gl_LIBRARY}) + set(LIBRARIES glfw ${GLEW_LIBRARIES} ${OPENGL_gl_LIBRARY} ${OPENGL_glx_LIBRARY}) else() set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") - set(EXTERNAL "external") - set(GLFW_ROOT_DIR ${EXTERNAL}) set(GLFW_USE_STATIC_LIBS ON) find_package(GLFW REQUIRED) @@ -57,7 +57,8 @@ else() set(LIBRARIES ${GLEW_LIBRARY} ${GLFW_LIBRARY} ${OPENGL_LIBRARY}) endif() -set(GLM_ROOT_DIR "external") +set(GLM_ROOT_DIR "${PROJECT_SOURCE_DIR}/external") +set(GLM_INCLUDE_DIR "${EXTERNAL}/include" CACHE PATH "GLM include dir" FORCE) find_package(GLM REQUIRED) include_directories(${GLM_INCLUDE_DIRS}) @@ -106,6 +107,11 @@ include_directories( ${GLEW_INCLUDE_DIR} ${GLFW_INCLUDE_DIR}) +if(UNIX) + list(APPEND CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "/usr/include") + list(REMOVE_DUPLICATES CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES) +endif() + ######################################## # Catch2 Tests ######################################## @@ -114,30 +120,35 @@ enable_testing() add_subdirectory(tests) ######################################## add_executable(${CMAKE_PROJECT_NAME} ${SOURCE_FILES} ${HEADER_FILES} ${IMGUI_SOURCES}) -target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE "${EXTERNAL}/ImGui" "${EXTERNAL}/svd3_cuda" ${eigen_SOURCE_DIR}) +target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE "${EXTERNAL}/ImGui" "${EXTERNAL}/svd3_cuda" "${EXTERNAL}/include") target_link_libraries(${CMAKE_PROJECT_NAME} ${LIBRARIES} CUDA::cudart CUDA::cusolver OpenMP::OpenMP_CXX spdlog::spdlog +Eigen3::Eigen #stream_compaction # TODO: uncomment if using your stream compaction ) set_target_properties(${CMAKE_PROJECT_NAME} PROPERTIES CUDA_SEPARABLE_COMPILATION ON CUDA_RESOLVE_DEVICE_SYMBOLS ON ) +set(CUDA_HOST_WARNING_SUPPRESSIONS "") +if(MSVC) + list(APPEND CUDA_HOST_WARNING_SUPPRESSIONS + -Xcompiler=/wd4819 + -Xcompiler=/wd4068 + -Xcompiler=/wd4661) +endif() + target_compile_options(${CMAKE_PROJECT_NAME} PRIVATE $<$: -Xptxas=-v --extended-lambda --expt-relaxed-constexpr -lineinfo - - -Xcompiler=/wd4819 - -Xcompiler=/wd4068 - -Xcompiler=/wd4661 - + ${CUDA_HOST_WARNING_SUPPRESSIONS} -Xcudafe=--display_error_number -Xcudafe=--diag_suppress=20012 -Xcudafe=--diag_suppress=20011 diff --git a/src/collision/aabb.h b/src/collision/aabb.h index af1afc0..c2364be 100644 --- a/src/collision/aabb.h +++ b/src/collision/aabb.h @@ -58,7 +58,12 @@ enum class QueryType { VF, EE }; - +struct Vec3d { + double x, y, z; + Vec3d operator-() const { + return Vec3d{-x, -y, -z}; + } +}; class Query { public: QueryType type = QueryType::UNKNOWN; @@ -69,5 +74,5 @@ class Query { indexType v3; double d; double toi = 0.f; - glm::dvec3 normal = glm::dvec3(0.f); -}; + Vec3d normal = Vec3d{0.0, 0.0, 0.0}; +}; \ No newline at end of file diff --git a/src/collision/broadphase.cu b/src/collision/broadphase.cu index 72a8dbd..33d5dbe 100644 --- a/src/collision/broadphase.cu +++ b/src/collision/broadphase.cu @@ -11,6 +11,20 @@ #include #include +namespace { +__device__ inline size_t atomicAddSizeT(size_t* address, size_t val) { + if constexpr (sizeof(size_t) == sizeof(unsigned long long)) { + return static_cast(atomicAdd( + reinterpret_cast(address), + static_cast(val))); + } else { + return static_cast(atomicAdd( + reinterpret_cast(address), + static_cast(val))); + } +} +} // namespace + //input the aabb box of a Tetrahedron //generate a 30-bit morton code template @@ -341,7 +355,7 @@ __global__ void traverseTree(int numTris, const BVHNode* nodes, const in // 1 faces * 3 verts + 3 edges * 3 edges if ((!ignoreSelfCollision || triFathers[myNode.TriangleIndex] != triFathers[leftChild.TriangleIndex]) && myNode.TriangleIndex != leftChild.TriangleIndex && !isAdjacentTriangle(tris[myNode.TriangleIndex * 3 + 0], tris[myNode.TriangleIndex * 3 + 1], tris[myNode.TriangleIndex * 3 + 2], tris[leftChild.TriangleIndex * 3 + 0], tris[leftChild.TriangleIndex * 3 + 1], tris[leftChild.TriangleIndex * 3 + 2])) { - int qIdx = atomicAdd(queryCount, 12); + size_t qIdx = atomicAddSizeT(queryCount, static_cast(12)); if (qIdx + 12 < maxNumQueries) { Query* qBegin = &queries[qIdx]; fillQuery(qBegin, myNode.TriangleIndex, leftChild.TriangleIndex, tris); @@ -365,7 +379,7 @@ __global__ void traverseTree(int numTris, const BVHNode* nodes, const in { if ((!ignoreSelfCollision || triFathers[myNode.TriangleIndex] != triFathers[rightChild.TriangleIndex]) && myNode.TriangleIndex != rightChild.TriangleIndex && !isAdjacentTriangle(tris[myNode.TriangleIndex * 3 + 0], tris[myNode.TriangleIndex * 3 + 1], tris[myNode.TriangleIndex * 3 + 2], tris[rightChild.TriangleIndex * 3 + 0], tris[rightChild.TriangleIndex * 3 + 1], tris[rightChild.TriangleIndex * 3 + 2])) { - int qIdx = atomicAdd(queryCount, 12); + size_t qIdx = atomicAddSizeT(queryCount, static_cast(12)); if (qIdx + 12 < maxNumQueries) { Query* qBegin = &queries[qIdx]; fillQuery(qBegin, myNode.TriangleIndex, rightChild.TriangleIndex, tris); @@ -401,7 +415,7 @@ bool CollisionDetection::DetectCollisionCandidates(const BVHNode overflowHappened = true; maxNumQueries *= 2; std::cerr << "Query buffer overflow, resizing to " << maxNumQueries << std::endl; - if (maxNumQueries > 1 << 31) { + if (maxNumQueries > (static_cast(1) << 31)) { std::cerr << "Number of queries exceeds 2^31, aborting" << std::endl; exit(1); return false; diff --git a/src/collision/bvh.cu b/src/collision/bvh.cu index 869853a..48113c5 100644 --- a/src/collision/bvh.cu +++ b/src/collision/bvh.cu @@ -24,7 +24,7 @@ __device__ void buildBBox(BVHNode& curr, const BVHNode& left, co } template -__global__ void buildBBoxesSerial(int leafCount, BVHNode* nodes, BVH::ReadyFlagType* ready) { +__global__ void buildBBoxesSerial(int leafCount, BVHNode* nodes, typename BVH::ReadyFlagType* ready) { int ind = blockIdx.x * blockDim.x + threadIdx.x; if (ind >= leafCount - 1)return; @@ -41,7 +41,7 @@ __global__ void buildBBoxesSerial(int leafCount, BVHNode* nodes, BVH -__global__ void buildBBoxesCG(int leafCount, BVHNode* nodes, BVH::ReadyFlagType* ready) { +__global__ void buildBBoxesCG(int leafCount, BVHNode* nodes, typename BVH::ReadyFlagType* ready) { int ind = blockIdx.x * blockDim.x + threadIdx.x; cg::grid_group grid = cg::this_grid(); @@ -62,7 +62,7 @@ __global__ void buildBBoxesCG(int leafCount, BVHNode* nodes, BVH } template -__global__ void buildBBoxesAtomic(int leafCount, BVHNode* nodes, BVH::ReadyFlagType* ready) { +__global__ void buildBBoxesAtomic(int leafCount, BVHNode* nodes, typename BVH::ReadyFlagType* ready) { int ind = blockIdx.x * blockDim.x + threadIdx.x; if (ind >= leafCount - 1) return; @@ -136,7 +136,7 @@ void BVH::BuildBBoxes(BuildType buildType) { } template -BVH::BVH(const int _threadsPerBlock) : +BVH::BVH(const int _threadsPerBlock) : threadsPerBlock(_threadsPerBlock) {} template diff --git a/src/collision/ccd.cu b/src/collision/ccd.cu index 6ea6231..1163576 100644 --- a/src/collision/ccd.cu +++ b/src/collision/ccd.cu @@ -210,9 +210,10 @@ SingleQueryDisplay& CollisionDetection::GetSQDisplay(int i, const glm::t cudaMemcpy(&v0Pos, pos + 1, sizeof(glm::vec3), cudaMemcpyDeviceToHost); cudaMemcpy(&v1Pos, pos + 2, sizeof(glm::vec3), cudaMemcpyDeviceToHost); - cudaMemcpy(&pos[4], &((v0Pos + v1Pos) / 2.f), sizeof(glm::vec3), cudaMemcpyHostToDevice); + glm::vec3 midPoint = (v0Pos + v1Pos) / 2.f; + cudaMemcpy(&pos[4], &midPoint, sizeof(glm::vec3), cudaMemcpyHostToDevice); // the third line point from the middle of v0 and v1 towards the normal direction - glm::vec3 normalPoint = (v0Pos + v1Pos) / 2.f + glm::vec3(q.normal) * 10.f; + glm::vec3 normalPoint = (v0Pos + v1Pos) / 2.f + glm::vec3(q.normal.x, q.normal.y, q.normal.z) * 10.f; cudaMemcpy(&pos[5], &normalPoint, sizeof(glm::vec3), cudaMemcpyHostToDevice); mSqDisplay.UnMapDevicePtr(&pos, nullptr, nullptr); } @@ -228,7 +229,7 @@ SingleQueryDisplay& CollisionDetection::GetSQDisplay(int i, const glm::t thrust::transform(dev_ptr_X + q.v3, dev_ptr_X + q.v3 + 1, dev_triPos + 2, HighPtoFloatP()); glm::vec3 v0Pos; cudaMemcpy(&v0Pos, vertPos, sizeof(glm::vec3), cudaMemcpyDeviceToHost); - glm::vec3 normalPoint = v0Pos + glm::vec3(q.normal) * 10.f; + glm::vec3 normalPoint = v0Pos + glm::vec3(q.normal.x, q.normal.y, q.normal.z) * 10.f; cudaMemcpy(&pos[0], &v0Pos, sizeof(glm::vec3), cudaMemcpyHostToDevice); cudaMemcpy(&pos[1], &normalPoint, sizeof(glm::vec3), cudaMemcpyHostToDevice); mSqDisplay.UnMapDevicePtr(&pos, &vertPos, &triPos); diff --git a/src/collision/intersections.cu b/src/collision/intersections.cu index 5cadcd3..8e76724 100644 --- a/src/collision/intersections.cu +++ b/src/collision/intersections.cu @@ -8,6 +8,7 @@ #include #include #include +#include /** * Handy-dandy hash function that provides seeds for random number generation. @@ -33,6 +34,12 @@ __host__ __device__ glm::tvec3 multiplyMV(glm::tmat4x4 m, glm::t return glm::tvec3(m * v); } +template +__host__ __device__ int solveQuadratic(T a, T b, T c, T* x); + +template +__host__ __device__ T newtonsMethod(T a, T b, T c, T d, T x0, int init_dir); + template __host__ __device__ int solveCubic(T a, T b, T c, T d, T* x) { T xc[2]; @@ -353,7 +360,8 @@ __host__ __device__ Intersection rayTriangleIntersection(Ray r, glm::vec3 v0, gl template indexType raySimCtxIntersection(Ray r, int numTris, const indexType* Tri, const glm::tvec3* X) { - Intersection intersection = thrust::transform_reduce(thrust::counting_iterator(0), + Intersection intersection = thrust::transform_reduce(thrust::device, + thrust::counting_iterator(0), thrust::counting_iterator(numTris), [Tri, X, r]__host__ __device__(indexType idx) { glm::tvec3 v0 = X[Tri[3 * idx]]; diff --git a/src/collision/narrowphase.cu b/src/collision/narrowphase.cu index 7b477f9..e089ed8 100644 --- a/src/collision/narrowphase.cu +++ b/src/collision/narrowphase.cu @@ -67,7 +67,7 @@ __global__ void detectCollisionNarrow(int numQueries, Query* queries, const glm: glm::tvec3 normal; Query& q = queries[index]; q.toi = ccdCollisionTest(q, Xs, XTildes, normal); - q.normal = normal; + q.normal = Vec3d{normal.x, normal.y, normal.z}; } } @@ -92,8 +92,8 @@ __global__ void storeTi(int numQueries, const Query* queries, Scalar* tI, glm::v tI[q.v1] = 0.5f; //tI[q.v2] = 0.5f; //tI[q.v3] = 0.5f; - nors[q.v1] = q.normal; - nors[q.v0] = q.normal; + nors[q.v1] = glm::vec3(q.normal.x, q.normal.y, q.normal.z); + nors[q.v0] = glm::vec3(q.normal.x, q.normal.y, q.normal.z); } } if (q.type == QueryType::VF) @@ -104,17 +104,17 @@ __global__ void storeTi(int numQueries, const Query* queries, Scalar* tI, glm::v tI[q.v1] = 0.5f; tI[q.v2] = 0.5f; tI[q.v3] = 0.5f; - nors[q.v0] = q.normal; - nors[q.v1] = -q.normal; - nors[q.v2] = -q.normal; - nors[q.v3] = -q.normal; + nors[q.v0] = glm::vec3(q.normal.x, q.normal.y, q.normal.z); + nors[q.v1] = -glm::vec3(q.normal.x, q.normal.y, q.normal.z); + nors[q.v2] = -glm::vec3(q.normal.x, q.normal.y, q.normal.z); + nors[q.v3] = -glm::vec3(q.normal.x, q.normal.y, q.normal.z); } } /* if (q.type == QueryType::VF) { tI[q.v0] = q.toi; - nors[q.v0] = q.normal; + nors[q.v0] = glm::vec3(q.normal.x, q.normal.y, q.normal.z); }*/ } } diff --git a/src/context.cpp b/src/context.cpp index 1b61b17..7fc3cd3 100644 --- a/src/context.cpp +++ b/src/context.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/openglcontext/preview.cpp b/src/openglcontext/preview.cpp index a3ca129..5f662e4 100644 --- a/src/openglcontext/preview.cpp +++ b/src/openglcontext/preview.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/openglcontext/shaderprogram.cpp b/src/openglcontext/shaderprogram.cpp index fbe9bc7..e5782e0 100644 --- a/src/openglcontext/shaderprogram.cpp +++ b/src/openglcontext/shaderprogram.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace fs = std::filesystem; diff --git a/src/simulation/collisionUtil.cu b/src/simulation/collisionUtil.cu index 614d5d2..4614e25 100644 --- a/src/simulation/collisionUtil.cu +++ b/src/simulation/collisionUtil.cu @@ -27,7 +27,6 @@ template __global__ void IPCCDKernel(glm::tvec3* X, glm::tvec3* XTilde, glm::tvec3* V, Scalar* tI, glm::vec3* normals, float muT, float muN, int numVerts) { int idx = threadIdx.x + blockIdx.x * blockDim.x; if (idx >= numVerts) return; - Scalar interval = glm::length(XTilde - X); if (tI[idx] < 1.0f) { @@ -51,7 +50,6 @@ template __global__ void CCDKernel(glm::tvec3* X, glm::tvec3* XTilde, glm::tvec3* V, Scalar* tI, glm::vec3* normals, float muT, float muN, int numVerts, Scalar dt) { int idx = threadIdx.x + blockIdx.x * blockDim.x; if (idx >= numVerts) return; - Scalar interval = glm::length(XTilde - X); if (tI[idx] < 1.0f) { @@ -74,5 +72,4 @@ __global__ void CCDKernel(glm::tvec3* X, glm::tvec3* XTilde, glm template __global__ void IPCCDKernel(glm::tvec3* X, glm::tvec3* XTilde, glm::tvec3* V, float* tI, glm::vec3* normals, float muT, float muN, int numVerts); template __global__ void IPCCDKernel(glm::tvec3* X, glm::tvec3* XTilde, glm::tvec3* V, double* tI, glm::vec3* normals, float muT, float muN, int numVerts); template __global__ void CCDKernel(glm::tvec3* X, glm::tvec3* XTilde, glm::tvec3* V, float* tI, glm::vec3* normals, float muT, float muN, int numVerts, float dt); -template __global__ void CCDKernel(glm::tvec3* X, glm::tvec3* XTilde, glm::tvec3* V, double* tI, glm::vec3* normals, float muT, float muN, int numVerts, double dt); - +template __global__ void CCDKernel(glm::tvec3* X, glm::tvec3* XTilde, glm::tvec3* V, double* tI, glm::vec3* normals, float muT, float muN, int numVerts, double dt); \ No newline at end of file diff --git a/src/simulation/distance/distance_type.cuh b/src/simulation/distance/distance_type.cuh index 04bf971..d85c324 100644 --- a/src/simulation/distance/distance_type.cuh +++ b/src/simulation/distance/distance_type.cuh @@ -4,6 +4,13 @@ using namespace ipc; +template +__host__ __device__ DistanceType edge_edge_parallel_distance_type( + const glm::tvec3& ea0, + const glm::tvec3& ea1, + const glm::tvec3& eb0, + const glm::tvec3& eb1); + #ifdef __CUDACC__ template __global__ void GetDistanceType(const glm::tvec3* Xs, Query* queries, int numQueries) { diff --git a/src/simulation/energy/barrier.cu b/src/simulation/energy/barrier.cu index 49ea48e..2c6bb99 100644 --- a/src/simulation/energy/barrier.cu +++ b/src/simulation/energy/barrier.cu @@ -6,6 +6,7 @@ #include #include + namespace Barrier { template __forceinline__ __host__ __device__ Scalar barrierSquareFunc(Scalar d_sqr, Scalar dhat, Scalar kappa) { @@ -207,7 +208,7 @@ void BarrierEnergy::Hessian(const SolverData& solverData, const if (numQueries == 0)return; int threadsPerBlock = 256; int numBlocks = (numQueries + threadsPerBlock - 1) / threadsPerBlock; - Barrier::hessianKern << > > (hessianVal, hessianRowIdx, hessianColIdx, solverData.X, solverData.queries(), numQueries, solverParams.dhat, solverData.kappa, coef); + Barrier::hessianKern << > > (this->hessianVal, this->hessianRowIdx, this->hessianColIdx, solverData.X, solverData.queries(), numQueries, solverParams.dhat, solverData.kappa, coef); } template @@ -217,7 +218,7 @@ void BarrierEnergy::GradientHessian(Scalar* grad, const SolverData> > (grad, hessianVal, hessianRowIdx, hessianColIdx, solverData.X, solverData.queries(), numQueries, solverParams.dhat, solverData.kappa, coef); + Barrier::gradHessianKern << > > (grad, this->hessianVal, this->hessianRowIdx, this->hessianColIdx, solverData.X, solverData.queries(), numQueries, solverParams.dhat, solverData.kappa, coef); } template diff --git a/src/simulation/energy/corotated.cu b/src/simulation/energy/corotated.cu index 7b72962..debab70 100644 --- a/src/simulation/energy/corotated.cu +++ b/src/simulation/energy/corotated.cu @@ -5,6 +5,7 @@ #include #include + namespace Corotated { template __global__ void GradientKern(Scalar* grad, const glm::tvec3* X, const indexType* Tet, const glm::tmat3x3* DmInvs, @@ -231,7 +232,7 @@ template void CorotatedEnergy::Hessian(const SolverData& solverData, const SolverParams& solverParams, Scalar coef) const { int threadsPerBlock = 256; int numBlocks = (solverData.numTets + threadsPerBlock - 1) / threadsPerBlock; - Corotated::HessianKern << > > (hessianVal, hessianRowIdx, hessianColIdx, + Corotated::HessianKern << > > (this->hessianVal, this->hessianRowIdx, this->hessianColIdx, solverData.X, solverData.Tet, solverData.DmInv, solverData.V0, solverData.mu, solverData.lambda, solverData.numTets, coef); } @@ -239,7 +240,7 @@ template void CorotatedEnergy::GradientHessian(Scalar* grad, const SolverData& solverData, const SolverParams& solverParams, Scalar coef) const { int threadsPerBlock = 256; int numBlocks = (solverData.numTets + threadsPerBlock - 1) / threadsPerBlock; - Corotated::GradHessianKern << > > (grad, hessianVal, hessianRowIdx, hessianColIdx, + Corotated::GradHessianKern << > > (grad, this->hessianVal, this->hessianRowIdx, this->hessianColIdx, solverData.X, solverData.Tet, solverData.DmInv, solverData.V0, solverData.mu, solverData.lambda, solverData.numTets, coef); } diff --git a/src/simulation/energy/implicitBarrier.cu b/src/simulation/energy/implicitBarrier.cu index f816c7e..f786c7d 100644 --- a/src/simulation/energy/implicitBarrier.cu +++ b/src/simulation/energy/implicitBarrier.cu @@ -327,7 +327,7 @@ void ImplicitBarrierEnergy::Hessian(const SolverData& solverData { int threadsPerBlock = 256; int numBlocks = (solverData.numVerts + threadsPerBlock - 1) / threadsPerBlock; - ImplicitBarrier::hessianKern << > > (hessianVal, hessianRowIdx, hessianColIdx, solverData.X, solverData.numVerts, + ImplicitBarrier::hessianKern << > > (this->hessianVal, this->hessianRowIdx, this->hessianColIdx, solverData.X, solverData.numVerts, solverData.pFixedBodies->dev_planes, solverData.pFixedBodies->numPlanes, solverData.pFixedBodies->dev_cylinders, solverData.pFixedBodies->numCylinders, solverData.pFixedBodies->dev_spheres, solverData.pFixedBodies->numSpheres, solverParams.dhat, solverData.contact_area, coef); } @@ -336,7 +336,7 @@ void ImplicitBarrierEnergy::GradientHessian(Scalar* grad, const SolverDa { int threadsPerBlock = 256; int numBlocks = (solverData.numVerts + threadsPerBlock - 1) / threadsPerBlock; - ImplicitBarrier::gradHessianKern << > > (grad, hessianVal, hessianRowIdx, hessianColIdx, solverData.X, solverData.numVerts, + ImplicitBarrier::gradHessianKern << > > (grad, this->hessianVal, this->hessianRowIdx, this->hessianColIdx, solverData.X, solverData.numVerts, solverData.pFixedBodies->dev_planes, solverData.pFixedBodies->numPlanes, solverData.pFixedBodies->dev_cylinders, solverData.pFixedBodies->numCylinders, solverData.pFixedBodies->dev_spheres, solverData.pFixedBodies->numSpheres, solverParams.dhat, solverData.contact_area, coef); } diff --git a/src/simulation/energy/inertia.cu b/src/simulation/energy/inertia.cu index d7c4e7c..a11aec1 100644 --- a/src/simulation/energy/inertia.cu +++ b/src/simulation/energy/inertia.cu @@ -98,7 +98,7 @@ void InertiaEnergy::Hessian(const SolverData& solverData, const { int threadsPerBlock = 256; int numBlocks = (solverData.numVerts + threadsPerBlock - 1) / threadsPerBlock; - Inertia::hessianKern << > > (solverData.mass, hessianVal, hessianRowIdx, hessianColIdx, solverData.numVerts); + Inertia::hessianKern << > > (solverData.mass, this->hessianVal, this->hessianRowIdx, this->hessianColIdx, solverData.numVerts); } template @@ -106,7 +106,7 @@ void InertiaEnergy::GradientHessian(Scalar* grad, const SolverData> > (solverData.X, solverData.XTilde, solverData.mass, grad, hessianVal, hessianRowIdx, hessianColIdx, solverData.numVerts); + Inertia::gradHessianKern << > > (solverData.X, solverData.XTilde, solverData.mass, grad, this->hessianVal, this->hessianRowIdx, this->hessianColIdx, solverData.numVerts); } template class InertiaEnergy; diff --git a/src/simulation/energy/neohookean08.cu b/src/simulation/energy/neohookean08.cu index 5584fed..39ee896 100644 --- a/src/simulation/energy/neohookean08.cu +++ b/src/simulation/energy/neohookean08.cu @@ -5,6 +5,7 @@ #include #include + namespace NeoHookean08 { template __global__ void GradientKern(Scalar* grad, const glm::tvec3* X, const indexType* Tet, const glm::tmat3x3* DmInvs, @@ -218,7 +219,7 @@ template void NeoHookean08Energy::Hessian(const SolverData& solverData, const SolverParams& solverParams, Scalar coef) const { int threadsPerBlock = 256; int numBlocks = (solverData.numTets + threadsPerBlock - 1) / threadsPerBlock; - NeoHookean08::HessianKern << > > (hessianVal, hessianRowIdx, hessianColIdx, + NeoHookean08::HessianKern << > > (this->hessianVal, this->hessianRowIdx, this->hessianColIdx, solverData.X, solverData.Tet, solverData.DmInv, solverData.V0, solverData.mu, solverData.lambda, solverData.numTets, coef); } @@ -227,7 +228,7 @@ void NeoHookean08Energy::GradientHessian(Scalar* grad, const SolverData< { int threadsPerBlock = 256; int numBlocks = (solverData.numTets + threadsPerBlock - 1) / threadsPerBlock; - NeoHookean08::GradHessianKern << > > (grad, hessianVal, hessianRowIdx, hessianColIdx, + NeoHookean08::GradHessianKern << > > (grad, this->hessianVal, this->hessianRowIdx, this->hessianColIdx, solverData.X, solverData.Tet, solverData.DmInv, solverData.V0, solverData.mu, solverData.lambda, solverData.numTets, coef); } diff --git a/src/simulation/simulationContext.cu b/src/simulation/simulationContext.cu index 8ab8e7d..ec4b85b 100644 --- a/src/simulation/simulationContext.cu +++ b/src/simulation/simulationContext.cu @@ -168,6 +168,8 @@ bool SimulationCUDAContext::RayIntersect(const Ray& ray, glm::vec3* pos, bool up hit_v = ms.select_v; } else { + if (impl.data.numTris <= 0 || impl.data.Tri == nullptr || impl.data.X == nullptr) + return false; hit_v = raySimCtxIntersection(ray, impl.data.numTris, impl.data.Tri, impl.data.X); if (updateV) ms.select_v = hit_v; } diff --git a/src/simulation/solver/femSolver.cu b/src/simulation/solver/femSolver.cu index 28097d2..0872d1d 100644 --- a/src/simulation/solver/femSolver.cu +++ b/src/simulation/solver/femSolver.cu @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/src/simulation/solver/linear/cg.h b/src/simulation/solver/linear/cg.h index d51b85a..dc4aca1 100644 --- a/src/simulation/solver/linear/cg.h +++ b/src/simulation/solver/linear/cg.h @@ -11,6 +11,13 @@ class CGSolver : public LinearSolver { virtual ~CGSolver() override; virtual void Solve(int N, T* d_b, T* d_x, T* d_A, int nz, int* d_rowIdx, int* d_colIdx, T* d_guess = nullptr) override; private: + using LinearSolver::dType; + using LinearSolver::d_A; + using LinearSolver::d_rowIdx; + using LinearSolver::d_colIdx; + using LinearSolver::d_rowPtrA; + using LinearSolver::capacity; + cublasHandle_t cubHandle = nullptr; cusparseHandle_t cusHandle = nullptr; csric02Info_t ic02info = nullptr; diff --git a/src/simulation/solver/linear/cholesky.cu b/src/simulation/solver/linear/cholesky.cu index c463848..aea4272 100644 --- a/src/simulation/solver/linear/cholesky.cu +++ b/src/simulation/solver/linear/cholesky.cu @@ -2,6 +2,7 @@ #include #include + template __global__ void FillMatrixA(int* AIdx, T* AVal, T* d_A, int n, int ASize) { int idx = blockIdx.x * blockDim.x + threadIdx.x; @@ -207,5 +208,7 @@ void CholeskySpLinearSolver::Solve(int N, T* d_b, T* d_x, T* d_A, int nz, int permuteVectorInv << > > (dev_x_permuted, d_x, d_p, N); } -template CholeskySpLinearSolver; -template CholeskySpLinearSolver; \ No newline at end of file +template class CholeskySpLinearSolver; +template class CholeskySpLinearSolver; +template class CholeskyDnLinearSolver; +template class CholeskyDnLinearSolver; \ No newline at end of file diff --git a/src/simulation/solver/linear/cholesky.h b/src/simulation/solver/linear/cholesky.h index 790ed64..97fc2cf 100644 --- a/src/simulation/solver/linear/cholesky.h +++ b/src/simulation/solver/linear/cholesky.h @@ -14,6 +14,12 @@ class CholeskySpLinearSolver : public LinearSolver { virtual ~CholeskySpLinearSolver() override; virtual void Solve(int N, T* d_b, T* d_x, T* d_A = nullptr, int nz = 0, int* d_rowIdx = nullptr, int* d_colIdx = nullptr, T* d_guess = nullptr) override; private: + using LinearSolver::d_A; + using LinearSolver::d_rowIdx; + using LinearSolver::d_colIdx; + using LinearSolver::d_rowPtrA; + using LinearSolver::capacity; + void ComputeAMD(cusolverSpHandle_t handle, int rowsA, int nnzA, int* dev_csrRowPtrA, int* dev_csrColIndA, T* dev_csrValA); cusolverSpHandle_t cusolverHandle; cusparseMatDescr_t descrA; @@ -31,6 +37,8 @@ class CholeskyDnLinearSolver : public LinearSolver { virtual ~CholeskyDnLinearSolver() override; virtual void Solve(int N, T* d_b, T* d_x, T* d_A = nullptr, int nz = 0, int* d_rowIdx = nullptr, int* d_colIdx = nullptr, T* d_guess = nullptr) override; private: + using LinearSolver::dType; + cusolverDnParams_t params; int* d_info = nullptr; /* error info */ cusolverDnHandle_t cusolverHandle; diff --git a/src/simulation/solver/linear/choleskyImmed.h b/src/simulation/solver/linear/choleskyImmed.h index 36f4116..76d878a 100644 --- a/src/simulation/solver/linear/choleskyImmed.h +++ b/src/simulation/solver/linear/choleskyImmed.h @@ -11,6 +11,12 @@ class CholeskySpImmedSolver : public LinearSolver { virtual ~CholeskySpImmedSolver() override; virtual void Solve(int N, T* d_b, T* d_x, T* d_A = nullptr, int nz = 0, int* d_rowIdx = nullptr, int* d_colIdx = nullptr, T* d_guess = nullptr) override; private: + using LinearSolver::d_A; + using LinearSolver::d_rowIdx; + using LinearSolver::d_colIdx; + using LinearSolver::d_rowPtrA; + using LinearSolver::capacity; + cusparseHandle_t handle; cusolverSpHandle_t cusolverHandle; cusparseMatDescr_t descrA; diff --git a/src/simulation/solver/linear/jacobi.h b/src/simulation/solver/linear/jacobi.h index 9b2e65e..95556f4 100644 --- a/src/simulation/solver/linear/jacobi.h +++ b/src/simulation/solver/linear/jacobi.h @@ -11,6 +11,12 @@ class JacobiSolver : public LinearSolver { virtual ~JacobiSolver() override; virtual void Solve(int N, T* d_b, T* d_x, T* d_A = nullptr, int nz = 0, int* d_rowIdx = nullptr, int* d_colIdx = nullptr, T* d_guess = nullptr) override; private: + using LinearSolver::d_A; + using LinearSolver::d_rowIdx; + using LinearSolver::d_colIdx; + using LinearSolver::d_rowPtrA; + using LinearSolver::capacity; + int maxIter; cusparseHandle_t cusHandle = nullptr; T* x_prime = nullptr; diff --git a/src/simulation/solver/linear/linear.h b/src/simulation/solver/linear/linear.h index 7df8806..f98e271 100644 --- a/src/simulation/solver/linear/linear.h +++ b/src/simulation/solver/linear/linear.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include template struct CUDAType { diff --git a/src/simulation/solver/linear/pcgJacobi.h b/src/simulation/solver/linear/pcgJacobi.h index d8db3f8..c4f987e 100644 --- a/src/simulation/solver/linear/pcgJacobi.h +++ b/src/simulation/solver/linear/pcgJacobi.h @@ -11,6 +11,13 @@ class PCGJacobiSolver : public LinearSolver { virtual ~PCGJacobiSolver() override; virtual void Solve(int N, T* d_b, T* d_x, T* d_A, int nz, int* d_rowIdx, int* d_colIdx, T* d_guess = nullptr) override; private: + using LinearSolver::dType; + using LinearSolver::d_A; + using LinearSolver::d_rowIdx; + using LinearSolver::d_colIdx; + using LinearSolver::d_rowPtrA; + using LinearSolver::capacity; + cublasHandle_t cubHandle = nullptr; cusparseHandle_t cusHandle = nullptr; diff --git a/src/simulation/solver/solverUtil.cuh b/src/simulation/solver/solverUtil.cuh index 6865695..da4f54e 100644 --- a/src/simulation/solver/solverUtil.cuh +++ b/src/simulation/solver/solverUtil.cuh @@ -3,6 +3,7 @@ #include #include + template float measureExecutionTime(const Func& func, bool print = false) { if (!print) { diff --git a/src/simulation/utilities.cuh b/src/simulation/utilities.cuh index 1b6f8fe..05dc978 100644 --- a/src/simulation/utilities.cuh +++ b/src/simulation/utilities.cuh @@ -26,6 +26,20 @@ void inspectGLM(const T* dev_ptr, int size, const char* str = "") { utilityCore::inspectHost(host_ptr.data(), size, str); } +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600 +__device__ inline double atomicAdd(double* address, double val) { + unsigned long long int* address_as_ull = reinterpret_cast(address); + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __double_as_longlong(val + __longlong_as_double(assumed))); + } while (assumed != old); + return __longlong_as_double(old); +} +#endif + + template void inspectSparseMatrix(T* dev_val, int* dev_rowIdx, int* dev_colIdx, int begin, int nnz, int size); void inspectMortonCodes(const int* dev_mortonCodes, int numTris); diff --git a/src/utilities.cpp b/src/utilities.cpp index 469781e..ab6bad4 100644 --- a/src/utilities.cpp +++ b/src/utilities.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include template @@ -214,7 +215,7 @@ void utilityCore::inspectHost(const Query* query, int size) { std::cout << "DistanceType::" << distanceTypeString[static_cast(query[i].dType)] << ","; std::cout << query[i].v0 << "," << query[i].v1 << "," << query[i].v2 << "," << query[i].v3 << "," << query[i].toi << "," << query[i].d << "," - << glm::to_string(query[i].normal) << "}," << std::endl; + << std::endl; } std::cout << "------------------------inspectHost--END------------------------------" << std::endl; } diff --git a/tests/test_distance.cpp b/tests/test_distance.cpp index 245fe0d..ed8341d 100644 --- a/tests/test_distance.cpp +++ b/tests/test_distance.cpp @@ -30,14 +30,14 @@ __forceinline__ __host__ __device__ Matrix12 barrierSquareFuncHess(Scala double dhat = 0.05; double kappa = 100; std::vector queries = { -Query{QueryType::EE,DistanceType::EA_EB,1,2,4,7,0.00187279,1,glm::dvec3(0.000000, 0.000000, 0.000000)}, -Query{QueryType::EE,DistanceType::EA_EB,4,7,1,2,0.00187279,1,glm::dvec3(0.000000, 0.000000, 0.000000)}, -Query{QueryType::EE,DistanceType::EA_EB,1,2,4,5,0.0021027,1,glm::dvec3(0.000000, 0.000000, 0.000000)}, -Query{QueryType::EE,DistanceType::EA_EB,4,5,1,2,0.0021027,1,glm::dvec3(0.000000, 0.000000, 0.000000)}, -Query{QueryType::VF,DistanceType::P_E0,4,1,2,3,0.00265943,1,glm::dvec3(0.000000, 0.000000, 0.000000)}, -Query{QueryType::VF,DistanceType::P_E1,4,0,1,2,0.00265943,1,glm::dvec3(0.000000, 0.000000, 0.000000)}, -Query{QueryType::EE,DistanceType::EA_EB0,1,2,4,6,0.00265943,1,glm::dvec3(0.000000, 0.000000, 0.000000)}, -Query{QueryType::EE,DistanceType::EA0_EB,4,6,1,2,0.00265943,1,glm::dvec3(0.000000, 0.000000, 0.000000)}, +Query{QueryType::EE,DistanceType::EA_EB,1,2,4,7,0.00187279,1,Vec3d{0.000000, 0.000000, 0.000000}}, +Query{QueryType::EE,DistanceType::EA_EB,4,7,1,2,0.00187279,1,Vec3d{0.000000, 0.000000, 0.000000}}, +Query{QueryType::EE,DistanceType::EA_EB,1,2,4,5,0.0021027,1,Vec3d{0.000000, 0.000000, 0.000000}}, +Query{QueryType::EE,DistanceType::EA_EB,4,5,1,2,0.0021027,1,Vec3d{0.000000, 0.000000, 0.000000}}, +Query{QueryType::VF,DistanceType::P_E0,4,1,2,3,0.00265943,1,Vec3d{0.000000, 0.000000, 0.000000}}, +Query{QueryType::VF,DistanceType::P_E1,4,0,1,2,0.00265943,1,Vec3d{0.000000, 0.000000, 0.000000}}, +Query{QueryType::EE,DistanceType::EA_EB0,1,2,4,6,0.00265943,1,Vec3d{0.000000, 0.000000, 0.000000}}, +Query{QueryType::EE,DistanceType::EA0_EB,4,6,1,2,0.00265943,1,Vec3d{0.000000, 0.000000, 0.000000}}, }; std::vector points = { glm::dvec3(0.436417, 59.751547, 0.282352), diff --git a/tests/test_solveCubic.cpp b/tests/test_solveCubic.cpp index 7849d5d..a12acb7 100644 --- a/tests/test_solveCubic.cpp +++ b/tests/test_solveCubic.cpp @@ -126,12 +126,12 @@ TEST_CASE("Tet collision test", "[Tet]") { std::vector queries = { -Query{QueryType::VF,DistanceType::P_T0,0,4,5,9,1,0,glm::dvec3(0.000000, 0.000000, 0.000000)}, -Query{QueryType::VF,DistanceType::P_T0,1,4,5,9,1,0,glm::dvec3(0.000000, 0.000000, 0.000000)}, -Query{QueryType::VF,DistanceType::P_T0,2,4,5,9,1,0,glm::dvec3(0.000000, 0.000000, 0.000000)}, -Query{QueryType::VF,DistanceType::EA1_EB0,3,4,7,10,0.994723,100,glm::dvec3(0.000010, 1.000000, 0.000010)}, -Query{QueryType::VF,DistanceType::P_T0,4,0,1,3,1,0,glm::dvec3(0.000000, 0.000000, 0.000000)}, -}; +Query{QueryType::VF,DistanceType::P_T0,0,4,5,9,1,0,Vec3d{0.000000, 0.000000, 0.000000}}, +Query{QueryType::VF,DistanceType::P_T0,1,4,5,9,1,0,Vec3d{0.000000, 0.000000, 0.000000}}, +Query{QueryType::VF,DistanceType::P_T0,2,4,5,9,1,0,Vec3d{0.000000, 0.000000, 0.000000}}, +Query{QueryType::VF,DistanceType::EA1_EB0,3,4,7,10,0.994723,100,Vec3d{0.000010, 1.000000, 0.000010}}, +Query{QueryType::VF,DistanceType::P_T0,4,0,1,3,1,0,Vec3d{0.000000, 0.000000, 0.000000}}, + }; std::vector Xs{ glm::dvec3(2.001631, 67.999075, 0.533407), glm::dvec3(-6.001968, 61.071146, -3.459837), From 8574b9545af38bedd2ceac790d00232627248fc6 Mon Sep 17 00:00:00 2001 From: GrahamZen <931252924@qq.com> Date: Fri, 19 Dec 2025 01:38:23 -0600 Subject: [PATCH 03/20] fix invalid write --- src/simulation/dataLoader.cu | 12 +++++++++--- src/simulation/solver/linear/jacobi.cu | 1 - 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/simulation/dataLoader.cu b/src/simulation/dataLoader.cu index 54ae39c..a147114 100644 --- a/src/simulation/dataLoader.cu +++ b/src/simulation/dataLoader.cu @@ -38,6 +38,8 @@ DataLoader::~DataLoader() = default; template std::pair, std::vector> DataLoader::loadEleFaceFile(const std::string& EleFilename, int startIndex, int& numTets, int& numTris, std::string faceFilename) { + numTets = 0; + numTris = 0; std::string line; std::ifstream file(EleFilename); @@ -342,9 +344,13 @@ void DataLoader::AllocData(std::vector& startIndices, SolverData::JacobiSolver(int N, int maxIter) : maxIter(maxIter) template JacobiSolver::~JacobiSolver() { - cudaFree(d_rowPtrA); cudaFree(x_prime); CHECK_CUSPARSE(cusparseDestroy(cusHandle)); } From 1876b68c09ec06fa5f5b6ba5ad2cff75ea971dab Mon Sep 17 00:00:00 2001 From: GrahamZen <931252924@qq.com> Date: Fri, 19 Dec 2025 01:46:10 -0600 Subject: [PATCH 04/20] extend hessian buffer --- src/simulation/energy/ip.cu | 36 ++++++++++++++++++++++++++++-------- src/simulation/energy/ip.h | 9 +++++++-- 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/src/simulation/energy/ip.cu b/src/simulation/energy/ip.cu index 7f36c95..037c202 100644 --- a/src/simulation/energy/ip.cu +++ b/src/simulation/energy/ip.cu @@ -17,13 +17,15 @@ struct AbsOp { } }; -IPEnergy::IPEnergy(const SolverData& solverData) :inertia(solverData, nnz, solverData.numVerts, solverData.mass), +IPEnergy::IPEnergy(const SolverData& solverData) : inertia(solverData, nnz, solverData.numVerts, solverData.mass), elastic(new CorotatedEnergy(solverData, nnz)), implicitBarrier(solverData, nnz), barrier(solverData, nnz) { + hessianCapacity = nnz; cudaMalloc((void**)&gradient, sizeof(double) * solverData.numVerts * 3); - cudaMalloc((void**)&hessianVal, sizeof(double) * nnz); - cudaMalloc((void**)&hessianRowIdx, sizeof(int) * nnz); - cudaMalloc((void**)&hessianColIdx, sizeof(int) * nnz); + cudaMalloc((void**)&hessianVal, sizeof(double) * hessianCapacity); + cudaMalloc((void**)&hessianRowIdx, sizeof(int) * hessianCapacity); + cudaMalloc((void**)&hessianColIdx, sizeof(int) * hessianCapacity); + inertia.SetHessianPtr(hessianVal, hessianRowIdx, hessianColIdx); implicitBarrier.SetHessianPtr(hessianVal, hessianRowIdx, hessianColIdx); elastic->SetHessianPtr(hessianVal, hessianRowIdx, hessianColIdx); @@ -36,6 +38,7 @@ IPEnergy::~IPEnergy() cudaFree(hessianVal); cudaFree(hessianRowIdx); cudaFree(hessianColIdx); + if (elastic) delete elastic; } double IPEnergy::Val(const glm::dvec3* Xs, const SolverData& solverData, const SolverParams& solverParams, double h2) const @@ -43,12 +46,29 @@ double IPEnergy::Val(const glm::dvec3* Xs, const SolverData& solverData, return inertia.Val(Xs, solverData, solverParams) + h2 * (gravity.Val(Xs, solverData, solverParams) + elastic->Val(Xs, solverData, solverParams) + implicitBarrier.Val(Xs, solverData, solverParams) + barrier.Val(Xs, solverData, solverParams)); } -void IPEnergy::GradientHessian(const SolverData& solverData, const SolverParams& solverParams, double h2) const +void IPEnergy::GradientHessian(const SolverData& solverData, const SolverParams& solverParams, double h2) { + int currentNNZ = NNZ(solverData); + if (currentNNZ > hessianCapacity) { + cudaFree(hessianVal); + cudaFree(hessianRowIdx); + cudaFree(hessianColIdx); + hessianCapacity = static_cast(currentNNZ * 1.5); + + cudaMalloc((void**)&hessianVal, sizeof(double) * hessianCapacity); + cudaMalloc((void**)&hessianRowIdx, sizeof(int) * hessianCapacity); + cudaMalloc((void**)&hessianColIdx, sizeof(int) * hessianCapacity); + + inertia.SetHessianPtr(hessianVal, hessianRowIdx, hessianColIdx); + implicitBarrier.SetHessianPtr(hessianVal, hessianRowIdx, hessianColIdx); + elastic->SetHessianPtr(hessianVal, hessianRowIdx, hessianColIdx); + barrier.SetHessianPtr(hessianVal, hessianRowIdx, hessianColIdx); + } + cudaMemset(gradient, 0, sizeof(double) * solverData.numVerts * 3); - cudaMemset(hessianVal, 0, sizeof(double) * NNZ(solverData)); - cudaMemset(hessianRowIdx, 0, sizeof(int) * NNZ(solverData)); - cudaMemset(hessianColIdx, 0, sizeof(int) * NNZ(solverData)); + cudaMemset(hessianVal, 0, sizeof(double) * currentNNZ); + cudaMemset(hessianRowIdx, 0, sizeof(int) * currentNNZ); + cudaMemset(hessianColIdx, 0, sizeof(int) * currentNNZ); inertia.GradientHessian(gradient, solverData, solverParams, 1); gravity.Gradient(gradient, solverData, solverParams, h2); elastic->GradientHessian(gradient, solverData, solverParams, h2); diff --git a/src/simulation/energy/ip.h b/src/simulation/energy/ip.h index 187c31f..d68c128 100644 --- a/src/simulation/energy/ip.h +++ b/src/simulation/energy/ip.h @@ -10,18 +10,23 @@ class IPEnergy { public: IPEnergy(const SolverData& solverData); ~IPEnergy(); + IPEnergy(const IPEnergy&) = delete; + IPEnergy& operator=(const IPEnergy&) = delete; + double Val(const glm::dvec3* Xs, const SolverData& solverData, const SolverParams& solverParams, double h2) const; - void GradientHessian(const SolverData& solverData, const SolverParams& solverParams, double h2) const; + void GradientHessian(const SolverData& solverData, const SolverParams& solverParams, double h2); void UpdateKappa(SolverData& solverData, const SolverParams& solverParams, double h2) const; double InitStepSize(SolverData& solverData, const SolverParams& solverParams, double* p, glm::tvec3* XTmp) const; int NNZ(const SolverData& solverData) const; double* gradient = nullptr; - // collision queries should be updated if dirty double* hessianVal = nullptr; int* hessianRowIdx = nullptr; int* hessianColIdx = nullptr; + private: int nnz = 0; + int hessianCapacity = 0; + InertiaEnergy inertia; GravityEnergy gravity; ImplicitBarrierEnergy implicitBarrier; From 43c443178f1bc2af5f002aee471109611945dbfa Mon Sep 17 00:00:00 2001 From: GrahamZen Date: Fri, 19 Dec 2025 12:40:37 -0600 Subject: [PATCH 05/20] use unique_ptr in context.h --- src/context.cpp | 45 ++++++++++++++-------------- src/context.h | 25 ++++++++-------- src/main.cpp | 4 +-- src/openglcontext/preview.cpp | 4 +-- src/simulation/simulationContext.cpp | 2 +- src/simulation/simulationContext.cu | 4 +-- 6 files changed, 42 insertions(+), 42 deletions(-) diff --git a/src/context.cpp b/src/context.cpp index 7fc3cd3..2569740 100644 --- a/src/context.cpp +++ b/src/context.cpp @@ -84,10 +84,20 @@ Ray Camera::RayPick(glm::ivec2 pixel) return Ray{ position, glm::normalize(rayWorldXYZ - position) }; } -Context::Context(const std::string& _filename) :shaderType(ShaderType::PHONG), filename(_filename), mpCamera(new Camera(_filename)), mpProgLambert(new SurfaceShader()), -mpProgPhong(new SurfaceShader()), mpProgHighLight(new SurfaceShader()), mpProgFlat(new SurfaceShader()), mpProgSkybox(new SurfaceShader()), -width(mpCamera->resolution.x), height(mpCamera->resolution.y), ogLookAt(mpCamera->lookAt), guiData(new GuiDataContainer()), -mpSelectSPhere(new Sphere(utilityCore::modelMatrix(glm::vec3(0), glm::vec3(0), glm::vec3(5, 5, 5)), 5, 10)) +Context::Context(const std::string& _filename) + : shaderType(ShaderType::PHONG), + filename(_filename), + mpCamera(std::make_unique(_filename)), + mpProgLambert(std::make_unique()), + mpProgPhong(std::make_unique()), + mpProgHighLight(std::make_unique()), + mpProgFlat(std::make_unique()), + mpProgSkybox(std::make_unique()), + width(mpCamera->resolution.x), + height(mpCamera->resolution.y), + ogLookAt(mpCamera->lookAt), + guiData(std::make_unique()), + mpSelectSPhere(std::make_unique(utilityCore::modelMatrix(glm::vec3(0), glm::vec3(0), glm::vec3(5, 5, 5)), 5, 10)) { glm::vec3 view = mpCamera->view; glm::vec3 up = mpCamera->up; @@ -105,18 +115,7 @@ mpSelectSPhere(new Sphere(utilityCore::modelMatrix(glm::vec3(0), glm::vec3(0), g zoom = glm::length(mpCamera->position - ogLookAt); } -Context::~Context() -{ - delete mpProgHighLight; - delete mpProgLambert; - delete mpProgPhong; - delete mpProgFlat; - delete mpProgSkybox; - delete mcrpSimContext; - delete guiData; - delete mpCamera; - delete mpEnvMapCube; -} +Context::~Context() = default; int Context::GetMaxCGThreads() { @@ -187,7 +186,7 @@ void Context::LoadShaders(const std::string& vertShaderFilename, const std::stri mpProgSkybox->create("../src/shaders/envMap.vert.glsl", "../src/shaders/envMap.frag.glsl"); mpProgSkybox->setViewProjMatrix(mpCamera->getView(), mpCamera->getProj()); mpProgSkybox->addUniform("u_EnvironmentMap"); - mpEnvMapCube = new Mesh(); + mpEnvMapCube = std::make_unique(); mpEnvMapCube->createCube(); } } @@ -376,20 +375,20 @@ SimulationCUDAContext* Context::LoadSimContext() { if (contextJson.contains("fixedBodies")) { fixBodies = ReadFixedBodies(contextJson["fixedBodies"], fixedBodyDefs); } - mpSimContexts.push_back(new SimulationCUDAContext(this, baseName, contextJson, softBodyDefs, fixBodies, threadsPerBlock, threadsPerBlockBVH, maxThreads, numIterations)); + mpSimContexts.push_back(std::make_unique(this, baseName, contextJson, softBodyDefs, fixBodies, threadsPerBlock, threadsPerBlockBVH, maxThreads, numIterations)); DOFs.push_back(mpSimContexts.back()->GetVertCnt() * 3); Eles.push_back(mpSimContexts.back()->GetTetCnt()); if (logEnabled) spdlog::info("{} #dof: {}, #ele: {}", "[" + baseName + "]", DOFs.back(), Eles.back()); } - mcrpSimContext = mpSimContexts[0]; + mcrpSimContext = mpSimContexts[0].get(); } return mcrpSimContext; } void Context::LoadEnvCubemap(const std::string& filename) { { - envMap = new TextureCubemap(); + envMap = std::make_unique(); envMap->create(filename.c_str(), false); } } @@ -442,10 +441,10 @@ void Context::Draw() { switch (shaderType) { case Context::ShaderType::LAMBERT: - mcrpSimContext->Draw(mpProgHighLight, mpProgLambert, mpProgFlat, guiData->HighLightObjId); + mcrpSimContext->Draw(mpProgHighLight.get(), mpProgLambert.get(), mpProgFlat.get(), guiData->HighLightObjId); break; case Context::ShaderType::PHONG: - mcrpSimContext->Draw(mpProgHighLight, mpProgPhong, mpProgFlat, guiData->HighLightObjId); + mcrpSimContext->Draw(mpProgHighLight.get(), mpProgPhong.get(), mpProgFlat.get(), guiData->HighLightObjId); break; default: break; @@ -488,7 +487,7 @@ void Context::Update() { PollEvents(); if (panelModified) { if (guiData->currSimContextId != -1) { - mcrpSimContext = mpSimContexts[guiData->currSimContextId]; + mcrpSimContext = mpSimContexts[guiData->currSimContextId].get(); guiData->solverParams = mcrpSimContext->GetSolverParamsUI(); } mcrpSimContext->SetGlobalSolver(guiData->solverType); diff --git a/src/context.h b/src/context.h index 7837a61..477743c 100644 --- a/src/context.h +++ b/src/context.h @@ -3,6 +3,7 @@ #include #include #include +#include class SoftBody; class Camera; @@ -102,7 +103,7 @@ class Context int GetIteration() const { return iteration; } const std::vector& GetDOFs() const { return DOFs; } const std::vector& GetEles() const { return Eles; } - Camera* mpCamera = nullptr; + std::unique_ptr mpCamera; const int width = 1024; const int height = 1024; bool panelModified = false; @@ -110,9 +111,9 @@ class Context int bvhBuildType = 1; float zoom, theta, phi; glm::vec3 cameraPosition; - GuiDataContainer* guiData; + std::unique_ptr guiData; SimulationCUDAContext* mcrpSimContext = nullptr; - std::vector mpSimContexts; + std::vector> mpSimContexts; MouseState mouseState; private: @@ -122,18 +123,18 @@ class Context std::string filename = "context.json"; SimulationCUDAContext* LoadSimContext(); glm::vec3 ogLookAt; // for recentering the camera - SurfaceShader* mpProgHighLight = nullptr; - SurfaceShader* mpProgLambert = nullptr; - SurfaceShader* mpProgPhong = nullptr; - SurfaceShader* mpProgFlat = nullptr; - SurfaceShader* mpProgSkybox = nullptr; - Mesh* mpEnvMapCube = nullptr; + std::unique_ptr mpProgHighLight; + std::unique_ptr mpProgLambert; + std::unique_ptr mpProgPhong; + std::unique_ptr mpProgFlat; + std::unique_ptr mpProgSkybox; + std::unique_ptr mpEnvMapCube; size_t iteration = 0; bool pause = false; bool logEnabled = false; std::vector DOFs; std::vector Eles; - TextureCubemap* envMap = nullptr; - Sphere* mpSelectSPhere = nullptr; + std::unique_ptr envMap; + std::unique_ptr mpSelectSPhere; glm::vec3 spherePos; -}; \ No newline at end of file +}; diff --git a/src/main.cpp b/src/main.cpp index 04c1ed4..c4b4084 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -41,7 +41,7 @@ int main(int argc, char** argv) { context->LoadShaders(); context->LoadFlatShaders(); // Initialize ImGui Data - InitImguiData(context->guiData); + InitImguiData(context->guiData.get()); context->InitDataContainer(); GLenum err; while ((err = glGetError()) != GL_NO_ERROR) { @@ -148,4 +148,4 @@ void framebuffer_size_callback(GLFWwindow* window, int width, int height) { context->mpCamera->resolution.y = height; context->mpCamera->resolution.x = width; context->mpCamera->computeCameraParams(); -} \ No newline at end of file +} diff --git a/src/openglcontext/preview.cpp b/src/openglcontext/preview.cpp index 5f662e4..668f0cf 100644 --- a/src/openglcontext/preview.cpp +++ b/src/openglcontext/preview.cpp @@ -85,7 +85,7 @@ void InitImguiData(GuiDataContainer* guiData) void RenderHierarchy(bool& contextChanged) { ImGui::Begin("Scene Hierarchy", nullptr); for (size_t i = 0; i < context->mpSimContexts.size(); i++) { - auto simCtx = context->mpSimContexts[i]; + auto* simCtx = context->mpSimContexts[i].get(); if (ImGui::TreeNode(simCtx->GetName().c_str())) { ImGui::SameLine(); if (ImGui::Button("Activate")) { @@ -102,7 +102,7 @@ void RenderHierarchy(bool& contextChanged) { if (ImGui::Button("Highlight")) { imguiData->HighLightObjId = uniqueId; } - ImGui::Text("#DBC: %d", softBody->GetAttributes().numDBC); + ImGui::Text("#DBC: %zu", softBody->GetAttributes().numDBC); ImGui::Text("#Triangle: %d", softBody->GetNumTris()); imguiData->softBodyAttr.mu = ImGui::DragFloat("mu", &softBody->GetAttributes().mu, 100.f, 0.0f, 100000.0f, "%.2f"); imguiData->softBodyAttr.lambda = ImGui::DragFloat("lambda", &softBody->GetAttributes().lambda, 100.f, 0.0f, 100000.0f, "%.2f"); diff --git a/src/simulation/simulationContext.cpp b/src/simulation/simulationContext.cpp index 449f7e4..dcffe48 100644 --- a/src/simulation/simulationContext.cpp +++ b/src/simulation/simulationContext.cpp @@ -52,7 +52,7 @@ static void CopyParamsToUI(const SolverParams& p, SolverParamsUI& ui) { SimulationCUDAContext::SimulationCUDAContext(Context* ctx, const std::string& _name, nlohmann::json& json, const std::map& softBodyDefs, std::vector& fixedBodies, int threadsPerBlock, int threadsPerBlockBVH, int maxThreads, int numIterations) - : threadsPerBlock_(threadsPerBlock), contextGuiData(ctx->guiData), name(_name) { + : threadsPerBlock_(threadsPerBlock), contextGuiData(ctx->guiData.get()), name(_name) { std::string prec = "double"; if (json.contains("precision")) { prec = json["precision"].get(); diff --git a/src/simulation/simulationContext.cu b/src/simulation/simulationContext.cu index ec4b85b..bc81c29 100644 --- a/src/simulation/simulationContext.cu +++ b/src/simulation/simulationContext.cu @@ -40,7 +40,7 @@ void SimulationCUDAContext::Impl::Init(Context* ctx, nlohmann::json& jso maxThreads = _maxThreads; numIterations = _numIterations; - auto guiData = ctx->guiData; + auto guiData = ctx->guiData.get(); DataLoader dataLoader(threadsPerBlock); std::vector namesSoftBodies; data.pCollisionDetection = new CollisionDetection{ ctx, _threadsPerBlockBVH, 1 << 16 }; @@ -242,4 +242,4 @@ void SimulationCUDAContext::PrepareRenderData() { } template struct SimulationCUDAContext::Impl; -template struct SimulationCUDAContext::Impl; \ No newline at end of file +template struct SimulationCUDAContext::Impl; From 391ad157830c697451d7d4f3f98765af61ff69f8 Mon Sep 17 00:00:00 2001 From: GrahamZen Date: Fri, 19 Dec 2025 12:46:32 -0600 Subject: [PATCH 06/20] unique_ptr in solver --- src/simulation/energy/ip.cu | 10 ++++++---- src/simulation/energy/ip.h | 5 +++-- src/simulation/solver/projective/pdSolver.cu | 5 +---- src/simulation/solver/projective/pdSolver.h | 5 +++-- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/simulation/energy/ip.cu b/src/simulation/energy/ip.cu index 037c202..f97458c 100644 --- a/src/simulation/energy/ip.cu +++ b/src/simulation/energy/ip.cu @@ -17,8 +17,11 @@ struct AbsOp { } }; -IPEnergy::IPEnergy(const SolverData& solverData) : inertia(solverData, nnz, solverData.numVerts, solverData.mass), -elastic(new CorotatedEnergy(solverData, nnz)), implicitBarrier(solverData, nnz), barrier(solverData, nnz) +IPEnergy::IPEnergy(const SolverData& solverData) + : inertia(solverData, nnz, solverData.numVerts, solverData.mass), + elastic(std::make_unique>(solverData, nnz)), + implicitBarrier(solverData, nnz), + barrier(solverData, nnz) { hessianCapacity = nnz; cudaMalloc((void**)&gradient, sizeof(double) * solverData.numVerts * 3); @@ -38,7 +41,6 @@ IPEnergy::~IPEnergy() cudaFree(hessianVal); cudaFree(hessianRowIdx); cudaFree(hessianColIdx); - if (elastic) delete elastic; } double IPEnergy::Val(const glm::dvec3* Xs, const SolverData& solverData, const SolverParams& solverParams, double h2) const @@ -123,4 +125,4 @@ double IPEnergy::InitStepSize(SolverData& solverData, const SolverParams int IPEnergy::NNZ(const SolverData& solverData) const { return inertia.NNZ(solverData) + implicitBarrier.NNZ(solverData) + elastic->NNZ(solverData) + barrier.NNZ(solverData); -} \ No newline at end of file +} diff --git a/src/simulation/energy/ip.h b/src/simulation/energy/ip.h index d68c128..477f6ab 100644 --- a/src/simulation/energy/ip.h +++ b/src/simulation/energy/ip.h @@ -5,6 +5,7 @@ #include #include #include +#include class IPEnergy { public: @@ -30,6 +31,6 @@ class IPEnergy { InertiaEnergy inertia; GravityEnergy gravity; ImplicitBarrierEnergy implicitBarrier; - ElasticEnergy* elastic = nullptr; + std::unique_ptr> elastic; BarrierEnergy barrier; -}; \ No newline at end of file +}; diff --git a/src/simulation/solver/projective/pdSolver.cu b/src/simulation/solver/projective/pdSolver.cu index db77ef2..f0357af 100644 --- a/src/simulation/solver/projective/pdSolver.cu +++ b/src/simulation/solver/projective/pdSolver.cu @@ -27,9 +27,6 @@ PdSolver::PdSolver(int threadsPerBlock, const SolverData& solverData) : F } PdSolver::~PdSolver() { - if (ls) { - free(ls); - } cudaFree(sn); cudaFree(sn_old); cudaFree(b); @@ -117,7 +114,7 @@ void PdSolver::SolverPrepare(SolverData& solverData, const SolverParams(threadsPerBlock, ARowIdx, AColIdx, AVal, ASize, nnz); + ls = std::make_unique>(threadsPerBlock, ARowIdx, AColIdx, AVal, ASize, nnz); } catch (const std::exception& e) { diff --git a/src/simulation/solver/projective/pdSolver.h b/src/simulation/solver/projective/pdSolver.h index c46cbb7..2f196c0 100644 --- a/src/simulation/solver/projective/pdSolver.h +++ b/src/simulation/solver/projective/pdSolver.h @@ -4,6 +4,7 @@ #include #include #include +#include template class LinearSolver; @@ -23,8 +24,8 @@ class PdSolver : public FEMSolver { virtual void SolverPrepare(SolverData& solverData, const SolverParams& solverParams) override; virtual bool SolverStep(SolverData& solverData, const SolverParams& solverParams) override; private: - LinearSolver* ls = nullptr; - LinearSolver* jacobiSolver = nullptr; + std::unique_ptr> ls; + std::unique_ptr> jacobiSolver; SolverType solverType; const float positional_weight = 1e6; From ae5b0cb5aab28059b411202989cc3f5d30ce6790 Mon Sep 17 00:00:00 2001 From: GrahamZen Date: Fri, 19 Dec 2025 23:14:19 -0600 Subject: [PATCH 07/20] fix interop free --- src/main.cpp | 6 +++++- src/openglcontext/drawable.cpp | 1 + src/openglcontext/drawable.h | 2 +- src/openglcontext/mesh.cpp | 10 ++++++++-- src/openglcontext/preview.cpp | 4 +++- src/openglcontext/preview.h | 2 +- src/openglcontext/queryDisplay.cpp | 10 ++++++++-- src/openglcontext/singleQueryDisplay.cpp | 15 +++++++++++--- src/openglcontext/wireframe.cpp | 5 ++++- src/simulation/simulationContext.cu | 13 ++++++++++++ src/simulation/solver/projective/pdSolver.cu | 21 +++++++++++++++----- src/simulation/solver/projective/pdSolver.h | 16 +++++++-------- 12 files changed, 80 insertions(+), 25 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index c4b4084..fdfbead 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -49,9 +49,13 @@ int main(int argc, char** argv) { } // GLFW main loop mainLoop(); + if (context) { + delete context; + context = nullptr; + } + cleanupOpenGL(); cudaDeviceReset(); - delete context; return 0; } diff --git a/src/openglcontext/drawable.cpp b/src/openglcontext/drawable.cpp index 65b6e2f..eefab2a 100644 --- a/src/openglcontext/drawable.cpp +++ b/src/openglcontext/drawable.cpp @@ -5,6 +5,7 @@ Drawable::Drawable() idxBound(false), posBound(false), norBound(false), uvBound(false) {} +Drawable::~Drawable() = default; void Drawable::destroy() { diff --git a/src/openglcontext/drawable.h b/src/openglcontext/drawable.h index 19d2b17..59eab03 100644 --- a/src/openglcontext/drawable.h +++ b/src/openglcontext/drawable.h @@ -21,7 +21,7 @@ class Drawable public: Drawable(); - + virtual ~Drawable(); virtual void create() = 0; // To be implemented by subclasses. Populates the VBOs of the Drawable. void destroy(); // Frees the VBOs of the Drawable. diff --git a/src/openglcontext/mesh.cpp b/src/openglcontext/mesh.cpp index cfa4a74..a796c3e 100644 --- a/src/openglcontext/mesh.cpp +++ b/src/openglcontext/mesh.cpp @@ -12,8 +12,14 @@ Mesh::Mesh() Mesh::~Mesh() { // unregister this buffer object with CUDA - cudaGLUnregisterBufferObject(bufPos); - cudaGLUnregisterBufferObject(bufNor); + if (cuda_bufPos_resource) { + cudaGraphicsUnregisterResource(cuda_bufPos_resource); + cuda_bufPos_resource = nullptr; + } + if (cuda_bufNor_resource) { + cudaGraphicsUnregisterResource(cuda_bufNor_resource); + cuda_bufNor_resource = nullptr; + } } void Mesh::createTetrahedron() diff --git a/src/openglcontext/preview.cpp b/src/openglcontext/preview.cpp index 668f0cf..58aa756 100644 --- a/src/openglcontext/preview.cpp +++ b/src/openglcontext/preview.cpp @@ -337,11 +337,13 @@ void mainLoop() { glfwSwapBuffers(window); } +} +void cleanupOpenGL() { ImGui_ImplOpenGL3_Shutdown(); ImGui_ImplGlfw_Shutdown(); ImGui::DestroyContext(); glfwDestroyWindow(window); glfwTerminate(); -} +} \ No newline at end of file diff --git a/src/openglcontext/preview.h b/src/openglcontext/preview.h index 4c569aa..61081e5 100644 --- a/src/openglcontext/preview.h +++ b/src/openglcontext/preview.h @@ -5,6 +5,6 @@ class GuiDataContainer; std::string currentTimeString(); bool initOpenGL(); void mainLoop(); - +void cleanupOpenGL(); bool MouseOverImGuiWindow(); void InitImguiData(GuiDataContainer* guiData); \ No newline at end of file diff --git a/src/openglcontext/queryDisplay.cpp b/src/openglcontext/queryDisplay.cpp index 9bc0b3e..c63b027 100644 --- a/src/openglcontext/queryDisplay.cpp +++ b/src/openglcontext/queryDisplay.cpp @@ -10,8 +10,14 @@ QueryDisplay::QueryDisplay() QueryDisplay::~QueryDisplay() { // unregister this buffer object with CUDA - cudaGLUnregisterBufferObject(bufPos); - cudaGLUnregisterBufferObject(bufCol); + if (cuda_bufPos_resource) { + cudaGraphicsUnregisterResource(cuda_bufPos_resource); + cuda_bufPos_resource = nullptr; + } + if (cuda_bufCol_resource) { + cudaGraphicsUnregisterResource(cuda_bufCol_resource); + cuda_bufCol_resource = nullptr; + } } GLenum QueryDisplay::drawMode() diff --git a/src/openglcontext/singleQueryDisplay.cpp b/src/openglcontext/singleQueryDisplay.cpp index 42445bb..b3840ed 100644 --- a/src/openglcontext/singleQueryDisplay.cpp +++ b/src/openglcontext/singleQueryDisplay.cpp @@ -12,9 +12,18 @@ SingleQueryDisplay::SingleQueryDisplay() SingleQueryDisplay::~SingleQueryDisplay() { // unregister this buffer object with CUDA - cudaGLUnregisterBufferObject(bufPos); - cudaGLUnregisterBufferObject(bufVertPos); - cudaGLUnregisterBufferObject(bufTriPos); + if (cuda_bufPos_resource) { + cudaGraphicsUnregisterResource(cuda_bufPos_resource); + cuda_bufPos_resource = nullptr; + } + if (cuda_bufVertPos_resource) { + cudaGraphicsUnregisterResource(cuda_bufVertPos_resource); + cuda_bufVertPos_resource = nullptr; + } + if (cuda_bufTriPos_resource) { + cudaGraphicsUnregisterResource(cuda_bufTriPos_resource); + cuda_bufTriPos_resource = nullptr; + } } GLenum SingleQueryDisplay::drawMode() diff --git a/src/openglcontext/wireframe.cpp b/src/openglcontext/wireframe.cpp index 4d458e1..bdd705b 100644 --- a/src/openglcontext/wireframe.cpp +++ b/src/openglcontext/wireframe.cpp @@ -8,7 +8,10 @@ Wireframe::Wireframe() Wireframe::~Wireframe() { // unregister this buffer object with CUDA - cudaGLUnregisterBufferObject(bufPos); + if (cuda_bufPos_resource) { + cudaGraphicsUnregisterResource(cuda_bufPos_resource); + cuda_bufPos_resource = nullptr; + } } GLenum Wireframe::drawMode() diff --git a/src/simulation/simulationContext.cu b/src/simulation/simulationContext.cu index bc81c29..b44ee0a 100644 --- a/src/simulation/simulationContext.cu +++ b/src/simulation/simulationContext.cu @@ -125,6 +125,7 @@ void SimulationCUDAContext::Impl::Init(Context* ctx, nlohmann::json& jso template SimulationCUDAContext::Impl::~Impl() { + cudaFree(data.X); cudaFree(data.Tet); cudaFree(data.V); @@ -132,6 +133,13 @@ SimulationCUDAContext::Impl::~Impl() cudaFree(data.X0); cudaFree(data.XTilde); cudaFree(data.ExtForce); + cudaFree(data.OffsetX); + cudaFree(data.moreDBC); + cudaFree(data.DBCX); + cudaFree(data.Tri); + cudaFree(data.DBCIdx); + cudaFree(data.contact_area); + cudaFree(data.DBC); cudaFree(data.mass); cudaFree(data.mu); @@ -145,6 +153,11 @@ SimulationCUDAContext::Impl::~Impl() delete softbody; } delete data.pCollisionDetection; + + if (data.pFixedBodies) { + delete data.pFixedBodies; + data.pFixedBodies = nullptr; + } } void SimulationCUDAContext::UpdateSoftBodyAttr(int index, SoftBodyAttr* pSoftBodyAttr) diff --git a/src/simulation/solver/projective/pdSolver.cu b/src/simulation/solver/projective/pdSolver.cu index f0357af..6f19152 100644 --- a/src/simulation/solver/projective/pdSolver.cu +++ b/src/simulation/solver/projective/pdSolver.cu @@ -27,15 +27,26 @@ PdSolver::PdSolver(int threadsPerBlock, const SolverData& solverData) : F } PdSolver::~PdSolver() { - cudaFree(sn); - cudaFree(sn_old); - cudaFree(b); - cudaFree(massDt_2s); - free(bHost); +if (sn) cudaFree(sn); + if (sn_old) cudaFree(sn_old); + if (b) cudaFree(b); + if (massDt_2s) cudaFree(massDt_2s); + if (bHost) free(bHost); + if (next_x) cudaFree(next_x); + if (prev_x) cudaFree(prev_x); + if (matrix_diag) cudaFree(matrix_diag); } void PdSolver::SolverPrepare(SolverData& solverData, const SolverParams& solverParams) { + if (sn) cudaFree(sn); + if (sn_old) cudaFree(sn_old); + if (next_x) cudaFree(next_x); + if (prev_x) cudaFree(prev_x); + if (b) cudaFree(b); + if (massDt_2s) cudaFree(massDt_2s); + if (matrix_diag) cudaFree(matrix_diag); + if (bHost) free(bHost); int vertBlocks = (solverData.numVerts + threadsPerBlock - 1) / threadsPerBlock; int tetBlocks = (solverData.numTets + threadsPerBlock - 1) / threadsPerBlock; float dt = solverParams.dt; diff --git a/src/simulation/solver/projective/pdSolver.h b/src/simulation/solver/projective/pdSolver.h index 2f196c0..a911371 100644 --- a/src/simulation/solver/projective/pdSolver.h +++ b/src/simulation/solver/projective/pdSolver.h @@ -29,16 +29,16 @@ class PdSolver : public FEMSolver { SolverType solverType; const float positional_weight = 1e6; - float* massDt_2s; - float* sn; - float* sn_old; - float* b; - float* bHost; - float* matrix_diag; + float* massDt_2s = nullptr; + float* sn = nullptr; + float* sn_old = nullptr; + float* b = nullptr; + float* bHost = nullptr; + float* matrix_diag = nullptr; Eigen::SimplicialCholesky> cholesky_decomposition_; //Jacobi float omega; - float* next_x; - float* prev_x; + float* next_x = nullptr; + float* prev_x = nullptr; }; From 1576eb4ea14952c585535e4bba4f2d59fd7867dd Mon Sep 17 00:00:00 2001 From: GrahamZen Date: Sat, 20 Dec 2025 00:02:18 -0600 Subject: [PATCH 08/20] free pointers --- src/simulation/simulationContext.cu | 4 +++ src/simulation/solver/IPC/ipc.cu | 33 ++++++++++++++++------ src/simulation/solver/IPC/ipc.h | 2 +- src/simulation/solver/linear/cholesky.cu | 19 +------------ src/simulation/solver/linear/cholesky.h | 36 ++++++++++++++++++------ 5 files changed, 58 insertions(+), 36 deletions(-) diff --git a/src/simulation/simulationContext.cu b/src/simulation/simulationContext.cu index b44ee0a..301de23 100644 --- a/src/simulation/simulationContext.cu +++ b/src/simulation/simulationContext.cu @@ -139,6 +139,10 @@ SimulationCUDAContext::Impl::~Impl() cudaFree(data.Tri); cudaFree(data.DBCIdx); cudaFree(data.contact_area); + if(data.V0) + cudaFree(data.V0); + if (data.DmInv) + cudaFree(data.DmInv); cudaFree(data.DBC); cudaFree(data.mass); diff --git a/src/simulation/solver/IPC/ipc.cu b/src/simulation/solver/IPC/ipc.cu index f534fb2..927487c 100644 --- a/src/simulation/solver/IPC/ipc.cu +++ b/src/simulation/solver/IPC/ipc.cu @@ -139,7 +139,6 @@ void IPCSolver::SolverPrepare(SolverData& solverData, const SolverParams } } - bool IPCSolver::SolverStep(SolverData& solverData, const SolverParams& solverParams) { SolverPrepare(solverData, solverParams); @@ -148,6 +147,9 @@ bool IPCSolver::SolverStep(SolverData& solverData, const SolverParams& solverData, const SolverParamsUpdateQueries(solverData.numVerts, solverData.numTris, solverData.Tri, solverData.X, solverData.dev_TriFathers, solverParams.dhat); energy.UpdateKappa(solverData, const_cast&>(solverParams), h2); E_last = energy.Val(solverData.X, solverData, solverParams, h2); + if (isnan(E_last) || isinf(E_last)) { std::cout << "FATAL: Energy is NaN/Inf before Line Search!" << std::endl; - return false; + stepSuccess = false; + return; + } + if (!SearchDirection(solverData, solverParams, h2)) { + stepSuccess = false; + return; } - if (!SearchDirection(solverData, solverParams, h2)) - return false; solverData.pCollisionDetection->UpdateDirection(p); solverData.pCollisionDetection->UpdateX(solverData.X); }, perf); + if (!stepSuccess) return false; + int maxIter = solverParams.maxIterations; int iter = 0; while (!EndCondition(h, solverParams.tol)) { if (++iter > maxIter) { return false; } + stepSuccess = true; performanceData[1].second += measureExecutionTime([&]() { IPC::computeXMinusAP << > > (xTmp, solverData.X, p, 1, solverData.numVerts); @@ -180,7 +189,8 @@ bool IPCSolver::SolverStep(SolverData& solverData, const SolverParams E_last) alpha /= 2; @@ -193,21 +203,28 @@ bool IPCSolver::SolverStep(SolverData& solverData, const SolverParamsUpdateQueries(solverData.numVerts, solverData.numTris, solverData.Tri, solverData.X, solverData.dev_TriFathers, solverParams.dhat); }, perf); + stepSuccess = true; performanceData[3].second += measureExecutionTime([&]() { E_last = energy.Val(solverData.X, solverData, solverParams, h2); if (isnan(E_last) || isinf(E_last)) { std::cout << "FATAL: Energy is NaN/Inf after Line Search!" << std::endl; - return false; + stepSuccess = false; + return; } - if (!SearchDirection(solverData, solverParams, h2)) - return false; + if (!SearchDirection(solverData, solverParams, h2)) { + stepSuccess = false; + return; + } }, perf); + + if (!stepSuccess) return false; } IPC::updateVel << > > (solverData.X, x_n, solverData.V, 1.0 / h, solverData.numVerts); return true; diff --git a/src/simulation/solver/IPC/ipc.h b/src/simulation/solver/IPC/ipc.h index 8deb115..e49a2d0 100644 --- a/src/simulation/solver/IPC/ipc.h +++ b/src/simulation/solver/IPC/ipc.h @@ -35,7 +35,7 @@ class IPCSolver : public FEMSolver { glm::dvec3* xTmp = nullptr; glm::dvec3* x_n = nullptr; IPEnergy energy; - std::array>, 3> linearSolver = { nullptr, nullptr, nullptr }; + std::array>, 4> linearSolver = { nullptr, nullptr, nullptr, nullptr }; LinearSolver* currLinearSolver = nullptr; SolverType solverType = SolverType::CuSolverCholesky; }; \ No newline at end of file diff --git a/src/simulation/solver/linear/cholesky.cu b/src/simulation/solver/linear/cholesky.cu index aea4272..7dd4c58 100644 --- a/src/simulation/solver/linear/cholesky.cu +++ b/src/simulation/solver/linear/cholesky.cu @@ -22,14 +22,6 @@ __global__ void initAMatrix(int* idx, int* row, int* col, int rowLen, int totalN } } -template -CholeskyDnLinearSolver::~CholeskyDnLinearSolver() -{ - cudaFree(d_info); - cudaFree(d_predecomposedA); - cudaFree(d_work); -} - template CholeskyDnLinearSolver::CholeskyDnLinearSolver(int threadsPerBlock, int* AIdx, T* AVal, int ASize, int len) { cudaMalloc(&d_predecomposedA, sizeof(T) * ASize * ASize); @@ -76,16 +68,6 @@ CholeskyDnLinearSolver::CholeskyDnLinearSolver(int threadsPerBlock, int* AIdx free(h_work); } -template -CholeskySpLinearSolver::~CholeskySpLinearSolver() -{ - cusolverSpDestroyCsrcholInfo(d_info); - cusparseDestroyMatDescr(descrA); - cusolverSpDestroy(cusolverHandle); - cudaFree(buffer_gpu); - cudaFree(dev_x_permuted); - cudaFree(dev_b_permuted); -} template void CholeskySpLinearSolver::ComputeAMD(cusolverSpHandle_t handle, int rowsA, int nnzA, int* dev_csrRowPtrA, int* dev_csrColIndA, T* dev_csrValA) { @@ -156,6 +138,7 @@ CholeskySpLinearSolver::CholeskySpLinearSolver(int threadsPerBlock, int* rowI cusparseHandle_t handle; cusparseCreate(&handle); cusparseXcoo2csr(handle, d_rowIdx, nnz, ASize, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO); + cusparseDestroy(handle); cusolverSpCreate(&cusolverHandle); cusparseCreateMatDescr(&descrA); diff --git a/src/simulation/solver/linear/cholesky.h b/src/simulation/solver/linear/cholesky.h index 97fc2cf..e6f1cd1 100644 --- a/src/simulation/solver/linear/cholesky.h +++ b/src/simulation/solver/linear/cholesky.h @@ -11,7 +11,18 @@ template class CholeskySpLinearSolver : public LinearSolver { public: CholeskySpLinearSolver(int threadsPerBlock, int* rowIdx, int* colIdx, T* val, int ASize, int len); - virtual ~CholeskySpLinearSolver() override; + virtual ~CholeskySpLinearSolver() override + { + if (d_info) { cusolverSpDestroyCsrcholInfo(d_info); d_info = nullptr; } + if (descrA) { cusparseDestroyMatDescr(descrA); descrA = nullptr; } + if (cusolverHandle) { cusolverSpDestroy(cusolverHandle); cusolverHandle = nullptr; } + + if (d_p) { cudaFree(d_p); d_p = nullptr; } + if (buffer_gpu) { cudaFree(buffer_gpu); buffer_gpu = nullptr; } + if (dev_x_permuted) { cudaFree(dev_x_permuted); dev_x_permuted = nullptr; } + if (dev_b_permuted) { cudaFree(dev_b_permuted); dev_b_permuted = nullptr; } + } + virtual void Solve(int N, T* d_b, T* d_x, T* d_A = nullptr, int nz = 0, int* d_rowIdx = nullptr, int* d_colIdx = nullptr, T* d_guess = nullptr) override; private: using LinearSolver::d_A; @@ -21,27 +32,34 @@ class CholeskySpLinearSolver : public LinearSolver { using LinearSolver::capacity; void ComputeAMD(cusolverSpHandle_t handle, int rowsA, int nnzA, int* dev_csrRowPtrA, int* dev_csrColIndA, T* dev_csrValA); - cusolverSpHandle_t cusolverHandle; - cusparseMatDescr_t descrA; - csrcholInfo_t d_info; + cusolverSpHandle_t cusolverHandle = nullptr; + cusparseMatDescr_t descrA = nullptr; + csrcholInfo_t d_info = nullptr; void* buffer_gpu = nullptr; int* d_p = nullptr; - T* dev_b_permuted = nullptr, * dev_x_permuted = nullptr; - int n; + T* dev_b_permuted = nullptr; + T* dev_x_permuted = nullptr; + int n = 0; }; template class CholeskyDnLinearSolver : public LinearSolver { public: CholeskyDnLinearSolver(int threadsPerBlock, int* AIdx, T* AVal, int ASize, int len); - virtual ~CholeskyDnLinearSolver() override; + virtual ~CholeskyDnLinearSolver() override + { + if (d_info) { cudaFree(d_info); d_info = nullptr; } + if (d_predecomposedA) { cudaFree(d_predecomposedA); d_predecomposedA = nullptr; } + if (d_work) { cudaFree(d_work); d_work = nullptr; } + if (cusolverHandle) { cusolverDnDestroy(cusolverHandle); cusolverHandle = nullptr; } + } virtual void Solve(int N, T* d_b, T* d_x, T* d_A = nullptr, int nz = 0, int* d_rowIdx = nullptr, int* d_colIdx = nullptr, T* d_guess = nullptr) override; private: using LinearSolver::dType; cusolverDnParams_t params; int* d_info = nullptr; /* error info */ - cusolverDnHandle_t cusolverHandle; + cusolverDnHandle_t cusolverHandle = nullptr; void* d_work = nullptr; /* device workspace */ - T* d_predecomposedA; + T* d_predecomposedA = nullptr; }; \ No newline at end of file From e261d816d9310616c086b8bf66eb23b72b795373 Mon Sep 17 00:00:00 2001 From: Gehan Zheng Date: Sat, 20 Dec 2025 00:27:46 -0600 Subject: [PATCH 09/20] remove fps cap --- src/openglcontext/preview.cpp | 2 ++ src/simulation/simulationContext.cu | 6 ++---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/openglcontext/preview.cpp b/src/openglcontext/preview.cpp index 58aa756..53fb723 100644 --- a/src/openglcontext/preview.cpp +++ b/src/openglcontext/preview.cpp @@ -49,6 +49,8 @@ bool initOpenGL() { return false; } glfwMakeContextCurrent(window); + // Disable vsync so we can cap the frame rate manually (otherwise it sticks to monitor refresh e.g. 60Hz) + glfwSwapInterval(0); glfwSetKeyCallback(window, keyCallback); glfwSetCursorPosCallback(window, mousePositionCallback); glfwSetMouseButtonCallback(window, mouseButtonCallback); diff --git a/src/simulation/simulationContext.cu b/src/simulation/simulationContext.cu index 301de23..0c7948f 100644 --- a/src/simulation/simulationContext.cu +++ b/src/simulation/simulationContext.cu @@ -139,10 +139,8 @@ SimulationCUDAContext::Impl::~Impl() cudaFree(data.Tri); cudaFree(data.DBCIdx); cudaFree(data.contact_area); - if(data.V0) - cudaFree(data.V0); - if (data.DmInv) - cudaFree(data.DmInv); + cudaFree(data.V0); + cudaFree(data.DmInv); cudaFree(data.DBC); cudaFree(data.mass); From 70f811f0834ec2b567e2e0e5ddbe745313927b45 Mon Sep 17 00:00:00 2001 From: Gehan Zheng Date: Sat, 20 Dec 2025 01:19:14 -0600 Subject: [PATCH 10/20] update readme fix barrier hessian construction --- README.md | 88 ++++++++++++++++++------ src/simulation/energy/implicitBarrier.cu | 26 +++---- 2 files changed, 80 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 5bc57f4..4cd4eb1 100644 --- a/README.md +++ b/README.md @@ -8,14 +8,17 @@ Hanting Xu ## Click [here](https://github.com/GrahamZen/Soft-Body-Simulation-CUDA/tree/CIS5650-Final) for documentation (CIS5650 Final Project version) -## Requirements +## Overview -- CUDA >= 12.0 (cublas, cusolver) -- CMake >= 3.18 +This project is a CUDA-accelerated soft body simulation framework originally developed as a final project for **CIS 5650: GPU Programming and Architecture** at Upenn. -## Description +The goal of this project is to explore GPU-based physics simulation by building a **lightweight, extensible simulation framework** with minimal external dependencies. The system is designed to support rapid experimentation with different: -This project is originally a final project for CIS5650 at UPenn. The goal of this toy project is to provide a CUDA-accelerated physical simulation framework with minimal dependencies. The framework is designed to be easily extensible, allowing new simulation algorithms, physical models, linear solvers, and collision detection methods to be added with minimal effort. The currently implemented features are listed below. +* physical models, +* numerical solvers, +* GPU-accelerated linear algebra pipelines. + +--- ## Features @@ -25,6 +28,8 @@ This project is originally a final project for CIS5650 at UPenn. The goal of thi * [x] Jacobi Solver (Naive) * [x] Cholesky Decomposition * [x] Preconditioned Conjugate Gradient + * [x] Incomplete Cholesky Preconditioner + * [x] Jacobi Preconditioner * FEM * [x] Projective Dynamics @@ -41,39 +46,78 @@ This project is originally a final project for CIS5650 at UPenn. The goal of thi * [x] Neo-Hookean * Collision Detection - * [x] Real-Time Bvh - * [x] Ccd - * [ ] Robust Collision Handling + * [x] Real-Time BVH Construction + * [x] Continuous Collision Detection (CCD) ## Dependencies -* [CUDA](https://developer.nvidia.com/cuda-downloads) -* [CMake](https://cmake.org/download/) +### System Requirements + +* **Operating System** + + * Windows + * Linux +* **CUDA Toolkit** ≥ 12.0 + (cublas, cusolver required) +* **CMake** ≥ 3.18 +* **OpenGL** + +### Third-Party Libraries -Below are included in the project: +The following libraries are included directly in the project: * OpenGL * ImGui -* spdlog +* GLFW * Eigen -* glfw -* catch2 +* spdlog +* Catch2 + +External tools: + +* [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads) +* [CMake](https://cmake.org/download/) + +--- + +## Configuration + +### Environment Configuration + +The full runtime configuration is specified in `context.json`. This file defines simulation contexts, solver settings, and physical parameters. + +--- + +### Scene Configuration + +The framework supports multiple **simulation contexts**, each representing an independent scene. A context may contain: + +* one or more soft bodies, +* rigid bodies, + +Each context can be configured independently with physical parameters such as time step size, gravity, damping coefficients. Contexts can be switched **at runtime**. + +--- -## Note on Configuration +### Solver Configuration -The complete environment configuration is specified in context.json. +Solver behavior is controlled on a per-context basis. -### Scene +* **Single-precision (`float`)** -The framework supports configuration of predefined soft bodies, rigid bodies, and camera parameters. Multiple contexts (scenes) can be loaded simultaneously, where each context may contain different combinations of soft and rigid objects, as well as distinct camera settings. + * Uses the **Projective Dynamics (PD)** solver +* **Double-precision (`double`)** -Each context can be configured independently with physical parameters such as time step size, gravity, damping coefficients, and friction coefficients, and supports real-time switching between contexts. + * Uses the **Incremental Potential Contact (IPC)** solver -### Solver +Only parameters relevant to the active solver are applied. -The behavior of the solver can be adjusted by modifying parameters in each context. Currently, solvers supporting two floating-point precisions are available. When defining a context, setting the precision parameter to float uses the projective dynamics solver, while setting it to double uses the IPC solver. Only the parameters relevant to the active solver take effect. +#### Notes on Solver Usage -The PD solver supports interactive object dragging within the scene. The IPC solver is significantly slower and consumes more GPU memory; therefore, it is not recommended for scenes involving objects with a large number of degrees of freedom. Different solvers expose different global solver and linear solver options in the ImGui combo box, which can be switched in real time. However, since solvers consume a substantial amount of GPU memory, frequent switching may lead to performance degradation. It is recommended to select the desired solver before starting the simulation and avoid switching after the simulation has begun. +* The PD solver supports **interactive object dragging**. +* IPC is **not recommended** for scenes with a large number of degrees of freedom; for large vertex counts, careful parameter tuning is required, otherwise the simulation may fail to converge and pause. +* For large-scale systems, **Cholesky-based solvers can become prohibitively slow**; **PCG with a Jacobi preconditioner** is recommended instead. +* Linear solvers can be switched via ImGui **before simulation starts**. ## Screenshots diff --git a/src/simulation/energy/implicitBarrier.cu b/src/simulation/energy/implicitBarrier.cu index f786c7d..c2b0a21 100644 --- a/src/simulation/energy/implicitBarrier.cu +++ b/src/simulation/energy/implicitBarrier.cu @@ -16,6 +16,13 @@ namespace ImplicitBarrier { if (idx >= numVerts) return; glm::tvec3 x = X[idx]; + for (int k = 0; k < 3; k++) { + for (int l = 0; l < 3; l++) { + int index = idx * 9 + k * 3 + l; + hessianRowIdx[index] = idx * 3 + k; + hessianColIdx[index] = idx * 3 + l; + } + } for (int j = 0; j < numPlanes; j++) { const Plane& plane = planes[j]; glm::tvec3 floorPos = glm::tvec3(plane.m_model[3]); @@ -33,8 +40,6 @@ namespace ImplicitBarrier { int colIdx = idx * 3 + l; int index = idx * 9 + k * 3 + l; hessianVal[index] += hess[k][l]; - hessianRowIdx[index] = rowIdx; - hessianColIdx[index] = colIdx; } } } @@ -60,8 +65,6 @@ namespace ImplicitBarrier { int colIdx = idx * 3 + l; int index = idx * 9 + k * 3 + l; hessianVal[index] += hess[k][l]; - hessianRowIdx[index] = rowIdx; - hessianColIdx[index] = colIdx; } } } @@ -85,8 +88,6 @@ namespace ImplicitBarrier { int colIdx = idx * 3 + l; int index = idx * 9 + k * 3 + l; hessianVal[index] += hess[k][l]; - hessianRowIdx[index] = rowIdx; - hessianColIdx[index] = colIdx; } } } @@ -153,6 +154,13 @@ namespace ImplicitBarrier { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx >= numVerts) return; + for (int k = 0; k < 3; k++) { + for (int l = 0; l < 3; l++) { + int index = idx * 9 + k * 3 + l; + hessianRowIdx[index] = idx * 3 + k; + hessianColIdx[index] = idx * 3 + l; + } + } const glm::tvec3 x = X[idx]; for (int j = 0; j < numPlanes; j++) { const Plane& plane = planes[j]; @@ -175,8 +183,6 @@ namespace ImplicitBarrier { int colIdx = idx * 3 + l; int index = idx * 9 + k * 3 + l; hessianVal[index] += hess[k][l]; - hessianRowIdx[index] = rowIdx; - hessianColIdx[index] = colIdx; } } } @@ -206,8 +212,6 @@ namespace ImplicitBarrier { int colIdx = idx * 3 + l; int index = idx * 9 + k * 3 + l; hessianVal[index] += hess[k][l]; - hessianRowIdx[index] = rowIdx; - hessianColIdx[index] = colIdx; } } } @@ -235,8 +239,6 @@ namespace ImplicitBarrier { int colIdx = idx * 3 + l; int index = idx * 9 + k * 3 + l; hessianVal[index] += hess[k][l]; - hessianRowIdx[index] = rowIdx; - hessianColIdx[index] = colIdx; } } } From 6efc9b78a4e3fa03d56ee86669019be115bbf239 Mon Sep 17 00:00:00 2001 From: Gehan Zheng Date: Sat, 20 Dec 2025 01:26:55 -0600 Subject: [PATCH 11/20] Refactor CMake workflow for multi-platform builds --- .github/workflows/cmake-multi-platform.yml | 57 ++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 .github/workflows/cmake-multi-platform.yml diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml new file mode 100644 index 0000000..f3d7c18 --- /dev/null +++ b/.github/workflows/cmake-multi-platform.yml @@ -0,0 +1,57 @@ +name: CMake on multiple platforms + +on: + pull_request: + branches: + - "linux" + - "main" + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-latest + build_type: Release + c_compiler: gcc + cpp_compiler: g++ + - os: windows-latest + build_type: Release + c_compiler: cl + cpp_compiler: cl + + steps: + - uses: actions/checkout@v4 + + # 安装 CUDA Toolkit(包含 cuSolver) + - name: Install CUDA Toolkit + uses: Jimver/cuda-toolkit@v0.2.16 + with: + # 选一个你项目支持的版本;也可以固定到你本地用的版本 + cuda: "12.4.1" + + - name: Set reusable strings + id: strings + shell: bash + run: | + echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT" + + - name: Configure CMake + shell: bash + run: > + cmake -B "${{ steps.strings.outputs.build-output-dir }}" + -S "${{ github.workspace }}" + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + -DCMAKE_C_COMPILER=${{ matrix.c_compiler }} + -DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }} + + - name: Build + shell: bash + run: cmake --build "${{ steps.strings.outputs.build-output-dir }}" --config ${{ matrix.build_type }} + + - name: Test + shell: bash + working-directory: ${{ steps.strings.outputs.build-output-dir }} + run: ctest --build-config ${{ matrix.build_type }} --output-on-failure From d1d44ef106e63ff2cd988c5a0efd5638a071089a Mon Sep 17 00:00:00 2001 From: Gehan Zheng Date: Sat, 20 Dec 2025 12:56:30 -0600 Subject: [PATCH 12/20] apt install cuda --- .github/workflows/cmake-multi-platform.yml | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index f3d7c18..2b7fc5b 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -25,18 +25,27 @@ jobs: steps: - uses: actions/checkout@v4 - # 安装 CUDA Toolkit(包含 cuSolver) - - name: Install CUDA Toolkit - uses: Jimver/cuda-toolkit@v0.2.16 - with: - # 选一个你项目支持的版本;也可以固定到你本地用的版本 - cuda: "12.4.1" + - name: Install CUDA Toolkit (Linux) + if: runner.os == 'Linux' + shell: bash + run: | + sudo apt-get update + sudo apt-get install -y wget gnupg + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb + sudo dpkg -i cuda-keyring_1.1-1_all.deb + sudo apt-get update + sudo apt-get install -y cuda-toolkit-12-4 - name: Set reusable strings id: strings shell: bash run: | echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT" + - name: Set CUDA env + if: runner.os == 'Linux' + run: | + echo "/usr/local/cuda/bin" >> $GITHUB_PATH + echo "CUDA_HOME=/usr/local/cuda" >> $GITHUB_ENV - name: Configure CMake shell: bash From 69620dc1558f510f40ce05fb21bd91010ccf24e1 Mon Sep 17 00:00:00 2001 From: Gehan Zheng Date: Sat, 20 Dec 2025 13:04:43 -0600 Subject: [PATCH 13/20] linux only test --- .github/workflows/cmake-multi-platform.yml | 47 +++++----------------- 1 file changed, 9 insertions(+), 38 deletions(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index 2b7fc5b..eb13394 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -1,32 +1,19 @@ -name: CMake on multiple platforms +name: CUDA CMake Build (Linux) on: pull_request: branches: - - "linux" - - "main" + - linux + - main jobs: build: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - include: - - os: ubuntu-latest - build_type: Release - c_compiler: gcc - cpp_compiler: g++ - - os: windows-latest - build_type: Release - c_compiler: cl - cpp_compiler: cl + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v4 - - name: Install CUDA Toolkit (Linux) - if: runner.os == 'Linux' + - name: Install CUDA Toolkit shell: bash run: | sudo apt-get update @@ -36,31 +23,15 @@ jobs: sudo apt-get update sudo apt-get install -y cuda-toolkit-12-4 - - name: Set reusable strings - id: strings - shell: bash - run: | - echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT" - name: Set CUDA env - if: runner.os == 'Linux' run: | echo "/usr/local/cuda/bin" >> $GITHUB_PATH echo "CUDA_HOME=/usr/local/cuda" >> $GITHUB_ENV - name: Configure CMake - shell: bash - run: > - cmake -B "${{ steps.strings.outputs.build-output-dir }}" - -S "${{ github.workspace }}" - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} - -DCMAKE_C_COMPILER=${{ matrix.c_compiler }} - -DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }} + run: | + cmake -S . -B build -DCMAKE_BUILD_TYPE=Release - name: Build - shell: bash - run: cmake --build "${{ steps.strings.outputs.build-output-dir }}" --config ${{ matrix.build_type }} - - - name: Test - shell: bash - working-directory: ${{ steps.strings.outputs.build-output-dir }} - run: ctest --build-config ${{ matrix.build_type }} --output-on-failure + run: | + cmake --build build --config Release From a724a391e4c5089266884ee853c8ae92dc2971e5 Mon Sep 17 00:00:00 2001 From: Gehan Zheng Date: Sat, 20 Dec 2025 13:14:55 -0600 Subject: [PATCH 14/20] Install OpenGL deps --- .github/workflows/cmake-multi-platform.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index eb13394..513b847 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -13,6 +13,18 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Install OpenGL deps + run: | + sudo apt-get update + sudo apt-get install -y \ + libgl1-mesa-dev \ + libglu1-mesa-dev \ + libx11-dev \ + libxrandr-dev \ + libxinerama-dev \ + libxcursor-dev \ + libxi-dev + - name: Install CUDA Toolkit shell: bash run: | From 1ff4e139a92d68856838c3b2a3e2699f372a5200 Mon Sep 17 00:00:00 2001 From: Gehan Zheng Date: Sat, 20 Dec 2025 13:36:38 -0600 Subject: [PATCH 15/20] update opengl setup --- CMakeLists.txt | 8 +++++- tests/CMakeLists.txt | 58 ++++++++++++++++++++------------------------ 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index da41093..56bc764 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,9 +39,15 @@ find_package(OpenGL REQUIRED) set(EXTERNAL "${PROJECT_SOURCE_DIR}/external") if(UNIX) + find_package(OpenGL REQUIRED) find_package(glfw3 REQUIRED) find_package(GLEW REQUIRED) - set(LIBRARIES glfw ${GLEW_LIBRARIES} ${OPENGL_gl_LIBRARY} ${OPENGL_glx_LIBRARY}) + + set(LIBRARIES + glfw + ${GLEW_LIBRARIES} + OpenGL::GL + ) else() set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") set(GLFW_ROOT_DIR ${EXTERNAL}) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 38005eb..8702385 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,51 +1,45 @@ cmake_minimum_required(VERSION 3.18) +project(tests LANGUAGES CXX CUDA) -# Enable C++11 for host code set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED True) -set(CMAKE_CUDA_STANDARD_REQUIRED ON) set(CMAKE_CUDA_ARCHITECTURES 86) -set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g") - -# Set a default build type if none was specified -if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) - SET(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE) - # Set the possible values of build type for cmake-gui - SET_PROPERTY(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") -endif() - find_package(CUDAToolkit REQUIRED) include(catch2) include(finite_diff) -######################################## -# Catch2 Tests -######################################## -file(GLOB_RECURSE TEST_SOURCES "./*.cpp") -file(GLOB_RECURSE TEST_DISTANCE_SOURCES "${CMAKE_SOURCE_DIR}/src/simulation/distance/*.cu") +file(GLOB_RECURSE TEST_SOURCES "*.cpp") +file(GLOB_RECURSE TEST_DISTANCE_SOURCES + "${CMAKE_SOURCE_DIR}/src/simulation/distance/*.cu" +) + add_executable(tests -${TEST_SOURCES} -${TEST_DISTANCE_SOURCES} -${CMAKE_SOURCE_DIR}/src/collision/intersections.cu -${CMAKE_SOURCE_DIR}/src/simulation/solver/linear/cg.cu -${CMAKE_SOURCE_DIR}/src/simulation/solver/linear/jacobi.cu -${CMAKE_SOURCE_DIR}/src/simulation/solver/linear/linear.cu + ${TEST_SOURCES} + ${TEST_DISTANCE_SOURCES} + ${CMAKE_SOURCE_DIR}/src/collision/intersections.cu + ${CMAKE_SOURCE_DIR}/src/simulation/solver/linear/cg.cu + ${CMAKE_SOURCE_DIR}/src/simulation/solver/linear/jacobi.cu + ${CMAKE_SOURCE_DIR}/src/simulation/solver/linear/linear.cu ) -target_include_directories(tests PUBLIC ${CMAKE_SOURCE_DIR}/src/simulation/solver/linear) -target_link_libraries(tests PUBLIC -Eigen3::Eigen -CUDA::cudart -Catch2::Catch2 -CUDA::cusolver -finitediff::finitediff + +target_link_libraries(tests + Eigen3::Eigen + CUDA::cudart + CUDA::cusolver + Catch2::Catch2 + finitediff::finitediff ) + set_target_properties(tests PROPERTIES CUDA_SEPARABLE_COMPILATION ON CUDA_RESOLVE_DEVICE_SYMBOLS ON ) + target_compile_options(tests PRIVATE - $<$:--extended-lambda -lineinfo --expt-relaxed-constexpr -Xcompiler "/wd 4819 /wd 4068 /wd 4661" -Xcudafe "--display_error_number --diag_suppress=20012 --diag_suppress=20011 --diag_suppress=20014"> + $<$: + --extended-lambda + --expt-relaxed-constexpr + -lineinfo + > ) -######################################## From 956422742a34260430e9ce5f66e95d8bd2802d22 Mon Sep 17 00:00:00 2001 From: Gehan Zheng Date: Sat, 20 Dec 2025 13:37:05 -0600 Subject: [PATCH 16/20] install opengl --- .github/workflows/cmake-multi-platform.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index 513b847..7e8d987 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -35,6 +35,20 @@ jobs: sudo apt-get update sudo apt-get install -y cuda-toolkit-12-4 + - name: Install OpenGL & GLFW deps + run: | + sudo apt-get update + sudo apt-get install -y \ + libglfw3-dev \ + libglew-dev \ + libglm-dev \ + libx11-dev \ + libxrandr-dev \ + libxinerama-dev \ + libxcursor-dev \ + libxi-dev + + - name: Set CUDA env run: | echo "/usr/local/cuda/bin" >> $GITHUB_PATH From 6001fe79dcac7cddc6ea530df48648092092a837 Mon Sep 17 00:00:00 2001 From: Gehan Zheng Date: Sat, 20 Dec 2025 13:47:49 -0600 Subject: [PATCH 17/20] set CMAKE_CUDA_ARCHITECTURES --- CMakeLists.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 56bc764..0c165a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,7 +13,13 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/recipes/") list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/find/") -set(CMAKE_CUDA_ARCHITECTURES native) +if(CMAKE_CUDA_COMPILER) + set(CMAKE_CUDA_ARCHITECTURES native) +endif() + +if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES OR CMAKE_CUDA_ARCHITECTURES STREQUAL "native") + set(CMAKE_CUDA_ARCHITECTURES 70;75;80;86;90) +endif() set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g") set(CUDA_NVCC_FLAGS_DEBUG "${CUDA_NVCC_FLAGS_DEBUG} -G -g") From 80671c249729ca3d34b213b2502b4ab718305c52 Mon Sep 17 00:00:00 2001 From: Gehan Zheng Date: Sat, 20 Dec 2025 14:41:45 -0600 Subject: [PATCH 18/20] add badge --- ...lti-platform.yml => cuda-cmake-build-linux.yml} | 14 ++++++++++---- README.md | 3 ++- 2 files changed, 12 insertions(+), 5 deletions(-) rename .github/workflows/{cmake-multi-platform.yml => cuda-cmake-build-linux.yml} (79%) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cuda-cmake-build-linux.yml similarity index 79% rename from .github/workflows/cmake-multi-platform.yml rename to .github/workflows/cuda-cmake-build-linux.yml index 7e8d987..7b08fe2 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cuda-cmake-build-linux.yml @@ -9,6 +9,9 @@ on: jobs: build: runs-on: ubuntu-22.04 + strategy: + matrix: + build_type: [Debug, Release] steps: - uses: actions/checkout@v4 @@ -26,7 +29,6 @@ jobs: libxi-dev - name: Install CUDA Toolkit - shell: bash run: | sudo apt-get update sudo apt-get install -y wget gnupg @@ -48,7 +50,6 @@ jobs: libxcursor-dev \ libxi-dev - - name: Set CUDA env run: | echo "/usr/local/cuda/bin" >> $GITHUB_PATH @@ -56,8 +57,13 @@ jobs: - name: Configure CMake run: | - cmake -S . -B build -DCMAKE_BUILD_TYPE=Release + cmake -S . -B build-${{ matrix.build_type }} -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} - name: Build run: | - cmake --build build --config Release + cmake --build build-${{ matrix.build_type }} --config ${{ matrix.build_type }} + + - name: Run tests + run: | + cd build-${{ matrix.build_type }} + ctest --output-on-failure diff --git a/README.md b/README.md index 4cd4eb1..9a93e88 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ CUDA-Accelerated Soft Body Simulation -================ + +![CUDA CMake Build (Linux)](https://github.com/GrahamZen/Soft-Body-Simulation-CUDA/actions/workflows/cuda-cmake-build-linux.yml/badge.svg) **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Final Project** From 884e0de2b783d2c7c625a1a415bfdf618359ca61 Mon Sep 17 00:00:00 2001 From: Gehan Zheng Date: Sat, 20 Dec 2025 14:42:42 -0600 Subject: [PATCH 19/20] update readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9a93e88..f25e8e7 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -CUDA-Accelerated Soft Body Simulation +# CUDA-Accelerated Soft Body Simulation ![CUDA CMake Build (Linux)](https://github.com/GrahamZen/Soft-Body-Simulation-CUDA/actions/workflows/cuda-cmake-build-linux.yml/badge.svg) From 5e5a9996fc32ccda708e5197299726b079bdd310 Mon Sep 17 00:00:00 2001 From: GrahamZen Date: Sat, 20 Dec 2025 16:45:54 -0600 Subject: [PATCH 20/20] fix CMAKE_CUDA_ARCHITECTURES --- CMakeLists.txt | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0c165a6..549e622 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,11 +14,13 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/recipes/") list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/find/") if(CMAKE_CUDA_COMPILER) - set(CMAKE_CUDA_ARCHITECTURES native) -endif() - -if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES OR CMAKE_CUDA_ARCHITECTURES STREQUAL "native") - set(CMAKE_CUDA_ARCHITECTURES 70;75;80;86;90) + if(DEFINED ENV{CI}) + message(STATUS "CI environment detected. Setting CUDA architecture to 75.") + set(CMAKE_CUDA_ARCHITECTURES 75) + else() + message(STATUS "Local environment detected. Setting CUDA architecture to native.") + set(CMAKE_CUDA_ARCHITECTURES native) + endif() endif() set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g")