From 11cd9046080f7cb3e18c02e45533dec07a4ac0e2 Mon Sep 17 00:00:00 2001
From: GrahamZen <931252924@qq.com>
Date: Thu, 18 Dec 2025 14:11:52 -0600
Subject: [PATCH 01/20] rename

---
 src/collision/broadphase.cu                               | 2 +-
 .../distance/{distance_type.cu => distance_type.cuh}      | 4 +++-
 src/simulation/distance/distance_type.h                   | 4 ++--
 src/simulation/distance/{edge_edge.cu => edge_edge.cuh}   | 8 +++++---
 src/simulation/distance/{line_line.cu => line_line.cuh}   | 0
 src/simulation/distance/{point_line.cu => point_line.cuh} | 2 ++
 .../distance/{point_plane.cu => point_plane.cuh}          | 2 ++
 .../distance/{point_point.cu => point_point.cuh}          | 2 ++
 .../distance/{point_triangle.cu => point_triangle.cuh}    | 8 +++++---
 src/simulation/energy/barrier.cu                          | 2 +-
 src/simulation/solver/linear/pcgJacobi.cu                 | 4 +---
 tests/test_distance.cpp                                   | 2 +-
 12 files changed, 25 insertions(+), 15 deletions(-)
 rename src/simulation/distance/{distance_type.cu => distance_type.cuh} (99%)
 rename src/simulation/distance/{edge_edge.cu => edge_edge.cuh} (98%)
 rename src/simulation/distance/{line_line.cu => line_line.cuh} (100%)
 rename src/simulation/distance/{point_line.cu => point_line.cuh} (99%)
 rename src/simulation/distance/{point_plane.cu => point_plane.cuh} (99%)
 rename src/simulation/distance/{point_point.cu => point_point.cuh} (99%)
 rename src/simulation/distance/{point_triangle.cu => point_triangle.cuh} (98%)
diff --git a/src/collision/broadphase.cu b/src/collision/broadphase.cu
index 2d64e67..72a8dbd 100644
--- a/src/collision/broadphase.cu
+++ b/src/collision/broadphase.cu
@@ -4,7 +4,7 @@
 #include <collision/bvh.cuh>
 #include <collision/bvh.h>
 #include <simulation/simulationContext.h>
-#include <distance/distance_type.h>
+#include <distance/distance_type.cuh>
 #include <collision/intersections.h>
 #include <thrust/sort.h>
 #include <thrust/unique.h>
diff --git a/src/simulation/distance/distance_type.cu b/src/simulation/distance/distance_type.cuh
similarity index 99%
rename from src/simulation/distance/distance_type.cu
rename to src/simulation/distance/distance_type.cuh
index c5168a6..04bf971 100644
--- a/src/simulation/distance/distance_type.cu
+++ b/src/simulation/distance/distance_type.cuh
@@ -4,6 +4,7 @@
 
 using namespace ipc;
 
+#ifdef __CUDACC__
 template <typename Scalar>
 __global__ void GetDistanceType(const glm::tvec3<Scalar>* Xs, Query* queries, int numQueries) {
     int qIdx = blockIdx.x * blockDim.x + threadIdx.x;
@@ -37,6 +38,7 @@ __global__ void ComputeDistance(const glm::tvec3<Scalar>* Xs, Query* queries, in
 
 template __global__ void ComputeDistance<float>(const glm::tvec3<float>* Xs, Query* queries, int numQueries);
 template __global__ void ComputeDistance<double>(const glm::tvec3<double>* Xs, Query* queries, int numQueries);
+#endif
 
 /// @brief Solve the least square problem: min ||A * x - b||^2
 /// @note A = [t1 - t0, glm::cross(t1 - t0, normal)], b = p - t0
@@ -261,4 +263,4 @@ template __host__ __device__ DistanceType edge_edge_distance_type<double>(
     const glm::tvec3<double>& ea0,
     const glm::tvec3<double>& ea1,
     const glm::tvec3<double>& eb0,
-    const glm::tvec3<double>& eb1);
\ No newline at end of file
+    const glm::tvec3<double>& eb1);
diff --git a/src/simulation/distance/distance_type.h b/src/simulation/distance/distance_type.h
index 5d09145..bafc6e7 100644
--- a/src/simulation/distance/distance_type.h
+++ b/src/simulation/distance/distance_type.h
@@ -1,7 +1,7 @@
 ﻿#pragma once
 #include <aabb.h>
-#include <distance/point_triangle.h>
-#include <distance/edge_edge.h>
+#include <distance/point_triangle.cuh>
+#include <distance/edge_edge.cuh>
 #include <cuda_runtime.h>
 
 template <typename Scalar>
diff --git a/src/simulation/distance/edge_edge.cu b/src/simulation/distance/edge_edge.cuh
similarity index 98%
rename from src/simulation/distance/edge_edge.cu
rename to src/simulation/distance/edge_edge.cuh
index 93d02d4..29726b8 100644
--- a/src/simulation/distance/edge_edge.cu
+++ b/src/simulation/distance/edge_edge.cuh
@@ -1,8 +1,10 @@
+#pragma once
+
 #include "edge_edge.h"
 
-#include <distance/point_point.h>
-#include <distance/point_line.h>
-#include <distance/line_line.h>
+#include <distance/point_point.cuh>
+#include <distance/point_line.cuh>
+#include <distance/line_line.cuh>
 
 namespace ipc {
 
diff --git a/src/simulation/distance/line_line.cu b/src/simulation/distance/line_line.cuh
similarity index 100%
rename from src/simulation/distance/line_line.cu
rename to src/simulation/distance/line_line.cuh
diff --git a/src/simulation/distance/point_line.cu b/src/simulation/distance/point_line.cuh
similarity index 99%
rename from src/simulation/distance/point_line.cu
rename to src/simulation/distance/point_line.cuh
index 22de769..9cea97d 100644
--- a/src/simulation/distance/point_line.cu
+++ b/src/simulation/distance/point_line.cuh
@@ -1,3 +1,5 @@
+#pragma once
+
 #include "point_line.h"
 #include <glm/gtx/norm.hpp> 
 
diff --git a/src/simulation/distance/point_plane.cu b/src/simulation/distance/point_plane.cuh
similarity index 99%
rename from src/simulation/distance/point_plane.cu
rename to src/simulation/distance/point_plane.cuh
index d6d2377..f757d91 100644
--- a/src/simulation/distance/point_plane.cu
+++ b/src/simulation/distance/point_plane.cuh
@@ -1,3 +1,5 @@
+#pragma once
+
 #include "point_plane.h"
 #include <cuda_runtime.h>
 
diff --git a/src/simulation/distance/point_point.cu b/src/simulation/distance/point_point.cuh
similarity index 99%
rename from src/simulation/distance/point_point.cu
rename to src/simulation/distance/point_point.cuh
index 758ee9e..fc53af2 100644
--- a/src/simulation/distance/point_point.cu
+++ b/src/simulation/distance/point_point.cuh
@@ -1,3 +1,5 @@
+#pragma once
+
 #include "point_point.h"
 #include <glm/gtx/norm.hpp> 
 #include <cuda_runtime.h>
diff --git a/src/simulation/distance/point_triangle.cu b/src/simulation/distance/point_triangle.cuh
similarity index 98%
rename from src/simulation/distance/point_triangle.cu
rename to src/simulation/distance/point_triangle.cuh
index 2d04d7c..825675a 100644
--- a/src/simulation/distance/point_triangle.cu
+++ b/src/simulation/distance/point_triangle.cuh
@@ -1,8 +1,10 @@
+#pragma once
+
 #include "point_triangle.h"
 
-#include <distance/point_line.h>
-#include <distance/point_plane.h>
-#include <distance/point_point.h>
+#include <distance/point_line.cuh>
+#include <distance/point_plane.cuh>
+#include <distance/point_point.cuh>
 #include <cuda_runtime.h>
 
 namespace ipc {
diff --git a/src/simulation/energy/barrier.cu b/src/simulation/energy/barrier.cu
index cec928f..49ea48e 100644
--- a/src/simulation/energy/barrier.cu
+++ b/src/simulation/energy/barrier.cu
@@ -2,7 +2,7 @@
 #include <collision/bvh.h>
 #include <solverUtil.cuh>
 #include <matrix.h>
-#include <distance/distance_type.h>
+#include <distance/distance_type.cuh>
 #include <thrust/transform_reduce.h>
 #include <thrust/iterator/counting_iterator.h>
 
diff --git a/src/simulation/solver/linear/pcgJacobi.cu b/src/simulation/solver/linear/pcgJacobi.cu
index ae93056..28b6a6e 100644
--- a/src/simulation/solver/linear/pcgJacobi.cu
+++ b/src/simulation/solver/linear/pcgJacobi.cu
@@ -18,7 +18,7 @@ __global__ void ExtractInverseDiagonalKernel(int N, const T* A, const int* rowPt
         }
 
         // Avoid division by zero
-        if (abs(diagVal) < 1e-12) diagVal = 1.0;
+        if (abs(diagVal) < 1e-9) diagVal = 1.0;
         invDiag[idx] = 1.0 / diagVal;
     }
 }
@@ -96,10 +96,8 @@ void PCGJacobiSolver<T>::Solve(int N, T* d_b, T* d_x, T* A, int nz, int* rowIdx,
 
     if (dvec_x) CHECK_CUSPARSE(cusparseDestroyDnVec(dvec_x));
     if (dvec_b) CHECK_CUSPARSE(cusparseDestroyDnVec(dvec_b));
-    if (dvec_r) CHECK_CUSPARSE(cusparseDestroyDnVec(dvec_r));
     CHECK_CUSPARSE(cusparseCreateDnVec(&dvec_x, N, d_x, dType));
     CHECK_CUSPARSE(cusparseCreateDnVec(&dvec_b, N, d_b, dType));
-    CHECK_CUSPARSE(cusparseCreateDnVec(&dvec_r, N, d_r, dType));
 
     size_t bufferSize = 0;
     CHECK_CUSPARSE(cusparseSpMV_bufferSize(cusHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &one, d_matA,
diff --git a/tests/test_distance.cpp b/tests/test_distance.cpp
index 029b6f0..245fe0d 100644
--- a/tests/test_distance.cpp
+++ b/tests/test_distance.cpp
@@ -2,7 +2,7 @@
 #include <catch2/catch_all.hpp>
 #include <catch2/catch_approx.hpp>
 #include <linear/cg.h>
-#include <distance/distance_type.h>
+#include <distance/distance_type.cuh>
 #include <matrix.h>
 #include <finitediff.hpp>
 

From 8e4d1649424043ffa554e5f135ae9d099f0dfd3b Mon Sep 17 00:00:00 2001
From: GrahamZen <931252924@qq.com>
Date: Fri, 19 Dec 2025 01:11:14 -0600
Subject: [PATCH 02/20] fix linux compilation

---
 CMakeLists.txt                               | 31 +++++++++++++-------
 src/collision/aabb.h                         | 11 +++++--
 src/collision/broadphase.cu                  | 20 +++++++++++--
 src/collision/bvh.cu                         |  8 ++---
 src/collision/ccd.cu                         |  7 +++--
 src/collision/intersections.cu               | 10 ++++++-
 src/collision/narrowphase.cu                 | 16 +++++-----
 src/context.cpp                              |  2 +-
 src/openglcontext/preview.cpp                |  2 +-
 src/openglcontext/shaderprogram.cpp          |  1 +
 src/simulation/collisionUtil.cu              |  5 +---
 src/simulation/distance/distance_type.cuh    |  7 +++++
 src/simulation/energy/barrier.cu             |  5 ++--
 src/simulation/energy/corotated.cu           |  5 ++--
 src/simulation/energy/implicitBarrier.cu     |  4 +--
 src/simulation/energy/inertia.cu             |  4 +--
 src/simulation/energy/neohookean08.cu        |  5 ++--
 src/simulation/simulationContext.cu          |  2 ++
 src/simulation/solver/femSolver.cu           |  2 +-
 src/simulation/solver/linear/cg.h            |  7 +++++
 src/simulation/solver/linear/cholesky.cu     |  7 +++--
 src/simulation/solver/linear/cholesky.h      |  8 +++++
 src/simulation/solver/linear/choleskyImmed.h |  6 ++++
 src/simulation/solver/linear/jacobi.h        |  6 ++++
 src/simulation/solver/linear/linear.h        |  1 +
 src/simulation/solver/linear/pcgJacobi.h     |  7 +++++
 src/simulation/solver/solverUtil.cuh         |  1 +
 src/simulation/utilities.cuh                 | 14 +++++++++
 src/utilities.cpp                            |  3 +-
 tests/test_distance.cpp                      | 16 +++++-----
 tests/test_solveCubic.cpp                    | 12 ++++----
 31 files changed, 169 insertions(+), 66 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8ee8d1e..da41093 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -36,14 +36,14 @@ find_package(CUDAToolkit REQUIRED)
 
 find_package(OpenGL REQUIRED)
 
+set(EXTERNAL "${PROJECT_SOURCE_DIR}/external")
+
 if(UNIX)
     find_package(glfw3 REQUIRED)
     find_package(GLEW REQUIRED)
-    set(LIBRARIES glfw ${GLEW_LIBRARIES} ${OPENGL_gl_LIBRARY})
+    set(LIBRARIES glfw ${GLEW_LIBRARIES} ${OPENGL_gl_LIBRARY} ${OPENGL_glx_LIBRARY})
 else()
     set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
-    set(EXTERNAL "external")
-
     set(GLFW_ROOT_DIR ${EXTERNAL})
     set(GLFW_USE_STATIC_LIBS ON)
     find_package(GLFW REQUIRED)
@@ -57,7 +57,8 @@ else()
     set(LIBRARIES ${GLEW_LIBRARY} ${GLFW_LIBRARY} ${OPENGL_LIBRARY})
 endif()
 
-set(GLM_ROOT_DIR "external")
+set(GLM_ROOT_DIR "${PROJECT_SOURCE_DIR}/external")
+set(GLM_INCLUDE_DIR "${EXTERNAL}/include" CACHE PATH "GLM include dir" FORCE)
 find_package(GLM REQUIRED)
 include_directories(${GLM_INCLUDE_DIRS})
 
@@ -106,6 +107,11 @@ include_directories(
     ${GLEW_INCLUDE_DIR} 
     ${GLFW_INCLUDE_DIR})
 
+if(UNIX)
+    list(APPEND CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "/usr/include")
+    list(REMOVE_DUPLICATES CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES)
+endif()
+
 ########################################
 # Catch2 Tests
 ########################################
@@ -114,30 +120,35 @@ enable_testing()
 add_subdirectory(tests)
 ########################################
 add_executable(${CMAKE_PROJECT_NAME} ${SOURCE_FILES} ${HEADER_FILES} ${IMGUI_SOURCES})
-target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE "${EXTERNAL}/ImGui" "${EXTERNAL}/svd3_cuda" ${eigen_SOURCE_DIR})
+target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE "${EXTERNAL}/ImGui" "${EXTERNAL}/svd3_cuda" "${EXTERNAL}/include")
 target_link_libraries(${CMAKE_PROJECT_NAME}
 ${LIBRARIES}
 CUDA::cudart
 CUDA::cusolver
 OpenMP::OpenMP_CXX
 spdlog::spdlog
+Eigen3::Eigen
 #stream_compaction  # TODO: uncomment if using your stream compaction
 )
 set_target_properties(${CMAKE_PROJECT_NAME} PROPERTIES
     CUDA_SEPARABLE_COMPILATION ON
     CUDA_RESOLVE_DEVICE_SYMBOLS ON
 )
+set(CUDA_HOST_WARNING_SUPPRESSIONS "")
+if(MSVC)
+  list(APPEND CUDA_HOST_WARNING_SUPPRESSIONS
+    -Xcompiler=/wd4819
+    -Xcompiler=/wd4068
+    -Xcompiler=/wd4661)
+endif()
+
 target_compile_options(${CMAKE_PROJECT_NAME} PRIVATE
   $<$<COMPILE_LANGUAGE:CUDA>:
     -Xptxas=-v
     --extended-lambda
     --expt-relaxed-constexpr
     -lineinfo
-
-    -Xcompiler=/wd4819
-    -Xcompiler=/wd4068
-    -Xcompiler=/wd4661
-
+    ${CUDA_HOST_WARNING_SUPPRESSIONS}
     -Xcudafe=--display_error_number
     -Xcudafe=--diag_suppress=20012
     -Xcudafe=--diag_suppress=20011
diff --git a/src/collision/aabb.h b/src/collision/aabb.h
index af1afc0..c2364be 100644
--- a/src/collision/aabb.h
+++ b/src/collision/aabb.h
@@ -58,7 +58,12 @@ enum class QueryType {
     VF,
     EE
 };
-
+struct Vec3d {
+    double x, y, z;
+    Vec3d operator-() const {
+        return Vec3d{-x, -y, -z};
+    }
+};
 class Query {
 public:
     QueryType type = QueryType::UNKNOWN;
@@ -69,5 +74,5 @@ class Query {
     indexType v3;
     double d;
     double toi = 0.f;
-    glm::dvec3 normal = glm::dvec3(0.f);
-};
+    Vec3d normal = Vec3d{0.0, 0.0, 0.0};
+};
\ No newline at end of file
diff --git a/src/collision/broadphase.cu b/src/collision/broadphase.cu
index 72a8dbd..33d5dbe 100644
--- a/src/collision/broadphase.cu
+++ b/src/collision/broadphase.cu
@@ -11,6 +11,20 @@
 #include <thrust/remove.h>
 #include <thrust/device_vector.h>
 
+namespace {
+__device__ inline size_t atomicAddSizeT(size_t* address, size_t val) {
+    if constexpr (sizeof(size_t) == sizeof(unsigned long long)) {
+        return static_cast<size_t>(atomicAdd(
+            reinterpret_cast<unsigned long long*>(address),
+            static_cast<unsigned long long>(val)));
+    } else {
+        return static_cast<size_t>(atomicAdd(
+            reinterpret_cast<unsigned int*>(address),
+            static_cast<unsigned int>(val)));
+    }
+}
+} // namespace
+
 //input the aabb box of a Tetrahedron
 //generate a 30-bit morton code
 template<typename Scalar>
@@ -341,7 +355,7 @@ __global__ void traverseTree(int numTris, const BVHNode<Scalar>* nodes, const in
                 // 1 faces * 3 verts + 3 edges * 3 edges
                 if ((!ignoreSelfCollision || triFathers[myNode.TriangleIndex] != triFathers[leftChild.TriangleIndex]) && myNode.TriangleIndex != leftChild.TriangleIndex && !isAdjacentTriangle(tris[myNode.TriangleIndex * 3 + 0], tris[myNode.TriangleIndex * 3 + 1], tris[myNode.TriangleIndex * 3 + 2],
                     tris[leftChild.TriangleIndex * 3 + 0], tris[leftChild.TriangleIndex * 3 + 1], tris[leftChild.TriangleIndex * 3 + 2])) {
-                    int qIdx = atomicAdd(queryCount, 12);
+                    size_t qIdx = atomicAddSizeT(queryCount, static_cast<size_t>(12));
                     if (qIdx + 12 < maxNumQueries) {
                         Query* qBegin = &queries[qIdx];
                         fillQuery(qBegin, myNode.TriangleIndex, leftChild.TriangleIndex, tris);
@@ -365,7 +379,7 @@ __global__ void traverseTree(int numTris, const BVHNode<Scalar>* nodes, const in
             {
                 if ((!ignoreSelfCollision || triFathers[myNode.TriangleIndex] != triFathers[rightChild.TriangleIndex]) && myNode.TriangleIndex != rightChild.TriangleIndex && !isAdjacentTriangle(tris[myNode.TriangleIndex * 3 + 0], tris[myNode.TriangleIndex * 3 + 1], tris[myNode.TriangleIndex * 3 + 2],
                     tris[rightChild.TriangleIndex * 3 + 0], tris[rightChild.TriangleIndex * 3 + 1], tris[rightChild.TriangleIndex * 3 + 2])) {
-                    int qIdx = atomicAdd(queryCount, 12);
+                    size_t qIdx = atomicAddSizeT(queryCount, static_cast<size_t>(12));
                     if (qIdx + 12 < maxNumQueries) {
                         Query* qBegin = &queries[qIdx];
                         fillQuery(qBegin, myNode.TriangleIndex, rightChild.TriangleIndex, tris);
@@ -401,7 +415,7 @@ bool CollisionDetection<Scalar>::DetectCollisionCandidates(const BVHNode<Scalar>
             overflowHappened = true;
             maxNumQueries *= 2;
             std::cerr << "Query buffer overflow, resizing to " << maxNumQueries << std::endl;
-            if (maxNumQueries > 1 << 31) {
+            if (maxNumQueries > (static_cast<size_t>(1) << 31)) {
                 std::cerr << "Number of queries exceeds 2^31, aborting" << std::endl;
                 exit(1);
                 return false;
diff --git a/src/collision/bvh.cu b/src/collision/bvh.cu
index 869853a..48113c5 100644
--- a/src/collision/bvh.cu
+++ b/src/collision/bvh.cu
@@ -24,7 +24,7 @@ __device__ void buildBBox(BVHNode<Scalar>& curr, const BVHNode<Scalar>& left, co
 }
 
 template<typename Scalar>
-__global__ void buildBBoxesSerial(int leafCount, BVHNode<Scalar>* nodes, BVH<Scalar>::ReadyFlagType* ready) {
+__global__ void buildBBoxesSerial(int leafCount, BVHNode<Scalar>* nodes, typename BVH<Scalar>::ReadyFlagType* ready) {
     int ind = blockIdx.x * blockDim.x + threadIdx.x;
 
     if (ind >= leafCount - 1)return;
@@ -41,7 +41,7 @@ __global__ void buildBBoxesSerial(int leafCount, BVHNode<Scalar>* nodes, BVH<Sca
 namespace cg = cooperative_groups;
 
 template<typename Scalar>
-__global__ void buildBBoxesCG(int leafCount, BVHNode<Scalar>* nodes, BVH<Scalar>::ReadyFlagType* ready) {
+__global__ void buildBBoxesCG(int leafCount, BVHNode<Scalar>* nodes, typename BVH<Scalar>::ReadyFlagType* ready) {
     int ind = blockIdx.x * blockDim.x + threadIdx.x;
     cg::grid_group grid = cg::this_grid();
 
@@ -62,7 +62,7 @@ __global__ void buildBBoxesCG(int leafCount, BVHNode<Scalar>* nodes, BVH<Scalar>
 }
 
 template<typename Scalar>
-__global__ void buildBBoxesAtomic(int leafCount, BVHNode<Scalar>* nodes, BVH<Scalar>::ReadyFlagType* ready) {
+__global__ void buildBBoxesAtomic(int leafCount, BVHNode<Scalar>* nodes, typename BVH<Scalar>::ReadyFlagType* ready) {
     int ind = blockIdx.x * blockDim.x + threadIdx.x;
 
     if (ind >= leafCount - 1) return;
@@ -136,7 +136,7 @@ void BVH<Scalar>::BuildBBoxes(BuildType buildType) {
 }
 
 template<typename Scalar>
-BVH<Scalar>::BVH<Scalar>(const int _threadsPerBlock) :
+BVH<Scalar>::BVH(const int _threadsPerBlock) :
     threadsPerBlock(_threadsPerBlock) {}
 
 template<typename Scalar>
diff --git a/src/collision/ccd.cu b/src/collision/ccd.cu
index 6ea6231..1163576 100644
--- a/src/collision/ccd.cu
+++ b/src/collision/ccd.cu
@@ -210,9 +210,10 @@ SingleQueryDisplay& CollisionDetection<Scalar>::GetSQDisplay(int i, const glm::t
         cudaMemcpy(&v0Pos, pos + 1, sizeof(glm::vec3), cudaMemcpyDeviceToHost);
         cudaMemcpy(&v1Pos, pos + 2, sizeof(glm::vec3), cudaMemcpyDeviceToHost);
 
-        cudaMemcpy(&pos[4], &((v0Pos + v1Pos) / 2.f), sizeof(glm::vec3), cudaMemcpyHostToDevice);
+        glm::vec3 midPoint = (v0Pos + v1Pos) / 2.f;
+        cudaMemcpy(&pos[4], &midPoint, sizeof(glm::vec3), cudaMemcpyHostToDevice);
         // the third line point from the middle of v0 and v1 towards the normal direction
-        glm::vec3 normalPoint = (v0Pos + v1Pos) / 2.f + glm::vec3(q.normal) * 10.f;
+        glm::vec3 normalPoint = (v0Pos + v1Pos) / 2.f + glm::vec3(q.normal.x, q.normal.y, q.normal.z) * 10.f;
         cudaMemcpy(&pos[5], &normalPoint, sizeof(glm::vec3), cudaMemcpyHostToDevice);
         mSqDisplay.UnMapDevicePtr(&pos, nullptr, nullptr);
     }
@@ -228,7 +229,7 @@ SingleQueryDisplay& CollisionDetection<Scalar>::GetSQDisplay(int i, const glm::t
         thrust::transform(dev_ptr_X + q.v3, dev_ptr_X + q.v3 + 1, dev_triPos + 2, HighPtoFloatP<Scalar>());
         glm::vec3 v0Pos;
         cudaMemcpy(&v0Pos, vertPos, sizeof(glm::vec3), cudaMemcpyDeviceToHost);
-        glm::vec3 normalPoint = v0Pos + glm::vec3(q.normal) * 10.f;
+        glm::vec3 normalPoint = v0Pos + glm::vec3(q.normal.x, q.normal.y, q.normal.z) * 10.f;
         cudaMemcpy(&pos[0], &v0Pos, sizeof(glm::vec3), cudaMemcpyHostToDevice);
         cudaMemcpy(&pos[1], &normalPoint, sizeof(glm::vec3), cudaMemcpyHostToDevice);
         mSqDisplay.UnMapDevicePtr(&pos, &vertPos, &triPos);
diff --git a/src/collision/intersections.cu b/src/collision/intersections.cu
index 5cadcd3..8e76724 100644
--- a/src/collision/intersections.cu
+++ b/src/collision/intersections.cu
@@ -8,6 +8,7 @@
 #include <glm/gtx/intersect.hpp>
 #include <thrust/transform_reduce.h>
 #include <thrust/iterator/counting_iterator.h>
+#include <thrust/execution_policy.h>
 
 /**
  * Handy-dandy hash function that provides seeds for random number generation.
@@ -33,6 +34,12 @@ __host__ __device__ glm::tvec3<Scalar> multiplyMV(glm::tmat4x4<Scalar> m, glm::t
     return glm::tvec3<Scalar>(m * v);
 }
 
+template<typename T>
+__host__ __device__ int solveQuadratic(T a, T b, T c, T* x);
+
+template<typename T>
+__host__ __device__ T newtonsMethod(T a, T b, T c, T d, T x0, int init_dir);
+
 template<typename T>
 __host__ __device__ int solveCubic(T a, T b, T c, T d, T* x) {
     T xc[2];
@@ -353,7 +360,8 @@ __host__ __device__ Intersection rayTriangleIntersection(Ray r, glm::vec3 v0, gl
 
 template<typename Scalar>
 indexType raySimCtxIntersection(Ray r, int numTris, const indexType* Tri, const glm::tvec3<Scalar>* X) {
-    Intersection intersection = thrust::transform_reduce(thrust::counting_iterator<indexType>(0),
+    Intersection intersection = thrust::transform_reduce(thrust::device, 
+        thrust::counting_iterator<indexType>(0),
         thrust::counting_iterator<indexType>(numTris),
         [Tri, X, r]__host__ __device__(indexType idx) {
         glm::tvec3<Scalar> v0 = X[Tri[3 * idx]];
diff --git a/src/collision/narrowphase.cu b/src/collision/narrowphase.cu
index 7b477f9..e089ed8 100644
--- a/src/collision/narrowphase.cu
+++ b/src/collision/narrowphase.cu
@@ -67,7 +67,7 @@ __global__ void detectCollisionNarrow(int numQueries, Query* queries, const glm:
         glm::tvec3<Scalar> normal;
         Query& q = queries[index];
         q.toi = ccdCollisionTest(q, Xs, XTildes, normal);
-        q.normal = normal;
+        q.normal = Vec3d{normal.x, normal.y, normal.z};
     }
 }
 
@@ -92,8 +92,8 @@ __global__ void storeTi(int numQueries, const Query* queries, Scalar* tI, glm::v
                 tI[q.v1] = 0.5f;
                 //tI[q.v2] = 0.5f;
                 //tI[q.v3] = 0.5f;
-                nors[q.v1] = q.normal;
-                nors[q.v0] = q.normal;
+                nors[q.v1] = glm::vec3(q.normal.x, q.normal.y, q.normal.z);
+                nors[q.v0] = glm::vec3(q.normal.x, q.normal.y, q.normal.z);
             }
         }
         if (q.type == QueryType::VF)
@@ -104,17 +104,17 @@ __global__ void storeTi(int numQueries, const Query* queries, Scalar* tI, glm::v
                 tI[q.v1] = 0.5f;
                 tI[q.v2] = 0.5f;
                 tI[q.v3] = 0.5f;
-                nors[q.v0] = q.normal;
-                nors[q.v1] = -q.normal;
-                nors[q.v2] = -q.normal;
-                nors[q.v3] = -q.normal;
+                nors[q.v0] = glm::vec3(q.normal.x, q.normal.y, q.normal.z);
+                nors[q.v1] = -glm::vec3(q.normal.x, q.normal.y, q.normal.z);
+                nors[q.v2] = -glm::vec3(q.normal.x, q.normal.y, q.normal.z);
+                nors[q.v3] = -glm::vec3(q.normal.x, q.normal.y, q.normal.z);
             }
         }
         /*
         if (q.type == QueryType::VF)
         {
             tI[q.v0] = q.toi;
-            nors[q.v0] = q.normal;
+            nors[q.v0] = glm::vec3(q.normal.x, q.normal.y, q.normal.z);
         }*/
     }
 }
diff --git a/src/context.cpp b/src/context.cpp
index 1b61b17..7fc3cd3 100644
--- a/src/context.cpp
+++ b/src/context.cpp
@@ -1,7 +1,7 @@
 #include <sceneStructs.h>
 #include <surfaceshader.h>
 #include <context.h>
-#include <Mesh.h>
+#include <mesh.h>
 #include <collision/aabb.h>
 #include <simulation/simulationContext.h>
 #include <utilities.h>
diff --git a/src/openglcontext/preview.cpp b/src/openglcontext/preview.cpp
index a3ca129..5f662e4 100644
--- a/src/openglcontext/preview.cpp
+++ b/src/openglcontext/preview.cpp
@@ -3,7 +3,7 @@
 #include <collision/aabb.h>
 #include <simulationContext.h>
 #include <context.h>
-#include <softbody.h>
+#include <softBody.h>
 #include <preview.h>
 #include <utilities.h>
 #include <imgui.h>
diff --git a/src/openglcontext/shaderprogram.cpp b/src/openglcontext/shaderprogram.cpp
index fbe9bc7..e5782e0 100644
--- a/src/openglcontext/shaderprogram.cpp
+++ b/src/openglcontext/shaderprogram.cpp
@@ -3,6 +3,7 @@
 #include <sstream>
 #include <filesystem> 
 #include <iostream>
+#include <cstring>
 
 namespace fs = std::filesystem;
 
diff --git a/src/simulation/collisionUtil.cu b/src/simulation/collisionUtil.cu
index 614d5d2..4614e25 100644
--- a/src/simulation/collisionUtil.cu
+++ b/src/simulation/collisionUtil.cu
@@ -27,7 +27,6 @@ template <typename Scalar>
 __global__ void IPCCDKernel(glm::tvec3<Scalar>* X, glm::tvec3<Scalar>* XTilde, glm::tvec3<Scalar>* V, Scalar* tI, glm::vec3* normals, float muT, float muN, int numVerts) {
     int idx = threadIdx.x + blockIdx.x * blockDim.x;
     if (idx >= numVerts) return;
-    Scalar interval = glm::length(XTilde - X);
 
     if (tI[idx] < 1.0f)
     {
@@ -51,7 +50,6 @@ template <typename Scalar>
 __global__ void CCDKernel(glm::tvec3<Scalar>* X, glm::tvec3<Scalar>* XTilde, glm::tvec3<Scalar>* V, Scalar* tI, glm::vec3* normals, float muT, float muN, int numVerts, Scalar dt) {
     int idx = threadIdx.x + blockIdx.x * blockDim.x;
     if (idx >= numVerts) return;
-    Scalar interval = glm::length(XTilde - X);
 
     if (tI[idx] < 1.0f)
     {
@@ -74,5 +72,4 @@ __global__ void CCDKernel(glm::tvec3<Scalar>* X, glm::tvec3<Scalar>* XTilde, glm
 template __global__ void IPCCDKernel<float>(glm::tvec3<float>* X, glm::tvec3<float>* XTilde, glm::tvec3<float>* V, float* tI, glm::vec3* normals, float muT, float muN, int numVerts);
 template __global__ void IPCCDKernel<double>(glm::tvec3<double>* X, glm::tvec3<double>* XTilde, glm::tvec3<double>* V, double* tI, glm::vec3* normals, float muT, float muN, int numVerts);
 template __global__ void CCDKernel<float>(glm::tvec3<float>* X, glm::tvec3<float>* XTilde, glm::tvec3<float>* V, float* tI, glm::vec3* normals, float muT, float muN, int numVerts, float dt);
-template __global__ void CCDKernel<double>(glm::tvec3<double>* X, glm::tvec3<double>* XTilde, glm::tvec3<double>* V, double* tI, glm::vec3* normals, float muT, float muN, int numVerts, double dt);
-
+template __global__ void CCDKernel<double>(glm::tvec3<double>* X, glm::tvec3<double>* XTilde, glm::tvec3<double>* V, double* tI, glm::vec3* normals, float muT, float muN, int numVerts, double dt);
\ No newline at end of file
diff --git a/src/simulation/distance/distance_type.cuh b/src/simulation/distance/distance_type.cuh
index 04bf971..d85c324 100644
--- a/src/simulation/distance/distance_type.cuh
+++ b/src/simulation/distance/distance_type.cuh
@@ -4,6 +4,13 @@
 
 using namespace ipc;
 
+template<typename Scalar>
+__host__ __device__ DistanceType edge_edge_parallel_distance_type(
+    const glm::tvec3<Scalar>& ea0,
+    const glm::tvec3<Scalar>& ea1,
+    const glm::tvec3<Scalar>& eb0,
+    const glm::tvec3<Scalar>& eb1);
+
 #ifdef __CUDACC__
 template <typename Scalar>
 __global__ void GetDistanceType(const glm::tvec3<Scalar>* Xs, Query* queries, int numQueries) {
diff --git a/src/simulation/energy/barrier.cu b/src/simulation/energy/barrier.cu
index 49ea48e..2c6bb99 100644
--- a/src/simulation/energy/barrier.cu
+++ b/src/simulation/energy/barrier.cu
@@ -6,6 +6,7 @@
 #include <thrust/transform_reduce.h>
 #include <thrust/iterator/counting_iterator.h>
 
+
 namespace Barrier {
     template <typename Scalar>
     __forceinline__ __host__ __device__ Scalar barrierSquareFunc(Scalar d_sqr, Scalar dhat, Scalar kappa) {
@@ -207,7 +208,7 @@ void BarrierEnergy<Scalar>::Hessian(const SolverData<Scalar>& solverData, const
     if (numQueries == 0)return;
     int threadsPerBlock = 256;
     int numBlocks = (numQueries + threadsPerBlock - 1) / threadsPerBlock;
-    Barrier::hessianKern << <numBlocks, threadsPerBlock >> > (hessianVal, hessianRowIdx, hessianColIdx, solverData.X, solverData.queries(), numQueries, solverParams.dhat, solverData.kappa, coef);
+    Barrier::hessianKern << <numBlocks, threadsPerBlock >> > (this->hessianVal, this->hessianRowIdx, this->hessianColIdx, solverData.X, solverData.queries(), numQueries, solverParams.dhat, solverData.kappa, coef);
 }
 
 template <typename Scalar>
@@ -217,7 +218,7 @@ void BarrierEnergy<Scalar>::GradientHessian(Scalar* grad, const SolverData<Scala
     if (numQueries == 0)return;
     int threadsPerBlock = 256;
     int numBlocks = (numQueries + threadsPerBlock - 1) / threadsPerBlock;
-    Barrier::gradHessianKern << <numBlocks, threadsPerBlock >> > (grad, hessianVal, hessianRowIdx, hessianColIdx, solverData.X, solverData.queries(), numQueries, solverParams.dhat, solverData.kappa, coef);
+    Barrier::gradHessianKern << <numBlocks, threadsPerBlock >> > (grad, this->hessianVal, this->hessianRowIdx, this->hessianColIdx, solverData.X, solverData.queries(), numQueries, solverParams.dhat, solverData.kappa, coef);
 }
 
 template<typename Scalar>
diff --git a/src/simulation/energy/corotated.cu b/src/simulation/energy/corotated.cu
index 7b72962..debab70 100644
--- a/src/simulation/energy/corotated.cu
+++ b/src/simulation/energy/corotated.cu
@@ -5,6 +5,7 @@
 #include <thrust/transform_reduce.h>
 #include <svd.cuh>
 
+
 namespace Corotated {
     template <typename Scalar>
     __global__ void GradientKern(Scalar* grad, const glm::tvec3<Scalar>* X, const indexType* Tet, const glm::tmat3x3<Scalar>* DmInvs,
@@ -231,7 +232,7 @@ template <typename Scalar>
 void CorotatedEnergy<Scalar>::Hessian(const SolverData<Scalar>& solverData, const SolverParams<Scalar>& solverParams, Scalar coef) const {
     int threadsPerBlock = 256;
     int numBlocks = (solverData.numTets + threadsPerBlock - 1) / threadsPerBlock;
-    Corotated::HessianKern << <numBlocks, threadsPerBlock >> > (hessianVal, hessianRowIdx, hessianColIdx,
+    Corotated::HessianKern << <numBlocks, threadsPerBlock >> > (this->hessianVal, this->hessianRowIdx, this->hessianColIdx,
         solverData.X, solverData.Tet, solverData.DmInv, solverData.V0, solverData.mu, solverData.lambda, solverData.numTets, coef);
 }
 
@@ -239,7 +240,7 @@ template <typename Scalar>
 void CorotatedEnergy<Scalar>::GradientHessian(Scalar* grad, const SolverData<Scalar>& solverData, const SolverParams<Scalar>& solverParams, Scalar coef) const {
     int threadsPerBlock = 256;
     int numBlocks = (solverData.numTets + threadsPerBlock - 1) / threadsPerBlock;
-    Corotated::GradHessianKern << <numBlocks, threadsPerBlock >> > (grad, hessianVal, hessianRowIdx, hessianColIdx,
+    Corotated::GradHessianKern << <numBlocks, threadsPerBlock >> > (grad, this->hessianVal, this->hessianRowIdx, this->hessianColIdx,
         solverData.X, solverData.Tet, solverData.DmInv, solverData.V0, solverData.mu, solverData.lambda, solverData.numTets, coef);
 }
 
diff --git a/src/simulation/energy/implicitBarrier.cu b/src/simulation/energy/implicitBarrier.cu
index f816c7e..f786c7d 100644
--- a/src/simulation/energy/implicitBarrier.cu
+++ b/src/simulation/energy/implicitBarrier.cu
@@ -327,7 +327,7 @@ void ImplicitBarrierEnergy<Scalar>::Hessian(const SolverData<Scalar>& solverData
 {
     int threadsPerBlock = 256;
     int numBlocks = (solverData.numVerts + threadsPerBlock - 1) / threadsPerBlock;
-    ImplicitBarrier::hessianKern << <numBlocks, threadsPerBlock >> > (hessianVal, hessianRowIdx, hessianColIdx, solverData.X, solverData.numVerts,
+    ImplicitBarrier::hessianKern << <numBlocks, threadsPerBlock >> > (this->hessianVal, this->hessianRowIdx, this->hessianColIdx, solverData.X, solverData.numVerts,
         solverData.pFixedBodies->dev_planes, solverData.pFixedBodies->numPlanes, solverData.pFixedBodies->dev_cylinders, solverData.pFixedBodies->numCylinders, solverData.pFixedBodies->dev_spheres, solverData.pFixedBodies->numSpheres, solverParams.dhat, solverData.contact_area, coef);
 }
 
@@ -336,7 +336,7 @@ void ImplicitBarrierEnergy<Scalar>::GradientHessian(Scalar* grad, const SolverDa
 {
     int threadsPerBlock = 256;
     int numBlocks = (solverData.numVerts + threadsPerBlock - 1) / threadsPerBlock;
-    ImplicitBarrier::gradHessianKern << <numBlocks, threadsPerBlock >> > (grad, hessianVal, hessianRowIdx, hessianColIdx, solverData.X, solverData.numVerts,
+    ImplicitBarrier::gradHessianKern << <numBlocks, threadsPerBlock >> > (grad, this->hessianVal, this->hessianRowIdx, this->hessianColIdx, solverData.X, solverData.numVerts,
         solverData.pFixedBodies->dev_planes, solverData.pFixedBodies->numPlanes, solverData.pFixedBodies->dev_cylinders, solverData.pFixedBodies->numCylinders, solverData.pFixedBodies->dev_spheres, solverData.pFixedBodies->numSpheres, solverParams.dhat, solverData.contact_area, coef);
 }
 
diff --git a/src/simulation/energy/inertia.cu b/src/simulation/energy/inertia.cu
index d7c4e7c..a11aec1 100644
--- a/src/simulation/energy/inertia.cu
+++ b/src/simulation/energy/inertia.cu
@@ -98,7 +98,7 @@ void InertiaEnergy<Scalar>::Hessian(const SolverData<Scalar>& solverData, const
 {
     int threadsPerBlock = 256;
     int numBlocks = (solverData.numVerts + threadsPerBlock - 1) / threadsPerBlock;
-    Inertia::hessianKern << <numBlocks, threadsPerBlock >> > (solverData.mass, hessianVal, hessianRowIdx, hessianColIdx, solverData.numVerts);
+    Inertia::hessianKern << <numBlocks, threadsPerBlock >> > (solverData.mass, this->hessianVal, this->hessianRowIdx, this->hessianColIdx, solverData.numVerts);
 }
 
 template <typename Scalar>
@@ -106,7 +106,7 @@ void InertiaEnergy<Scalar>::GradientHessian(Scalar* grad, const SolverData<Scala
 {
     int threadsPerBlock = 256;
     int numBlocks = (solverData.numVerts + threadsPerBlock - 1) / threadsPerBlock;
-    Inertia::gradHessianKern << <numBlocks, threadsPerBlock >> > (solverData.X, solverData.XTilde, solverData.mass, grad, hessianVal, hessianRowIdx, hessianColIdx, solverData.numVerts);
+    Inertia::gradHessianKern << <numBlocks, threadsPerBlock >> > (solverData.X, solverData.XTilde, solverData.mass, grad, this->hessianVal, this->hessianRowIdx, this->hessianColIdx, solverData.numVerts);
 }
 
 template class InertiaEnergy<float>;
diff --git a/src/simulation/energy/neohookean08.cu b/src/simulation/energy/neohookean08.cu
index 5584fed..39ee896 100644
--- a/src/simulation/energy/neohookean08.cu
+++ b/src/simulation/energy/neohookean08.cu
@@ -5,6 +5,7 @@
 #include <thrust/transform_reduce.h>
 #include <svd.cuh>
 
+
 namespace NeoHookean08 {
     template <typename Scalar>
     __global__ void GradientKern(Scalar* grad, const glm::tvec3<Scalar>* X, const indexType* Tet, const glm::tmat3x3<Scalar>* DmInvs,
@@ -218,7 +219,7 @@ template <typename Scalar>
 void NeoHookean08Energy<Scalar>::Hessian(const SolverData<Scalar>& solverData, const SolverParams<Scalar>& solverParams, Scalar coef) const {
     int threadsPerBlock = 256;
     int numBlocks = (solverData.numTets + threadsPerBlock - 1) / threadsPerBlock;
-    NeoHookean08::HessianKern << <numBlocks, threadsPerBlock >> > (hessianVal, hessianRowIdx, hessianColIdx,
+    NeoHookean08::HessianKern << <numBlocks, threadsPerBlock >> > (this->hessianVal, this->hessianRowIdx, this->hessianColIdx,
         solverData.X, solverData.Tet, solverData.DmInv, solverData.V0, solverData.mu, solverData.lambda, solverData.numTets, coef);
 }
 
@@ -227,7 +228,7 @@ void NeoHookean08Energy<Scalar>::GradientHessian(Scalar* grad, const SolverData<
 {
     int threadsPerBlock = 256;
     int numBlocks = (solverData.numTets + threadsPerBlock - 1) / threadsPerBlock;
-    NeoHookean08::GradHessianKern << <numBlocks, threadsPerBlock >> > (grad, hessianVal, hessianRowIdx, hessianColIdx,
+    NeoHookean08::GradHessianKern << <numBlocks, threadsPerBlock >> > (grad, this->hessianVal, this->hessianRowIdx, this->hessianColIdx,
         solverData.X, solverData.Tet, solverData.DmInv, solverData.V0, solverData.mu, solverData.lambda, solverData.numTets, coef);
 }
 
diff --git a/src/simulation/simulationContext.cu b/src/simulation/simulationContext.cu
index 8ab8e7d..ec4b85b 100644
--- a/src/simulation/simulationContext.cu
+++ b/src/simulation/simulationContext.cu
@@ -168,6 +168,8 @@ bool SimulationCUDAContext::RayIntersect(const Ray& ray, glm::vec3* pos, bool up
             hit_v = ms.select_v;
         }
         else {
+            if (impl.data.numTris <= 0 || impl.data.Tri == nullptr || impl.data.X == nullptr) 
+                return false;
             hit_v = raySimCtxIntersection(ray, impl.data.numTris, impl.data.Tri, impl.data.X);
             if (updateV) ms.select_v = hit_v;
         }
diff --git a/src/simulation/solver/femSolver.cu b/src/simulation/solver/femSolver.cu
index 28097d2..0872d1d 100644
--- a/src/simulation/solver/femSolver.cu
+++ b/src/simulation/solver/femSolver.cu
@@ -1,4 +1,4 @@
-#include <simulation/solver/femsolver.h>
+#include <simulation/solver/femSolver.h>
 #include <thrust/transform.h>
 #include <thrust/device_vector.h>
 #include <solver/solverUtil.cuh>
diff --git a/src/simulation/solver/linear/cg.h b/src/simulation/solver/linear/cg.h
index d51b85a..dc4aca1 100644
--- a/src/simulation/solver/linear/cg.h
+++ b/src/simulation/solver/linear/cg.h
@@ -11,6 +11,13 @@ class CGSolver : public LinearSolver<T> {
     virtual ~CGSolver() override;
     virtual void Solve(int N, T* d_b, T* d_x, T* d_A, int nz, int* d_rowIdx, int* d_colIdx, T* d_guess = nullptr) override;
 private:
+    using LinearSolver<T>::dType;
+    using LinearSolver<T>::d_A;
+    using LinearSolver<T>::d_rowIdx;
+    using LinearSolver<T>::d_colIdx;
+    using LinearSolver<T>::d_rowPtrA;
+    using LinearSolver<T>::capacity;
+
     cublasHandle_t cubHandle = nullptr;
     cusparseHandle_t cusHandle = nullptr;
     csric02Info_t ic02info = nullptr;
diff --git a/src/simulation/solver/linear/cholesky.cu b/src/simulation/solver/linear/cholesky.cu
index c463848..aea4272 100644
--- a/src/simulation/solver/linear/cholesky.cu
+++ b/src/simulation/solver/linear/cholesky.cu
@@ -2,6 +2,7 @@
 #include <linear/cuUtils.cuh>
 #include <stdexcept>
 
+
 template<typename T>
 __global__ void FillMatrixA(int* AIdx, T* AVal, T* d_A, int n, int ASize) {
     int idx = blockIdx.x * blockDim.x + threadIdx.x;
@@ -207,5 +208,7 @@ void CholeskySpLinearSolver<T>::Solve(int N, T* d_b, T* d_x, T* d_A, int nz, int
     permuteVectorInv << <blocks, threadsPerBlock >> > (dev_x_permuted, d_x, d_p, N);
 }
 
-template CholeskySpLinearSolver<double>;
-template CholeskySpLinearSolver<float>;
\ No newline at end of file
+template class CholeskySpLinearSolver<double>;
+template class CholeskySpLinearSolver<float>;
+template class CholeskyDnLinearSolver<double>;
+template class CholeskyDnLinearSolver<float>;
\ No newline at end of file
diff --git a/src/simulation/solver/linear/cholesky.h b/src/simulation/solver/linear/cholesky.h
index 790ed64..97fc2cf 100644
--- a/src/simulation/solver/linear/cholesky.h
+++ b/src/simulation/solver/linear/cholesky.h
@@ -14,6 +14,12 @@ class CholeskySpLinearSolver : public LinearSolver<T> {
     virtual ~CholeskySpLinearSolver() override;
     virtual void Solve(int N, T* d_b, T* d_x, T* d_A = nullptr, int nz = 0, int* d_rowIdx = nullptr, int* d_colIdx = nullptr, T* d_guess = nullptr) override;
 private:
+    using LinearSolver<T>::d_A;
+    using LinearSolver<T>::d_rowIdx;
+    using LinearSolver<T>::d_colIdx;
+    using LinearSolver<T>::d_rowPtrA;
+    using LinearSolver<T>::capacity;
+
     void ComputeAMD(cusolverSpHandle_t handle, int rowsA, int nnzA, int* dev_csrRowPtrA, int* dev_csrColIndA, T* dev_csrValA);
     cusolverSpHandle_t cusolverHandle;
     cusparseMatDescr_t descrA;
@@ -31,6 +37,8 @@ class CholeskyDnLinearSolver : public LinearSolver<T> {
     virtual ~CholeskyDnLinearSolver() override;
     virtual void Solve(int N, T* d_b, T* d_x, T* d_A = nullptr, int nz = 0, int* d_rowIdx = nullptr, int* d_colIdx = nullptr, T* d_guess = nullptr) override;
 private:
+    using LinearSolver<T>::dType;
+
     cusolverDnParams_t params;
     int* d_info = nullptr;    /* error info */
     cusolverDnHandle_t cusolverHandle;
diff --git a/src/simulation/solver/linear/choleskyImmed.h b/src/simulation/solver/linear/choleskyImmed.h
index 36f4116..76d878a 100644
--- a/src/simulation/solver/linear/choleskyImmed.h
+++ b/src/simulation/solver/linear/choleskyImmed.h
@@ -11,6 +11,12 @@ class CholeskySpImmedSolver : public LinearSolver<T> {
     virtual ~CholeskySpImmedSolver() override;
     virtual void Solve(int N, T* d_b, T* d_x, T* d_A = nullptr, int nz = 0, int* d_rowIdx = nullptr, int* d_colIdx = nullptr, T* d_guess = nullptr) override;
 private:
+    using LinearSolver<T>::d_A;
+    using LinearSolver<T>::d_rowIdx;
+    using LinearSolver<T>::d_colIdx;
+    using LinearSolver<T>::d_rowPtrA;
+    using LinearSolver<T>::capacity;
+
     cusparseHandle_t handle;
     cusolverSpHandle_t cusolverHandle;
     cusparseMatDescr_t descrA;
diff --git a/src/simulation/solver/linear/jacobi.h b/src/simulation/solver/linear/jacobi.h
index 9b2e65e..95556f4 100644
--- a/src/simulation/solver/linear/jacobi.h
+++ b/src/simulation/solver/linear/jacobi.h
@@ -11,6 +11,12 @@ class JacobiSolver : public LinearSolver<T> {
     virtual ~JacobiSolver() override;
     virtual void Solve(int N, T* d_b, T* d_x, T* d_A = nullptr, int nz = 0, int* d_rowIdx = nullptr, int* d_colIdx = nullptr, T* d_guess = nullptr) override;
 private:
+    using LinearSolver<T>::d_A;
+    using LinearSolver<T>::d_rowIdx;
+    using LinearSolver<T>::d_colIdx;
+    using LinearSolver<T>::d_rowPtrA;
+    using LinearSolver<T>::capacity;
+
     int maxIter;
     cusparseHandle_t cusHandle = nullptr;
     T* x_prime = nullptr;
diff --git a/src/simulation/solver/linear/linear.h b/src/simulation/solver/linear/linear.h
index 7df8806..f98e271 100644
--- a/src/simulation/solver/linear/linear.h
+++ b/src/simulation/solver/linear/linear.h
@@ -1,6 +1,7 @@
 #pragma once
 #include <def.h>
 #include <library_types.h>
+#include <cuda_runtime_api.h>
 
 template <typename T>
 struct CUDAType {
diff --git a/src/simulation/solver/linear/pcgJacobi.h b/src/simulation/solver/linear/pcgJacobi.h
index d8db3f8..c4f987e 100644
--- a/src/simulation/solver/linear/pcgJacobi.h
+++ b/src/simulation/solver/linear/pcgJacobi.h
@@ -11,6 +11,13 @@ class PCGJacobiSolver : public LinearSolver<T> {
     virtual ~PCGJacobiSolver() override;
     virtual void Solve(int N, T* d_b, T* d_x, T* d_A, int nz, int* d_rowIdx, int* d_colIdx, T* d_guess = nullptr) override;
 private:
+    using LinearSolver<T>::dType;
+    using LinearSolver<T>::d_A;
+    using LinearSolver<T>::d_rowIdx;
+    using LinearSolver<T>::d_colIdx;
+    using LinearSolver<T>::d_rowPtrA;
+    using LinearSolver<T>::capacity;
+
     cublasHandle_t cubHandle = nullptr;
     cusparseHandle_t cusHandle = nullptr;
 
diff --git a/src/simulation/solver/solverUtil.cuh b/src/simulation/solver/solverUtil.cuh
index 6865695..da4f54e 100644
--- a/src/simulation/solver/solverUtil.cuh
+++ b/src/simulation/solver/solverUtil.cuh
@@ -3,6 +3,7 @@
 #include <glm/glm.hpp>
 #include <cuda_runtime.h>
 
+
 template<typename Func>
 float measureExecutionTime(const Func& func, bool print = false) {
     if (!print) {
diff --git a/src/simulation/utilities.cuh b/src/simulation/utilities.cuh
index 1b6f8fe..05dc978 100644
--- a/src/simulation/utilities.cuh
+++ b/src/simulation/utilities.cuh
@@ -26,6 +26,20 @@ void inspectGLM(const T* dev_ptr, int size, const char* str = "") {
     utilityCore::inspectHost(host_ptr.data(), size, str);
 }
 
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
+__device__ inline double atomicAdd(double* address, double val) {
+    unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(address);
+    unsigned long long int old = *address_as_ull, assumed;
+    do {
+        assumed = old;
+        old = atomicCAS(address_as_ull, assumed,
+            __double_as_longlong(val + __longlong_as_double(assumed)));
+    } while (assumed != old);
+    return __longlong_as_double(old);
+}
+#endif
+
+
 template <typename T>
 void inspectSparseMatrix(T* dev_val, int* dev_rowIdx, int* dev_colIdx, int begin, int nnz, int size);
 void inspectMortonCodes(const int* dev_mortonCodes, int numTris);
diff --git a/src/utilities.cpp b/src/utilities.cpp
index 469781e..ab6bad4 100644
--- a/src/utilities.cpp
+++ b/src/utilities.cpp
@@ -15,6 +15,7 @@
 #include <fstream>
 #include <filesystem>
 #include <bitset>
+#include <iterator>
 #include <type_traits>
 
 template<typename T, typename = void>
@@ -214,7 +215,7 @@ void utilityCore::inspectHost(const Query* query, int size) {
 
         std::cout << "DistanceType::" << distanceTypeString[static_cast<int>(query[i].dType)] << ",";
         std::cout << query[i].v0 << "," << query[i].v1 << "," << query[i].v2 << "," << query[i].v3 << "," << query[i].toi << "," << query[i].d << ","
-            << glm::to_string(query[i].normal) << "}," << std::endl;
+           << std::endl;
     }
     std::cout << "------------------------inspectHost--END------------------------------" << std::endl;
 }
diff --git a/tests/test_distance.cpp b/tests/test_distance.cpp
index 245fe0d..ed8341d 100644
--- a/tests/test_distance.cpp
+++ b/tests/test_distance.cpp
@@ -30,14 +30,14 @@ __forceinline__ __host__ __device__ Matrix12<Scalar> barrierSquareFuncHess(Scala
 double dhat = 0.05;
 double kappa = 100;
 std::vector<Query> queries = {
-Query{QueryType::EE,DistanceType::EA_EB,1,2,4,7,0.00187279,1,glm::dvec3(0.000000, 0.000000, 0.000000)},
-Query{QueryType::EE,DistanceType::EA_EB,4,7,1,2,0.00187279,1,glm::dvec3(0.000000, 0.000000, 0.000000)},
-Query{QueryType::EE,DistanceType::EA_EB,1,2,4,5,0.0021027,1,glm::dvec3(0.000000, 0.000000, 0.000000)},
-Query{QueryType::EE,DistanceType::EA_EB,4,5,1,2,0.0021027,1,glm::dvec3(0.000000, 0.000000, 0.000000)},
-Query{QueryType::VF,DistanceType::P_E0,4,1,2,3,0.00265943,1,glm::dvec3(0.000000, 0.000000, 0.000000)},
-Query{QueryType::VF,DistanceType::P_E1,4,0,1,2,0.00265943,1,glm::dvec3(0.000000, 0.000000, 0.000000)},
-Query{QueryType::EE,DistanceType::EA_EB0,1,2,4,6,0.00265943,1,glm::dvec3(0.000000, 0.000000, 0.000000)},
-Query{QueryType::EE,DistanceType::EA0_EB,4,6,1,2,0.00265943,1,glm::dvec3(0.000000, 0.000000, 0.000000)},
+Query{QueryType::EE,DistanceType::EA_EB,1,2,4,7,0.00187279,1,Vec3d{0.000000, 0.000000, 0.000000}},
+Query{QueryType::EE,DistanceType::EA_EB,4,7,1,2,0.00187279,1,Vec3d{0.000000, 0.000000, 0.000000}},
+Query{QueryType::EE,DistanceType::EA_EB,1,2,4,5,0.0021027,1,Vec3d{0.000000, 0.000000, 0.000000}},
+Query{QueryType::EE,DistanceType::EA_EB,4,5,1,2,0.0021027,1,Vec3d{0.000000, 0.000000, 0.000000}},
+Query{QueryType::VF,DistanceType::P_E0,4,1,2,3,0.00265943,1,Vec3d{0.000000, 0.000000, 0.000000}},
+Query{QueryType::VF,DistanceType::P_E1,4,0,1,2,0.00265943,1,Vec3d{0.000000, 0.000000, 0.000000}},
+Query{QueryType::EE,DistanceType::EA_EB0,1,2,4,6,0.00265943,1,Vec3d{0.000000, 0.000000, 0.000000}},
+Query{QueryType::EE,DistanceType::EA0_EB,4,6,1,2,0.00265943,1,Vec3d{0.000000, 0.000000, 0.000000}},
 };
 std::vector<glm::dvec3> points = {
 glm::dvec3(0.436417, 59.751547, 0.282352),
diff --git a/tests/test_solveCubic.cpp b/tests/test_solveCubic.cpp
index 7849d5d..a12acb7 100644
--- a/tests/test_solveCubic.cpp
+++ b/tests/test_solveCubic.cpp
@@ -126,12 +126,12 @@
 TEST_CASE("Tet collision test", "[Tet]")
 {
     std::vector<Query> queries = {
-Query{QueryType::VF,DistanceType::P_T0,0,4,5,9,1,0,glm::dvec3(0.000000, 0.000000, 0.000000)},
-Query{QueryType::VF,DistanceType::P_T0,1,4,5,9,1,0,glm::dvec3(0.000000, 0.000000, 0.000000)},
-Query{QueryType::VF,DistanceType::P_T0,2,4,5,9,1,0,glm::dvec3(0.000000, 0.000000, 0.000000)},
-Query{QueryType::VF,DistanceType::EA1_EB0,3,4,7,10,0.994723,100,glm::dvec3(0.000010, 1.000000, 0.000010)},
-Query{QueryType::VF,DistanceType::P_T0,4,0,1,3,1,0,glm::dvec3(0.000000, 0.000000, 0.000000)},
-};
+Query{QueryType::VF,DistanceType::P_T0,0,4,5,9,1,0,Vec3d{0.000000, 0.000000, 0.000000}},
+Query{QueryType::VF,DistanceType::P_T0,1,4,5,9,1,0,Vec3d{0.000000, 0.000000, 0.000000}},
+Query{QueryType::VF,DistanceType::P_T0,2,4,5,9,1,0,Vec3d{0.000000, 0.000000, 0.000000}},
+Query{QueryType::VF,DistanceType::EA1_EB0,3,4,7,10,0.994723,100,Vec3d{0.000010, 1.000000, 0.000010}},
+Query{QueryType::VF,DistanceType::P_T0,4,0,1,3,1,0,Vec3d{0.000000, 0.000000, 0.000000}},
+    };
     std::vector<glm::dvec3> Xs{
 glm::dvec3(2.001631, 67.999075, 0.533407),
 glm::dvec3(-6.001968, 61.071146, -3.459837),

From 8574b9545af38bedd2ceac790d00232627248fc6 Mon Sep 17 00:00:00 2001
From: GrahamZen <931252924@qq.com>
Date: Fri, 19 Dec 2025 01:38:23 -0600
Subject: [PATCH 03/20] fix invalid write

---
 src/simulation/dataLoader.cu           | 12 +++++++++---
 src/simulation/solver/linear/jacobi.cu |  1 -
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/simulation/dataLoader.cu b/src/simulation/dataLoader.cu
index 54ae39c..a147114 100644
--- a/src/simulation/dataLoader.cu
+++ b/src/simulation/dataLoader.cu
@@ -38,6 +38,8 @@ DataLoader<Scalar>::~DataLoader() = default;
 template<typename Scalar>
 std::pair<std::vector<indexType>, std::vector<indexType>> DataLoader<Scalar>::loadEleFaceFile(const std::string& EleFilename, int startIndex, int& numTets, int& numTris, std::string faceFilename)
 {
+    numTets = 0;
+    numTris = 0;
     std::string line;
     std::ifstream file(EleFilename);
 
@@ -342,9 +344,13 @@ void DataLoader<Scalar>::AllocData(std::vector<int>& startIndices, SolverData<Sc
             return x + vertOffset;
         });
         if (softBodyData.Tri) {
-            thrust::for_each(thrust::device_pointer_cast(softBodyData.Tri), thrust::device_pointer_cast(softBodyData.Tri) + softBodyData.numTris * 3, [vertOffset] __device__(indexType & x) {
-                x += vertOffset;
-            });
+            auto first = thrust::device_pointer_cast(softBodyData.Tri);
+            auto last = first + softBodyData.numTris * 3;
+            thrust::transform(first, last, first,
+                [vertOffset] __device__(indexType x) {
+                return x + vertOffset;
+            }
+            );
             cudaMemcpy(solverData.Tri + triOffset, softBodyData.Tri, sizeof(indexType) * softBodyData.numTris * 3, cudaMemcpyDeviceToDevice);
         }
         thrust::fill(thrust::device_pointer_cast(solverData.dev_TriFathers) + triOffset / 3, thrust::device_pointer_cast(solverData.dev_TriFathers) + triOffset / 3 + softBodyData.numTris, i);
diff --git a/src/simulation/solver/linear/jacobi.cu b/src/simulation/solver/linear/jacobi.cu
index 0ec16fe..aa9004d 100644
--- a/src/simulation/solver/linear/jacobi.cu
+++ b/src/simulation/solver/linear/jacobi.cu
@@ -16,7 +16,6 @@ inline JacobiSolver<T>::JacobiSolver(int N, int maxIter) : maxIter(maxIter)
 template<typename T>
 JacobiSolver<T>::~JacobiSolver()
 {
-    cudaFree(d_rowPtrA);
     cudaFree(x_prime);
     CHECK_CUSPARSE(cusparseDestroy(cusHandle));
 }

From 1876b68c09ec06fa5f5b6ba5ad2cff75ea971dab Mon Sep 17 00:00:00 2001
From: GrahamZen <931252924@qq.com>
Date: Fri, 19 Dec 2025 01:46:10 -0600
Subject: [PATCH 04/20] extend hessian buffer

---
 src/simulation/energy/ip.cu | 36 ++++++++++++++++++++++++++++--------
 src/simulation/energy/ip.h  |  9 +++++++--
 2 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/src/simulation/energy/ip.cu b/src/simulation/energy/ip.cu
index 7f36c95..037c202 100644
--- a/src/simulation/energy/ip.cu
+++ b/src/simulation/energy/ip.cu
@@ -17,13 +17,15 @@ struct AbsOp {
     }
 };
 
-IPEnergy::IPEnergy(const SolverData<double>& solverData) :inertia(solverData, nnz, solverData.numVerts, solverData.mass),
+IPEnergy::IPEnergy(const SolverData<double>& solverData) : inertia(solverData, nnz, solverData.numVerts, solverData.mass),
 elastic(new CorotatedEnergy<double>(solverData, nnz)), implicitBarrier(solverData, nnz), barrier(solverData, nnz)
 {
+    hessianCapacity = nnz;
     cudaMalloc((void**)&gradient, sizeof(double) * solverData.numVerts * 3);
-    cudaMalloc((void**)&hessianVal, sizeof(double) * nnz);
-    cudaMalloc((void**)&hessianRowIdx, sizeof(int) * nnz);
-    cudaMalloc((void**)&hessianColIdx, sizeof(int) * nnz);
+    cudaMalloc((void**)&hessianVal, sizeof(double) * hessianCapacity);
+    cudaMalloc((void**)&hessianRowIdx, sizeof(int) * hessianCapacity);
+    cudaMalloc((void**)&hessianColIdx, sizeof(int) * hessianCapacity);
+
     inertia.SetHessianPtr(hessianVal, hessianRowIdx, hessianColIdx);
     implicitBarrier.SetHessianPtr(hessianVal, hessianRowIdx, hessianColIdx);
     elastic->SetHessianPtr(hessianVal, hessianRowIdx, hessianColIdx);
@@ -36,6 +38,7 @@ IPEnergy::~IPEnergy()
     cudaFree(hessianVal);
     cudaFree(hessianRowIdx);
     cudaFree(hessianColIdx);
+    if (elastic) delete elastic;
 }
 
 double IPEnergy::Val(const glm::dvec3* Xs, const SolverData<double>& solverData, const SolverParams<double>& solverParams, double h2) const
@@ -43,12 +46,29 @@ double IPEnergy::Val(const glm::dvec3* Xs, const SolverData<double>& solverData,
     return inertia.Val(Xs, solverData, solverParams) + h2 * (gravity.Val(Xs, solverData, solverParams) + elastic->Val(Xs, solverData, solverParams) + implicitBarrier.Val(Xs, solverData, solverParams) + barrier.Val(Xs, solverData, solverParams));
 }
 
-void IPEnergy::GradientHessian(const SolverData<double>& solverData, const SolverParams<double>& solverParams, double h2) const
+void IPEnergy::GradientHessian(const SolverData<double>& solverData, const SolverParams<double>& solverParams, double h2)
 {
+    int currentNNZ = NNZ(solverData);
+    if (currentNNZ > hessianCapacity) {
+        cudaFree(hessianVal);
+        cudaFree(hessianRowIdx);
+        cudaFree(hessianColIdx);
+        hessianCapacity = static_cast<int>(currentNNZ * 1.5);
+
+        cudaMalloc((void**)&hessianVal, sizeof(double) * hessianCapacity);
+        cudaMalloc((void**)&hessianRowIdx, sizeof(int) * hessianCapacity);
+        cudaMalloc((void**)&hessianColIdx, sizeof(int) * hessianCapacity);
+
+        inertia.SetHessianPtr(hessianVal, hessianRowIdx, hessianColIdx);
+        implicitBarrier.SetHessianPtr(hessianVal, hessianRowIdx, hessianColIdx);
+        elastic->SetHessianPtr(hessianVal, hessianRowIdx, hessianColIdx);
+        barrier.SetHessianPtr(hessianVal, hessianRowIdx, hessianColIdx);
+    }
+
     cudaMemset(gradient, 0, sizeof(double) * solverData.numVerts * 3);
-    cudaMemset(hessianVal, 0, sizeof(double) * NNZ(solverData));
-    cudaMemset(hessianRowIdx, 0, sizeof(int) * NNZ(solverData));
-    cudaMemset(hessianColIdx, 0, sizeof(int) * NNZ(solverData));
+    cudaMemset(hessianVal, 0, sizeof(double) * currentNNZ);
+    cudaMemset(hessianRowIdx, 0, sizeof(int) * currentNNZ);
+    cudaMemset(hessianColIdx, 0, sizeof(int) * currentNNZ);
     inertia.GradientHessian(gradient, solverData, solverParams, 1);
     gravity.Gradient(gradient, solverData, solverParams, h2);
     elastic->GradientHessian(gradient, solverData, solverParams, h2);
diff --git a/src/simulation/energy/ip.h b/src/simulation/energy/ip.h
index 187c31f..d68c128 100644
--- a/src/simulation/energy/ip.h
+++ b/src/simulation/energy/ip.h
@@ -10,18 +10,23 @@ class IPEnergy {
 public:
     IPEnergy(const SolverData<double>& solverData);
     ~IPEnergy();
+    IPEnergy(const IPEnergy&) = delete;
+    IPEnergy& operator=(const IPEnergy&) = delete;
+
     double Val(const glm::dvec3* Xs, const SolverData<double>& solverData, const SolverParams<double>& solverParams, double h2) const;
-    void GradientHessian(const SolverData<double>& solverData, const SolverParams<double>& solverParams, double h2) const;
+    void GradientHessian(const SolverData<double>& solverData, const SolverParams<double>& solverParams, double h2);
     void UpdateKappa(SolverData<double>& solverData, const SolverParams<double>& solverParams, double h2) const;
     double InitStepSize(SolverData<double>& solverData, const SolverParams<double>& solverParams, double* p, glm::tvec3<double>* XTmp) const;
     int NNZ(const SolverData<double>& solverData) const;
     double* gradient = nullptr;
-    // collision queries should be updated if dirty
     double* hessianVal = nullptr;
     int* hessianRowIdx = nullptr;
     int* hessianColIdx = nullptr;
+
 private:
     int nnz = 0;
+    int hessianCapacity = 0;
+
     InertiaEnergy<double> inertia;
     GravityEnergy<double> gravity;
     ImplicitBarrierEnergy<double> implicitBarrier;

From 43c443178f1bc2af5f002aee471109611945dbfa Mon Sep 17 00:00:00 2001
From: GrahamZen <gehan.zheng@vanderbilt.edu>
Date: Fri, 19 Dec 2025 12:40:37 -0600
Subject: [PATCH 05/20] use unique_ptr in context.h

---
 src/context.cpp                      | 45 ++++++++++++++--------------
 src/context.h                        | 25 ++++++++--------
 src/main.cpp                         |  4 +--
 src/openglcontext/preview.cpp        |  4 +--
 src/simulation/simulationContext.cpp |  2 +-
 src/simulation/simulationContext.cu  |  4 +--
 6 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/src/context.cpp b/src/context.cpp
index 7fc3cd3..2569740 100644
--- a/src/context.cpp
+++ b/src/context.cpp
@@ -84,10 +84,20 @@ Ray Camera::RayPick(glm::ivec2 pixel)
     return Ray{ position, glm::normalize(rayWorldXYZ - position) };
 }
 
-Context::Context(const std::string& _filename) :shaderType(ShaderType::PHONG), filename(_filename), mpCamera(new Camera(_filename)), mpProgLambert(new SurfaceShader()),
-mpProgPhong(new SurfaceShader()), mpProgHighLight(new SurfaceShader()), mpProgFlat(new SurfaceShader()), mpProgSkybox(new SurfaceShader()),
-width(mpCamera->resolution.x), height(mpCamera->resolution.y), ogLookAt(mpCamera->lookAt), guiData(new GuiDataContainer()),
-mpSelectSPhere(new Sphere(utilityCore::modelMatrix(glm::vec3(0), glm::vec3(0), glm::vec3(5, 5, 5)), 5, 10))
+Context::Context(const std::string& _filename)
+    : shaderType(ShaderType::PHONG),
+    filename(_filename),
+    mpCamera(std::make_unique<Camera>(_filename)),
+    mpProgLambert(std::make_unique<SurfaceShader>()),
+    mpProgPhong(std::make_unique<SurfaceShader>()),
+    mpProgHighLight(std::make_unique<SurfaceShader>()),
+    mpProgFlat(std::make_unique<SurfaceShader>()),
+    mpProgSkybox(std::make_unique<SurfaceShader>()),
+    width(mpCamera->resolution.x),
+    height(mpCamera->resolution.y),
+    ogLookAt(mpCamera->lookAt),
+    guiData(std::make_unique<GuiDataContainer>()),
+    mpSelectSPhere(std::make_unique<Sphere>(utilityCore::modelMatrix(glm::vec3(0), glm::vec3(0), glm::vec3(5, 5, 5)), 5, 10))
 {
     glm::vec3 view = mpCamera->view;
     glm::vec3 up = mpCamera->up;
@@ -105,18 +115,7 @@ mpSelectSPhere(new Sphere(utilityCore::modelMatrix(glm::vec3(0), glm::vec3(0), g
     zoom = glm::length(mpCamera->position - ogLookAt);
 }
 
-Context::~Context()
-{
-    delete mpProgHighLight;
-    delete mpProgLambert;
-    delete mpProgPhong;
-    delete mpProgFlat;
-    delete mpProgSkybox;
-    delete mcrpSimContext;
-    delete guiData;
-    delete mpCamera;
-    delete mpEnvMapCube;
-}
+Context::~Context() = default;
 
 int Context::GetMaxCGThreads()
 {
@@ -187,7 +186,7 @@ void Context::LoadShaders(const std::string& vertShaderFilename, const std::stri
             mpProgSkybox->create("../src/shaders/envMap.vert.glsl", "../src/shaders/envMap.frag.glsl");
             mpProgSkybox->setViewProjMatrix(mpCamera->getView(), mpCamera->getProj());
             mpProgSkybox->addUniform("u_EnvironmentMap");
-            mpEnvMapCube = new Mesh();
+            mpEnvMapCube = std::make_unique<Mesh>();
             mpEnvMapCube->createCube();
         }
     }
@@ -376,20 +375,20 @@ SimulationCUDAContext* Context::LoadSimContext() {
             if (contextJson.contains("fixedBodies")) {
                 fixBodies = ReadFixedBodies(contextJson["fixedBodies"], fixedBodyDefs);
             }
-            mpSimContexts.push_back(new SimulationCUDAContext(this, baseName, contextJson, softBodyDefs, fixBodies, threadsPerBlock, threadsPerBlockBVH, maxThreads, numIterations));
+            mpSimContexts.push_back(std::make_unique<SimulationCUDAContext>(this, baseName, contextJson, softBodyDefs, fixBodies, threadsPerBlock, threadsPerBlockBVH, maxThreads, numIterations));
             DOFs.push_back(mpSimContexts.back()->GetVertCnt() * 3);
             Eles.push_back(mpSimContexts.back()->GetTetCnt());
             if (logEnabled)
                 spdlog::info("{} #dof: {}, #ele: {}", "[" + baseName + "]", DOFs.back(), Eles.back());
         }
-        mcrpSimContext = mpSimContexts[0];
+        mcrpSimContext = mpSimContexts[0].get();
     }
     return mcrpSimContext;
 }
 
 void Context::LoadEnvCubemap(const std::string& filename) {
     {
-        envMap = new TextureCubemap();
+        envMap = std::make_unique<TextureCubemap>();
         envMap->create(filename.c_str(), false);
     }
 }
@@ -442,10 +441,10 @@ void Context::Draw() {
     switch (shaderType)
     {
     case Context::ShaderType::LAMBERT:
-        mcrpSimContext->Draw(mpProgHighLight, mpProgLambert, mpProgFlat, guiData->HighLightObjId);
+        mcrpSimContext->Draw(mpProgHighLight.get(), mpProgLambert.get(), mpProgFlat.get(), guiData->HighLightObjId);
         break;
     case Context::ShaderType::PHONG:
-        mcrpSimContext->Draw(mpProgHighLight, mpProgPhong, mpProgFlat, guiData->HighLightObjId);
+        mcrpSimContext->Draw(mpProgHighLight.get(), mpProgPhong.get(), mpProgFlat.get(), guiData->HighLightObjId);
         break;
     default:
         break;
@@ -488,7 +487,7 @@ void Context::Update() {
     PollEvents();
     if (panelModified) {
         if (guiData->currSimContextId != -1) {
-            mcrpSimContext = mpSimContexts[guiData->currSimContextId];
+            mcrpSimContext = mpSimContexts[guiData->currSimContextId].get();
             guiData->solverParams = mcrpSimContext->GetSolverParamsUI();
         }
         mcrpSimContext->SetGlobalSolver(guiData->solverType);
diff --git a/src/context.h b/src/context.h
index 7837a61..477743c 100644
--- a/src/context.h
+++ b/src/context.h
@@ -3,6 +3,7 @@
 #include <vector>
 #include <string>
 #include <optional>
+#include <memory>
 
 class SoftBody;
 class Camera;
@@ -102,7 +103,7 @@ class Context
     int GetIteration() const { return iteration; }
     const std::vector<int>& GetDOFs() const { return DOFs; }
     const std::vector<int>& GetEles() const { return Eles; }
-    Camera* mpCamera = nullptr;
+    std::unique_ptr<Camera> mpCamera;
     const int width = 1024;
     const int height = 1024;
     bool panelModified = false;
@@ -110,9 +111,9 @@ class Context
     int bvhBuildType = 1;
     float zoom, theta, phi;
     glm::vec3 cameraPosition;
-    GuiDataContainer* guiData;
+    std::unique_ptr<GuiDataContainer> guiData;
     SimulationCUDAContext* mcrpSimContext = nullptr;
-    std::vector<SimulationCUDAContext*> mpSimContexts;
+    std::vector<std::unique_ptr<SimulationCUDAContext>> mpSimContexts;
     MouseState mouseState;
 
 private:
@@ -122,18 +123,18 @@ class Context
     std::string filename = "context.json";
     SimulationCUDAContext* LoadSimContext();
     glm::vec3 ogLookAt; // for recentering the camera
-    SurfaceShader* mpProgHighLight = nullptr;
-    SurfaceShader* mpProgLambert = nullptr;
-    SurfaceShader* mpProgPhong = nullptr;
-    SurfaceShader* mpProgFlat = nullptr;
-    SurfaceShader* mpProgSkybox = nullptr;
-    Mesh* mpEnvMapCube = nullptr;
+    std::unique_ptr<SurfaceShader> mpProgHighLight;
+    std::unique_ptr<SurfaceShader> mpProgLambert;
+    std::unique_ptr<SurfaceShader> mpProgPhong;
+    std::unique_ptr<SurfaceShader> mpProgFlat;
+    std::unique_ptr<SurfaceShader> mpProgSkybox;
+    std::unique_ptr<Mesh> mpEnvMapCube;
     size_t iteration = 0;
     bool pause = false;
     bool logEnabled = false;
     std::vector<int> DOFs;
     std::vector<int> Eles;
-    TextureCubemap* envMap = nullptr;
-    Sphere* mpSelectSPhere = nullptr;
+    std::unique_ptr<TextureCubemap> envMap;
+    std::unique_ptr<Sphere> mpSelectSPhere;
     glm::vec3 spherePos;
-};
\ No newline at end of file
+};
diff --git a/src/main.cpp b/src/main.cpp
index 04c1ed4..c4b4084 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -41,7 +41,7 @@ int main(int argc, char** argv) {
     context->LoadShaders();
     context->LoadFlatShaders();
     // Initialize ImGui Data
-    InitImguiData(context->guiData);
+    InitImguiData(context->guiData.get());
     context->InitDataContainer();
     GLenum err;
     while ((err = glGetError()) != GL_NO_ERROR) {
@@ -148,4 +148,4 @@ void framebuffer_size_callback(GLFWwindow* window, int width, int height) {
     context->mpCamera->resolution.y = height;
     context->mpCamera->resolution.x = width;
     context->mpCamera->computeCameraParams();
-}
\ No newline at end of file
+}
diff --git a/src/openglcontext/preview.cpp b/src/openglcontext/preview.cpp
index 5f662e4..668f0cf 100644
--- a/src/openglcontext/preview.cpp
+++ b/src/openglcontext/preview.cpp
@@ -85,7 +85,7 @@ void InitImguiData(GuiDataContainer* guiData)
 void RenderHierarchy(bool& contextChanged) {
     ImGui::Begin("Scene Hierarchy", nullptr);
     for (size_t i = 0; i < context->mpSimContexts.size(); i++) {
-        auto simCtx = context->mpSimContexts[i];
+        auto* simCtx = context->mpSimContexts[i].get();
         if (ImGui::TreeNode(simCtx->GetName().c_str())) {
             ImGui::SameLine();
             if (ImGui::Button("Activate")) {
@@ -102,7 +102,7 @@ void RenderHierarchy(bool& contextChanged) {
                         if (ImGui::Button("Highlight")) {
                             imguiData->HighLightObjId = uniqueId;
                         }
-                        ImGui::Text("#DBC: %d", softBody->GetAttributes().numDBC);
+                        ImGui::Text("#DBC: %zu", softBody->GetAttributes().numDBC);
                         ImGui::Text("#Triangle: %d", softBody->GetNumTris());
                         imguiData->softBodyAttr.mu = ImGui::DragFloat("mu", &softBody->GetAttributes().mu, 100.f, 0.0f, 100000.0f, "%.2f");
                         imguiData->softBodyAttr.lambda = ImGui::DragFloat("lambda", &softBody->GetAttributes().lambda, 100.f, 0.0f, 100000.0f, "%.2f");
diff --git a/src/simulation/simulationContext.cpp b/src/simulation/simulationContext.cpp
index 449f7e4..dcffe48 100644
--- a/src/simulation/simulationContext.cpp
+++ b/src/simulation/simulationContext.cpp
@@ -52,7 +52,7 @@ static void CopyParamsToUI(const SolverParams<Scalar>& p, SolverParamsUI& ui) {
 
 SimulationCUDAContext::SimulationCUDAContext(Context* ctx, const std::string& _name, nlohmann::json& json,
     const std::map<std::string, nlohmann::json>& softBodyDefs, std::vector<FixedBody*>& fixedBodies, int threadsPerBlock, int threadsPerBlockBVH, int maxThreads, int numIterations)
-    : threadsPerBlock_(threadsPerBlock), contextGuiData(ctx->guiData), name(_name) {
+    : threadsPerBlock_(threadsPerBlock), contextGuiData(ctx->guiData.get()), name(_name) {
     std::string prec = "double";
     if (json.contains("precision")) {
         prec = json["precision"].get<std::string>();
diff --git a/src/simulation/simulationContext.cu b/src/simulation/simulationContext.cu
index ec4b85b..bc81c29 100644
--- a/src/simulation/simulationContext.cu
+++ b/src/simulation/simulationContext.cu
@@ -40,7 +40,7 @@ void SimulationCUDAContext::Impl<Scalar>::Init(Context* ctx, nlohmann::json& jso
     maxThreads = _maxThreads;
     numIterations = _numIterations;
 
-    auto guiData = ctx->guiData;
+    auto guiData = ctx->guiData.get();
     DataLoader<Scalar> dataLoader(threadsPerBlock);
     std::vector<const char*> namesSoftBodies;
     data.pCollisionDetection = new CollisionDetection<Scalar>{ ctx, _threadsPerBlockBVH, 1 << 16 };
@@ -242,4 +242,4 @@ void SimulationCUDAContext::PrepareRenderData() {
 }
 
 template struct SimulationCUDAContext::Impl<float>;
-template struct SimulationCUDAContext::Impl<double>;
\ No newline at end of file
+template struct SimulationCUDAContext::Impl<double>;

From 391ad157830c697451d7d4f3f98765af61ff69f8 Mon Sep 17 00:00:00 2001
From: GrahamZen <gehan.zheng@vanderbilt.edu>
Date: Fri, 19 Dec 2025 12:46:32 -0600
Subject: [PATCH 06/20] unique_ptr in solver

---
 src/simulation/energy/ip.cu                  | 10 ++++++----
 src/simulation/energy/ip.h                   |  5 +++--
 src/simulation/solver/projective/pdSolver.cu |  5 +----
 src/simulation/solver/projective/pdSolver.h  |  5 +++--
 4 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/simulation/energy/ip.cu b/src/simulation/energy/ip.cu
index 037c202..f97458c 100644
--- a/src/simulation/energy/ip.cu
+++ b/src/simulation/energy/ip.cu
@@ -17,8 +17,11 @@ struct AbsOp {
     }
 };
 
-IPEnergy::IPEnergy(const SolverData<double>& solverData) : inertia(solverData, nnz, solverData.numVerts, solverData.mass),
-elastic(new CorotatedEnergy<double>(solverData, nnz)), implicitBarrier(solverData, nnz), barrier(solverData, nnz)
+IPEnergy::IPEnergy(const SolverData<double>& solverData)
+    : inertia(solverData, nnz, solverData.numVerts, solverData.mass),
+    elastic(std::make_unique<CorotatedEnergy<double>>(solverData, nnz)),
+    implicitBarrier(solverData, nnz),
+    barrier(solverData, nnz)
 {
     hessianCapacity = nnz;
     cudaMalloc((void**)&gradient, sizeof(double) * solverData.numVerts * 3);
@@ -38,7 +41,6 @@ IPEnergy::~IPEnergy()
     cudaFree(hessianVal);
     cudaFree(hessianRowIdx);
     cudaFree(hessianColIdx);
-    if (elastic) delete elastic;
 }
 
 double IPEnergy::Val(const glm::dvec3* Xs, const SolverData<double>& solverData, const SolverParams<double>& solverParams, double h2) const
@@ -123,4 +125,4 @@ double IPEnergy::InitStepSize(SolverData<double>& solverData, const SolverParams
 int IPEnergy::NNZ(const SolverData<double>& solverData) const
 {
     return inertia.NNZ(solverData) + implicitBarrier.NNZ(solverData) + elastic->NNZ(solverData) + barrier.NNZ(solverData);
-}
\ No newline at end of file
+}
diff --git a/src/simulation/energy/ip.h b/src/simulation/energy/ip.h
index d68c128..477f6ab 100644
--- a/src/simulation/energy/ip.h
+++ b/src/simulation/energy/ip.h
@@ -5,6 +5,7 @@
 #include <energy/gravity.h>
 #include <energy/elasticity.h>
 #include <energy/implicitBarrier.h>
+#include <memory>
 
 class IPEnergy {
 public:
@@ -30,6 +31,6 @@ class IPEnergy {
     InertiaEnergy<double> inertia;
     GravityEnergy<double> gravity;
     ImplicitBarrierEnergy<double> implicitBarrier;
-    ElasticEnergy<double>* elastic = nullptr;
+    std::unique_ptr<ElasticEnergy<double>> elastic;
     BarrierEnergy<double> barrier;
-};
\ No newline at end of file
+};
diff --git a/src/simulation/solver/projective/pdSolver.cu b/src/simulation/solver/projective/pdSolver.cu
index db77ef2..f0357af 100644
--- a/src/simulation/solver/projective/pdSolver.cu
+++ b/src/simulation/solver/projective/pdSolver.cu
@@ -27,9 +27,6 @@ PdSolver::PdSolver(int threadsPerBlock, const SolverData<float>& solverData) : F
 }
 
 PdSolver::~PdSolver() {
-    if (ls) {
-        free(ls);
-    }
     cudaFree(sn);
     cudaFree(sn_old);
     cudaFree(b);
@@ -117,7 +114,7 @@ void PdSolver::SolverPrepare(SolverData<float>& solverData, const SolverParams<f
         cudaMemcpy(AColIdx, AColIdxHost.data(), sizeof(int) * nnz, cudaMemcpyHostToDevice);
         cudaMemcpy(AVal, tmpValHost.data(), sizeof(float) * nnz, cudaMemcpyHostToDevice);
 
-        ls = new CholeskySpLinearSolver<float>(threadsPerBlock, ARowIdx, AColIdx, AVal, ASize, nnz);
+        ls = std::make_unique<CholeskySpLinearSolver<float>>(threadsPerBlock, ARowIdx, AColIdx, AVal, ASize, nnz);
     }
     catch (const std::exception& e)
     {
diff --git a/src/simulation/solver/projective/pdSolver.h b/src/simulation/solver/projective/pdSolver.h
index c46cbb7..2f196c0 100644
--- a/src/simulation/solver/projective/pdSolver.h
+++ b/src/simulation/solver/projective/pdSolver.h
@@ -4,6 +4,7 @@
 #include <Eigen/SparseCore>
 #include <simulation/solver/femSolver.h>
 #include <Eigen/Dense>
+#include <memory>
 
 template<typename T>
 class LinearSolver;
@@ -23,8 +24,8 @@ class PdSolver : public FEMSolver<float> {
     virtual void SolverPrepare(SolverData<float>& solverData, const SolverParams<float>& solverParams) override;
     virtual bool SolverStep(SolverData<float>& solverData, const SolverParams<float>& solverParams) override;
 private:
-    LinearSolver<float>* ls = nullptr;
-    LinearSolver<float>* jacobiSolver = nullptr;
+    std::unique_ptr<LinearSolver<float>> ls;
+    std::unique_ptr<LinearSolver<float>> jacobiSolver;
     SolverType solverType;
 
     const float positional_weight = 1e6;

From ae5b0cb5aab28059b411202989cc3f5d30ce6790 Mon Sep 17 00:00:00 2001
From: GrahamZen <gehan.zheng@vanderbilt.edu>
Date: Fri, 19 Dec 2025 23:14:19 -0600
Subject: [PATCH 07/20] fix interop free

---
 src/main.cpp                                 |  6 +++++-
 src/openglcontext/drawable.cpp               |  1 +
 src/openglcontext/drawable.h                 |  2 +-
 src/openglcontext/mesh.cpp                   | 10 ++++++++--
 src/openglcontext/preview.cpp                |  4 +++-
 src/openglcontext/preview.h                  |  2 +-
 src/openglcontext/queryDisplay.cpp           | 10 ++++++++--
 src/openglcontext/singleQueryDisplay.cpp     | 15 +++++++++++---
 src/openglcontext/wireframe.cpp              |  5 ++++-
 src/simulation/simulationContext.cu          | 13 ++++++++++++
 src/simulation/solver/projective/pdSolver.cu | 21 +++++++++++++++-----
 src/simulation/solver/projective/pdSolver.h  | 16 +++++++--------
 12 files changed, 80 insertions(+), 25 deletions(-)

diff --git a/src/main.cpp b/src/main.cpp
index c4b4084..fdfbead 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -49,9 +49,13 @@ int main(int argc, char** argv) {
     }
     // GLFW main loop
     mainLoop();
+    if (context) {
+        delete context;
+        context = nullptr;
+    }
+    cleanupOpenGL();
     cudaDeviceReset();
 
-    delete context;
     return 0;
 }
 
diff --git a/src/openglcontext/drawable.cpp b/src/openglcontext/drawable.cpp
index 65b6e2f..eefab2a 100644
--- a/src/openglcontext/drawable.cpp
+++ b/src/openglcontext/drawable.cpp
@@ -5,6 +5,7 @@ Drawable::Drawable()
     idxBound(false), posBound(false), norBound(false), uvBound(false)
 {}
 
+Drawable::~Drawable() = default;
 
 void Drawable::destroy()
 {
diff --git a/src/openglcontext/drawable.h b/src/openglcontext/drawable.h
index 19d2b17..59eab03 100644
--- a/src/openglcontext/drawable.h
+++ b/src/openglcontext/drawable.h
@@ -21,7 +21,7 @@ class Drawable
 
 public:
     Drawable();
-
+    virtual ~Drawable();
     virtual void create() = 0; // To be implemented by subclasses. Populates the VBOs of the Drawable.
     void destroy(); // Frees the VBOs of the Drawable.
 
diff --git a/src/openglcontext/mesh.cpp b/src/openglcontext/mesh.cpp
index cfa4a74..a796c3e 100644
--- a/src/openglcontext/mesh.cpp
+++ b/src/openglcontext/mesh.cpp
@@ -12,8 +12,14 @@ Mesh::Mesh()
 Mesh::~Mesh()
 {
     // unregister this buffer object with CUDA
-    cudaGLUnregisterBufferObject(bufPos);
-    cudaGLUnregisterBufferObject(bufNor);
+    if (cuda_bufPos_resource) {
+        cudaGraphicsUnregisterResource(cuda_bufPos_resource);
+        cuda_bufPos_resource = nullptr;
+    }
+    if (cuda_bufNor_resource) {
+        cudaGraphicsUnregisterResource(cuda_bufNor_resource);
+        cuda_bufNor_resource = nullptr;
+    }
 }
 
 void Mesh::createTetrahedron()
diff --git a/src/openglcontext/preview.cpp b/src/openglcontext/preview.cpp
index 668f0cf..58aa756 100644
--- a/src/openglcontext/preview.cpp
+++ b/src/openglcontext/preview.cpp
@@ -337,11 +337,13 @@ void mainLoop() {
 
         glfwSwapBuffers(window);
     }
+}
 
+void cleanupOpenGL() {
     ImGui_ImplOpenGL3_Shutdown();
     ImGui_ImplGlfw_Shutdown();
     ImGui::DestroyContext();
 
     glfwDestroyWindow(window);
     glfwTerminate();
-}
+}
\ No newline at end of file
diff --git a/src/openglcontext/preview.h b/src/openglcontext/preview.h
index 4c569aa..61081e5 100644
--- a/src/openglcontext/preview.h
+++ b/src/openglcontext/preview.h
@@ -5,6 +5,6 @@ class GuiDataContainer;
 std::string currentTimeString();
 bool initOpenGL();
 void mainLoop();
-
+void cleanupOpenGL();
 bool MouseOverImGuiWindow();
 void InitImguiData(GuiDataContainer* guiData);
\ No newline at end of file
diff --git a/src/openglcontext/queryDisplay.cpp b/src/openglcontext/queryDisplay.cpp
index 9bc0b3e..c63b027 100644
--- a/src/openglcontext/queryDisplay.cpp
+++ b/src/openglcontext/queryDisplay.cpp
@@ -10,8 +10,14 @@ QueryDisplay::QueryDisplay()
 QueryDisplay::~QueryDisplay()
 {
     // unregister this buffer object with CUDA
-    cudaGLUnregisterBufferObject(bufPos);
-    cudaGLUnregisterBufferObject(bufCol);
+    if (cuda_bufPos_resource) {
+        cudaGraphicsUnregisterResource(cuda_bufPos_resource);
+        cuda_bufPos_resource = nullptr;
+    }
+    if (cuda_bufCol_resource) {
+        cudaGraphicsUnregisterResource(cuda_bufCol_resource);
+        cuda_bufCol_resource = nullptr;
+    }
 }
 
 GLenum QueryDisplay::drawMode()
diff --git a/src/openglcontext/singleQueryDisplay.cpp b/src/openglcontext/singleQueryDisplay.cpp
index 42445bb..b3840ed 100644
--- a/src/openglcontext/singleQueryDisplay.cpp
+++ b/src/openglcontext/singleQueryDisplay.cpp
@@ -12,9 +12,18 @@ SingleQueryDisplay::SingleQueryDisplay()
 SingleQueryDisplay::~SingleQueryDisplay()
 {
     // unregister this buffer object with CUDA
-    cudaGLUnregisterBufferObject(bufPos);
-    cudaGLUnregisterBufferObject(bufVertPos);
-    cudaGLUnregisterBufferObject(bufTriPos);
+    if (cuda_bufPos_resource) {
+        cudaGraphicsUnregisterResource(cuda_bufPos_resource);
+        cuda_bufPos_resource = nullptr;
+    }
+    if (cuda_bufVertPos_resource) {
+        cudaGraphicsUnregisterResource(cuda_bufVertPos_resource);
+        cuda_bufVertPos_resource = nullptr;
+    }
+    if (cuda_bufTriPos_resource) {
+        cudaGraphicsUnregisterResource(cuda_bufTriPos_resource);
+        cuda_bufTriPos_resource = nullptr;
+    }
 }
 
 GLenum SingleQueryDisplay::drawMode()
diff --git a/src/openglcontext/wireframe.cpp b/src/openglcontext/wireframe.cpp
index 4d458e1..bdd705b 100644
--- a/src/openglcontext/wireframe.cpp
+++ b/src/openglcontext/wireframe.cpp
@@ -8,7 +8,10 @@ Wireframe::Wireframe()
 Wireframe::~Wireframe()
 {
     // unregister this buffer object with CUDA
-    cudaGLUnregisterBufferObject(bufPos);
+    if (cuda_bufPos_resource) {
+        cudaGraphicsUnregisterResource(cuda_bufPos_resource);
+        cuda_bufPos_resource = nullptr;
+    }
 }
 
 GLenum Wireframe::drawMode()
diff --git a/src/simulation/simulationContext.cu b/src/simulation/simulationContext.cu
index bc81c29..b44ee0a 100644
--- a/src/simulation/simulationContext.cu
+++ b/src/simulation/simulationContext.cu
@@ -125,6 +125,7 @@ void SimulationCUDAContext::Impl<Scalar>::Init(Context* ctx, nlohmann::json& jso
 template<class Scalar>
 SimulationCUDAContext::Impl<Scalar>::~Impl()
 {
+
     cudaFree(data.X);
     cudaFree(data.Tet);
     cudaFree(data.V);
@@ -132,6 +133,13 @@ SimulationCUDAContext::Impl<Scalar>::~Impl()
     cudaFree(data.X0);
     cudaFree(data.XTilde);
     cudaFree(data.ExtForce);
+    cudaFree(data.OffsetX);     
+    cudaFree(data.moreDBC);     
+    cudaFree(data.DBCX);        
+    cudaFree(data.Tri);         
+    cudaFree(data.DBCIdx);      
+    cudaFree(data.contact_area);
+
     cudaFree(data.DBC);
     cudaFree(data.mass);
     cudaFree(data.mu);
@@ -145,6 +153,11 @@ SimulationCUDAContext::Impl<Scalar>::~Impl()
         delete softbody;
     }
     delete data.pCollisionDetection;
+
+    if (data.pFixedBodies) {
+        delete data.pFixedBodies; 
+        data.pFixedBodies = nullptr;
+    }
 }
 
 void SimulationCUDAContext::UpdateSoftBodyAttr(int index, SoftBodyAttr* pSoftBodyAttr)
diff --git a/src/simulation/solver/projective/pdSolver.cu b/src/simulation/solver/projective/pdSolver.cu
index f0357af..6f19152 100644
--- a/src/simulation/solver/projective/pdSolver.cu
+++ b/src/simulation/solver/projective/pdSolver.cu
@@ -27,15 +27,26 @@ PdSolver::PdSolver(int threadsPerBlock, const SolverData<float>& solverData) : F
 }
 
 PdSolver::~PdSolver() {
-    cudaFree(sn);
-    cudaFree(sn_old);
-    cudaFree(b);
-    cudaFree(massDt_2s);
-    free(bHost);
+if (sn) cudaFree(sn);
+    if (sn_old) cudaFree(sn_old);
+    if (b) cudaFree(b);
+    if (massDt_2s) cudaFree(massDt_2s);
+    if (bHost) free(bHost);
+    if (next_x) cudaFree(next_x);
+    if (prev_x) cudaFree(prev_x);
+    if (matrix_diag) cudaFree(matrix_diag);
 }
 
 void PdSolver::SolverPrepare(SolverData<float>& solverData, const SolverParams<float>& solverParams)
 {
+    if (sn) cudaFree(sn);
+    if (sn_old) cudaFree(sn_old);
+    if (next_x) cudaFree(next_x);
+    if (prev_x) cudaFree(prev_x);
+    if (b) cudaFree(b);
+    if (massDt_2s) cudaFree(massDt_2s);
+    if (matrix_diag) cudaFree(matrix_diag);
+    if (bHost) free(bHost);
     int vertBlocks = (solverData.numVerts + threadsPerBlock - 1) / threadsPerBlock;
     int tetBlocks = (solverData.numTets + threadsPerBlock - 1) / threadsPerBlock;
     float dt = solverParams.dt;
diff --git a/src/simulation/solver/projective/pdSolver.h b/src/simulation/solver/projective/pdSolver.h
index 2f196c0..a911371 100644
--- a/src/simulation/solver/projective/pdSolver.h
+++ b/src/simulation/solver/projective/pdSolver.h
@@ -29,16 +29,16 @@ class PdSolver : public FEMSolver<float> {
     SolverType solverType;
 
     const float positional_weight = 1e6;
-    float* massDt_2s;
-    float* sn;
-    float* sn_old;
-    float* b;
-    float* bHost;
-    float* matrix_diag;
+    float* massDt_2s = nullptr;
+    float* sn = nullptr;
+    float* sn_old = nullptr;
+    float* b = nullptr;
+    float* bHost = nullptr;
+    float* matrix_diag = nullptr;
     Eigen::SimplicialCholesky<Eigen::SparseMatrix<float>> cholesky_decomposition_;
 
     //Jacobi
     float omega;
-    float* next_x;
-    float* prev_x;
+    float* next_x = nullptr;
+    float* prev_x = nullptr;
 };

From 1576eb4ea14952c585535e4bba4f2d59fd7867dd Mon Sep 17 00:00:00 2001
From: GrahamZen <gehan.zheng@vanderbilt.edu>
Date: Sat, 20 Dec 2025 00:02:18 -0600
Subject: [PATCH 08/20] free pointers

---
 src/simulation/simulationContext.cu      |  4 +++
 src/simulation/solver/IPC/ipc.cu         | 33 ++++++++++++++++------
 src/simulation/solver/IPC/ipc.h          |  2 +-
 src/simulation/solver/linear/cholesky.cu | 19 +------------
 src/simulation/solver/linear/cholesky.h  | 36 ++++++++++++++++++------
 5 files changed, 58 insertions(+), 36 deletions(-)

diff --git a/src/simulation/simulationContext.cu b/src/simulation/simulationContext.cu
index b44ee0a..301de23 100644
--- a/src/simulation/simulationContext.cu
+++ b/src/simulation/simulationContext.cu
@@ -139,6 +139,10 @@ SimulationCUDAContext::Impl<Scalar>::~Impl()
     cudaFree(data.Tri);         
     cudaFree(data.DBCIdx);      
     cudaFree(data.contact_area);
+    if(data.V0)
+        cudaFree(data.V0);
+    if (data.DmInv)
+        cudaFree(data.DmInv);
 
     cudaFree(data.DBC);
     cudaFree(data.mass);
diff --git a/src/simulation/solver/IPC/ipc.cu b/src/simulation/solver/IPC/ipc.cu
index f534fb2..927487c 100644
--- a/src/simulation/solver/IPC/ipc.cu
+++ b/src/simulation/solver/IPC/ipc.cu
@@ -139,7 +139,6 @@ void IPCSolver::SolverPrepare(SolverData<double>& solverData, const SolverParams
     }
 }
 
-
 bool IPCSolver::SolverStep(SolverData<double>& solverData, const SolverParams<double>& solverParams)
 {
     SolverPrepare(solverData, solverParams);
@@ -148,6 +147,9 @@ bool IPCSolver::SolverStep(SolverData<double>& solverData, const SolverParams<do
     double h2 = h * h;
     int blocks = (solverData.numVerts + threadsPerBlock - 1) / threadsPerBlock;
     double E_last = 0;
+
+    bool stepSuccess = true;
+
     performanceData[0].second +=
         measureExecutionTime([&]() {
         cudaMemcpy(x_n, solverData.X, sizeof(glm::dvec3) * solverData.numVerts, cudaMemcpyDeviceToDevice);
@@ -155,21 +157,28 @@ bool IPCSolver::SolverStep(SolverData<double>& solverData, const SolverParams<do
         solverData.pCollisionDetection->UpdateQueries(solverData.numVerts, solverData.numTris, solverData.Tri, solverData.X, solverData.dev_TriFathers, solverParams.dhat);
         energy.UpdateKappa(solverData, const_cast<SolverParams<double>&>(solverParams), h2);
         E_last = energy.Val(solverData.X, solverData, solverParams, h2);
+
         if (isnan(E_last) || isinf(E_last)) {
             std::cout << "FATAL: Energy is NaN/Inf before Line Search!" << std::endl;
-            return false;
+            stepSuccess = false;
+            return;
+        }
+        if (!SearchDirection(solverData, solverParams, h2)) {
+            stepSuccess = false;
+            return;
         }
-        if (!SearchDirection(solverData, solverParams, h2))
-            return false;
         solverData.pCollisionDetection->UpdateDirection(p);
         solverData.pCollisionDetection->UpdateX(solverData.X);
             }, perf);
+    if (!stepSuccess) return false;
+
     int maxIter = solverParams.maxIterations;
     int iter = 0;
     while (!EndCondition(h, solverParams.tol)) {
         if (++iter > maxIter) {
             return false;
         }
+        stepSuccess = true;
         performanceData[1].second +=
             measureExecutionTime([&]() {
             IPC::computeXMinusAP << <blocks, threadsPerBlock >> > (xTmp, solverData.X, p, 1, solverData.numVerts);
@@ -180,7 +189,8 @@ bool IPCSolver::SolverStep(SolverData<double>& solverData, const SolverParams<do
                 double E = energy.Val(xTmp, solverData, solverParams, h2);
                 if (isnan(E) || isinf(E)) {
                     std::cout << "FATAL: Energy is NaN/Inf during Line Search! Alpha: " << alpha << std::endl;
-                    return false;
+                    stepSuccess = false;
+                    return;
                 }
                 if (E > E_last)
                     alpha /= 2;
@@ -193,21 +203,28 @@ bool IPCSolver::SolverStep(SolverData<double>& solverData, const SolverParams<do
             }
             cudaMemcpy(solverData.X, xTmp, sizeof(glm::dvec3) * solverData.numVerts, cudaMemcpyDeviceToDevice);
                 }, perf);
+        if (!stepSuccess) return false;
         performanceData[2].second +=
             measureExecutionTime([&]() {
             solverData.pCollisionDetection->UpdateQueries(solverData.numVerts, solverData.numTris, solverData.Tri, solverData.X, solverData.dev_TriFathers, solverParams.dhat);
                 }, perf);
+        stepSuccess = true;
         performanceData[3].second +=
             measureExecutionTime([&]() {
             E_last = energy.Val(solverData.X, solverData, solverParams, h2);
             if (isnan(E_last) || isinf(E_last)) {
                 std::cout << "FATAL: Energy is NaN/Inf after Line Search!" << std::endl;
-                return false;
+                stepSuccess = false;
+                return;
             }
 
-            if (!SearchDirection(solverData, solverParams, h2))
-                return false;
+            if (!SearchDirection(solverData, solverParams, h2)) {
+                stepSuccess = false;
+                return;
+            }
                 }, perf);
+
+        if (!stepSuccess) return false;
     }
     IPC::updateVel << <blocks, threadsPerBlock >> > (solverData.X, x_n, solverData.V, 1.0 / h, solverData.numVerts);
     return true;
diff --git a/src/simulation/solver/IPC/ipc.h b/src/simulation/solver/IPC/ipc.h
index 8deb115..e49a2d0 100644
--- a/src/simulation/solver/IPC/ipc.h
+++ b/src/simulation/solver/IPC/ipc.h
@@ -35,7 +35,7 @@ class IPCSolver : public FEMSolver<double> {
     glm::dvec3* xTmp = nullptr;
     glm::dvec3* x_n = nullptr;
     IPEnergy energy;
-    std::array<std::unique_ptr<LinearSolver<double>>, 3> linearSolver = { nullptr, nullptr, nullptr };
+    std::array<std::unique_ptr<LinearSolver<double>>, 4> linearSolver = { nullptr, nullptr, nullptr, nullptr };
     LinearSolver<double>* currLinearSolver = nullptr;
     SolverType solverType = SolverType::CuSolverCholesky;
 };
\ No newline at end of file
diff --git a/src/simulation/solver/linear/cholesky.cu b/src/simulation/solver/linear/cholesky.cu
index aea4272..7dd4c58 100644
--- a/src/simulation/solver/linear/cholesky.cu
+++ b/src/simulation/solver/linear/cholesky.cu
@@ -22,14 +22,6 @@ __global__ void initAMatrix(int* idx, int* row, int* col, int rowLen, int totalN
     }
 }
 
-template<typename T>
-CholeskyDnLinearSolver<T>::~CholeskyDnLinearSolver()
-{
-    cudaFree(d_info);
-    cudaFree(d_predecomposedA);
-    cudaFree(d_work);
-}
-
 template<typename T>
 CholeskyDnLinearSolver<T>::CholeskyDnLinearSolver(int threadsPerBlock, int* AIdx, T* AVal, int ASize, int len) {
     cudaMalloc(&d_predecomposedA, sizeof(T) * ASize * ASize);
@@ -76,16 +68,6 @@ CholeskyDnLinearSolver<T>::CholeskyDnLinearSolver(int threadsPerBlock, int* AIdx
     free(h_work);
 }
 
-template<typename T>
-CholeskySpLinearSolver<T>::~CholeskySpLinearSolver()
-{
-    cusolverSpDestroyCsrcholInfo(d_info);
-    cusparseDestroyMatDescr(descrA);
-    cusolverSpDestroy(cusolverHandle);
-    cudaFree(buffer_gpu);
-    cudaFree(dev_x_permuted);
-    cudaFree(dev_b_permuted);
-}
 
 template<typename T>
 void CholeskySpLinearSolver<T>::ComputeAMD(cusolverSpHandle_t handle, int rowsA, int nnzA, int* dev_csrRowPtrA, int* dev_csrColIndA, T* dev_csrValA) {
@@ -156,6 +138,7 @@ CholeskySpLinearSolver<T>::CholeskySpLinearSolver(int threadsPerBlock, int* rowI
     cusparseHandle_t handle;
     cusparseCreate(&handle);
     cusparseXcoo2csr(handle, d_rowIdx, nnz, ASize, d_rowPtrA, CUSPARSE_INDEX_BASE_ZERO);
+    cusparseDestroy(handle);
 
     cusolverSpCreate(&cusolverHandle);
     cusparseCreateMatDescr(&descrA);
diff --git a/src/simulation/solver/linear/cholesky.h b/src/simulation/solver/linear/cholesky.h
index 97fc2cf..e6f1cd1 100644
--- a/src/simulation/solver/linear/cholesky.h
+++ b/src/simulation/solver/linear/cholesky.h
@@ -11,7 +11,18 @@ template<typename T>
 class CholeskySpLinearSolver : public LinearSolver<T> {
 public:
     CholeskySpLinearSolver(int threadsPerBlock, int* rowIdx, int* colIdx, T* val, int ASize, int len);
-    virtual ~CholeskySpLinearSolver() override;
+    virtual ~CholeskySpLinearSolver() override
+    {
+        if (d_info) { cusolverSpDestroyCsrcholInfo(d_info); d_info = nullptr; }
+        if (descrA) { cusparseDestroyMatDescr(descrA); descrA = nullptr; }
+        if (cusolverHandle) { cusolverSpDestroy(cusolverHandle); cusolverHandle = nullptr; }
+
+        if (d_p) { cudaFree(d_p); d_p = nullptr; }
+        if (buffer_gpu) { cudaFree(buffer_gpu); buffer_gpu = nullptr; }
+        if (dev_x_permuted) { cudaFree(dev_x_permuted); dev_x_permuted = nullptr; }
+        if (dev_b_permuted) { cudaFree(dev_b_permuted); dev_b_permuted = nullptr; }
+    }
+
     virtual void Solve(int N, T* d_b, T* d_x, T* d_A = nullptr, int nz = 0, int* d_rowIdx = nullptr, int* d_colIdx = nullptr, T* d_guess = nullptr) override;
 private:
     using LinearSolver<T>::d_A;
@@ -21,27 +32,34 @@ class CholeskySpLinearSolver : public LinearSolver<T> {
     using LinearSolver<T>::capacity;
 
     void ComputeAMD(cusolverSpHandle_t handle, int rowsA, int nnzA, int* dev_csrRowPtrA, int* dev_csrColIndA, T* dev_csrValA);
-    cusolverSpHandle_t cusolverHandle;
-    cusparseMatDescr_t descrA;
-    csrcholInfo_t d_info;
+    cusolverSpHandle_t cusolverHandle = nullptr;
+    cusparseMatDescr_t descrA = nullptr;
+    csrcholInfo_t d_info = nullptr;
     void* buffer_gpu = nullptr;
     int* d_p = nullptr;
-    T* dev_b_permuted = nullptr, * dev_x_permuted = nullptr;
-    int n;
+    T* dev_b_permuted = nullptr;
+    T* dev_x_permuted = nullptr;
+    int n = 0;
 };
 
 template<typename T>
 class CholeskyDnLinearSolver : public LinearSolver<T> {
 public:
     CholeskyDnLinearSolver(int threadsPerBlock, int* AIdx, T* AVal, int ASize, int len);
-    virtual ~CholeskyDnLinearSolver() override;
+    virtual ~CholeskyDnLinearSolver() override
+    {
+        if (d_info) { cudaFree(d_info); d_info = nullptr; }
+        if (d_predecomposedA) { cudaFree(d_predecomposedA); d_predecomposedA = nullptr; }
+        if (d_work) { cudaFree(d_work); d_work = nullptr; }
+        if (cusolverHandle) { cusolverDnDestroy(cusolverHandle); cusolverHandle = nullptr; }
+    }
     virtual void Solve(int N, T* d_b, T* d_x, T* d_A = nullptr, int nz = 0, int* d_rowIdx = nullptr, int* d_colIdx = nullptr, T* d_guess = nullptr) override;
 private:
     using LinearSolver<T>::dType;
 
     cusolverDnParams_t params;
     int* d_info = nullptr;    /* error info */
-    cusolverDnHandle_t cusolverHandle;
+    cusolverDnHandle_t cusolverHandle = nullptr;
     void* d_work = nullptr;              /* device workspace */
-    T* d_predecomposedA;
+    T* d_predecomposedA = nullptr;
 };
\ No newline at end of file

From e261d816d9310616c086b8bf66eb23b72b795373 Mon Sep 17 00:00:00 2001
From: Gehan Zheng <gehan.zheng@vanderbilt.edu>
Date: Sat, 20 Dec 2025 00:27:46 -0600
Subject: [PATCH 09/20] remove fps cap

---
 src/openglcontext/preview.cpp       | 2 ++
 src/simulation/simulationContext.cu | 6 ++----
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/openglcontext/preview.cpp b/src/openglcontext/preview.cpp
index 58aa756..53fb723 100644
--- a/src/openglcontext/preview.cpp
+++ b/src/openglcontext/preview.cpp
@@ -49,6 +49,8 @@ bool initOpenGL() {
         return false;
     }
     glfwMakeContextCurrent(window);
+    // Disable vsync so we can cap the frame rate manually (otherwise it sticks to monitor refresh e.g. 60Hz)
+    glfwSwapInterval(0);
     glfwSetKeyCallback(window, keyCallback);
     glfwSetCursorPosCallback(window, mousePositionCallback);
     glfwSetMouseButtonCallback(window, mouseButtonCallback);
diff --git a/src/simulation/simulationContext.cu b/src/simulation/simulationContext.cu
index 301de23..0c7948f 100644
--- a/src/simulation/simulationContext.cu
+++ b/src/simulation/simulationContext.cu
@@ -139,10 +139,8 @@ SimulationCUDAContext::Impl<Scalar>::~Impl()
     cudaFree(data.Tri);         
     cudaFree(data.DBCIdx);      
     cudaFree(data.contact_area);
-    if(data.V0)
-        cudaFree(data.V0);
-    if (data.DmInv)
-        cudaFree(data.DmInv);
+    cudaFree(data.V0);
+    cudaFree(data.DmInv);
 
     cudaFree(data.DBC);
     cudaFree(data.mass);

From 70f811f0834ec2b567e2e0e5ddbe745313927b45 Mon Sep 17 00:00:00 2001
From: Gehan Zheng <gehan.zheng@vanderbilt.edu>
Date: Sat, 20 Dec 2025 01:19:14 -0600
Subject: [PATCH 10/20] update readme fix barrier hessian construction

---
 README.md                                | 88 ++++++++++++++++++------
 src/simulation/energy/implicitBarrier.cu | 26 +++----
 2 files changed, 80 insertions(+), 34 deletions(-)

diff --git a/README.md b/README.md
index 5bc57f4..4cd4eb1 100644
--- a/README.md
+++ b/README.md
@@ -8,14 +8,17 @@ Hanting Xu
 
 ## Click [here](https://github.com/GrahamZen/Soft-Body-Simulation-CUDA/tree/CIS5650-Final) for documentation (CIS5650 Final Project version)
 
-## Requirements
+## Overview
 
-- CUDA >= 12.0 (cublas, cusolver)
-- CMake >= 3.18
+This project is a CUDA-accelerated soft body simulation framework originally developed as a final project for **CIS 5650: GPU Programming and Architecture** at Upenn.
 
-## Description
+The goal of this project is to explore GPU-based physics simulation by building a **lightweight, extensible simulation framework** with minimal external dependencies. The system is designed to support rapid experimentation with different:
 
-This project is originally a final project for CIS5650 at UPenn. The goal of this toy project is to provide a CUDA-accelerated physical simulation framework with minimal dependencies. The framework is designed to be easily extensible, allowing new simulation algorithms, physical models, linear solvers, and collision detection methods to be added with minimal effort. The currently implemented features are listed below.
+* physical models,
+* numerical solvers,
+* GPU-accelerated linear algebra pipelines.
+
+---
 
 ## Features
 
@@ -25,6 +28,8 @@ This project is originally a final project for CIS5650 at UPenn. The goal of thi
     * [x] Jacobi Solver (Naive)
     * [x] Cholesky Decomposition
     * [x] Preconditioned Conjugate Gradient
+      * [x] Incomplete Cholesky Preconditioner
+      * [x] Jacobi Preconditioner
 
 * FEM
     * [x] Projective Dynamics
@@ -41,39 +46,78 @@ This project is originally a final project for CIS5650 at UPenn. The goal of thi
          * [x] Neo-Hookean
 
 * Collision Detection
-    * [x] Real-Time Bvh
-    * [x] Ccd
-    * [ ] Robust Collision Handling
+    * [x] Real-Time BVH Construction
+    * [x] Continuous Collision Detection (CCD)
 
 ## Dependencies
 
-* [CUDA](https://developer.nvidia.com/cuda-downloads)
-* [CMake](https://cmake.org/download/)
+### System Requirements
+
+* **Operating System**
+
+  * Windows
+  * Linux
+* **CUDA Toolkit** ≥ 12.0
+  (cublas, cusolver required)
+* **CMake** ≥ 3.18
+* **OpenGL**
+
+### Third-Party Libraries
 
-Below are included in the project:
+The following libraries are included directly in the project:
 
 * OpenGL
 * ImGui
-* spdlog
+* GLFW
 * Eigen
-* glfw
-* catch2
+* spdlog
+* Catch2
+
+External tools:
+
+* [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads)
+* [CMake](https://cmake.org/download/)
+
+---
+
+## Configuration
+
+### Environment Configuration
+
+The full runtime configuration is specified in `context.json`. This file defines simulation contexts, solver settings, and physical parameters.
+
+---
+
+### Scene Configuration
+
+The framework supports multiple **simulation contexts**, each representing an independent scene. A context may contain:
+
+* one or more soft bodies,
+* rigid bodies,
+
+Each context can be configured independently with physical parameters such as time step size, gravity, damping coefficients. Contexts can be switched **at runtime**.
+
+---
 
-## Note on Configuration
+### Solver Configuration
 
-The complete environment configuration is specified in context.json.
+Solver behavior is controlled on a per-context basis.
 
-### Scene
+* **Single-precision (`float`)**
 
-The framework supports configuration of predefined soft bodies, rigid bodies, and camera parameters. Multiple contexts (scenes) can be loaded simultaneously, where each context may contain different combinations of soft and rigid objects, as well as distinct camera settings.
+  * Uses the **Projective Dynamics (PD)** solver
+* **Double-precision (`double`)**
 
-Each context can be configured independently with physical parameters such as time step size, gravity, damping coefficients, and friction coefficients, and supports real-time switching between contexts.
+  * Uses the **Incremental Potential Contact (IPC)** solver
 
-### Solver
+Only parameters relevant to the active solver are applied.
 
-The behavior of the solver can be adjusted by modifying parameters in each context. Currently, solvers supporting two floating-point precisions are available. When defining a context, setting the precision parameter to float uses the projective dynamics solver, while setting it to double uses the IPC solver. Only the parameters relevant to the active solver take effect.
+#### Notes on Solver Usage
 
-The PD solver supports interactive object dragging within the scene. The IPC solver is significantly slower and consumes more GPU memory; therefore, it is not recommended for scenes involving objects with a large number of degrees of freedom. Different solvers expose different global solver and linear solver options in the ImGui combo box, which can be switched in real time. However, since solvers consume a substantial amount of GPU memory, frequent switching may lead to performance degradation. It is recommended to select the desired solver before starting the simulation and avoid switching after the simulation has begun.
+* The PD solver supports **interactive object dragging**.
+* IPC is **not recommended** for scenes with a large number of degrees of freedom; for large vertex counts, careful parameter tuning is required, otherwise the simulation may fail to converge and pause.
+* For large-scale systems, **Cholesky-based solvers can become prohibitively slow**; **PCG with a Jacobi preconditioner** is recommended instead.
+* Linear solvers can be switched via ImGui **before simulation starts**.
 
 ## Screenshots
 
diff --git a/src/simulation/energy/implicitBarrier.cu b/src/simulation/energy/implicitBarrier.cu
index f786c7d..c2b0a21 100644
--- a/src/simulation/energy/implicitBarrier.cu
+++ b/src/simulation/energy/implicitBarrier.cu
@@ -16,6 +16,13 @@ namespace ImplicitBarrier {
         if (idx >= numVerts)
             return;
         glm::tvec3<Scalar> x = X[idx];
+        for (int k = 0; k < 3; k++) {
+            for (int l = 0; l < 3; l++) {
+                int index = idx * 9 + k * 3 + l;
+                hessianRowIdx[index] = idx * 3 + k;
+                hessianColIdx[index] = idx * 3 + l;
+            }
+        }
         for (int j = 0; j < numPlanes; j++) {
             const Plane& plane = planes[j];
             glm::tvec3<Scalar> floorPos = glm::tvec3<Scalar>(plane.m_model[3]);
@@ -33,8 +40,6 @@ namespace ImplicitBarrier {
                         int colIdx = idx * 3 + l;
                         int index = idx * 9 + k * 3 + l;
                         hessianVal[index] += hess[k][l];
-                        hessianRowIdx[index] = rowIdx;
-                        hessianColIdx[index] = colIdx;
                     }
                 }
             }
@@ -60,8 +65,6 @@ namespace ImplicitBarrier {
                         int colIdx = idx * 3 + l;
                         int index = idx * 9 + k * 3 + l;
                         hessianVal[index] += hess[k][l];
-                        hessianRowIdx[index] = rowIdx;
-                        hessianColIdx[index] = colIdx;
                     }
                 }
             }
@@ -85,8 +88,6 @@ namespace ImplicitBarrier {
                         int colIdx = idx * 3 + l;
                         int index = idx * 9 + k * 3 + l;
                         hessianVal[index] += hess[k][l];
-                        hessianRowIdx[index] = rowIdx;
-                        hessianColIdx[index] = colIdx;
                     }
                 }
             }
@@ -153,6 +154,13 @@ namespace ImplicitBarrier {
         int idx = blockIdx.x * blockDim.x + threadIdx.x;
         if (idx >= numVerts)
             return;
+        for (int k = 0; k < 3; k++) {
+            for (int l = 0; l < 3; l++) {
+                int index = idx * 9 + k * 3 + l;
+                hessianRowIdx[index] = idx * 3 + k;
+                hessianColIdx[index] = idx * 3 + l;
+            }
+        }
         const glm::tvec3<Scalar> x = X[idx];
         for (int j = 0; j < numPlanes; j++) {
             const Plane& plane = planes[j];
@@ -175,8 +183,6 @@ namespace ImplicitBarrier {
                         int colIdx = idx * 3 + l;
                         int index = idx * 9 + k * 3 + l;
                         hessianVal[index] += hess[k][l];
-                        hessianRowIdx[index] = rowIdx;
-                        hessianColIdx[index] = colIdx;
                     }
                 }
             }
@@ -206,8 +212,6 @@ namespace ImplicitBarrier {
                         int colIdx = idx * 3 + l;
                         int index = idx * 9 + k * 3 + l;
                         hessianVal[index] += hess[k][l];
-                        hessianRowIdx[index] = rowIdx;
-                        hessianColIdx[index] = colIdx;
                     }
                 }
             }
@@ -235,8 +239,6 @@ namespace ImplicitBarrier {
                         int colIdx = idx * 3 + l;
                         int index = idx * 9 + k * 3 + l;
                         hessianVal[index] += hess[k][l];
-                        hessianRowIdx[index] = rowIdx;
-                        hessianColIdx[index] = colIdx;
                     }
                 }
             }

From 6efc9b78a4e3fa03d56ee86669019be115bbf239 Mon Sep 17 00:00:00 2001
From: Gehan Zheng <gehan.zheng@vanderbilt.edu>
Date: Sat, 20 Dec 2025 01:26:55 -0600
Subject: [PATCH 11/20] Refactor CMake workflow for multi-platform builds

---
 .github/workflows/cmake-multi-platform.yml | 57 ++++++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 .github/workflows/cmake-multi-platform.yml

diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml
new file mode 100644
index 0000000..f3d7c18
--- /dev/null
+++ b/.github/workflows/cmake-multi-platform.yml
@@ -0,0 +1,57 @@
+name: CMake on multiple platforms
+
+on:
+  pull_request:
+    branches:
+      - "linux"
+      - "main"
+
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: ubuntu-latest
+            build_type: Release
+            c_compiler: gcc
+            cpp_compiler: g++
+          - os: windows-latest
+            build_type: Release
+            c_compiler: cl
+            cpp_compiler: cl
+
+    steps:
+      - uses: actions/checkout@v4
+
+      # 安装 CUDA Toolkit（包含 cuSolver）
+      - name: Install CUDA Toolkit
+        uses: Jimver/cuda-toolkit@v0.2.16
+        with:
+          # 选一个你项目支持的版本；也可以固定到你本地用的版本
+          cuda: "12.4.1"
+
+      - name: Set reusable strings
+        id: strings
+        shell: bash
+        run: |
+          echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT"
+
+      - name: Configure CMake
+        shell: bash
+        run: >
+          cmake -B "${{ steps.strings.outputs.build-output-dir }}"
+          -S "${{ github.workspace }}"
+          -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
+          -DCMAKE_C_COMPILER=${{ matrix.c_compiler }}
+          -DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }}
+
+      - name: Build
+        shell: bash
+        run: cmake --build "${{ steps.strings.outputs.build-output-dir }}" --config ${{ matrix.build_type }}
+
+      - name: Test
+        shell: bash
+        working-directory: ${{ steps.strings.outputs.build-output-dir }}
+        run: ctest --build-config ${{ matrix.build_type }} --output-on-failure

From d1d44ef106e63ff2cd988c5a0efd5638a071089a Mon Sep 17 00:00:00 2001
From: Gehan Zheng <gehan.zheng@vanderbilt.edu>
Date: Sat, 20 Dec 2025 12:56:30 -0600
Subject: [PATCH 12/20] apt install cuda

---
 .github/workflows/cmake-multi-platform.yml | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml
index f3d7c18..2b7fc5b 100644
--- a/.github/workflows/cmake-multi-platform.yml
+++ b/.github/workflows/cmake-multi-platform.yml
@@ -25,18 +25,27 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
-      # 安装 CUDA Toolkit（包含 cuSolver）
-      - name: Install CUDA Toolkit
-        uses: Jimver/cuda-toolkit@v0.2.16
-        with:
-          # 选一个你项目支持的版本；也可以固定到你本地用的版本
-          cuda: "12.4.1"
+      - name: Install CUDA Toolkit (Linux)
+        if: runner.os == 'Linux'
+        shell: bash
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y wget gnupg
+          wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
+          sudo dpkg -i cuda-keyring_1.1-1_all.deb
+          sudo apt-get update
+          sudo apt-get install -y cuda-toolkit-12-4
 
       - name: Set reusable strings
         id: strings
         shell: bash
         run: |
           echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT"
+      - name: Set CUDA env
+        if: runner.os == 'Linux'
+        run: |
+          echo "/usr/local/cuda/bin" >> $GITHUB_PATH
+          echo "CUDA_HOME=/usr/local/cuda" >> $GITHUB_ENV
 
       - name: Configure CMake
         shell: bash

From 69620dc1558f510f40ce05fb21bd91010ccf24e1 Mon Sep 17 00:00:00 2001
From: Gehan Zheng <gehan.zheng@vanderbilt.edu>
Date: Sat, 20 Dec 2025 13:04:43 -0600
Subject: [PATCH 13/20] linux only test

---
 .github/workflows/cmake-multi-platform.yml | 47 +++++-----------------
 1 file changed, 9 insertions(+), 38 deletions(-)

diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml
index 2b7fc5b..eb13394 100644
--- a/.github/workflows/cmake-multi-platform.yml
+++ b/.github/workflows/cmake-multi-platform.yml
@@ -1,32 +1,19 @@
-name: CMake on multiple platforms
+name: CUDA CMake Build (Linux)
 
 on:
   pull_request:
     branches:
-      - "linux"
-      - "main"
+      - linux
+      - main
 
 jobs:
   build:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - os: ubuntu-latest
-            build_type: Release
-            c_compiler: gcc
-            cpp_compiler: g++
-          - os: windows-latest
-            build_type: Release
-            c_compiler: cl
-            cpp_compiler: cl
+    runs-on: ubuntu-22.04
 
     steps:
       - uses: actions/checkout@v4
 
-      - name: Install CUDA Toolkit (Linux)
-        if: runner.os == 'Linux'
+      - name: Install CUDA Toolkit
         shell: bash
         run: |
           sudo apt-get update
@@ -36,31 +23,15 @@ jobs:
           sudo apt-get update
           sudo apt-get install -y cuda-toolkit-12-4
 
-      - name: Set reusable strings
-        id: strings
-        shell: bash
-        run: |
-          echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT"
       - name: Set CUDA env
-        if: runner.os == 'Linux'
         run: |
           echo "/usr/local/cuda/bin" >> $GITHUB_PATH
           echo "CUDA_HOME=/usr/local/cuda" >> $GITHUB_ENV
 
       - name: Configure CMake
-        shell: bash
-        run: >
-          cmake -B "${{ steps.strings.outputs.build-output-dir }}"
-          -S "${{ github.workspace }}"
-          -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
-          -DCMAKE_C_COMPILER=${{ matrix.c_compiler }}
-          -DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }}
+        run: |
+          cmake -S . -B build -DCMAKE_BUILD_TYPE=Release
 
       - name: Build
-        shell: bash
-        run: cmake --build "${{ steps.strings.outputs.build-output-dir }}" --config ${{ matrix.build_type }}
-
-      - name: Test
-        shell: bash
-        working-directory: ${{ steps.strings.outputs.build-output-dir }}
-        run: ctest --build-config ${{ matrix.build_type }} --output-on-failure
+        run: |
+          cmake --build build --config Release

From a724a391e4c5089266884ee853c8ae92dc2971e5 Mon Sep 17 00:00:00 2001
From: Gehan Zheng <gehan.zheng@vanderbilt.edu>
Date: Sat, 20 Dec 2025 13:14:55 -0600
Subject: [PATCH 14/20] Install OpenGL deps

---
 .github/workflows/cmake-multi-platform.yml | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml
index eb13394..513b847 100644
--- a/.github/workflows/cmake-multi-platform.yml
+++ b/.github/workflows/cmake-multi-platform.yml
@@ -13,6 +13,18 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
+      - name: Install OpenGL deps
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y \
+            libgl1-mesa-dev \
+            libglu1-mesa-dev \
+            libx11-dev \
+            libxrandr-dev \
+            libxinerama-dev \
+            libxcursor-dev \
+            libxi-dev
+
       - name: Install CUDA Toolkit
         shell: bash
         run: |

From 1ff4e139a92d68856838c3b2a3e2699f372a5200 Mon Sep 17 00:00:00 2001
From: Gehan Zheng <gehan.zheng@vanderbilt.edu>
Date: Sat, 20 Dec 2025 13:36:38 -0600
Subject: [PATCH 15/20] update opengl setup

---
 CMakeLists.txt       |  8 +++++-
 tests/CMakeLists.txt | 58 ++++++++++++++++++++------------------------
 2 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index da41093..56bc764 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -39,9 +39,15 @@ find_package(OpenGL REQUIRED)
 set(EXTERNAL "${PROJECT_SOURCE_DIR}/external")
 
 if(UNIX)
+    find_package(OpenGL REQUIRED)
     find_package(glfw3 REQUIRED)
     find_package(GLEW REQUIRED)
-    set(LIBRARIES glfw ${GLEW_LIBRARIES} ${OPENGL_gl_LIBRARY} ${OPENGL_glx_LIBRARY})
+
+    set(LIBRARIES
+        glfw
+        ${GLEW_LIBRARIES}
+        OpenGL::GL
+    )
 else()
     set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
     set(GLFW_ROOT_DIR ${EXTERNAL})
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 38005eb..8702385 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -1,51 +1,45 @@
 cmake_minimum_required(VERSION 3.18)
+project(tests LANGUAGES CXX CUDA)
 
-# Enable C++11 for host code
 set(CMAKE_CXX_STANDARD 17)
-set(CMAKE_CXX_STANDARD_REQUIRED True)
-set(CMAKE_CUDA_STANDARD_REQUIRED ON)
 set(CMAKE_CUDA_ARCHITECTURES 86)
 
-set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g")
-
-# Set a default build type if none was specified
-if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
-    SET(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
-    # Set the possible values of build type for cmake-gui
-    SET_PROPERTY(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
-endif()
-
 find_package(CUDAToolkit REQUIRED)
 
 include(catch2)
 include(finite_diff)
 
-########################################
-# Catch2 Tests
-########################################
-file(GLOB_RECURSE TEST_SOURCES "./*.cpp")
-file(GLOB_RECURSE TEST_DISTANCE_SOURCES "${CMAKE_SOURCE_DIR}/src/simulation/distance/*.cu")
+file(GLOB_RECURSE TEST_SOURCES "*.cpp")
+file(GLOB_RECURSE TEST_DISTANCE_SOURCES
+    "${CMAKE_SOURCE_DIR}/src/simulation/distance/*.cu"
+)
+
 add_executable(tests
-${TEST_SOURCES}
-${TEST_DISTANCE_SOURCES}
-${CMAKE_SOURCE_DIR}/src/collision/intersections.cu
-${CMAKE_SOURCE_DIR}/src/simulation/solver/linear/cg.cu
-${CMAKE_SOURCE_DIR}/src/simulation/solver/linear/jacobi.cu
-${CMAKE_SOURCE_DIR}/src/simulation/solver/linear/linear.cu
+    ${TEST_SOURCES}
+    ${TEST_DISTANCE_SOURCES}
+    ${CMAKE_SOURCE_DIR}/src/collision/intersections.cu
+    ${CMAKE_SOURCE_DIR}/src/simulation/solver/linear/cg.cu
+    ${CMAKE_SOURCE_DIR}/src/simulation/solver/linear/jacobi.cu
+    ${CMAKE_SOURCE_DIR}/src/simulation/solver/linear/linear.cu
 )
-target_include_directories(tests PUBLIC ${CMAKE_SOURCE_DIR}/src/simulation/solver/linear)
-target_link_libraries(tests PUBLIC
-Eigen3::Eigen
-CUDA::cudart
-Catch2::Catch2
-CUDA::cusolver
-finitediff::finitediff
+
+target_link_libraries(tests
+    Eigen3::Eigen
+    CUDA::cudart
+    CUDA::cusolver
+    Catch2::Catch2
+    finitediff::finitediff
 )
+
 set_target_properties(tests PROPERTIES
     CUDA_SEPARABLE_COMPILATION ON
     CUDA_RESOLVE_DEVICE_SYMBOLS ON
 )
+
 target_compile_options(tests PRIVATE
-    $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda -lineinfo --expt-relaxed-constexpr -Xcompiler "/wd 4819 /wd 4068 /wd 4661" -Xcudafe "--display_error_number --diag_suppress=20012 --diag_suppress=20011 --diag_suppress=20014">
+  $<$<COMPILE_LANGUAGE:CUDA>:
+    --extended-lambda
+    --expt-relaxed-constexpr
+    -lineinfo
+  >
 )
-########################################

From 956422742a34260430e9ce5f66e95d8bd2802d22 Mon Sep 17 00:00:00 2001
From: Gehan Zheng <gehan.zheng@vanderbilt.edu>
Date: Sat, 20 Dec 2025 13:37:05 -0600
Subject: [PATCH 16/20] install opengl

---
 .github/workflows/cmake-multi-platform.yml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml
index 513b847..7e8d987 100644
--- a/.github/workflows/cmake-multi-platform.yml
+++ b/.github/workflows/cmake-multi-platform.yml
@@ -35,6 +35,20 @@ jobs:
           sudo apt-get update
           sudo apt-get install -y cuda-toolkit-12-4
 
+      - name: Install OpenGL & GLFW deps
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y \
+            libglfw3-dev \
+            libglew-dev \
+            libglm-dev \
+            libx11-dev \
+            libxrandr-dev \
+            libxinerama-dev \
+            libxcursor-dev \
+            libxi-dev
+
+
       - name: Set CUDA env
         run: |
           echo "/usr/local/cuda/bin" >> $GITHUB_PATH

From 6001fe79dcac7cddc6ea530df48648092092a837 Mon Sep 17 00:00:00 2001
From: Gehan Zheng <gehan.zheng@vanderbilt.edu>
Date: Sat, 20 Dec 2025 13:47:49 -0600
Subject: [PATCH 17/20] set CMAKE_CUDA_ARCHITECTURES

---
 CMakeLists.txt | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 56bc764..0c165a6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -13,7 +13,13 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/recipes/")
 list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/find/")
 
-set(CMAKE_CUDA_ARCHITECTURES native)
+if(CMAKE_CUDA_COMPILER)
+  set(CMAKE_CUDA_ARCHITECTURES native)
+endif()
+
+if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES OR CMAKE_CUDA_ARCHITECTURES STREQUAL "native")
+  set(CMAKE_CUDA_ARCHITECTURES 70;75;80;86;90)
+endif()
 
 set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g")
 set(CUDA_NVCC_FLAGS_DEBUG "${CUDA_NVCC_FLAGS_DEBUG} -G -g")

From 80671c249729ca3d34b213b2502b4ab718305c52 Mon Sep 17 00:00:00 2001
From: Gehan Zheng <gehan.zheng@vanderbilt.edu>
Date: Sat, 20 Dec 2025 14:41:45 -0600
Subject: [PATCH 18/20] add badge

---
 ...lti-platform.yml => cuda-cmake-build-linux.yml} | 14 ++++++++++----
 README.md                                          |  3 ++-
 2 files changed, 12 insertions(+), 5 deletions(-)
 rename .github/workflows/{cmake-multi-platform.yml => cuda-cmake-build-linux.yml} (79%)

diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cuda-cmake-build-linux.yml
similarity index 79%
rename from .github/workflows/cmake-multi-platform.yml
rename to .github/workflows/cuda-cmake-build-linux.yml
index 7e8d987..7b08fe2 100644
--- a/.github/workflows/cmake-multi-platform.yml
+++ b/.github/workflows/cuda-cmake-build-linux.yml
@@ -9,6 +9,9 @@ on:
 jobs:
   build:
     runs-on: ubuntu-22.04
+    strategy:
+      matrix:
+        build_type: [Debug, Release]
 
     steps:
       - uses: actions/checkout@v4
@@ -26,7 +29,6 @@ jobs:
             libxi-dev
 
       - name: Install CUDA Toolkit
-        shell: bash
         run: |
           sudo apt-get update
           sudo apt-get install -y wget gnupg
@@ -48,7 +50,6 @@ jobs:
             libxcursor-dev \
             libxi-dev
 
-
       - name: Set CUDA env
         run: |
           echo "/usr/local/cuda/bin" >> $GITHUB_PATH
@@ -56,8 +57,13 @@ jobs:
 
       - name: Configure CMake
         run: |
-          cmake -S . -B build -DCMAKE_BUILD_TYPE=Release
+          cmake -S . -B build-${{ matrix.build_type }} -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
 
       - name: Build
         run: |
-          cmake --build build --config Release
+          cmake --build build-${{ matrix.build_type }} --config ${{ matrix.build_type }}
+
+      - name: Run tests
+        run: |
+          cd build-${{ matrix.build_type }}
+          ctest --output-on-failure
diff --git a/README.md b/README.md
index 4cd4eb1..9a93e88 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
 CUDA-Accelerated Soft Body Simulation
-================
+
+![CUDA CMake Build (Linux)](https://github.com/GrahamZen/Soft-Body-Simulation-CUDA/actions/workflows/cuda-cmake-build-linux.yml/badge.svg)
 
 **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Final Project**
 

From 884e0de2b783d2c7c625a1a415bfdf618359ca61 Mon Sep 17 00:00:00 2001
From: Gehan Zheng <gehan.zheng@vanderbilt.edu>
Date: Sat, 20 Dec 2025 14:42:42 -0600
Subject: [PATCH 19/20] update readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9a93e88..f25e8e7 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-CUDA-Accelerated Soft Body Simulation
+# CUDA-Accelerated Soft Body Simulation
 
 ![CUDA CMake Build (Linux)](https://github.com/GrahamZen/Soft-Body-Simulation-CUDA/actions/workflows/cuda-cmake-build-linux.yml/badge.svg)
 

From 5e5a9996fc32ccda708e5197299726b079bdd310 Mon Sep 17 00:00:00 2001
From: GrahamZen <gehan.zheng@vanderbilt.edu>
Date: Sat, 20 Dec 2025 16:45:54 -0600
Subject: [PATCH 20/20] fix CMAKE_CUDA_ARCHITECTURES

---
 CMakeLists.txt | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0c165a6..549e622 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -14,11 +14,13 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/recipes/")
 list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/find/")
 
 if(CMAKE_CUDA_COMPILER)
-  set(CMAKE_CUDA_ARCHITECTURES native)
-endif()
-
-if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES OR CMAKE_CUDA_ARCHITECTURES STREQUAL "native")
-  set(CMAKE_CUDA_ARCHITECTURES 70;75;80;86;90)
+    if(DEFINED ENV{CI})
+        message(STATUS "CI environment detected. Setting CUDA architecture to 75.")
+        set(CMAKE_CUDA_ARCHITECTURES 75)
+    else()
+        message(STATUS "Local environment detected. Setting CUDA architecture to native.")
+        set(CMAKE_CUDA_ARCHITECTURES native)
+    endif()
 endif()
 
 set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g")