diff --git a/cuBQL/builder/cuda/sah_builder.h b/cuBQL/builder/cuda/sah_builder.h index ba68e22..dafb440 100644 --- a/cuBQL/builder/cuda/sah_builder.h +++ b/cuBQL/builder/cuda/sah_builder.h @@ -28,8 +28,14 @@ namespace cuBQL { struct { AtomicBox centBounds; uint32_t offset; +#if 1 + int8_t dim; + int8_t bin; + int32_t atomicSideDeciderBits; +#else int8_t dim; int8_t bin; +#endif } openNode; struct { uint32_t offset; @@ -54,7 +60,8 @@ namespace cuBQL { inline __device__ void evaluateSAH(int &splitDim, int &splitBin, - const SAHBins &sah) + const SAHBins &sah, + int maxAllowedLeafSize) { float bestCost = CUBQL_INF; @@ -69,7 +76,7 @@ namespace cuBQL { rAreas[b] = surfaceArea(box); } const float leafCost = rAreas[0] * rCount; - if (leafCost < bestCost) { + if (leafCost < bestCost && rCount <= maxAllowedLeafSize) { bestCost = leafCost; splitDim = -1; } @@ -241,12 +248,17 @@ namespace cuBQL { int splitDim = -1; int splitBin; if (in.count > buildConfig.makeLeafThreshold) { - evaluateSAH(splitDim,splitBin,sah); + evaluateSAH(splitDim,splitBin,sah,buildConfig.maxAllowedLeafSize); } - if (splitDim < 0) { + bool makeLeaf + = (splitDim < 0) + && (in.count <= buildConfig.maxAllowedLeafSize); + + if (makeLeaf) { nodeState = DONE_NODE; auto &done = nodes[nodeID].doneNode; done.count = in.count; + // set this to max-value, so the prims can later do atomicMin // with their position ion the leaf list; this value is // greater than any prim position. @@ -295,22 +307,26 @@ namespace cuBQL { auto open = nodes[me.nodeID].openNode; const box_t primBox = primBoxes[me.primID]; + int side = -1; + if (open.dim == -1) { + // // use top bit to atomically 'sort' prims left vs right + // side = atomicAdd(&nodes[me.nodeID].openNode.atomicSideDeciderBits,1u<<31)>>31; + side = atomicAdd(&nodes[me.nodeID].openNode.atomicSideDeciderBits,1u)&1; + } else { - const int d = open.dim; - T lo = open.centBounds.get_lower(d); - T hi = open.centBounds.get_upper(d); + const int d = open.dim; + T lo = open.centBounds.get_lower(d); + T hi = open.centBounds.get_upper(d); - T prim_d = T(0.5)*(primBox.get_lower(d)+primBox.get_upper(d)); - T rel - = (prim_d - lo) - / (hi - lo + 1e-20f); - int prim_bin = int(rel*(int)SAHBins>::numBins); - prim_bin = max(0,min((int)SAHBins>::numBins-1,prim_bin)); + T prim_d = T(0.5)*(primBox.get_lower(d)+primBox.get_upper(d)); + T rel + = (prim_d - lo) + / (hi - lo + 1e-20f); + int prim_bin = int(rel*(int)SAHBins>::numBins); + prim_bin = max(0,min((int)SAHBins>::numBins-1,prim_bin)); - int side = (prim_bin >= open.bin); - // printf("updateprim %i node %i state %i dim %i bin %i -> prim bin %i -> side %i\n", - // primID,me.nodeID,ns,open.dim,open.bin, - // prim_bin,side); + side = (prim_bin >= open.bin); + } int newNodeID = open.offset+side; auto &myBranch = nodes[newNodeID].openBranch; atomicAdd(&myBranch.count,1); diff --git a/cuBQL/bvh.h b/cuBQL/bvh.h index effb41a..aeda611 100644 --- a/cuBQL/bvh.h +++ b/cuBQL/bvh.h @@ -140,6 +140,7 @@ namespace cuBQL { // easy short-hand - though cubql also supports other types of bvhs, // scalars, etc, this will likely be the most commonly used one. using bvh3f = BinaryBVH; + using bvh3d = BinaryBVH; #ifdef __CUDACC__ typedef BinaryBVH bvh_float2; diff --git a/cuBQL/math/Ray.h b/cuBQL/math/Ray.h index ae77c86..b5379d8 100644 --- a/cuBQL/math/Ray.h +++ b/cuBQL/math/Ray.h @@ -16,6 +16,8 @@ namespace cuBQL { struct ray_t { using vec3 = vec_t; + inline ray_t() = default; + inline ray_t(const ray_t &) = default; __cubql_both ray_t(vec3 org, vec3 dir, T tMin, T tMax); __cubql_both ray_t(vec3 org, vec3 dir); vec3 origin; diff --git a/cuBQL/math/affine.h b/cuBQL/math/affine.h index 1a24ca5..7655bef 100644 --- a/cuBQL/math/affine.h +++ b/cuBQL/math/affine.h @@ -164,6 +164,7 @@ namespace cuBQL { typename AffineSpaceT::vector_t xfmPoint(const AffineSpaceT& m, const typename AffineSpaceT::vector_t &p) { + using vector_t = typename AffineSpaceT::vector_t; return madd(vector_t(p.x),m.l.vx, madd(vector_t(p.y),m.l.vy, madd(vector_t(p.z),m.l.vz, @@ -185,11 +186,11 @@ namespace cuBQL { /// Comparison Operators //////////////////////////////////////////////////////////////////////////////// - template inline + template inline __cubql_both bool operator ==( const AffineSpaceT& a, const AffineSpaceT& b ) { return a.l == b.l && a.p == b.p; } - template inline + template inline __cubql_both bool operator !=( const AffineSpaceT& a, const AffineSpaceT& b ) { return a.l != b.l || a.p != b.p; } diff --git a/cuBQL/math/linear.h b/cuBQL/math/linear.h index 9628990..5e7c57f 100644 --- a/cuBQL/math/linear.h +++ b/cuBQL/math/linear.h @@ -132,8 +132,13 @@ namespace cuBQL { /// Comparison Operators //////////////////////////////////////////////////////////////////////////////// - template inline __cubql_both bool operator ==( const LinearSpace2& a, const LinearSpace2& b ) { return a.vx == b.vx && a.vy == b.vy; } - template inline __cubql_both bool operator !=( const LinearSpace2& a, const LinearSpace2& b ) { return a.vx != b.vx || a.vy != b.vy; } + template inline __cubql_both + bool operator ==( const LinearSpace2& a, const LinearSpace2& b ) + { return a.vx == b.vx && a.vy == b.vy; } + + template inline __cubql_both + bool operator !=( const LinearSpace2& a, const LinearSpace2& b ) + { return a.vx != b.vx || a.vy != b.vy; } //////////////////////////////////////////////////////////////////////////////// /// Output Operators @@ -307,8 +312,13 @@ namespace cuBQL { /// Comparison Operators //////////////////////////////////////////////////////////////////////////////// - template inline bool operator ==( const LinearSpace3& a, const LinearSpace3& b ) { return a.vx == b.vx && a.vy == b.vy && a.vz == b.vz; } - template inline bool operator !=( const LinearSpace3& a, const LinearSpace3& b ) { return a.vx != b.vx || a.vy != b.vy || a.vz != b.vz; } + template inline __cubql_both + bool operator ==( const LinearSpace3& a, const LinearSpace3& b ) + { return a.vx == b.vx && a.vy == b.vy && a.vz == b.vz; } + + template inline __cubql_both + bool operator !=( const LinearSpace3& a, const LinearSpace3& b ) + { return a.vx != b.vx || a.vy != b.vy || a.vz != b.vz; } //////////////////////////////////////////////////////////////////////////////// /// Output Operators diff --git a/cuBQL/math/vec.h b/cuBQL/math/vec.h index 474a110..10ffdb7 100644 --- a/cuBQL/math/vec.h +++ b/cuBQL/math/vec.h @@ -169,7 +169,9 @@ namespace cuBQL { using vec_t_data::y; using vec_t_data::z; - inline __cubql_both vec_t() {} + inline vec_t() = default; + inline vec_t(const vec_t &) = default; + // inline __cubql_both vec_t() {} inline __cubql_both vec_t(const T &t) { x = y = z = t; } inline __cubql_both vec_t(T x, T y, T z) { this->x = x; this->y = y; this->z = z; } diff --git a/cuBQL/queries/triangleData/Triangle.h b/cuBQL/queries/triangleData/Triangle.h index c8fb104..5f07f66 100644 --- a/cuBQL/queries/triangleData/Triangle.h +++ b/cuBQL/queries/triangleData/Triangle.h @@ -34,7 +34,12 @@ namespace cuBQL { { using vec3 = vec_t; using box3 = box_t; - + + inline triangle_t() = default; + inline triangle_t(const triangle_t &) = default; + inline __cubql_both triangle_t(vec3 a, vec3 b, vec3 c) + : a(a), b(b), c(c) + {} inline __cubql_both box3 bounds() const; inline __cubql_both vec3 sample(float u, float v) const; inline __cubql_both vec3 normal() const; diff --git a/cuBQL/traversal/rayQueries.h b/cuBQL/traversal/rayQueries.h index 9ee0067..f09bdb8 100644 --- a/cuBQL/traversal/rayQueries.h +++ b/cuBQL/traversal/rayQueries.h @@ -168,6 +168,21 @@ namespace cuBQL { return tin <= tout; } + template + inline __cubql_both + void rayBoxTest(T &tin, T &tout, + ray_t ray, box_t box) + { + using vec3 = vec_t; + vec3 inv = rcp(ray.direction); + vec3 lo = (box.lower - ray.origin) * inv; + vec3 hi = (box.upper - ray.origin) * inv; + vec3 nr = min(lo,hi); + vec3 fr = max(lo,hi); + tin = max(ray.tMin,reduce_max(nr)); + tout = min(ray.tMax,reduce_min(fr)); + } + template inline __cubql_both bool rayIntersectsBox(float &ret_t0, @@ -462,7 +477,7 @@ namespace cuBQL { node_t *tlasSavedNodePtr = 0; uint32_t *tlasSavedPrimIDs = 0; - vec3f saved_dir, saved_org; + vec_t saved_dir, saved_org; if (ray.direction.x == (T)0) ray.direction.x = T(1e-20); if (ray.direction.y == (T)0) ray.direction.y = T(1e-20); @@ -512,6 +527,9 @@ namespace cuBQL { enterBlas(transformed_ray,blas,instID); ray.origin = transformed_ray.origin; ray.direction = transformed_ray.direction; + if (ray.direction.x == (T)0) ray.direction.x = T(1e-20); + if (ray.direction.y == (T)0) ray.direction.y = T(1e-20); + if (ray.direction.z == (T)0) ray.direction.z = T(1e-20); rcp_dir = rcp(ray.direction); bvh.nodes = blas.nodes; bvh.primIDs = blas.primIDs; @@ -531,10 +549,13 @@ namespace cuBQL { bool o0 = rayIntersectsBox(node_t0,ray,rcp_dir,n0.bounds); bool o1 = rayIntersectsBox(node_t1,ray,rcp_dir,n1.bounds); - if (dbg) + if (dbg) { + // dout << " node L " << n0.bounds << "\n"; + // dout << " node R " << n1.bounds << "\n"; printf("children L hit %i dist %f R hit %i dist %f\n", int(o0),node_t0, int(o1),node_t1); + } if (o0) { if (o1) { if (stackPtr-traversalStack >= STACK_DEPTH) { @@ -620,15 +641,7 @@ namespace cuBQL { for (int i=0;i