20 #include <nori/timer.h>
22 #include <Eigen/Geometry>
38 static const int BIN_COUNT = 16;
39 Bins() { memset(counts, 0,
sizeof(uint32_t) * BIN_COUNT); }
40 uint32_t counts[BIN_COUNT];
58 uint32_t *start, *end, *temp;
97 BVHBuildTask(
BVH &bvh, uint32_t node_idx, uint32_t *start, uint32_t *end, uint32_t *temp)
98 : bvh(bvh), node_idx(node_idx), start(start), end(end), temp(temp) { }
101 uint32_t size = (uint32_t) (end-start);
112 float min = node.bbox.
min[axis], max = node.bbox.
max[axis],
113 inv_bin_size = Bins::BIN_COUNT / (max-min);
116 Bins bins = tbb::parallel_reduce(
117 tbb::blocked_range<uint32_t>(0u, size,
GRAIN_SIZE),
120 [&](
const tbb::blocked_range<uint32_t> &range,
Bins result) {
121 for (uint32_t i = range.begin(); i != range.end(); ++i) {
122 uint32_t f = start[i];
123 float centroid = bvh.getCentroid(f)[axis];
125 int index = std::min(std::max(
126 (int) ((centroid - min) * inv_bin_size), 0),
127 (Bins::BIN_COUNT - 1));
129 result.counts[index]++;
130 result.bbox[index].expandBy(bvh.getBoundingBox(f));
135 [](
const Bins &b1,
const Bins &b2) {
137 for (
int i=0; i < Bins::BIN_COUNT; ++i) {
138 result.counts[i] = b1.counts[i] + b2.counts[i];
147 bbox_left[0] = bins.bbox[0];
148 for (
int i=1; i<Bins::BIN_COUNT; ++i) {
149 bins.counts[i] += bins.counts[i-1];
153 BoundingBox3f bbox_right = bins.bbox[Bins::BIN_COUNT-1], best_bbox_right;
154 int64_t best_index = -1;
158 for (
int i=Bins::BIN_COUNT - 2; i >= 0; --i) {
159 uint32_t prims_left = bins.counts[i], prims_right = (uint32_t) (end - start) - bins.counts[i];
161 tri_factor * (prims_left * bbox_left[i].getSurfaceArea() +
163 if (sah_cost < best_cost) {
164 best_cost = sah_cost;
166 best_bbox_right = bbox_right;
171 if (best_index == -1) {
178 uint32_t left_count = bins.counts[best_index];
179 int node_idx_left = node_idx+1;
180 int node_idx_right = node_idx+2*left_count;
182 bvh.m_nodes[node_idx_left ].bbox = bbox_left[best_index];
183 bvh.m_nodes[node_idx_right].bbox = best_bbox_right;
184 node.inner.rightChild = node_idx_right;
185 node.inner.axis = axis;
188 std::atomic<uint32_t> offset_left(0),
189 offset_right(bins.counts[best_index]);
192 tbb::blocked_range<uint32_t>(0u, size,
GRAIN_SIZE),
193 [&](
const tbb::blocked_range<uint32_t> &range) {
194 uint32_t count_left = 0, count_right = 0;
195 for (uint32_t i = range.begin(); i != range.end(); ++i) {
196 uint32_t f = start[i];
197 float centroid = bvh.getCentroid(f)[axis];
198 int index = (int) ((centroid - min) * inv_bin_size);
199 (index <= best_index ? count_left : count_right)++;
201 uint32_t idx_l = offset_left.fetch_add(count_left);
202 uint32_t idx_r = offset_right.fetch_add(count_right);
203 for (uint32_t i = range.begin(); i != range.end(); ++i) {
204 uint32_t f = start[i];
205 float centroid = bvh.getCentroid(f)[axis];
206 int index = (int) ((centroid - min) * inv_bin_size);
207 if (index <= best_index)
214 memcpy(start, temp, size *
sizeof(uint32_t));
215 assert(offset_left == left_count && offset_right == size);
218 tbb::task& c = *
new (allocate_continuation()) tbb::empty_task;
224 end, temp + left_count);
228 recycle_as_child_of(c);
229 node_idx = node_idx_left;
230 end = start + left_count;
236 static void execute_serially(
BVH &bvh, uint32_t node_idx, uint32_t *start, uint32_t *end, uint32_t *temp) {
238 uint32_t size = (uint32_t) (end - start);
240 int64_t best_index = -1, best_axis = -1;
241 float *left_areas = (
float *) temp;
244 for (
int axis=0; axis<3; ++axis) {
246 std::sort(start, end, [&](uint32_t f1, uint32_t f2) {
247 return bvh.getCentroid(f1)[axis] < bvh.getCentroid(f2)[axis];
251 for (uint32_t i = 0; i<size; ++i) {
252 uint32_t f = *(start + i);
253 bbox.
expandBy(bvh.getBoundingBox(f));
254 left_areas[i] = (float) bbox.getSurfaceArea();
263 for (uint32_t i = size-1; i>=1; --i) {
264 uint32_t f = *(start + i);
265 bbox.expandBy(bvh.getBoundingBox(f));
267 float left_area = left_areas[i-1];
268 float right_area = bbox.getSurfaceArea();
269 uint32_t prims_left = i;
270 uint32_t prims_right = size-i;
273 tri_factor * (prims_left * left_area +
274 prims_right * right_area);
276 if (sah_cost < best_cost) {
277 best_cost = sah_cost;
284 if (best_index == -1) {
287 node.leaf.start = (uint32_t) (start - bvh.m_indices.data());
288 node.leaf.size = size;
292 std::sort(start, end, [&](uint32_t f1, uint32_t f2) {
293 return bvh.getCentroid(f1)[best_axis] < bvh.getCentroid(f2)[best_axis];
296 uint32_t left_count = (uint32_t) best_index;
297 uint32_t node_idx_left = node_idx + 1;
298 uint32_t node_idx_right = node_idx + 2 * left_count;
299 node.inner.rightChild = node_idx_right;
300 node.inner.axis = best_axis;
304 execute_serially(bvh, node_idx_right, start+left_count, end, temp + left_count);
309 m_shapes.push_back(shape);
311 m_bbox.
expandBy(shape->getBoundingBox());
315 for (
auto shape : m_shapes)
318 m_shapeOffset.clear();
319 m_shapeOffset.push_back(0u);
323 m_nodes.shrink_to_fit();
324 m_shapes.shrink_to_fit();
325 m_shapeOffset.shrink_to_fit();
326 m_indices.shrink_to_fit();
333 cout <<
"Constructing a SAH BVH (" << m_shapes.size()
334 << (m_shapes.size() == 1 ?
" shape, " :
" shapes, ")
335 << size <<
" primitives) .. ";
340 m_nodes.resize(2*size);
341 memset(m_nodes.data(), 0,
sizeof(
BVHNode) * m_nodes.size());
342 m_nodes[0].bbox = m_bbox;
343 m_indices.resize(size);
346 throw NoriException(
"BVH Node is not packed! Investigate compiler settings.");
348 for (uint32_t i = 0; i < size; ++i)
351 uint32_t *indices = m_indices.data(), *temp =
new uint32_t[size];
353 BVHBuildTask(*
this, 0u, indices, indices + size , temp);
354 tbb::task::spawn_root_and_wait(task);
356 std::pair<float, uint32_t> stats =
statistics();
360 std::vector<BVHNode> compactified(stats.second);
361 std::vector<uint32_t> skipped_accum(m_nodes.size());
363 for (int64_t i = stats.second-1, j = m_nodes.size(), skipped = 0; i >= 0; --i) {
364 while (m_nodes[--j].isUnused())
366 BVHNode &new_node = compactified[i];
367 new_node = m_nodes[j];
368 skipped_accum[j] = (uint32_t) skipped;
370 if (new_node.isInner()) {
371 new_node.inner.rightChild = (uint32_t)
372 (i + new_node.inner.rightChild - j -
373 (skipped - skipped_accum[new_node.inner.rightChild]));
377 << memString(
sizeof(
BVHNode) * m_nodes.size() +
sizeof(uint32_t)*m_indices.size())
378 <<
", SAH cost = " << stats.first
381 m_nodes = std::move(compactified);
385 const BVHNode &node = m_nodes[node_idx];
389 std::pair<float, uint32_t> stats_left =
statistics(node_idx + 1u);
390 std::pair<float, uint32_t> stats_right =
statistics(node.inner.rightChild);
391 float saLeft = m_nodes[node_idx + 1u].bbox.getSurfaceArea();
392 float saRight = m_nodes[node.inner.rightChild].bbox.getSurfaceArea();
396 (saLeft * stats_left.first + saRight * stats_right.first) / saCur;
397 return std::make_pair(
399 stats_left.second + stats_right.second + 1u
405 uint32_t node_idx = 0, stack_idx = 0, stack[64];
407 its.
t = std::numeric_limits<float>::infinity();
411 if (ray.
mint == Epsilon)
412 ray.
mint = std::max(ray.
mint, ray.
mint * ray.
o.array().abs().maxCoeff());
414 if (m_nodes.empty() || ray.
maxt < ray.
mint)
417 bool foundIntersection =
false;
421 const BVHNode &node = m_nodes[node_idx];
426 node_idx = stack[--stack_idx];
430 if (node.isInner()) {
431 stack[stack_idx++] = node.inner.rightChild;
433 assert(stack_idx<64);
435 for (uint32_t i = node.start(), end = node.end(); i < end; ++i) {
436 uint32_t idx = m_indices[i];
440 if (shape->rayIntersect(idx, ray, u, v, t)) {
443 foundIntersection =
true;
444 ray.
maxt = its.
t = t;
452 node_idx = stack[--stack_idx];
457 if (foundIntersection) {
461 return foundIntersection;
Build task for parallel BVH construction.
BVHBuildTask(BVH &bvh, uint32_t node_idx, uint32_t *start, uint32_t *end, uint32_t *temp)
@ GRAIN_SIZE
Process triangles in batches of 1K for the purpose of parallelization.
@ INTERSECTION_COST
Heuristic cost value for intersection operations.
@ TRAVERSAL_COST
Heuristic cost value for traversal operations.
@ SERIAL_THRESHOLD
Switch to a serial build when less than 32 triangles are left.
static void execute_serially(BVH &bvh, uint32_t node_idx, uint32_t *start, uint32_t *end, uint32_t *temp)
Single-threaded build function.
Bounding Volume Hierarchy for fast ray intersection queries.
uint32_t findShape(uint32_t &idx) const
Compute the shape and primitive indices corresponding to a primitive index used by the underlying gen...
bool rayIntersect(const Ray3f &ray, Intersection &its, bool shadowRay=false) const
Intersect a ray against all shapes registered with the BVH.
std::pair< float, uint32_t > statistics(uint32_t index=0) const
Compute internal tree statistics.
uint32_t getPrimitiveCount() const
Return the total number of internally represented primitives.
void build()
Build the BVH.
void clear()
Release all resources.
void addShape(Shape *shape)
Register a shape for inclusion in the BVH.
Simple exception class, which stores a human-readable error description.
Superclass of all shapes.
virtual void setHitInformation(uint32_t index, const Ray3f &ray, Intersection &its) const =0
Set the intersection information: hit point, shading frame, UVs, etc.
virtual uint32_t getPrimitiveCount() const
Return the total number of primitives in this shape.
Simple timer with millisecond precision.
std::string elapsedString(bool precise=false) const
Like elapsed(), but return a human-readable string.
Intersection data structure.
const Shape * mesh
Pointer to the associated shape.
float t
Unoccluded distance along the ray.
Point2f uv
UV coordinates, if any.
static TBoundingBox merge(const TBoundingBox &bbox1, const TBoundingBox &bbox2)
Merge two bounding boxes.
int getLargestAxis() const
Return the index of the largest axis.
void expandBy(const PointType &p)
Expand the bounding box to contain another point.
void reset()
Mark the bounding box as invalid.
PointType max
Component-wise maximum.
bool rayIntersect(const Ray3f &ray) const
Check if a ray intersects a bounding box.
PointType min
Component-wise minimum.
float getSurfaceArea() const
Calculate the n-1 dimensional volume of the boundary.
Scalar mint
Minimum position on the ray segment.
Scalar maxt
Maximum position on the ray segment.