Nori  23
bvh.cpp
1 /*
2  This file is part of Nori, a simple educational ray tracer
3 
4  Copyright (c) 2015 by Wenzel Jakob, Romain Prévost
5 
6  Nori is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License Version 3
8  as published by the Free Software Foundation.
9 
10  Nori is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 #include <nori/bvh.h>
20 #include <nori/timer.h>
21 #include <tbb/tbb.h>
22 #include <Eigen/Geometry>
23 #include <atomic>
24 
25 /*
26  * =======================================================================
27  * WARNING WARNING WARNING WARNING WARNING WARNING
28  * =======================================================================
29  * Remember to put on SAFETY GOGGLES before looking at this file. You
30  * are most certainly not expected to read or understand any of it.
31  * =======================================================================
32  */
33 
34 NORI_NAMESPACE_BEGIN
35 
36 /* Bin data structure for counting triangles and computing their bounding box */
37 struct Bins {
38  static const int BIN_COUNT = 16;
39  Bins() { memset(counts, 0, sizeof(uint32_t) * BIN_COUNT); }
40  uint32_t counts[BIN_COUNT];
41  BoundingBox3f bbox[BIN_COUNT];
42 };
43 
54 class BVHBuildTask : public tbb::task {
55 private:
56  BVH &bvh;
57  uint32_t node_idx;
58  uint32_t *start, *end, *temp;
59 
60 public:
62  enum {
65 
67  GRAIN_SIZE = 1000,
68 
71 
74  };
75 
76 public:
97  BVHBuildTask(BVH &bvh, uint32_t node_idx, uint32_t *start, uint32_t *end, uint32_t *temp)
98  : bvh(bvh), node_idx(node_idx), start(start), end(end), temp(temp) { }
99 
100  task *execute() {
101  uint32_t size = (uint32_t) (end-start);
102  BVH::BVHNode &node = bvh.m_nodes[node_idx];
103 
104  /* Switch to a serial build when less than SERIAL_THRESHOLD triangles are left */
105  if (size < SERIAL_THRESHOLD) {
106  execute_serially(bvh, node_idx, start, end, temp);
107  return nullptr;
108  }
109 
110  /* Always split along the largest axis */
111  int axis = node.bbox.getLargestAxis();
112  float min = node.bbox.min[axis], max = node.bbox.max[axis],
113  inv_bin_size = Bins::BIN_COUNT / (max-min);
114 
115  /* Accumulate all triangles into bins */
116  Bins bins = tbb::parallel_reduce(
117  tbb::blocked_range<uint32_t>(0u, size, GRAIN_SIZE),
118  Bins(),
119  /* MAP: Bin a number of triangles and return the resulting 'Bins' data structure */
120  [&](const tbb::blocked_range<uint32_t> &range, Bins result) {
121  for (uint32_t i = range.begin(); i != range.end(); ++i) {
122  uint32_t f = start[i];
123  float centroid = bvh.getCentroid(f)[axis];
124 
125  int index = std::min(std::max(
126  (int) ((centroid - min) * inv_bin_size), 0),
127  (Bins::BIN_COUNT - 1));
128 
129  result.counts[index]++;
130  result.bbox[index].expandBy(bvh.getBoundingBox(f));
131  }
132  return result;
133  },
134  /* REDUCE: Combine two 'Bins' data structures */
135  [](const Bins &b1, const Bins &b2) {
136  Bins result;
137  for (int i=0; i < Bins::BIN_COUNT; ++i) {
138  result.counts[i] = b1.counts[i] + b2.counts[i];
139  result.bbox[i] = BoundingBox3f::merge(b1.bbox[i], b2.bbox[i]);
140  }
141  return result;
142  }
143  );
144 
145  /* Choose the best split plane based on the binned data */
146  BoundingBox3f bbox_left[Bins::BIN_COUNT];
147  bbox_left[0] = bins.bbox[0];
148  for (int i=1; i<Bins::BIN_COUNT; ++i) {
149  bins.counts[i] += bins.counts[i-1];
150  bbox_left[i] = BoundingBox3f::merge(bbox_left[i-1], bins.bbox[i]);
151  }
152 
153  BoundingBox3f bbox_right = bins.bbox[Bins::BIN_COUNT-1], best_bbox_right;
154  int64_t best_index = -1;
155  float best_cost = (float) INTERSECTION_COST * size;
156  float tri_factor = (float) INTERSECTION_COST / node.bbox.getSurfaceArea();
157 
158  for (int i=Bins::BIN_COUNT - 2; i >= 0; --i) {
159  uint32_t prims_left = bins.counts[i], prims_right = (uint32_t) (end - start) - bins.counts[i];
160  float sah_cost = 2.0f * TRAVERSAL_COST +
161  tri_factor * (prims_left * bbox_left[i].getSurfaceArea() +
162  prims_right * bbox_right.getSurfaceArea());
163  if (sah_cost < best_cost) {
164  best_cost = sah_cost;
165  best_index = i;
166  best_bbox_right = bbox_right;
167  }
168  bbox_right = BoundingBox3f::merge(bbox_right, bins.bbox[i]);
169  }
170 
171  if (best_index == -1) {
172  /* Could not find a good split plane -- retry with
173  more careful serial code just to be sure.. */
174  execute_serially(bvh, node_idx, start, end, temp);
175  return nullptr;
176  }
177 
178  uint32_t left_count = bins.counts[best_index];
179  int node_idx_left = node_idx+1;
180  int node_idx_right = node_idx+2*left_count;
181 
182  bvh.m_nodes[node_idx_left ].bbox = bbox_left[best_index];
183  bvh.m_nodes[node_idx_right].bbox = best_bbox_right;
184  node.inner.rightChild = node_idx_right;
185  node.inner.axis = axis;
186  node.inner.flag = 0;
187 
188  std::atomic<uint32_t> offset_left(0),
189  offset_right(bins.counts[best_index]);
190 
191  tbb::parallel_for(
192  tbb::blocked_range<uint32_t>(0u, size, GRAIN_SIZE),
193  [&](const tbb::blocked_range<uint32_t> &range) {
194  uint32_t count_left = 0, count_right = 0;
195  for (uint32_t i = range.begin(); i != range.end(); ++i) {
196  uint32_t f = start[i];
197  float centroid = bvh.getCentroid(f)[axis];
198  int index = (int) ((centroid - min) * inv_bin_size);
199  (index <= best_index ? count_left : count_right)++;
200  }
201  uint32_t idx_l = offset_left.fetch_add(count_left);
202  uint32_t idx_r = offset_right.fetch_add(count_right);
203  for (uint32_t i = range.begin(); i != range.end(); ++i) {
204  uint32_t f = start[i];
205  float centroid = bvh.getCentroid(f)[axis];
206  int index = (int) ((centroid - min) * inv_bin_size);
207  if (index <= best_index)
208  temp[idx_l++] = f;
209  else
210  temp[idx_r++] = f;
211  }
212  }
213  );
214  memcpy(start, temp, size * sizeof(uint32_t));
215  assert(offset_left == left_count && offset_right == size);
216 
217  /* Create an empty parent task */
218  tbb::task& c = *new (allocate_continuation()) tbb::empty_task;
219  c.set_ref_count(2);
220 
221  /* Post right subtree to scheduler */
222  BVHBuildTask &b = *new (c.allocate_child())
223  BVHBuildTask(bvh, node_idx_right, start + left_count,
224  end, temp + left_count);
225  spawn(b);
226 
227  /* Directly start working on left subtree */
228  recycle_as_child_of(c);
229  node_idx = node_idx_left;
230  end = start + left_count;
231 
232  return this;
233  }
234 
236  static void execute_serially(BVH &bvh, uint32_t node_idx, uint32_t *start, uint32_t *end, uint32_t *temp) {
237  BVH::BVHNode &node = bvh.m_nodes[node_idx];
238  uint32_t size = (uint32_t) (end - start);
239  float best_cost = (float) INTERSECTION_COST * size;
240  int64_t best_index = -1, best_axis = -1;
241  float *left_areas = (float *) temp;
242 
243  /* Try splitting along every axis */
244  for (int axis=0; axis<3; ++axis) {
245  /* Sort all triangles based on their centroid positions projected on the axis */
246  std::sort(start, end, [&](uint32_t f1, uint32_t f2) {
247  return bvh.getCentroid(f1)[axis] < bvh.getCentroid(f2)[axis];
248  });
249 
250  BoundingBox3f bbox;
251  for (uint32_t i = 0; i<size; ++i) {
252  uint32_t f = *(start + i);
253  bbox.expandBy(bvh.getBoundingBox(f));
254  left_areas[i] = (float) bbox.getSurfaceArea();
255  }
256  if (axis == 0)
257  node.bbox = bbox;
258 
259  bbox.reset();
260 
261  /* Choose the best split plane */
262  float tri_factor = INTERSECTION_COST / node.bbox.getSurfaceArea();
263  for (uint32_t i = size-1; i>=1; --i) {
264  uint32_t f = *(start + i);
265  bbox.expandBy(bvh.getBoundingBox(f));
266 
267  float left_area = left_areas[i-1];
268  float right_area = bbox.getSurfaceArea();
269  uint32_t prims_left = i;
270  uint32_t prims_right = size-i;
271 
272  float sah_cost = 2.0f * TRAVERSAL_COST +
273  tri_factor * (prims_left * left_area +
274  prims_right * right_area);
275 
276  if (sah_cost < best_cost) {
277  best_cost = sah_cost;
278  best_index = i;
279  best_axis = axis;
280  }
281  }
282  }
283 
284  if (best_index == -1) {
285  /* Splitting does not reduce the cost, make a leaf */
286  node.leaf.flag = 1;
287  node.leaf.start = (uint32_t) (start - bvh.m_indices.data());
288  node.leaf.size = size;
289  return;
290  }
291 
292  std::sort(start, end, [&](uint32_t f1, uint32_t f2) {
293  return bvh.getCentroid(f1)[best_axis] < bvh.getCentroid(f2)[best_axis];
294  });
295 
296  uint32_t left_count = (uint32_t) best_index;
297  uint32_t node_idx_left = node_idx + 1;
298  uint32_t node_idx_right = node_idx + 2 * left_count;
299  node.inner.rightChild = node_idx_right;
300  node.inner.axis = best_axis;
301  node.inner.flag = 0;
302 
303  execute_serially(bvh, node_idx_left, start, start + left_count, temp);
304  execute_serially(bvh, node_idx_right, start+left_count, end, temp + left_count);
305  }
306 };
307 
308 void BVH::addShape(Shape *shape) {
309  m_shapes.push_back(shape);
310  m_shapeOffset.push_back(m_shapeOffset.back() + shape->getPrimitiveCount());
311  m_bbox.expandBy(shape->getBoundingBox());
312 }
313 
314 void BVH::clear() {
315  for (auto shape : m_shapes)
316  delete shape;
317  m_shapes.clear();
318  m_shapeOffset.clear();
319  m_shapeOffset.push_back(0u);
320  m_nodes.clear();
321  m_indices.clear();
322  m_bbox.reset();
323  m_nodes.shrink_to_fit();
324  m_shapes.shrink_to_fit();
325  m_shapeOffset.shrink_to_fit();
326  m_indices.shrink_to_fit();
327 }
328 
329 void BVH::build() {
330  uint32_t size = getPrimitiveCount();
331  if (size == 0)
332  return;
333  cout << "Constructing a SAH BVH (" << m_shapes.size()
334  << (m_shapes.size() == 1 ? " shape, " : " shapes, ")
335  << size << " primitives) .. ";
336  cout.flush();
337  Timer timer;
338 
339  /* Conservative estimate for the total number of nodes */
340  m_nodes.resize(2*size);
341  memset(m_nodes.data(), 0, sizeof(BVHNode) * m_nodes.size());
342  m_nodes[0].bbox = m_bbox;
343  m_indices.resize(size);
344 
345  if (sizeof(BVHNode) != 32)
346  throw NoriException("BVH Node is not packed! Investigate compiler settings.");
347 
348  for (uint32_t i = 0; i < size; ++i)
349  m_indices[i] = i;
350 
351  uint32_t *indices = m_indices.data(), *temp = new uint32_t[size];
352  BVHBuildTask& task = *new(tbb::task::allocate_root())
353  BVHBuildTask(*this, 0u, indices, indices + size , temp);
354  tbb::task::spawn_root_and_wait(task);
355  delete[] temp;
356  std::pair<float, uint32_t> stats = statistics();
357 
358  /* The node array was allocated conservatively and now contains
359  many unused entries -- do a compactification pass. */
360  std::vector<BVHNode> compactified(stats.second);
361  std::vector<uint32_t> skipped_accum(m_nodes.size());
362 
363  for (int64_t i = stats.second-1, j = m_nodes.size(), skipped = 0; i >= 0; --i) {
364  while (m_nodes[--j].isUnused())
365  skipped++;
366  BVHNode &new_node = compactified[i];
367  new_node = m_nodes[j];
368  skipped_accum[j] = (uint32_t) skipped;
369 
370  if (new_node.isInner()) {
371  new_node.inner.rightChild = (uint32_t)
372  (i + new_node.inner.rightChild - j -
373  (skipped - skipped_accum[new_node.inner.rightChild]));
374  }
375  }
376  cout << "done (took " << timer.elapsedString() << " and "
377  << memString(sizeof(BVHNode) * m_nodes.size() + sizeof(uint32_t)*m_indices.size())
378  << ", SAH cost = " << stats.first
379  << ")." << endl;
380 
381  m_nodes = std::move(compactified);
382 }
383 
384 std::pair<float, uint32_t> BVH::statistics(uint32_t node_idx) const {
385  const BVHNode &node = m_nodes[node_idx];
386  if (node.isLeaf()) {
387  return std::make_pair((float) BVHBuildTask::INTERSECTION_COST * node.leaf.size, 1u);
388  } else {
389  std::pair<float, uint32_t> stats_left = statistics(node_idx + 1u);
390  std::pair<float, uint32_t> stats_right = statistics(node.inner.rightChild);
391  float saLeft = m_nodes[node_idx + 1u].bbox.getSurfaceArea();
392  float saRight = m_nodes[node.inner.rightChild].bbox.getSurfaceArea();
393  float saCur = node.bbox.getSurfaceArea();
394  float sahCost =
396  (saLeft * stats_left.first + saRight * stats_right.first) / saCur;
397  return std::make_pair(
398  sahCost,
399  stats_left.second + stats_right.second + 1u
400  );
401  }
402 }
403 
404 bool BVH::rayIntersect(const Ray3f &_ray, Intersection &its, bool shadowRay) const {
405  uint32_t node_idx = 0, stack_idx = 0, stack[64];
406 
407  its.t = std::numeric_limits<float>::infinity();
408 
409  /* Use an adaptive ray epsilon */
410  Ray3f ray(_ray);
411  if (ray.mint == Epsilon)
412  ray.mint = std::max(ray.mint, ray.mint * ray.o.array().abs().maxCoeff());
413 
414  if (m_nodes.empty() || ray.maxt < ray.mint)
415  return false;
416 
417  bool foundIntersection = false;
418  uint32_t f = 0;
419 
420  while (true) {
421  const BVHNode &node = m_nodes[node_idx];
422 
423  if (!node.bbox.rayIntersect(ray)) {
424  if (stack_idx == 0)
425  break;
426  node_idx = stack[--stack_idx];
427  continue;
428  }
429 
430  if (node.isInner()) {
431  stack[stack_idx++] = node.inner.rightChild;
432  node_idx++;
433  assert(stack_idx<64);
434  } else {
435  for (uint32_t i = node.start(), end = node.end(); i < end; ++i) {
436  uint32_t idx = m_indices[i];
437  const Shape *shape = m_shapes[findShape(idx)];
438 
439  float u, v, t;
440  if (shape->rayIntersect(idx, ray, u, v, t)) {
441  if (shadowRay)
442  return true;
443  foundIntersection = true;
444  ray.maxt = its.t = t;
445  its.uv = Point2f(u, v);
446  its.mesh = shape;
447  f = idx;
448  }
449  }
450  if (stack_idx == 0)
451  break;
452  node_idx = stack[--stack_idx];
453  continue;
454  }
455  }
456 
457  if (foundIntersection) {
458  its.mesh->setHitInformation(f,ray,its);
459  }
460 
461  return foundIntersection;
462 }
463 
464 NORI_NAMESPACE_END
Build task for parallel BVH construction.
Definition: bvh.cpp:54
BVHBuildTask(BVH &bvh, uint32_t node_idx, uint32_t *start, uint32_t *end, uint32_t *temp)
Definition: bvh.cpp:97
@ GRAIN_SIZE
Process triangles in batches of 1K for the purpose of parallelization.
Definition: bvh.cpp:67
@ INTERSECTION_COST
Heuristic cost value for intersection operations.
Definition: bvh.cpp:73
@ TRAVERSAL_COST
Heuristic cost value for traversal operations.
Definition: bvh.cpp:70
@ SERIAL_THRESHOLD
Switch to a serial build when less than 32 triangles are left.
Definition: bvh.cpp:64
static void execute_serially(BVH &bvh, uint32_t node_idx, uint32_t *start, uint32_t *end, uint32_t *temp)
Single-threaded build function.
Definition: bvh.cpp:236
Bounding Volume Hierarchy for fast ray intersection queries.
Definition: bvh.h:43
uint32_t findShape(uint32_t &idx) const
Compute the shape and primitive indices corresponding to a primitive index used by the underlying gen...
Definition: bvh.h:105
bool rayIntersect(const Ray3f &ray, Intersection &its, bool shadowRay=false) const
Intersect a ray against all shapes registered with the BVH.
Definition: bvh.cpp:404
std::pair< float, uint32_t > statistics(uint32_t index=0) const
Compute internal tree statistics.
Definition: bvh.cpp:384
uint32_t getPrimitiveCount() const
Return the total number of internally represented primitives.
Definition: bvh.h:87
void build()
Build the BVH.
Definition: bvh.cpp:329
void clear()
Release all resources.
Definition: bvh.cpp:314
void addShape(Shape *shape)
Register a shape for inclusion in the BVH.
Definition: bvh.cpp:308
Simple exception class, which stores a human-readable error description.
Definition: common.h:148
Superclass of all shapes.
Definition: shape.h:97
virtual void setHitInformation(uint32_t index, const Ray3f &ray, Intersection &its) const =0
Set the intersection information: hit point, shading frame, UVs, etc.
virtual uint32_t getPrimitiveCount() const
Return the total number of primitives in this shape.
Definition: shape.h:124
Simple timer with millisecond precision.
Definition: timer.h:32
std::string elapsedString(bool precise=false) const
Like elapsed(), but return a human-readable string.
Definition: timer.h:48
Definition: bvh.cpp:37
Intersection data structure.
Definition: shape.h:37
const Shape * mesh
Pointer to the associated shape.
Definition: shape.h:49
float t
Unoccluded distance along the ray.
Definition: shape.h:41
Point2f uv
UV coordinates, if any.
Definition: shape.h:43
static TBoundingBox merge(const TBoundingBox &bbox1, const TBoundingBox &bbox2)
Merge two bounding boxes.
Definition: bbox.h:300
int getLargestAxis() const
Return the index of the largest axis.
Definition: bbox.h:308
void expandBy(const PointType &p)
Expand the bounding box to contain another point.
Definition: bbox.h:288
void reset()
Mark the bounding box as invalid.
Definition: bbox.h:282
PointType max
Component-wise maximum.
Definition: bbox.h:396
bool rayIntersect(const Ray3f &ray) const
Check if a ray intersects a bounding box.
Definition: bbox.h:336
PointType min
Component-wise minimum.
Definition: bbox.h:395
float getSurfaceArea() const
Calculate the n-1 dimensional volume of the boundary.
Definition: bbox.h:87
Scalar mint
Minimum position on the ray segment.
Definition: ray.h:46
Scalar maxt
Maximum position on the ray segment.
Definition: ray.h:47
PointType o
Ray origin.
Definition: ray.h:43