137 for(std::size_t di = 0; di < 3; ++di)
139 for(std::size_t ci = 0; ci < N; ci +=
BATCH_SIZE)
143 const auto& aabb0 = node.getAABB(ci);
144 const auto& aabb1 = ci + 1 < N ? node.getAABB(ci + 1) : emptyAABB;
145 const auto& aabb2 = ci + 2 < N ? node.getAABB(ci + 2) : emptyAABB;
146 const auto& aabb3 = ci + 3 < N ? node.getAABB(ci + 3) : emptyAABB;
147 const auto& aabb4 = ci + 4 < N ? node.getAABB(ci + 4) : emptyAABB;
148 const auto& aabb5 = ci + 5 < N ? node.getAABB(ci + 5) : emptyAABB;
149 const auto& aabb6 = ci + 6 < N ? node.getAABB(ci + 6) : emptyAABB;
150 const auto& aabb7 = ci + 7 < N ? node.getAABB(ci + 7) : emptyAABB;
155 m_aabbMins[di][ci / 4] = _mm_setr_ps(
156 aabb0.getMinVertex()[di],
157 aabb1.getMinVertex()[di],
158 aabb2.getMinVertex()[di],
159 aabb3.getMinVertex()[di]);
160 m_aabbMaxs[di][ci / 4] = _mm_setr_ps(
161 aabb0.getMaxVertex()[di],
162 aabb1.getMaxVertex()[di],
163 aabb2.getMaxVertex()[di],
164 aabb3.getMaxVertex()[di]);
170 m_aabbMins[di][ci / 8] = _mm256_setr_ps(
171 aabb0.getMinVertex()[di],
172 aabb1.getMinVertex()[di],
173 aabb2.getMinVertex()[di],
174 aabb3.getMinVertex()[di],
175 aabb4.getMinVertex()[di],
176 aabb5.getMinVertex()[di],
177 aabb6.getMinVertex()[di],
178 aabb7.getMinVertex()[di]);
179 m_aabbMaxs[di][ci / 8] = _mm256_setr_ps(
180 aabb0.getMaxVertex()[di],
181 aabb1.getMaxVertex()[di],
182 aabb2.getMaxVertex()[di],
183 aabb3.getMaxVertex()[di],
184 aabb4.getMaxVertex()[di],
185 aabb5.getMaxVertex()[di],
186 aabb6.getMaxVertex()[di],
187 aabb7.getMaxVertex()[di]);
196 PH_ASSERT_GE(node.SOA_VIEW_ALIGNMENT, 16);
197 m_aabbMins[di][ci / 4] = _mm_load_ps(&(node.getMinVerticesOnAxis(di)[ci]));
198 m_aabbMaxs[di][ci / 4] = _mm_load_ps(&(node.getMaxVerticesOnAxis(di)[ci]));
204 PH_ASSERT_GE(node.SOA_VIEW_ALIGNMENT, 32);
205 m_aabbMins[di][ci / 8] = _mm256_load_ps(&(node.getMinVerticesOnAxis(di)[ci]));
206 m_aabbMaxs[di][ci / 8] = _mm256_load_ps(&(node.getMaxVerticesOnAxis(di)[ci]));
236 for(std::size_t di = 0; di < 3; ++di)
238 for(std::size_t bi = 0; bi < B; ++bi)
244 _mm_mul_ps(_mm_sub_ps(m_aabbMins[di][bi], m_segmentOrigins[di]), m_rcpSegmentDirs[di]);
246 _mm_mul_ps(_mm_sub_ps(m_aabbMaxs[di][bi], m_segmentOrigins[di]), m_rcpSegmentDirs[di]);
248 const __m128 minT = _mm_min_ps(t1, t2);
249 const __m128 maxT = _mm_max_ps(t1, t2);
252 m_aabbMinTs[bi] = _mm_max_ps(minT, m_aabbMinTs[bi]);
255 m_aabbMaxTs[bi] = _mm_min_ps(maxT, m_aabbMaxTs[bi]);
262 _mm256_mul_ps(_mm256_sub_ps(m_aabbMins[di][bi], m_segmentOrigins[di]), m_rcpSegmentDirs[di]);
264 _mm256_mul_ps(_mm256_sub_ps(m_aabbMaxs[di][bi], m_segmentOrigins[di]), m_rcpSegmentDirs[di]);
266 const __m256 minT = _mm256_min_ps(t1, t2);
267 const __m256 maxT = _mm256_max_ps(t1, t2);
270 m_aabbMinTs[bi] = _mm256_max_ps(minT, m_aabbMinTs[bi]);
273 m_aabbMaxTs[bi] = _mm256_min_ps(maxT, m_aabbMaxTs[bi]);