unroll hit loop

Zentrik · Sep 16, 2023 · 9cda50b · 9cda50b
1 parent 2779da7
commit 9cda50b
Show file tree

Hide file tree

Showing 5 changed files with 16 additions and 25 deletions.
diff --git a/c++/colour.hpp b/c++/colour.hpp
@@ -4,7 +4,7 @@
 #include <iostream>
 
 void write_colour(std::ofstream& out, colour pixel_colour, int samples_per_pixel) {
-    float scale = 1.f / samples_per_pixel;
+    float scale = 1.f / (float)samples_per_pixel;
 
     float r = powf32(pixel_colour.x * scale, .5);
     float g = powf32(pixel_colour.y * scale, .5);

diff --git a/c++/hittable_list.hpp b/c++/hittable_list.hpp
@@ -72,6 +72,7 @@ class HittableList
         Vec8f tMinVec(t_min);
         Vec8ui curId(0, 1, 2, 3, 4, 5, 6, 7);
 
+        #pragma unroll 4
         for (int i = 0; i < (int)radius.size(); i++)
         {
             // load data for n spheres

diff --git a/c++/main.cpp b/c++/main.cpp
@@ -1,4 +1,4 @@
-// clang++-15 -std=c++20 c++/main.cpp -o c++/main -Wall -Ofast -ffast-math -fdenormal-fp-math=positive-zero -march=native -flto=full -ltbb
+// clang++-15 -std=c++20 c++/main.cpp -o c++/main -Wall -Wextra -Ofast -ffast-math -fdenormal-fp-math=positive-zero -march=native -flto=full -ltbb // -Wdouble-promotion -Wimplicit-int-float-conversion
 
 #include "settings.hpp"
 
@@ -11,7 +11,7 @@
 
 colour world_colour(ray r) {
     vec3 unit_direction = r.direction;
-    float t = 0.5 * (unit_direction.y + 1);
+    float t = 0.5f * (unit_direction.y + 1);
     return (1 - t) * colour(1, 1, 1) + t * colour(0.5, 0.7, 1);
 }
 
@@ -48,15 +48,15 @@ HittableList random_scene() {
     for (int a = -11; a < 11; a++) {
         for (int b = -11; b < 11; b++) {
             float choose_mat = random_float32(rng);
-            point3 center(a + 0.9 * random_float32(rng), 0.2, b + 0.9 * random_float32(rng));
+            point3 center(a + 0.9f * random_float32(rng), 0.2, b + 0.9f * random_float32(rng));
 
-            if (length(center - point3(4, 0.2, 0)) > 0.9) {
-                if (choose_mat < 0.8) {
+            if (length(center - point3(4, 0.2, 0)) > 0.9f) {
+                if (choose_mat < 0.8f) {
                     // diffuse
                     colour albedo = colour::random(rng) * colour::random(rng);
                     world.add(Sphere(center, 0.2, Material::Lambertian(albedo)));
                 }
-                else if (choose_mat < 0.95) {
+                else if (choose_mat < 0.95f) {
                     // metal
                     colour albedo = colour::random(rng) / 2 + vec3(.5);
                     float fuzz = random_float32(rng) / 2;
@@ -152,13 +152,13 @@ int main() {
     std::for_each(std::execution::par, jIterator.begin(), jIterator.end(),
         [&](int j)
         {
-            std::cout << "\rScanlines remaining: " << j << " " << std::flush;;
+            // std::cout << "\rScanlines remaining: " << j << " " << std::flush;;
             for (int i = 0; i < image_width; ++i) {
                 colour& pixel_colour = pixel[j][i];
 
                 for (int s = 0; s < samples_per_pixel; ++s) {
-                    float u = float(i + random_float32(rng)) / (image_width - 1);
-                    float v = float(j + random_float32(rng)) / (image_height - 1);
+                    float u = ((float)i + random_float32(rng)) / (image_width - 1);
+                    float v = ((float)j + random_float32(rng)) / (image_height - 1);
 
                     ray r = cam.get_ray(u, v, rng);
 

diff --git a/c++/material.hpp b/c++/material.hpp
@@ -45,14 +45,9 @@ vec3 dielectric(ray r_in, vec3 normal, float ior, RNG& rng) {
     float sinTheta = sqrt(1 - cosTheta*cosTheta);
     bool into = cosTheta > 0;
 
-    float ior_ratio;
-    if (into) {
-        ior_ratio = air_ior / ior;
-    } else {
-        ior_ratio = ior / air_ior;
-        normal *= -1;
-        cosTheta *= -1;
-    }
+    float ior_ratio = into ? air_ior / ior : ior / air_ior;
+    normal *= into ? 1 : -1;
+    cosTheta *= into ? 1 : -1;
 
     bool cannot_refract = (ior_ratio * sinTheta) > 1;
 

diff --git a/c++/vec3.hpp b/c++/vec3.hpp
@@ -3,17 +3,13 @@
 #include <cmath>
 #include <iostream>
 
-#ifndef _MSC_VER
-#include <boost/stacktrace.hpp>
-#endif
-
 class RNG;
 float random_float32(RNG& rng);
 float random_float32_minustoplus(RNG& rng);
 
 using std::sqrt;
 
-class vec3{
+class vec3 {
 public:
     union {
         float v[3];
@@ -78,8 +74,7 @@ class vec3{
     }
 
     inline bool approx_zero() const {
-        const float absolute_tolerance = 1e-8;
-        return (fabs((*this)[0]) < absolute_tolerance) && (fabs((*this)[1]) < absolute_tolerance) && (fabs((*this)[2]) < absolute_tolerance);
+        return (abs(x) + abs(y) + abs(z)) < 1e-2f;
     }
 };