Kevin Beason's Homepage > Software > smallpt

smallpt: Global Illumination in 99 lines of C++

Kevin Beason / kevin.beason [at] gmail.com

Cornell box image

smallpt is a global illumination renderer. It is 99 lines of C++, is open source, and renders the above scene using unbiased Monte Carlo path tracing (click for full size).

Features

  • Global illumination via unbiased Monte Carlo path tracing
  • 99 lines of 72-column (or less) open source C++ code
  • Multi-threading using OpenMP
  • Soft shadows from diffuse luminaire
  • Specular, Diffuse, and Glass BRDFs
  • Antialiasing via super-sampling with importance-sampled tent distribution, and 2x2 subpixels
  • Ray-sphere intersection
  • Modified Cornell box scene description
  • Cosine importance sampling of the hemisphere for diffuse reflection
  • Russian roulette for path termination
  • Russian roulette and splitting for selecting reflection and/or refraction for glass BRDF
  • With minor changes compiles to a 4 KB binary (less than 4096 bytes)

    Added 11/11/2010:

  • Modifications including explicit light sampling and non-branching ray tree.
  • Ports to CUDA and BSGP featuring interactive display and scene editing.

    Added 3/12/2012:

  • Presentation slides explaining each line, by David Cline

Source

Here is the complete source code for the entire renderer, including scene description:

  1. #include <math.h> // smallpt, a Path Tracer by Kevin Beason, 2008
  2. #include <stdlib.h> // Make : g++ -O3 -fopenmp smallpt.cpp -o smallpt
  3. #include <stdio.h> // Remove "-fopenmp" for g++ version < 4.2
  4. struct Vec { // Usage: time ./smallpt 5000 && xv image.ppm
  5. double x, y, z; // position, also color (r,g,b)
  6. Vec(double x_=0, double y_=0, double z_=0){ x=x_; y=y_; z=z_; }
  7. Vec operator+(const Vec &b) const { return Vec(x+b.x,y+b.y,z+b.z); }
  8. Vec operator-(const Vec &b) const { return Vec(x-b.x,y-b.y,z-b.z); }
  9. Vec operator*(double b) const { return Vec(x*b,y*b,z*b); }
  10. Vec mult(const Vec &b) const { return Vec(x*b.x,y*b.y,z*b.z); }
  11. Vec& norm(){ return *this = *this * (1/sqrt(x*x+y*y+z*z)); }
  12. double dot(const Vec &b) const { return x*b.x+y*b.y+z*b.z; } // cross:
  13. Vec operator%(Vec&b){return Vec(y*b.z-z*b.y,z*b.x-x*b.z,x*b.y-y*b.x);}
  14. };
  15. struct Ray { Vec o, d; Ray(Vec o_, Vec d_) : o(o_), d(d_) {} };
  16. enum Refl_t { DIFF, SPEC, REFR }; // material types, used in radiance()
  17. struct Sphere {
  18. double rad; // radius
  19. Vec p, e, c; // position, emission, color
  20. Refl_t refl; // reflection type (DIFFuse, SPECular, REFRactive)
  21. Sphere(double rad_, Vec p_, Vec e_, Vec c_, Refl_t refl_):
  22. rad(rad_), p(p_), e(e_), c(c_), refl(refl_) {}
  23. double intersect(const Ray &r) const { // returns distance, 0 if nohit
  24. Vec op = p-r.o; // Solve t^2*d.d + 2*t*(o-p).d + (o-p).(o-p)-R^2 = 0
  25. double t, eps=1e-4, b=op.dot(r.d), det=b*b-op.dot(op)+rad*rad;
  26. if (det<0) return 0; else det=sqrt(det);
  27. return (t=b-det)>eps ? t : ((t=b+det)>eps ? t : 0);
  28. }
  29. };
  30. Sphere spheres[] = {//Scene: radius, position, emission, color, material
  31. Sphere(1e5, Vec( 1e5+1,40.8,81.6), Vec(),Vec(.75,.25,.25),DIFF),//Left
  32. Sphere(1e5, Vec(-1e5+99,40.8,81.6),Vec(),Vec(.25,.25,.75),DIFF),//Rght
  33. Sphere(1e5, Vec(50,40.8, 1e5), Vec(),Vec(.75,.75,.75),DIFF),//Back
  34. Sphere(1e5, Vec(50,40.8,-1e5+170), Vec(),Vec(), DIFF),//Frnt
  35. Sphere(1e5, Vec(50, 1e5, 81.6), Vec(),Vec(.75,.75,.75),DIFF),//Botm
  36. Sphere(1e5, Vec(50,-1e5+81.6,81.6),Vec(),Vec(.75,.75,.75),DIFF),//Top
  37. Sphere(16.5,Vec(27,16.5,47), Vec(),Vec(1,1,1)*.999, SPEC),//Mirr
  38. Sphere(16.5,Vec(73,16.5,78), Vec(),Vec(1,1,1)*.999, REFR),//Glas
  39. Sphere(600, Vec(50,681.6-.27,81.6),Vec(12,12,12), Vec(), DIFF) //Lite
  40. };
  41. inline double clamp(double x){ return x<0 ? 0 : x>1 ? 1 : x; }
  42. inline int toInt(double x){ return int(pow(clamp(x),1/2.2)*255+.5); }
  43. inline bool intersect(const Ray &r, double &t, int &id){
  44. double n=sizeof(spheres)/sizeof(Sphere), d, inf=t=1e20;
  45. for(int i=int(n);i--;) if((d=spheres[i].intersect(r))&&d<t){t=d;id=i;}
  46. return t<inf;
  47. }
  48. Vec radiance(const Ray &r, int depth, unsigned short *Xi){
  49. double t; // distance to intersection
  50. int id=0; // id of intersected object
  51. if (!intersect(r, t, id)) return Vec(); // if miss, return black
  52. const Sphere &obj = spheres[id]; // the hit object
  53. Vec x=r.o+r.d*t, n=(x-obj.p).norm(), nl=n.dot(r.d)<0?n:n*-1, f=obj.c;
  54. double p = f.x>f.y && f.x>f.z ? f.x : f.y>f.z ? f.y : f.z; // max refl
  55. if (++depth>5) if (erand48(Xi)<p) f=f*(1/p); else return obj.e; //R.R.
  56. if (obj.refl == DIFF){ // Ideal DIFFUSE reflection
  57. double r1=2*M_PI*erand48(Xi), r2=erand48(Xi), r2s=sqrt(r2);
  58. Vec w=nl, u=((fabs(w.x)>.1?Vec(0,1):Vec(1))%w).norm(), v=w%u;
  59. Vec d = (u*cos(r1)*r2s + v*sin(r1)*r2s + w*sqrt(1-r2)).norm();
  60. return obj.e + f.mult(radiance(Ray(x,d),depth,Xi));
  61. } else if (obj.refl == SPEC) // Ideal SPECULAR reflection
  62. return obj.e + f.mult(radiance(Ray(x,r.d-n*2*n.dot(r.d)),depth,Xi));
  63. Ray reflRay(x, r.d-n*2*n.dot(r.d)); // Ideal dielectric REFRACTION
  64. bool into = n.dot(nl)>0; // Ray from outside going in?
  65. double nc=1, nt=1.5, nnt=into?nc/nt:nt/nc, ddn=r.d.dot(nl), cos2t;
  66. if ((cos2t=1-nnt*nnt*(1-ddn*ddn))<0) // Total internal reflection
  67. return obj.e + f.mult(radiance(reflRay,depth,Xi));
  68. Vec tdir = (r.d*nnt - n*((into?1:-1)*(ddn*nnt+sqrt(cos2t)))).norm();
  69. double a=nt-nc, b=nt+nc, R0=a*a/(b*b), c = 1-(into?-ddn:tdir.dot(n));
  70. double Re=R0+(1-R0)*c*c*c*c*c,Tr=1-Re,P=.25+.5*Re,RP=Re/P,TP=Tr/(1-P);
  71. return obj.e + f.mult(depth>2 ? (erand48(Xi)<P ? // Russian roulette
  72. radiance(reflRay,depth,Xi)*RP:radiance(Ray(x,tdir),depth,Xi)*TP) :
  73. radiance(reflRay,depth,Xi)*Re+radiance(Ray(x,tdir),depth,Xi)*Tr);
  74. }
  75. int main(int argc, char *argv[]){
  76. int w=1024, h=768, samps = argc==2 ? atoi(argv[1])/4 : 1; // # samples
  77. Ray cam(Vec(50,52,295.6), Vec(0,-0.042612,-1).norm()); // cam pos, dir
  78. Vec cx=Vec(w*.5135/h), cy=(cx%cam.d).norm()*.5135, r, *c=new Vec[w*h];
  79. #pragma omp parallel for schedule(dynamic, 1) private(r) // OpenMP
  80. for (int y=0; y<h; y++){ // Loop over image rows
  81. fprintf(stderr,"\rRendering (%d spp) %5.2f%%",samps*4,100.*y/(h-1));
  82. for (unsigned short x=0, Xi[3]={0,0,y*y*y}; x<w; x++) // Loop cols
  83. for (int sy=0, i=(h-y-1)*w+x; sy<2; sy++) // 2x2 subpixel rows
  84. for (int sx=0; sx<2; sx++, r=Vec()){ // 2x2 subpixel cols
  85. for (int s=0; s<samps; s++){
  86. double r1=2*erand48(Xi), dx=r1<1 ? sqrt(r1)-1: 1-sqrt(2-r1);
  87. double r2=2*erand48(Xi), dy=r2<1 ? sqrt(r2)-1: 1-sqrt(2-r2);
  88. Vec d = cx*( ( (sx+.5 + dx)/2 + x)/w - .5) +
  89. cy*( ( (sy+.5 + dy)/2 + y)/h - .5) + cam.d;
  90. r = r + radiance(Ray(cam.o+d*140,d.norm()),0,Xi)*(1./samps);
  91. } // Camera rays are pushed ^^^^^ forward to start in interior
  92. c[i] = c[i] + Vec(clamp(r.x),clamp(r.y),clamp(r.z))*.25;
  93. }
  94. }
  95. FILE *f = fopen("image.ppm", "w"); // Write image to PPM file.
  96. fprintf(f, "P3\n%d %d\n%d\n", w, h, 255);
  97. for (int i=0; i<w*h; i++)
  98. fprintf(f,"%d %d %d ", toInt(c[i].x), toInt(c[i].y), toInt(c[i].z));
  99. }

Other formats

  • smallpt.cpp (plain text)
  • smallpt4k.cpp (4 KB executable version)
  • smallpt.tar.gz (982 KB) Full distribution. Includes the above source files, Makefile, README, LICENSE, and resulting rendered image (losslessly converted to PNG).

Usage

g++ -O3 -fopenmp smallpt.cpp -o smallpt 
time ./smallpt 5000
display image.ppm

The argument to smallpt ("5000") is the number of samples per pixel, and must be greater than 4. If you don't have ImageMagick's display command available, you can use other viewers such as gthumb, gwenview, xv, gimp, etc.

GCC 4.2 or newer is required for multi-threading, however older versions of gcc will still work without threading. Remove the "-fopenmp" flag to disable threading support.

smallpt compiles with GCC 4.2 down to a 16 KB executable and produces a 1024x768 resolution image. Rendering using 5000 paths per pixel takes 2.1 hours on an Intel Core 2 Quad machine.

A slightly modified version is provided which compiles to a 4 KB (4049 bytes) compressed executable. Do "make smallpt4k" to build. See smallpt4k.cpp and the Makefile in the full distribution for more information. Note sstrip and p7zip are required.

If self assembly is used, the binary is only 2.7 KB (2666 bytes). Do "make smallptSA" to build.

Details


8 spp
13 sec
40 spp
63 sec
200 spp
5 min
1000 spp
25 min
5000 spp
124 min
25000 spp
10.3 hrs
Timings and resulting images for different numbers of samples per pixel (spp) on a 2.4 GHz Intel Core 2 Quad CPU using 4 threads.

The box scene is constructed out of nine very large spheres which overlap. The camera is very close to their surface so they appear flat, yet each wall is actually the side of a sphere. The light is another sphere poking through the ceiling which is why it's round instead of the normal square. The black area visible in the mirror ball is the front of the box colored black to appear like empty space.

The image is computed by solving the rendering equation using numerical integration. The specific algorithm is Monte Carlo path tracing with Russian roulette for path termination. I highly recommend Shirley's and Jensen's execellent books (linked below) for explanations of these ideas. Due to size constraints and simplicity, explicit light sampling is not used, nor any ray intersection acceleration data structure.

Parallelism is achieved using an OpenMP #pragma to dynamically allocate rows of the image to different threads, with a thread for each processor or core. The variable Xi is used to store the state of the random number generator erand48(), and is seeded using an arbitrary function of the row number to decorrelate (at least visually) the sequences from row-to-row. In this way the sequences are deterministic and consistent from run to run, and independent of which thread is executing and in what order the rows are executed.

Anti-aliasing is done using supersampling, which removes all the jaggies except around the light. These are handled by using 2x2 subpixels which are clamped and then averaged.

Instead of fclose()ing the file, I exploit the C++ standard which calls return(0) implicitly, which in turn calls exit(0), which flushes and closes open files.

More Scenes

These images were all generated by smallpt by replacing the Cornell box scene definition with varying combinations of 4 to 22 spheres.


License

The source is released under the MIT license, which is open and compatible with GPL. See LICENSE.txt in the distribution for more information.

Modifications

  • explicit.cpp Huge speedup, especially for small lights. Adds explicit light sampling with 23 additional lines of code and a small function signature change. Produces this image in 10 seconds on a Intel Core i7 920 quad-core CPU using 16 samples per pixel.
  • forward.cpp Revision of radiance() function that removes all recursion and uses only a simple loop and no path branching. That is, the ray tree is always one ray wide.
  • Single precision float support by Christopher Kulla. Mostly fixes crazy artifacts that appear when single precision floats are used, by avoiding self intersection. There is still a light leak at at the top of the scene though.

Ports

More Information

Last update: 11/5/2014
Initial version: 4/29/2007

Comments

comments powered by Disqus
Kevin Beason / kevin.beason [at] gmail.com Home