Mandelbrot using GPU and two CPU variants

Calculate the Mandelbrot set, display it in a Tkinter window. You can choose to use the GPU (ElementwiseKernel) for the calculation or two numpy CPU solutions. Timing results are shown in the code.

ian@ianozsvald.com

License of this example:

GPL

Date:

July 2010

PyCUDA version:

0.94

   1 # Mandelbrot calculate using GPU, Serial numpy and faster numpy
   2 # Use to show the speed difference between CPU and GPU calculations
   3 # ian@ianozsvald.com July 2010
   4 
   5 # Based on vegaseat's TKinter/numpy example code from 2006
   6 # http://www.daniweb.com/code/snippet216851.html#
   7 # with minor changes to move to numpy from the obsolete Numeric
   8 
   9 import sys
  10 import numpy as nm
  11 
  12 import Tkinter as tk
  13 import Image          # PIL
  14 import ImageTk        # PIL
  15 
  16 import pycuda.driver as drv
  17 import pycuda.tools
  18 import pycuda.autoinit
  19 import numpy
  20 from pycuda.compiler import SourceModule
  21 import pycuda.gpuarray as gpuarray
  22 
  23 # You can choose a calculation routine below (calculate_z), uncomment
  24 # one of the three lines to test the three variations
  25 # Speed notes are listed in the same place
  26 
  27 # set width and height of window, more pixels take longer to calculate
  28 w = 1000
  29 h = 1000
  30 
  31 from pycuda.elementwise import ElementwiseKernel
  32 complex_gpu = ElementwiseKernel(
  33         "pycuda::complex<float> *z, pycuda::complex<float> *q, int *iteration, int maxiter",
  34             "for (int n=0; n < maxiter; n++) {z[i] = (z[i]*z[i])+q[i]; if (abs(z[i]) > 2.0f) {iteration[i]=n; z[i] = pycuda::complex<float>(); q[i] = pycuda::complex<float>();};}",
  35         "complex5",
  36         preamble="#include <pycuda-complex.hpp>",)
  37 
  38 def calculate_z_gpu(q, maxiter, z):
  39     output = nm.resize(nm.array(0,), q.shape)
  40     q_gpu = gpuarray.to_gpu(q.astype(nm.complex64))
  41     z_gpu = gpuarray.to_gpu(z.astype(nm.complex64))
  42     iterations_gpu = gpuarray.to_gpu(output)
  43     # the for loop and complex calculations are all done on the GPU
  44     # we bring the iterations_gpu array back to determine pixel colours later
  45     complex_gpu(z_gpu, q_gpu, iterations_gpu, maxiter)
  46 
  47     iterations = iterations_gpu.get()
  48     return iterations
  49 
  50 
  51 def calculate_z_numpy_gpu(q, maxiter, z):
  52     """Calculate z using numpy on the GPU via gpuarray"""
  53     outputg = gpuarray.to_gpu(nm.resize(nm.array(0,), q.shape))
  54     zg = gpuarray.to_gpu(z.astype(nm.complex64))
  55     qg = gpuarray.to_gpu(q.astype(nm.complex64))
  56     # 2.0 as an array
  57     twosg = gpuarray.to_gpu(nm.array([2.0]*zg.size).astype(numpy.float32))
  58     # 0+0j as an array
  59     cmplx0sg = gpuarray.to_gpu(nm.array([0+0j]*zg.size).astype(nm.complex64))
  60     # for abs_zg > twosg result
  61     comparison_result = gpuarray.to_gpu(nm.array([False]*zg.size).astype(nm.bool))
  62     # we'll add 1 to iterg after each iteration
  63     iterg = gpuarray.to_gpu(nm.array([0]*zg.size).astype(nm.int32))
  64 
  65     for iter in range(maxiter):
  66         zg = zg*zg + qg
  67 
  68         # abs returns a complex (rather than a float) from the complex
  69         # input where the real component is the absolute value (which
  70         # looks like a bug) so I take the .real after abs()
  71         abs_zg = abs(zg).real
  72 
  73         comparison_result = abs_zg > twosg
  74         qg = gpuarray.if_positive(comparison_result, cmplx0sg, qg)
  75         zg = gpuarray.if_positive(comparison_result, cmplx0sg, zg)
  76         outputg = gpuarray.if_positive(comparison_result, iterg, outputg)
  77         iterg = iterg + 1
  78     output = outputg.get()
  79     return output
  80 
  81 
  82 def calculate_z_numpy(q, maxiter, z):
  83     # calculate z using numpy, this is the original
  84     # routine from vegaseat's URL
  85     # NOTE this routine was faster using a default of double-precision complex nbrs
  86     # rather than the current single precision
  87     output = nm.resize(nm.array(0,), q.shape)
  88     for iter in range(maxiter):
  89         z = z*z + q
  90         done = nm.greater(abs(z), 2.0)
  91         q = nm.where(done,0+0j, q)
  92         z = nm.where(done,0+0j, z)
  93         output = nm.where(done, iter, output)
  94     return output
  95 
  96 def calculate_z_serial(q, maxiter, z):
  97     # calculate z using pure python with numpy arrays
  98     # this routine unrolls calculate_z_numpy as an intermediate
  99     # step to the creation of calculate_z_gpu
 100     # it runs slower than calculate_z_numpy
 101     output = nm.resize(nm.array(0,), q.shape)
 102     for i in range(len(q)):
 103         if i % 100 == 0:
 104             # print out some progress info since it is so slow...
 105             print "%0.2f%% complete" % (1.0/len(q) * i * 100)
 106         for iter in range(maxiter):
 107             z[i] = z[i]*z[i] + q[i]
 108             if abs(z[i]) > 2.0:
 109                 q[i] = 0+0j
 110                 z[i] = 0+0j
 111                 output[i] = iter
 112     return output
 113 
 114 
 115 show_instructions = False
 116 if len(sys.argv) == 1:
 117     show_instructions = True
 118 
 119 if len(sys.argv) > 1:
 120     if sys.argv[1] not in ['gpu', 'gpuarray', 'numpy', 'python']:
 121         show_instructions = True
 122 
 123 if show_instructions:
 124     print "Usage: python mandelbrot.py [gpu|gpuarray|numpy|python]"
 125     print "Where:"
 126     print " gpu is a pure CUDA solution on the GPU"
 127     print " gpuarray uses a numpy-like CUDA wrapper in Python on the GPU"
 128     print " numpy is a pure Numpy (C-based) solution on the CPU"
 129     print " python is a pure Python solution on the CPU with numpy arrays"
 130     sys.exit(0)
 131 
 132 routine = {'gpuarray':calculate_z_numpy_gpu,
 133             'gpu':calculate_z_gpu,
 134             'numpy':calculate_z_numpy,
 135             'python':calculate_z_serial}
 136 
 137 calculate_z = routine[sys.argv[1]]
 138 ##if sys.argv[1] == 'python':
 139 #    import psyco
 140 #    psyco.full()
 141 
 142 # Using a WinXP Intel Core2 Duo 2.66GHz CPU (1 CPU used)
 143 # with a 9800GT GPU I get the following timings (smaller is better).
 144 # With 200x200 problem with max iterations set at 300:
 145 # calculate_z_gpu: 0.03s
 146 # calculate_z_serial: 8.7s
 147 # calculate_z_numpy: 0.3s
 148 #
 149 # Using WinXP Intel 2.9GHz CPU (1 CPU used)
 150 # with a GTX 480 GPU I get the following using 1000x1000 plot with 1000 max iterations:
 151 # gpu: 0.07s
 152 # gpuarray: 3.4s
 153 # numpy: 43.4s
 154 # python (serial): 1605.6s
 155 
 156 class Mandelbrot(object):
 157     def __init__(self):
 158         # create window
 159         self.root = tk.Tk()
 160         self.root.title("Mandelbrot Set")
 161         self.create_image()
 162         self.create_label()
 163         # start event loop
 164         self.root.mainloop()
 165 
 166 
 167     def draw(self, x1, x2, y1, y2, maxiter=300):
 168         # draw the Mandelbrot set, from numpy example
 169         xx = nm.arange(x1, x2, (x2-x1)/w*2)
 170         yy = nm.arange(y2, y1, (y1-y2)/h*2) * 1j
 171         # force yy, q and z to use 32 bit floats rather than
 172         # the default 64 doubles for nm.complex for consistency with CUDA
 173         yy = yy.astype(nm.complex64)
 174         q = nm.ravel(xx+yy[:, nm.newaxis]).astype(nm.complex64)
 175         z = nm.zeros(q.shape, nm.complex64)
 176 
 177         start_main = drv.Event()
 178         end_main  = drv.Event()
 179         start_main.record()
 180         output = calculate_z(q, maxiter, z)
 181 
 182         end_main.record()
 183         end_main.synchronize()
 184         secs = start_main.time_till(end_main)*1e-3
 185         print "Main took", secs
 186 
 187         output = (output + (256*output) + (256**2)*output) * 8
 188         # convert output to a string
 189         self.mandel = output.tostring()
 190 
 191     def create_image(self):
 192         """"
 193         create the image from the draw() string
 194         """
 195         self.im = Image.new("RGB", (w/2, h/2))
 196         # you can experiment with these x and y ranges
 197         self.draw(-2.13, 0.77, -1.3, 1.3, 1000)
 198         self.im.fromstring(self.mandel, "raw", "RGBX", 0, -1)
 199 
 200     def create_label(self):
 201         # put the image on a label widget
 202         self.image = ImageTk.PhotoImage(self.im)
 203         self.label = tk.Label(self.root, image=self.image)
 204         self.label.pack()
 205 
 206 # test the class
 207 if __name__ == '__main__':
 208     test = Mandelbrot()
 209 

PyCuda/Examples/Mandelbrot (last edited 2010-07-28 15:32:41 by AndreasKloeckner)