1 #! /usr/bin/env python
   2 # DO NOT USE THIS AS A BENCHMARK. See
   3 # http://documen.tician.de/pycuda/array.html#pycuda.curandom.rand
   4 
   5 import pycuda.autoinit
   6 import pycuda.driver as drv
   7 import pycuda.curandom as curandom
   8 import numpy
   9 import numpy.linalg as la
  10 from pytools import Table
  11 
  12 
  13 
  14 
  15 def main():
  16     import pycuda.gpuarray as gpuarray
  17 
  18     sizes = []
  19     times = []
  20     flops = []
  21     flopsCPU = []
  22     timesCPU = []
  23     
  24     for power in range(10, 25): # 24
  25         size = 1<<power
  26         print size
  27         sizes.append(size)
  28         a = gpuarray.zeros((size,), dtype=numpy.float32)
  29 
  30         if power > 20:
  31             count = 100
  32         else:
  33             count = 1000
  34 
  35         #start timer
  36         start = drv.Event()
  37         end = drv.Event()
  38         start.record()
  39 
  40         #cuda operation which fills the array with random numbers
  41         for i in range(count):
  42             curandom.rand((size, ))
  43             
  44         #stop timer
  45         end.record()
  46         end.synchronize()
  47         
  48         #calculate used time
  49         secs = start.time_till(end)*1e-3
  50 
  51         times.append(secs/count)
  52         flops.append(size)
  53 
  54         #cpu operations which fills teh array with random data
  55         a = numpy.array((size,), dtype=numpy.float32)
  56 
  57         #start timer
  58         start = drv.Event()
  59         end = drv.Event()
  60         start.record()
  61 
  62         #cpu operation which fills the array with random data        
  63         for i in range(count):
  64             numpy.random.rand(size).astype(numpy.float32)
  65 
  66         #stop timer
  67         end.record()
  68         end.synchronize()
  69         
  70         #calculate used time
  71         secs = start.time_till(end)*1e-3
  72 
  73         #add results to variable
  74         timesCPU.append(secs/count)
  75         flopsCPU.append(size)
  76             
  77             
  78     #calculate pseudo flops
  79     flops = [f/t for f, t in zip(flops,times)]
  80     flopsCPU = [f/t for f, t in zip(flopsCPU,timesCPU)]
  81 
  82     #print the data out
  83     tbl = Table()
  84     tbl.add_row(("Size", "Time GPU", "Size/Time GPU", "Time CPU","Size/Time CPU","GPU vs CPU speedup"))
  85     for s, t, f,tCpu,fCpu in zip(sizes, times, flops,timesCPU,flopsCPU):
  86         tbl.add_row((s,t,f,tCpu,fCpu,f/fCpu))
  87     print tbl
  88  
  89 
  90 if __name__ == "__main__":
  91     main()