1
2 import pycuda.autoinit
3 import pycuda.driver as drv
4 import pycuda.curandom as curandom
5 import numpy
6 import numpy.linalg as la
7 from pytools import Table
8
9
10
11
12 def main():
13 import pycuda.gpuarray as gpuarray
14
15 sizes = []
16 times = []
17 flops = []
18 flopsCPU = []
19 timesCPU = []
20
21 for power in range(10, 25):
22 size = 1<<power
23 print size
24 sizes.append(size)
25 a = gpuarray.zeros((size,), dtype=numpy.float32)
26
27 if power > 20:
28 count = 100
29 else:
30 count = 1000
31
32
33 start = drv.Event()
34 end = drv.Event()
35 start.record()
36
37
38 for i in range(count):
39 curandom.rand((size, ))
40
41
42 end.record()
43 end.synchronize()
44
45
46 secs = start.time_till(end)*1e-3
47
48 times.append(secs/count)
49 flops.append(size)
50
51
52 a = numpy.array((size,), dtype=numpy.float32)
53
54
55 start = drv.Event()
56 end = drv.Event()
57 start.record()
58
59
60 for i in range(count):
61 numpy.random.rand(size).astype(numpy.float32)
62
63
64 end.record()
65 end.synchronize()
66
67
68 secs = start.time_till(end)*1e-3
69
70
71 timesCPU.append(secs/count)
72 flopsCPU.append(size)
73
74
75
76 flops = [f/t for f, t in zip(flops,times)]
77 flopsCPU = [f/t for f, t in zip(flopsCPU,timesCPU)]
78
79
80 tbl = Table()
81 tbl.add_row(("Size", "Time GPU", "Size/Time GPU", "Time CPU","Size/Time CPU","GPU vs CPU speedup"))
82 for s, t, f,tCpu,fCpu in zip(sizes, times, flops,timesCPU,flopsCPU):
83 tbl.add_row((s,t,f,tCpu,fCpu,f/fCpu))
84 print tbl
85
86
87 if __name__ == "__main__":
88 main()
89