1
2
3
4
5 import pycuda.autoinit
6 import pycuda.driver as drv
7 import pycuda.curandom as curandom
8 import numpy
9 import numpy.linalg as la
10 from pytools import Table
11
12
13
14
15 def main():
16 import pycuda.gpuarray as gpuarray
17
18 sizes = []
19 times = []
20 flops = []
21 flopsCPU = []
22 timesCPU = []
23
24 for power in range(10, 25):
25 size = 1<<power
26 print size
27 sizes.append(size)
28 a = gpuarray.zeros((size,), dtype=numpy.float32)
29
30 if power > 20:
31 count = 100
32 else:
33 count = 1000
34
35
36 start = drv.Event()
37 end = drv.Event()
38 start.record()
39
40
41 for i in range(count):
42 curandom.rand((size, ))
43
44
45 end.record()
46 end.synchronize()
47
48
49 secs = start.time_till(end)*1e-3
50
51 times.append(secs/count)
52 flops.append(size)
53
54
55 a = numpy.array((size,), dtype=numpy.float32)
56
57
58 start = drv.Event()
59 end = drv.Event()
60 start.record()
61
62
63 for i in range(count):
64 numpy.random.rand(size).astype(numpy.float32)
65
66
67 end.record()
68 end.synchronize()
69
70
71 secs = start.time_till(end)*1e-3
72
73
74 timesCPU.append(secs/count)
75 flopsCPU.append(size)
76
77
78
79 flops = [f/t for f, t in zip(flops,times)]
80 flopsCPU = [f/t for f, t in zip(flopsCPU,timesCPU)]
81
82
83 tbl = Table()
84 tbl.add_row(("Size", "Time GPU", "Size/Time GPU", "Time CPU","Size/Time CPU","GPU vs CPU speedup"))
85 for s, t, f,tCpu,fCpu in zip(sizes, times, flops,timesCPU,flopsCPU):
86 tbl.add_row((s,t,f,tCpu,fCpu,f/fCpu))
87 print tbl
88
89
90 if __name__ == "__main__":
91 main()