1 import pycuda.driver as drv
2 import pycuda.tools
3 import pycuda.autoinit
4 import numpy
5 import numpy.linalg as la
6 from pycuda.compiler import SourceModule
7
8 mod = SourceModule("""
9 __global__ void multiply_them(float *dest, float *a, float *b)
10 {
11 const int i = threadIdx.x;
12 dest[i] = a[i] * b[i];
13 }
14 """)
15
16 multiply_them = mod.get_function("multiply_them")
17
18 a = numpy.random.randn(400).astype(numpy.float32)
19 b = numpy.random.randn(400).astype(numpy.float32)
20
21 dest = numpy.zeros_like(a)
22 multiply_them(
23 drv.Out(dest), drv.In(a), drv.In(b),
24 block=(400,1,1))
25
26 print dest-a*b