#!python
import pycuda.driver as drv
import pycuda.tools
import pycuda.autoinit
import numpy
import numpy.linalg as la
from pycuda.compiler import SourceModule
mod = SourceModule("""
__global__ void dot(int *result, int *a, int *b)
{
const int i = threadIdx.x;
result = result+ a[i] * b[i];
}
""")
multiply_them = mod.get_function("dot")
a = numpy.random.randint(1,20,5)
b = numpy.random.randint(1,20,5)
result = 0
dot(
drv.Out(result), drv.In(a), drv.In(b),
block=(5,1,1))
print result