1
2
3 import pycuda.driver as cuda
4 import pycuda.autoinit
5 import numpy
6 from pycuda.compiler import SourceModule
7
8 class DoubleOpStruct:
9 mem_size = 8 + numpy.intp(0).nbytes
10 def __init__(self, array, struct_arr_ptr):
11 self.data = cuda.to_device(array)
12 self.shape, self.dtype = array.shape, array.dtype
13 cuda.memcpy_htod(int(struct_arr_ptr), numpy.int32(array.size))
14 cuda.memcpy_htod(int(struct_arr_ptr) + 8, numpy.intp(int(self.data)))
15
16 def __str__(self):
17 return str(cuda.from_device(self.data, self.shape, self.dtype))
18
19 struct_arr = cuda.mem_alloc(2 * DoubleOpStruct.mem_size)
20 do2_ptr = int(struct_arr) + DoubleOpStruct.mem_size
21
22 array1 = DoubleOpStruct(numpy.array([1, 2, 3], dtype=numpy.float32), struct_arr)
23 array2 = DoubleOpStruct(numpy.array([0, 4], dtype=numpy.float32), do2_ptr)
24
25 print "original arrays"
26 print array1
27 print array2
28
29 mod = SourceModule("""
30 struct DoubleOperation {
31 int datalen, __padding; // so 64-bit ptrs can be aligned
32 float *ptr;
33 };
34
35
36 __global__ void double_array(DoubleOperation *a)
37 {
38 a = a + blockIdx.x;
39 for (int idx = threadIdx.x; idx < a->datalen; idx += blockDim.x)
40 {
41 float *a_ptr = a->ptr;
42 a_ptr[idx] *= 2;
43 }
44 }
45 """)
46 func = mod.get_function("double_array")
47 func(struct_arr, block = (32, 1, 1), grid=(2, 1))
48
49 print "doubled arrays"
50 print array1
51 print array2
52
53 func(numpy.intp(do2_ptr), block = (32, 1, 1), grid=(1, 1))
54 print "doubled second only"
55 print array1
56 print array2
57
58 func.prepare("P", block=(32, 1, 1))
59 func.prepared_call((2, 1), struct_arr)
60
61 print "doubled again"
62 print array1
63 print array2
64
65 func.prepared_call((1, 1), do2_ptr)
66
67 print "doubled second only again"
68 print array1
69 print array2
70