1 # prepared invocations and structures -----------------------------------------
   2 
   3 import pycuda.driver as cuda
   4 import pycuda.autoinit
   5 import numpy
   6 from pycuda.compiler import SourceModule
   7 
   8 class DoubleOpStruct:
   9     mem_size = 8 + numpy.intp(0).nbytes
  10     def __init__(self, array, struct_arr_ptr):
  11         self.data = cuda.to_device(array)
  12         self.shape, self.dtype = array.shape, array.dtype
  13         cuda.memcpy_htod(int(struct_arr_ptr), numpy.getbuffer(numpy.int32(array.size)))
  14         cuda.memcpy_htod(int(struct_arr_ptr) + 8, numpy.getbuffer(numpy.intp(int(self.data))))
  15 
  16     def __str__(self):
  17         return str(cuda.from_device(self.data, self.shape, self.dtype))
  18 
  19 #pointer to both datasets
  20 struct_arr = cuda.mem_alloc(2 * DoubleOpStruct.mem_size)
  21 
  22 #pointer to the second dataset
  23 do2_ptr = int(struct_arr) + DoubleOpStruct.mem_size
  24 
  25 array1 = DoubleOpStruct(numpy.array([1, 2, 3], dtype=numpy.float32), struct_arr)
  26 array2 = DoubleOpStruct(numpy.array([0, 4], dtype=numpy.float32), do2_ptr)
  27 
  28 print "original arrays"
  29 print array1
  30 print array2
  31 
  32 mod = SourceModule("""
  33     struct DoubleOperation {
  34         int datalen, __padding; // so 64-bit ptrs can be aligned
  35         float *ptr;
  36     };
  37 
  38 
  39     __global__ void double_array(DoubleOperation *a) 
  40     {
  41         a = a + blockIdx.x;
  42         for (int idx = threadIdx.x; idx < a->datalen; idx += blockDim.x) 
  43         {
  44             float *a_ptr = a->ptr;
  45             a_ptr[idx] *= 2;
  46         }
  47     }
  48     """)
  49 func = mod.get_function("double_array")
  50 func(struct_arr, block = (32, 1, 1), grid=(2, 1))
  51 
  52 print "doubled arrays"
  53 print array1
  54 print array2
  55 
  56 func(numpy.intp(do2_ptr), block = (32, 1, 1), grid=(1, 1))
  57 print "doubled second only"
  58 print array1
  59 print array2
  60 
  61 #preparing function
  62 # Passing block or shared not equal to None is djprecated as of version 2011.1.
  63 #func.prepare("P", block=(32, 1, 1))
  64 func.prepare("P")
  65 #calling function
  66 #func.prepared_call((2, 1), struct_arr)
  67 #prepared_call(grid, block, pointer)
  68 func.prepared_call((2,1),(32,1,1), struct_arr)
  69 
  70 print "doubled again"
  71 print array1
  72 print array2
  73 
  74 func.prepared_call((1, 1),(32,1,1), do2_ptr)
  75 
  76 print "doubled second only again"
  77 print array1
  78 print array2


CategoryPyCuda

PyCuda/Examples/DemoStruct (last edited 2014-12-10 11:22:09 by ::ffff:82)