Using Thrust on a PyCUDA array

License of this example:

Public Domain

Date:

2012-05-22

PyCUDA version:

2011.1

   1 import pycuda
   2 import pycuda.autoinit
   3 import pycuda.gpuarray as gpuarray
   4 import numpy as np
   5 
   6 from codepy.cgen import *
   7 from codepy.bpl import BoostPythonModule
   8 from codepy.cuda import CudaModule
   9 
  10 #Make a host_module, compiled for CPU
  11 host_mod = BoostPythonModule()
  12 
  13 #Make a device module, compiled with NVCC
  14 nvcc_mod = CudaModule(host_mod)
  15 
  16 #Describe device module code
  17 #NVCC includes
  18 nvcc_includes = [
  19     'thrust/sort.h',
  20     'thrust/device_vector.h',
  21     'cuda.h',
  22     ]
  23 #Add includes to module
  24 nvcc_mod.add_to_preamble([Include(x) for x in nvcc_includes])
  25 
  26 #NVCC function
  27 nvcc_function = FunctionBody(
  28     FunctionDeclaration(Value('void', 'my_sort'),
  29                         [Value('CUdeviceptr', 'input_ptr'),
  30                          Value('int', 'length')]),
  31     Block([Statement('thrust::device_ptr<float> thrust_ptr((float*)input_ptr)'),
  32            Statement('thrust::sort(thrust_ptr, thrust_ptr+length)')]))
  33 
  34 #Add declaration to nvcc_mod
  35 #Adds declaration to host_mod as well
  36 nvcc_mod.add_function(nvcc_function)
  37 
  38 host_includes = [
  39     'boost/python/extract.hpp',
  40     ]
  41 #Add host includes to module
  42 host_mod.add_to_preamble([Include(x) for x in host_includes])
  43 
  44 host_namespaces = [
  45     'using namespace boost::python',
  46     ]
  47 
  48 #Add BPL using statement
  49 host_mod.add_to_preamble([Statement(x) for x in host_namespaces])
  50 
  51 
  52 host_statements = [
  53     #Extract information from PyCUDA GPUArray
  54     #Get length
  55     'tuple shape = extract<tuple>(gpu_array.attr("shape"))',
  56     'int length = extract<int>(shape[0])',
  57     #Get data pointer
  58     'CUdeviceptr ptr = extract<CUdeviceptr>(gpu_array.attr("gpudata"))',
  59     #Call Thrust routine, compiled into the CudaModule
  60     'my_sort(ptr, length)',
  61     #Return result
  62     'return gpu_array',
  63     ]
  64 
  65 host_mod.add_function(
  66     FunctionBody(
  67         FunctionDeclaration(Value('object', 'host_entry'),
  68                             [Value('object', 'gpu_array')]),
  69         Block([Statement(x) for x in host_statements])))
  70 
  71 #Print out generated code, to see what we're actually compiling
  72 print("---------------------- Host code ----------------------")
  73 print(host_mod.generate())
  74 print("--------------------- Device code ---------------------")
  75 print(nvcc_mod.generate())
  76 print("-------------------------------------------------------")
  77       
  78 
  79 
  80 #Compile modules
  81 import codepy.jit, codepy.toolchain
  82 gcc_toolchain = codepy.toolchain.guess_toolchain()
  83 nvcc_toolchain = codepy.toolchain.guess_nvcc_toolchain()
  84 
  85 module = nvcc_mod.compile(gcc_toolchain, nvcc_toolchain, debug=True)
  86 
  87 
  88 
  89 length = 100
  90 a = np.array(np.random.rand(length), dtype=np.float32)
  91 print("---------------------- Unsorted -----------------------")
  92 print(a)
  93 b = gpuarray.to_gpu(a)
  94 # Call Thrust!!
  95 c = module.host_entry(b)
  96 print("----------------------- Sorted ------------------------")
  97 print c.get()
  98 print("-------------------------------------------------------")