Using Thrust on a PyCUDA array
Written by Bryan Catanzaro, see also https://gist.github.com/2772091
Uses CodePy, thrust, and Boost C++ (Boost.Python to be precise)
License of this example: |
Public Domain |
Date: |
2012-05-22 |
PyCUDA version: |
2011.1 |
1 import pycuda
2 import pycuda.autoinit
3 import pycuda.gpuarray as gpuarray
4 import numpy as np
5
6 from codepy.cgen import *
7 from codepy.bpl import BoostPythonModule
8 from codepy.cuda import CudaModule
9
10 #Make a host_module, compiled for CPU
11 host_mod = BoostPythonModule()
12
13 #Make a device module, compiled with NVCC
14 nvcc_mod = CudaModule(host_mod)
15
16 #Describe device module code
17 #NVCC includes
18 nvcc_includes = [
19 'thrust/sort.h',
20 'thrust/device_vector.h',
21 'cuda.h',
22 ]
23 #Add includes to module
24 nvcc_mod.add_to_preamble([Include(x) for x in nvcc_includes])
25
26 #NVCC function
27 nvcc_function = FunctionBody(
28 FunctionDeclaration(Value('void', 'my_sort'),
29 [Value('CUdeviceptr', 'input_ptr'),
30 Value('int', 'length')]),
31 Block([Statement('thrust::device_ptr<float> thrust_ptr((float*)input_ptr)'),
32 Statement('thrust::sort(thrust_ptr, thrust_ptr+length)')]))
33
34 #Add declaration to nvcc_mod
35 #Adds declaration to host_mod as well
36 nvcc_mod.add_function(nvcc_function)
37
38 host_includes = [
39 'boost/python/extract.hpp',
40 ]
41 #Add host includes to module
42 host_mod.add_to_preamble([Include(x) for x in host_includes])
43
44 host_namespaces = [
45 'using namespace boost::python',
46 ]
47
48 #Add BPL using statement
49 host_mod.add_to_preamble([Statement(x) for x in host_namespaces])
50
51
52 host_statements = [
53 #Extract information from PyCUDA GPUArray
54 #Get length
55 'tuple shape = extract<tuple>(gpu_array.attr("shape"))',
56 'int length = extract<int>(shape[0])',
57 #Get data pointer
58 'CUdeviceptr ptr = extract<CUdeviceptr>(gpu_array.attr("gpudata"))',
59 #Call Thrust routine, compiled into the CudaModule
60 'my_sort(ptr, length)',
61 #Return result
62 'return gpu_array',
63 ]
64
65 host_mod.add_function(
66 FunctionBody(
67 FunctionDeclaration(Value('object', 'host_entry'),
68 [Value('object', 'gpu_array')]),
69 Block([Statement(x) for x in host_statements])))
70
71 #Print out generated code, to see what we're actually compiling
72 print("---------------------- Host code ----------------------")
73 print(host_mod.generate())
74 print("--------------------- Device code ---------------------")
75 print(nvcc_mod.generate())
76 print("-------------------------------------------------------")
77
78
79
80 #Compile modules
81 import codepy.jit, codepy.toolchain
82 gcc_toolchain = codepy.toolchain.guess_toolchain()
83 nvcc_toolchain = codepy.toolchain.guess_nvcc_toolchain()
84
85 module = nvcc_mod.compile(gcc_toolchain, nvcc_toolchain, debug=True)
86
87
88
89 length = 100
90 a = np.array(np.random.rand(length), dtype=np.float32)
91 print("---------------------- Unsorted -----------------------")
92 print(a)
93 b = gpuarray.to_gpu(a)
94 # Call Thrust!!
95 c = module.host_entry(b)
96 print("----------------------- Sorted ------------------------")
97 print c.get()
98 print("-------------------------------------------------------")
