Demostrating simulatneous use of 3D surfaces and 3D textures
Author: |
Ezequiel Alfie <ealfie@gmail.com> |
License of this example: |
GPLv3 |
Date: |
2012.04.21 |
PyCUDA version: |
dd12c742c6ea35cd06ce25fd17abf21c01cd6ff7 |
1 #!/usr/bin/env python
2
3 #
4 # Author: Ezequiel Alfie <ealfie@gmail.com>
5 #
6 # demonstrating simultaneous use of 3D textures and surfaces
7 #
8 #
9 #
10 # needs CUDA 4.x and pycuda with v4 launch interface
11 # (later than commit dd12c742c6ea35cd06ce25fd17abf21c01cd6ff7 Apr 21, 2012)
12 #
13
14 from __future__ import division
15 import numpy as np
16 import pycuda.driver as drv
17 from pycuda.compiler import SourceModule
18
19 import pycuda.autoinit
20 import numpy.testing
21
22
23
24 def array_format_to_dtype(af):
25 if af == drv.array_format.UNSIGNED_INT8:
26 return np.uint8
27 elif af == drv.array_format.UNSIGNED_INT16:
28 return np.uint16
29 elif af == drv.array_format.UNSIGNED_INT32:
30 return np.uint32
31 elif af == drv.array_format.SIGNED_INT8:
32 return np.int8
33 elif af == drv.array_format.SIGNED_INT16:
34 return np.int16
35 elif af == drv.array_format.SIGNED_INT32:
36 return np.int32
37 elif af == drv.array_format.FLOAT:
38 return np.float32
39 else:
40 raise TypeError(
41 "cannot convert array_format '%s' to a numpy dtype"
42 % array_format)
43
44 #
45 # numpy3d_to_array
46 # this function was
47 # taken from pycuda mailing list (striped for C ordering only)
48 #
49 def numpy3d_to_array(np_array, allow_surface_bind=True):
50
51 import pycuda.autoinit
52
53 d, h, w = np_array.shape
54
55 descr = drv.ArrayDescriptor3D()
56 descr.width = w
57 descr.height = h
58 descr.depth = d
59 descr.format = drv.dtype_to_array_format(np_array.dtype)
60 descr.num_channels = 1
61 descr.flags = 0
62
63 if allow_surface_bind:
64 descr.flags = drv.array3d_flags.SURFACE_LDST
65
66 device_array = drv.Array(descr)
67
68 copy = drv.Memcpy3D()
69 copy.set_src_host(np_array)
70 copy.set_dst_array(device_array)
71 copy.width_in_bytes = copy.src_pitch = np_array.strides[1]
72 copy.src_height = copy.height = h
73 copy.depth = d
74
75 copy()
76
77 return device_array
78
79
80 def array_to_numpy3d(cuda_array):
81
82 import pycuda.autoinit
83
84 descriptor = cuda_array.get_descriptor_3d()
85
86 w = descriptor.width
87 h = descriptor.height
88 d = descriptor.depth
89
90 shape = d, h, w
91
92 dtype = array_format_to_dtype(descriptor.format)
93
94 numpy_array=np.zeros(shape, dtype)
95
96 copy = drv.Memcpy3D()
97 copy.set_src_array(cuda_array)
98 copy.set_dst_host(numpy_array)
99
100 itemsize = numpy_array.dtype.itemsize
101
102 copy.width_in_bytes = copy.src_pitch = w*itemsize
103 copy.src_height = copy.height = h
104 copy.depth = d
105
106 copy()
107
108 return numpy_array
109
110
111 src_module=r'''
112 #include <stdint.h>
113 #include <cuda.h>
114 #include <surface_functions.h>
115
116 texture<float, cudaTextureType3D, cudaReadModeElementType> tex_in;
117 surface<void, 3> surf_out;
118
119 __global__ void test_3d_surf(int32_t Nz, int32_t Ny, int32_t Nx)
120 {
121
122 int x = blockDim.x * blockIdx.x + threadIdx.x;
123 int y = blockDim.y * blockIdx.y + threadIdx.y;
124 int z = blockDim.z * blockIdx.z + threadIdx.z;
125
126 if (x < Nx && y < Ny && z < Nz) {
127 float value = tex3D(tex_in, (float) x, (float) y, float (z));
128
129 surf3Dwrite((float) value, surf_out, sizeof(float) * x, y, z, cudaBoundaryModeZero);
130 }
131
132 }
133 '''
134
135 mod=SourceModule(src_module, cache_dir=False, keep=False)
136
137 kernel=mod.get_function("test_3d_surf")
138 arg_types = (np.int32, np.int32, np.int32)
139
140 tex_in=mod.get_texref('tex_in')
141 surf_out=mod.get_surfref('surf_out')
142
143 # random shape
144 shape_x = np.random.randint(1,255)
145 shape_y = np.random.randint(1,255)
146 shape_z = np.random.randint(1,255)
147
148 dtype=np.float32 # should match src_module's datatype
149
150 numpy_array_in=np.random.randn(shape_z, shape_y, shape_x).astype(dtype).copy()
151 cuda_array_in = numpy3d_to_array(numpy_array_in)
152 tex_in.set_array(cuda_array_in)
153
154 zeros=np.zeros_like(numpy_array_in)
155 cuda_array_out = numpy3d_to_array(zeros,allow_surface_bind=True)
156 surf_out.set_array(cuda_array_out)
157
158
159 block_size_z, block_size_y, block_size_x = 8,8,8 #hardcoded, tune to your needs
160 gridz = shape_z // block_size_z + 1 * (shape_z % block_size_z != 0)
161 gridy = shape_y // block_size_y + 1 * (shape_y % block_size_y != 0)
162 gridx = shape_x // block_size_x + 1 * (shape_x % block_size_x != 0)
163 grid = (gridx, gridy, gridz)
164 block = (block_size_x, block_size_y, block_size_x)
165
166 kernel.prepare(arg_types,texrefs=[tex_in])
167 kernel.prepared_call(grid, block, shape_z, shape_y, shape_x)
168
169 numpy_array_out = array_to_numpy3d(cuda_array_out)
170 numpy.testing.assert_array_almost_equal(numpy_array_out, numpy_array_in)
