Demostrating simulatneous use of 3D surfaces and 3D textures

Author:

Ezequiel Alfie <ealfie@gmail.com>

License of this example:

GPLv3

Date:

2012.04.21

PyCUDA version:

dd12c742c6ea35cd06ce25fd17abf21c01cd6ff7

   1 #!/usr/bin/env python
   2 
   3 # 
   4 # Author: Ezequiel Alfie <ealfie@gmail.com>
   5 #
   6 # demonstrating simultaneous use of 3D textures and surfaces
   7 #
   8 #
   9 #
  10 # needs CUDA 4.x and pycuda with v4 launch interface
  11 # (later than commit dd12c742c6ea35cd06ce25fd17abf21c01cd6ff7 Apr 21, 2012)
  12 #
  13 
  14 from __future__ import division
  15 import numpy as np
  16 import pycuda.driver as drv
  17 from pycuda.compiler import SourceModule
  18 
  19 import pycuda.autoinit
  20 import numpy.testing
  21 
  22 
  23 
  24 def array_format_to_dtype(af):
  25     if af == drv.array_format.UNSIGNED_INT8:
  26         return np.uint8
  27     elif af == drv.array_format.UNSIGNED_INT16:
  28         return np.uint16
  29     elif af == drv.array_format.UNSIGNED_INT32:
  30         return np.uint32
  31     elif af == drv.array_format.SIGNED_INT8:
  32         return np.int8
  33     elif af == drv.array_format.SIGNED_INT16:
  34         return np.int16
  35     elif af == drv.array_format.SIGNED_INT32:
  36         return np.int32
  37     elif af == drv.array_format.FLOAT:
  38         return np.float32
  39     else:
  40         raise TypeError(
  41                 "cannot convert array_format '%s' to a numpy dtype" 
  42                 % array_format)
  43 
  44 #
  45 # numpy3d_to_array
  46 # this function was
  47 # taken from pycuda mailing list (striped for C ordering only)
  48 #
  49 def numpy3d_to_array(np_array, allow_surface_bind=True):
  50 
  51     import pycuda.autoinit
  52 
  53     d, h, w = np_array.shape
  54 
  55     descr = drv.ArrayDescriptor3D()
  56     descr.width = w
  57     descr.height = h
  58     descr.depth = d
  59     descr.format = drv.dtype_to_array_format(np_array.dtype)
  60     descr.num_channels = 1
  61     descr.flags = 0
  62 
  63     if allow_surface_bind:
  64         descr.flags = drv.array3d_flags.SURFACE_LDST
  65 
  66     device_array = drv.Array(descr)
  67 
  68     copy = drv.Memcpy3D()
  69     copy.set_src_host(np_array)
  70     copy.set_dst_array(device_array)
  71     copy.width_in_bytes = copy.src_pitch = np_array.strides[1]
  72     copy.src_height = copy.height = h
  73     copy.depth = d
  74 
  75     copy()
  76 
  77     return device_array
  78 
  79 
  80 def array_to_numpy3d(cuda_array):
  81 
  82     import pycuda.autoinit
  83 
  84     descriptor = cuda_array.get_descriptor_3d()
  85 
  86     w = descriptor.width
  87     h = descriptor.height
  88     d = descriptor.depth
  89 
  90     shape = d, h, w
  91 
  92     dtype = array_format_to_dtype(descriptor.format)
  93 
  94     numpy_array=np.zeros(shape, dtype)
  95 
  96     copy = drv.Memcpy3D()
  97     copy.set_src_array(cuda_array)
  98     copy.set_dst_host(numpy_array)
  99 
 100     itemsize = numpy_array.dtype.itemsize
 101 
 102     copy.width_in_bytes = copy.src_pitch = w*itemsize
 103     copy.src_height = copy.height = h
 104     copy.depth = d
 105 
 106     copy()
 107 
 108     return numpy_array
 109 
 110 
 111 src_module=r'''
 112 #include <stdint.h>
 113 #include <cuda.h>
 114 #include <surface_functions.h>
 115 
 116 texture<float, cudaTextureType3D, cudaReadModeElementType> tex_in;
 117 surface<void, 3> surf_out;
 118 
 119 __global__ void test_3d_surf(int32_t Nz, int32_t Ny, int32_t Nx)
 120 {
 121 
 122   int x = blockDim.x * blockIdx.x + threadIdx.x;
 123   int y = blockDim.y * blockIdx.y + threadIdx.y;
 124   int z = blockDim.z * blockIdx.z + threadIdx.z;
 125 
 126   if (x < Nx && y < Ny && z < Nz) {
 127     float value = tex3D(tex_in, (float) x, (float) y, float (z));
 128 
 129     surf3Dwrite((float) value, surf_out, sizeof(float) * x, y, z, cudaBoundaryModeZero);
 130   }
 131 
 132 }
 133 '''
 134 
 135 mod=SourceModule(src_module, cache_dir=False, keep=False)
 136 
 137 kernel=mod.get_function("test_3d_surf")
 138 arg_types = (np.int32, np.int32, np.int32)
 139 
 140 tex_in=mod.get_texref('tex_in')
 141 surf_out=mod.get_surfref('surf_out')
 142 
 143 # random shape
 144 shape_x = np.random.randint(1,255)
 145 shape_y = np.random.randint(1,255)
 146 shape_z = np.random.randint(1,255)
 147 
 148 dtype=np.float32 # should match src_module's datatype
 149 
 150 numpy_array_in=np.random.randn(shape_z, shape_y, shape_x).astype(dtype).copy()
 151 cuda_array_in = numpy3d_to_array(numpy_array_in)
 152 tex_in.set_array(cuda_array_in)
 153 
 154 zeros=np.zeros_like(numpy_array_in)
 155 cuda_array_out = numpy3d_to_array(zeros,allow_surface_bind=True)
 156 surf_out.set_array(cuda_array_out)
 157 
 158 
 159 block_size_z, block_size_y, block_size_x = 8,8,8 #hardcoded, tune to your needs
 160 gridz = shape_z // block_size_z + 1 * (shape_z % block_size_z != 0)
 161 gridy = shape_y // block_size_y + 1 * (shape_y % block_size_y != 0)
 162 gridx = shape_x // block_size_x + 1 * (shape_x % block_size_x != 0)
 163 grid = (gridx, gridy, gridz)
 164 block = (block_size_x, block_size_y, block_size_x)
 165 
 166 kernel.prepare(arg_types,texrefs=[tex_in])
 167 kernel.prepared_call(grid, block, shape_z, shape_y, shape_x)
 168 
 169 numpy_array_out = array_to_numpy3d(cuda_array_out)
 170 numpy.testing.assert_array_almost_equal(numpy_array_out, numpy_array_in)

PyCUDA/Examples/Demo3DSurface (last edited 2012-04-22 00:19:39 by AndreasKloeckner)