Basic Arithmetic Operation in PyCuda

A basic functioning operation is carried out

If you like, you may put your contact info here. [[!table header="no" class="mointable" data=""" License of this example: | Sugeerth Murugesan,UC Davis (sugeerth@gmail.com) Date: | September 2nd,2013 PyCUDA version: |
"""]]


#!python 
#!python
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import pycuda.gpuarray as gpuarray
import numpy as np

# Converting the list into numpy array for faster access and putting it into the GPU for processing... 
start = cuda.Event()
end = cuda.Event()

N = 222341

values = np.random.randn(N)
number_of_blocks=N/1024

# Calculating the (value-max)/max-min computation and storing it in a numpy array. Pre-calculating the maximum and minimum values.

# Space for the Kernel computation..

func_mod = SourceModule("""
// Needed to avoid name mangling so that PyCUDA can
// find the kernel function:
extern "C" {
__global__ void func(float *a, int N, float minval, int denom)
{
int idx = threadIdx.x+threadIdx.y*32+blockIdx.x*blockDim.x;
if (idx < N)
    a[idx] = (a[idx]-minval)/denom;
}
}
""", no_extern_c=1)

func = func_mod.get_function('func')
x = np.asarray(values, np.float32)
x_gpu = gpuarray.to_gpu(x)
h_minval = np.float32(0)
h_denom = np.int32(255)

start.record()
# a function to the GPU to calculate the computation in the GPU.
func(x_gpu.gpudata, np.uint32(N), np.float32(h_minval), np.uint32(h_denom), block=(1024, 1, 1), grid=(number_of_blocks+1,1,1))
end.record() 
end.synchronize()
secs = start.time_till(end)*1e-3

print "SourceModule time"
print "%fs" % (secs)
print 'x:       ', x[N-1]
print 'Func(x): ', x_gpu.get()[N-1],'Actual: ',(values[N-1]-0)/(h_denom)
x_colors=x_gpu.get()