2D FFT of any size using PYFFT, PYCUDA and Multiprocessing
This code computes the 2D FFT of any size using 2 GPU's using the Transpose-Split algorithm. B.J.Jackin - jackin@opt.utsunomiya-u.ac.jp
License of this example: |
Public Domain? |
Date: |
13-08-2010 |
PyCUDA version: |
0.94rc |
1 #2D FFT USING PYCUDA, PYFFT AND MULTIPROCESSING
2 #INPUT IMAGE / MATRIX IS DIVIDED INTO PARTS IN ORDER TO USE THE TRANSPOSE SPLIT METHOD THE FOR 2D FFT
3 #HOW MANY PARTS THE INPUT IMAGE SHOULD GET DIVIDED IS DECIDED BY THE USER BASED ON THE AVAILABLE MEMORY AND INPUT SIZE
4
5 import numpy
6 import scipy.misc
7 import numpy.fft as nfft
8 import multiprocessing
9
10 from pyfft.cuda import Plan
11 from pycuda.tools import make_default_context
12 import pycuda.tools as pytools
13 import pycuda.gpuarray as garray
14 import pycuda.driver as drv
15
16
17 class GPUMulti(multiprocessing.Process):
18 def __init__(self, number, input_cpu, output_cpu):
19 multiprocessing.Process.__init__(self)
20 self.number = number
21 self.input_cpu = input_cpu
22 self.output_cpu = output_cpu
23
24 def run(self):
25 drv.init()
26 a0=numpy.zeros((p,),dtype=numpy.complex64)
27 self.dev = drv.Device(self.number)
28 self.ctx = self.dev.make_context()
29 #TO VERIFY WHETHER ALL THE MEMORY IS FREED BEFORE NEXT ALLOCATION (THIS DOES NOT HAPPEN IN MULTITHREADING)
30 print drv.mem_get_info()
31 self.gpu_a = garray.empty((self.input_cpu.size,), dtype=numpy.complex64)
32 self.gpu_b = garray.zeros_like(self.gpu_a)
33 self.gpu_a = garray.to_gpu(self.input_cpu)
34 plan = Plan(a0.shape,context=self.ctx)
35 plan.execute(self.gpu_a, self.gpu_b, batch=p/m)
36 self.temp = self.gpu_b.get()
37 self.output_cpu.put(self.temp)
38 self.output_cpu.close()
39 self.ctx.pop()
40 del self.gpu_a
41 del self.gpu_b
42 del self.ctx
43
44 print "till the end %d" %self.number
45
46
47 p = 8192; # INPUT IMAGE SIZE (8192 * 8192)
48 m = 4 # TO DIVIDE THE INPUT IMAGE INTO 4 (2048 * 8192) SIZED IMAGES (Depends on the total memory of your GPU)
49 trans = 2 # THE WHOLE THING HAS TO BE REPEATED 2 TIMES FOR THE TRANSPOSE-SPLIT (TS) ALGORITHM (So trans = 2)
50
51
52 #INPUT IMAGE (GENERATE A 2d SINE WAVE PATTERN)
53 p_n = 8000 # No. OF PERIODS OF SINE WAVES
54 x=numpy.arange(0,p_n,float(p_n)/float(p))
55 a_i = 128 + 128 * numpy.sin(2*numpy.pi*x)
56 a2 = numpy.zeros([p,p],dtype=numpy.complex64)
57 a2[::]=a_i
58 scipy.misc.imsave("sine.bmp",numpy.absolute(a2)) #TEST THE GENERATION OF INPUT IMAGE
59
60 #INITIALISE THE VARIABLES
61 a2_1 = numpy.zeros([m,p*p/m],dtype = numpy.complex64) #INPUT TO THE GPU
62 #VERY IMPORTANT
63 output_cpu = multiprocessing.Queue() # GPU VARIABLE THAT STORES THE OUTPUT(MULTIPROCESSING DOES NOT ALLOW SHARING AND HENCE THIS IS NEEDED FOR COMMUNICATION OF DATA)
64
65 b2pa = numpy.zeros([p/m,p,m],dtype = numpy.complex64) #OUTPUT FROM GPU
66 b2_a = numpy.zeros([p,p],dtype = numpy.complex64) #RESHAPED OUPUT
67
68 #NOW WE ARE READY TO KICK START THE GPU
69
70 # THE NO OF GPU'S PRESENT (CHANGE ACCORDING TO THE No.OF GPUS YOU HAVE)
71 num = 2 # I KNOW THIS IS A BAD PRACTISE, BUT I COUNDN'T FIND ANY OTHER WAY(INIT CANNOT BE USED HERE)
72
73 #THE TRANSPOSE-SPLIT ALGORITHM FOR FFT
74 for t in range (0,trans):
75 for i in range (m):
76 a2_1[i,:] = a2[i*p/m:(i+1)*p/m,:].flatten()#DIVIDE AND RESHAPE THE INPUT IMAGE INTO 1D ARRAY
77
78 for j in range (m/num):
79 gpu_multi_list = []
80
81 #CREATE AND START THE MULTIPROCESS
82 for i in range (num):
83 gpu_multi = GPUMulti(i,a2_1[i+j*num,:],output_cpu) #FEED THE DATA INTO THE GPU
84 gpu_multi_list.append(gpu_multi)
85 gpu_multi.start()#THERE YOU GO
86
87 #COLLECT THE OUTPUT FROM THE RUNNING MULTIPROCESS AND RESHAPE
88 for gpu_pro in gpu_multi_list:
89 temp_b2_1 = output_cpu.get(gpu_pro)
90 b2pa[:,:,gpu_pro.number+j*num] = numpy.reshape(temp_b2_1,(p/m,p))
91 gpu_multi.terminate()
92
93 #REUNITE AGAIN TO A (p * p) IMAGE
94 for i in range(m):
95 b2_a[i*p/m:(i+1)*p/m,:] = b2pa[:,:,i]
96
97 #TRANSPOSE THE OUTPUT AND MAKE IT THE INPUT. THE WHOLE PROCESS REPEATS ONCE AGAIN (THIS IS DUE TO THE TRANSPOSE-SPLIT ALGORITHM)
98 a2 = b2_a.transpose()
99
100 #SAVE THE OUTPUT RESULT
101 scipy.misc.imsave("sine_fft.bmp",numpy.absolute(nfft.fftshift(a2)))
102
103
104
105
106 Note: please leave the '#!python' marker in place above. The script 'examples/download-examples-from-wiki.py' in the PyCUDA distribution relies on it.