Port of SobelFilter example from CUDA C SDK
Python port of SobelFilter example in NVIDIA CUDA C SDK. Show how opengl interoperability works.
License of this example: |
GPL |
Date: |
October 2011 |
PyCUDA version: |
2011.1.2 |
1 #!/usr/bin/env python
2 #-*- coding: utf-8 -*-
3 #
4 # Requires PyCuda, PyOpenGL, and Pil
5 # MAKE SURE YOU HAVE AN UPDATED VERSION OF THESE PACKAGES!!
6 #
7 # Ported to PyCUDA by
8 # Stefano Brilli: stefanobrilli@gmail.com
9 #
10 # Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
11 #
12 # This software contains source code provided by NVIDIA Corporation
13 #
14 # http://developer.download.nvidia.com/compute/cuda/2_3/sdk/docs/cudasdk_eula.pdf
15 #
16 # Please refer to the NVIDIA end user license agreement (EULA) associated
17 # with this source code for terms and conditions that govern your use of
18 # this software. Any use, reproduction, disclosure, or distribution of
19 # this software and related documentation outside the terms of the EULA
20 # is strictly prohibited.
21 #
22
23 from OpenGL.GL import *
24 from OpenGL.GLUT import *
25 from OpenGL.GLU import *
26 from OpenGL.GL.ARB.vertex_buffer_object import *
27 import numpy as np, Image
28 import sys, time, os
29 import pycuda.driver as cuda_driver
30 import pycuda.gl as cuda_gl
31 import pycuda
32 #import pycuda.gl.autoinit
33 from pycuda.compiler import SourceModule
34
35 imWidth = 0
36 imHeight = 0
37 wWidth = 0
38 wHeight = 0
39 wName = "Cuda Edge Detection:"
40 pixels = None
41 array = None
42 texid = 0
43 pbo_buffer = None
44 cuda_pbo_resource = None
45 mode = 0
46 scale = 1.0
47
48 frameCount = 0
49 fpsCount = 0
50 fpsLimit = 8
51 timer = 0.0
52 ver2011 = False
53
54 def copy2D_array_to_device(dst, src, type_sz, width, height):
55 copy = cuda_driver.Memcpy2D()
56 copy.set_src_array(src)
57 copy.set_dst_device(dst)
58 copy.height = height
59 copy.dst_pitch = copy.src_pitch = copy.width_in_bytes = width*type_sz
60 copy(aligned=True)
61
62 def computeFPS():
63 global frameCount, fpsCount, fpsLimit, timer
64 frameCount += 1
65 fpsCount += 1
66 if fpsCount == fpsLimit:
67 ifps = 1.0 /timer
68 glutSetWindowTitle("Cuda Edge Detection: %f fps" % ifps)
69 fpsCount = 0
70
71 def sobelFilter(odata, iw, ih):
72 global array, pixels, mode, scale
73 if mode == 3:
74 # Texture and shared memory with fixed BlockSize
75 sm = SourceModule("""
76 texture<unsigned char, 2> tex;
77 extern __shared__ unsigned char LocalBlock[];
78 #define RADIUS 1
79 #define BlockWidth 80
80 #define SharedPitch 384
81 __device__ unsigned char
82 ComputeSobel(unsigned char ul, // upper left
83 unsigned char um, // upper middle
84 unsigned char ur, // upper right
85 unsigned char ml, // middle left
86 unsigned char mm, // middle (unused)
87 unsigned char mr, // middle right
88 unsigned char ll, // lower left
89 unsigned char lm, // lower middle
90 unsigned char lr, // lower right
91 float fScale )
92 {
93 short Horz = ur + 2*mr + lr - ul - 2*ml - ll;
94 short Vert = ul + 2*um + ur - ll - 2*lm - lr;
95 short Sum = (short) (fScale*(::abs(int(Horz))+::abs(int(Vert))));
96 if ( Sum < 0 ) return 0; else if ( Sum > 0xff ) return 0xff;
97 return (unsigned char) Sum;
98 }
99
100 __global__ void
101 SobelShared( int* pSobelOriginal, unsigned short SobelPitch,
102 short w, short h, float fScale )
103 {
104 short u = 4*blockIdx.x*BlockWidth;
105 short v = blockIdx.y*blockDim.y + threadIdx.y;
106 short ib;
107
108 int SharedIdx = threadIdx.y * SharedPitch;
109
110 for ( ib = threadIdx.x; ib < BlockWidth+2*RADIUS; ib += blockDim.x ) {
111 LocalBlock[SharedIdx+4*ib+0] = tex2D( tex,
112 (float) (u+4*ib-RADIUS+0), (float) (v-RADIUS) );
113 LocalBlock[SharedIdx+4*ib+1] = tex2D( tex,
114 (float) (u+4*ib-RADIUS+1), (float) (v-RADIUS) );
115 LocalBlock[SharedIdx+4*ib+2] = tex2D( tex,
116 (float) (u+4*ib-RADIUS+2), (float) (v-RADIUS) );
117 LocalBlock[SharedIdx+4*ib+3] = tex2D( tex,
118 (float) (u+4*ib-RADIUS+3), (float) (v-RADIUS) );
119 }
120 if ( threadIdx.y < RADIUS*2 ) {
121 //
122 // copy trailing RADIUS*2 rows of pixels into shared
123 //
124 SharedIdx = (blockDim.y+threadIdx.y) * SharedPitch;
125 for ( ib = threadIdx.x; ib < BlockWidth+2*RADIUS; ib += blockDim.x ) {
126 LocalBlock[SharedIdx+4*ib+0] = tex2D( tex,
127 (float) (u+4*ib-RADIUS+0), (float) (v+blockDim.y-RADIUS) );
128 LocalBlock[SharedIdx+4*ib+1] = tex2D( tex,
129 (float) (u+4*ib-RADIUS+1), (float) (v+blockDim.y-RADIUS) );
130 LocalBlock[SharedIdx+4*ib+2] = tex2D( tex,
131 (float) (u+4*ib-RADIUS+2), (float) (v+blockDim.y-RADIUS) );
132 LocalBlock[SharedIdx+4*ib+3] = tex2D( tex,
133 (float) (u+4*ib-RADIUS+3), (float) (v+blockDim.y-RADIUS) );
134 }
135 }
136
137 __syncthreads();
138
139 u >>= 2; // index as uchar4 from here
140 uchar4 *pSobel = (uchar4 *) (((char *) pSobelOriginal)+v*SobelPitch);
141 SharedIdx = threadIdx.y * SharedPitch;
142
143 for ( ib = threadIdx.x; ib < BlockWidth; ib += blockDim.x ) {
144
145 unsigned char pix00 = LocalBlock[SharedIdx+4*ib+0*SharedPitch+0];
146 unsigned char pix01 = LocalBlock[SharedIdx+4*ib+0*SharedPitch+1];
147 unsigned char pix02 = LocalBlock[SharedIdx+4*ib+0*SharedPitch+2];
148 unsigned char pix10 = LocalBlock[SharedIdx+4*ib+1*SharedPitch+0];
149 unsigned char pix11 = LocalBlock[SharedIdx+4*ib+1*SharedPitch+1];
150 unsigned char pix12 = LocalBlock[SharedIdx+4*ib+1*SharedPitch+2];
151 unsigned char pix20 = LocalBlock[SharedIdx+4*ib+2*SharedPitch+0];
152 unsigned char pix21 = LocalBlock[SharedIdx+4*ib+2*SharedPitch+1];
153 unsigned char pix22 = LocalBlock[SharedIdx+4*ib+2*SharedPitch+2];
154
155 uchar4 out;
156
157 out.x = ComputeSobel(pix00, pix01, pix02,
158 pix10, pix11, pix12,
159 pix20, pix21, pix22, fScale );
160
161 pix00 = LocalBlock[SharedIdx+4*ib+0*SharedPitch+3];
162 pix10 = LocalBlock[SharedIdx+4*ib+1*SharedPitch+3];
163 pix20 = LocalBlock[SharedIdx+4*ib+2*SharedPitch+3];
164 out.y = ComputeSobel(pix01, pix02, pix00,
165 pix11, pix12, pix10,
166 pix21, pix22, pix20, fScale );
167
168 pix01 = LocalBlock[SharedIdx+4*ib+0*SharedPitch+4];
169 pix11 = LocalBlock[SharedIdx+4*ib+1*SharedPitch+4];
170 pix21 = LocalBlock[SharedIdx+4*ib+2*SharedPitch+4];
171 out.z = ComputeSobel( pix02, pix00, pix01,
172 pix12, pix10, pix11,
173 pix22, pix20, pix21, fScale );
174
175 pix02 = LocalBlock[SharedIdx+4*ib+0*SharedPitch+5];
176 pix12 = LocalBlock[SharedIdx+4*ib+1*SharedPitch+5];
177 pix22 = LocalBlock[SharedIdx+4*ib+2*SharedPitch+5];
178 out.w = ComputeSobel( pix00, pix01, pix02,
179 pix10, pix11, pix12,
180 pix20, pix21, pix22, fScale );
181 if ( u+ib < w/4 && v < h ) {
182 pSobel[u+ib] = out;
183 }
184 }
185
186 __syncthreads();
187 }
188 """)
189 cuda_function = sm.get_function("SobelShared")
190 elif mode == 2:
191 # Texture and shared memory with variable BlockSize
192 sm = SourceModule("""
193 #define RADIUS 1
194 texture<unsigned char, 2> tex;
195 extern __shared__ unsigned char LocalBlock[];
196 __device__ unsigned char
197 ComputeSobel(unsigned char ul, // upper left
198 unsigned char um, // upper middle
199 unsigned char ur, // upper right
200 unsigned char ml, // middle left
201 unsigned char mm, // middle (unused)
202 unsigned char mr, // middle right
203 unsigned char ll, // lower left
204 unsigned char lm, // lower middle
205 unsigned char lr, // lower right
206 float fScale )
207 {
208 short Horz = ur + 2*mr + lr - ul - 2*ml - ll;
209 short Vert = ul + 2*um + ur - ll - 2*lm - lr;
210 short Sum = (short) (fScale*(::abs(int(Horz))+::abs(int(Vert))));
211 if ( Sum < 0 ) return 0; else if ( Sum > 0xff ) return 0xff;
212 return (unsigned char) Sum;
213 }
214
215 __global__ void
216 SobelShared( int* pSobelOriginal, unsigned short SobelPitch,
217 short BlockWidth, short SharedPitch,
218 short w, short h, float fScale )
219 {
220 short u = 4*blockIdx.x*BlockWidth;
221 short v = blockIdx.y*blockDim.y + threadIdx.y;
222 short ib;
223
224 int SharedIdx = threadIdx.y * SharedPitch;
225
226 for ( ib = threadIdx.x; ib < BlockWidth+2*RADIUS; ib += blockDim.x ) {
227 LocalBlock[SharedIdx+4*ib+0] = tex2D( tex,
228 (float) (u+4*ib-RADIUS+0), (float) (v-RADIUS) );
229 LocalBlock[SharedIdx+4*ib+1] = tex2D( tex,
230 (float) (u+4*ib-RADIUS+1), (float) (v-RADIUS) );
231 LocalBlock[SharedIdx+4*ib+2] = tex2D( tex,
232 (float) (u+4*ib-RADIUS+2), (float) (v-RADIUS) );
233 LocalBlock[SharedIdx+4*ib+3] = tex2D( tex,
234 (float) (u+4*ib-RADIUS+3), (float) (v-RADIUS) );
235 }
236 if ( threadIdx.y < RADIUS*2 ) {
237 //
238 // copy trailing RADIUS*2 rows of pixels into shared
239 //
240 SharedIdx = (blockDim.y+threadIdx.y) * SharedPitch;
241 for ( ib = threadIdx.x; ib < BlockWidth+2*RADIUS; ib += blockDim.x ) {
242 LocalBlock[SharedIdx+4*ib+0] = tex2D( tex,
243 (float) (u+4*ib-RADIUS+0), (float) (v+blockDim.y-RADIUS) );
244 LocalBlock[SharedIdx+4*ib+1] = tex2D( tex,
245 (float) (u+4*ib-RADIUS+1), (float) (v+blockDim.y-RADIUS) );
246 LocalBlock[SharedIdx+4*ib+2] = tex2D( tex,
247 (float) (u+4*ib-RADIUS+2), (float) (v+blockDim.y-RADIUS) );
248 LocalBlock[SharedIdx+4*ib+3] = tex2D( tex,
249 (float) (u+4*ib-RADIUS+3), (float) (v+blockDim.y-RADIUS) );
250 }
251 }
252
253 __syncthreads();
254
255 u >>= 2; // index as uchar4 from here
256 uchar4 *pSobel = (uchar4 *) (((char *) pSobelOriginal)+v*SobelPitch);
257 SharedIdx = threadIdx.y * SharedPitch;
258
259 for ( ib = threadIdx.x; ib < BlockWidth; ib += blockDim.x ) {
260
261 unsigned char pix00 = LocalBlock[SharedIdx+4*ib+0*SharedPitch+0];
262 unsigned char pix01 = LocalBlock[SharedIdx+4*ib+0*SharedPitch+1];
263 unsigned char pix02 = LocalBlock[SharedIdx+4*ib+0*SharedPitch+2];
264 unsigned char pix10 = LocalBlock[SharedIdx+4*ib+1*SharedPitch+0];
265 unsigned char pix11 = LocalBlock[SharedIdx+4*ib+1*SharedPitch+1];
266 unsigned char pix12 = LocalBlock[SharedIdx+4*ib+1*SharedPitch+2];
267 unsigned char pix20 = LocalBlock[SharedIdx+4*ib+2*SharedPitch+0];
268 unsigned char pix21 = LocalBlock[SharedIdx+4*ib+2*SharedPitch+1];
269 unsigned char pix22 = LocalBlock[SharedIdx+4*ib+2*SharedPitch+2];
270
271 uchar4 out;
272
273 out.x = ComputeSobel(pix00, pix01, pix02,
274 pix10, pix11, pix12,
275 pix20, pix21, pix22, fScale );
276
277 pix00 = LocalBlock[SharedIdx+4*ib+0*SharedPitch+3];
278 pix10 = LocalBlock[SharedIdx+4*ib+1*SharedPitch+3];
279 pix20 = LocalBlock[SharedIdx+4*ib+2*SharedPitch+3];
280 out.y = ComputeSobel(pix01, pix02, pix00,
281 pix11, pix12, pix10,
282 pix21, pix22, pix20, fScale );
283
284 pix01 = LocalBlock[SharedIdx+4*ib+0*SharedPitch+4];
285 pix11 = LocalBlock[SharedIdx+4*ib+1*SharedPitch+4];
286 pix21 = LocalBlock[SharedIdx+4*ib+2*SharedPitch+4];
287 out.z = ComputeSobel( pix02, pix00, pix01,
288 pix12, pix10, pix11,
289 pix22, pix20, pix21, fScale );
290
291 pix02 = LocalBlock[SharedIdx+4*ib+0*SharedPitch+5];
292 pix12 = LocalBlock[SharedIdx+4*ib+1*SharedPitch+5];
293 pix22 = LocalBlock[SharedIdx+4*ib+2*SharedPitch+5];
294 out.w = ComputeSobel( pix00, pix01, pix02,
295 pix10, pix11, pix12,
296 pix20, pix21, pix22, fScale );
297 if ( u+ib < w/4 && v < h ) {
298 pSobel[u+ib] = out;
299 }
300 }
301
302 __syncthreads();
303 }
304
305 """)
306 cuda_function = sm.get_function("SobelShared")
307 if mode == 1:
308 # Just Texture
309 sm = SourceModule("""
310 texture<unsigned char, 2> tex;
311 __device__ unsigned char ComputeSobel(unsigned char ul, // upper left
312 unsigned char um, // upper middle
313 unsigned char ur, // upper right
314 unsigned char ml, // middle left
315 unsigned char mm, // middle (unused)
316 unsigned char mr, // middle right
317 unsigned char ll, // lower left
318 unsigned char lm, // lower middle
319 unsigned char lr, // lower right
320 float fScale )
321 {
322 short Horz = ur + 2*mr + lr - ul - 2*ml - ll;
323 short Vert = ul + 2*um + ur - ll - 2*lm - lr;
324 short Sum = (short) (fScale*(::abs(int(Horz))+::abs(int(Vert))));
325 if ( Sum < 0 ) return 0; else if ( Sum > 0xff ) return 0xff;
326 return (unsigned char) Sum;
327 }
328 __global__ void SobelTex( int* pSobelOriginal, unsigned int Pitch,
329 int w, int h, float fScale )
330 {
331 unsigned char *pSobel =
332 (unsigned char *) (((char *) pSobelOriginal)+blockIdx.x*Pitch);
333 for ( int i = threadIdx.x; i < w; i += blockDim.x ) {
334 unsigned char pix00 = tex2D( tex, (float) i-1, (float) blockIdx.x-1 );
335 unsigned char pix01 = tex2D( tex, (float) i+0, (float) blockIdx.x-1 );
336 unsigned char pix02 = tex2D( tex, (float) i+1, (float) blockIdx.x-1 );
337 unsigned char pix10 = tex2D( tex, (float) i-1, (float) blockIdx.x+0 );
338 unsigned char pix11 = tex2D( tex, (float) i+0, (float) blockIdx.x+0 );
339 unsigned char pix12 = tex2D( tex, (float) i+1, (float) blockIdx.x+0 );
340 unsigned char pix20 = tex2D( tex, (float) i-1, (float) blockIdx.x+1 );
341 unsigned char pix21 = tex2D( tex, (float) i+0, (float) blockIdx.x+1 );
342 unsigned char pix22 = tex2D( tex, (float) i+1, (float) blockIdx.x+1 );
343 pSobel[i] = ComputeSobel(pix00, pix01, pix02,
344 pix10, pix11, pix12,
345 pix20, pix21, pix22, fScale );
346 }
347 }
348 """)
349 cuda_function = sm.get_function("SobelTex")
350 elif mode == 0:
351 # Just Copy
352 sm = SourceModule("""
353 texture<unsigned char, 2> tex;
354 __global__ void SobelCopyImage(int* pSobelOriginal, unsigned int Pitch, int w, int h, float fscale )
355 {
356 unsigned char *pSobel =
357 (unsigned char *) (((unsigned char *) pSobelOriginal)+blockIdx.x*Pitch);
358 for ( int i = threadIdx.x; i < w; i += blockDim.x ) {
359 pSobel[i] = min( max((tex2D( tex, (float)i, (float)blockIdx.x ) * fscale), 0.f), 255.f);
360 }
361 }
362 """)
363 cuda_function = sm.get_function("SobelCopyImage")
364 texref = sm.get_texref("tex")
365 texref.set_array(array)
366 texref.set_flags(cuda_driver.TRSA_OVERRIDE_FORMAT)
367 if mode == 3:
368 # fixed BlockSize Launch
369 RADIUS = 1
370 threads = (16, 4, 1)
371 BlockWidth = 80 # Do not change!
372 blocks = (iw/(4*BlockWidth)+(0!=iw%(4*BlockWidth)),
373 ih/threads[1]+(0!=ih%threads[1]) )
374 SharedPitch = ~0x3f & (4*(BlockWidth+2*RADIUS)+0x3f);
375 sharedMem = SharedPitch*(threads[1]+2*RADIUS);
376 iw = iw & ~3
377 cuda_function(np.intp(odata), np.uint16(iw), np.int16(iw), np.int16(ih), np.float32(scale), texrefs=[texref],block=threads, grid=blocks, shared=sharedMem)
378 elif mode == 2:
379 # variable BlockSize launch
380 RADIUS = 1
381 threads = (16, 4, 1)
382 BlockWidth = 80 # Change only with divisible by 16 values!
383 blocks = (iw/(4*BlockWidth)+(0!=iw%(4*BlockWidth)),
384 ih/threads[1]+(0!=ih%threads[1]) )
385 SharedPitch = ~0x3f & (4*(BlockWidth+2*RADIUS)+0x3f);
386 sharedMem = SharedPitch*(threads[1]+2*RADIUS);
387 iw = iw & ~3
388 cuda_function(np.intp(odata), np.uint16(iw), np.int16(BlockWidth), np.int16(SharedPitch), np.int16(iw), np.int16(ih), np.float32(scale), texrefs=[texref],block=threads, grid=blocks, shared=sharedMem)
389 else:
390 BlockWidth = 384
391 cuda_function(np.intp(odata), np.uint32(iw), np.int32(iw), np.int32(ih), np.float32(scale), texrefs=[texref],block=(BlockWidth,1,1),grid=(ih,1))
392
393 def initGL():
394 global wWidth, wHeight, wName
395 glutInit(sys.argv)
396 glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA)
397 glutInitWindowSize(wWidth, wHeight)
398 glutCreateWindow(wName)
399 import pycuda.gl.autoinit
400
401 def loadImage(fn=None):
402 global pixels, imWidth, imHeight, wWidth, wHeight
403 try:
404 im = Image.open(fn) # Open the image
405 except IOError:
406 print "Usage:", os.path.basename(sys.argv[0]), "[IMAGE=defaultimage.jpg]"
407 print "Can't open", fn
408 sys.exit(1)
409 imWidth, imHeight = im.size # Window size is set to image size
410 wWidth, wHeight = im.size
411 im.draft("L", im.size) # L-flag is for Luminance
412 pixels = np.fromstring(im.tostring(), dtype=np.uint8) # Got the array
413 pixels.resize((imHeight, imWidth)) # Resize to 2d array
414 print "Reading image:", fn, "size:", imWidth, "x", imHeight
415
416 def initData(fn=None):
417 global pixels, array, pbo_buffer, cuda_pbo_resource, imWidth, imHeight, texid
418
419 # Cuda array initialization
420 array = cuda_driver.matrix_to_array(pixels, "C") # C-style instead of Fortran-style: row-major
421
422 pixels.fill(0) # Resetting the array to 0
423
424 pbo_buffer = glGenBuffers(1) # generate 1 buffer reference
425 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_buffer) # binding to this buffer
426 glBufferData(GL_PIXEL_UNPACK_BUFFER, imWidth*imHeight, pixels, GL_STREAM_DRAW) # Allocate the buffer
427 bsize = glGetBufferParameteriv(GL_PIXEL_UNPACK_BUFFER, GL_BUFFER_SIZE) # Check allocated buffer size
428 assert(bsize == imWidth*imHeight)
429 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0) # Unbind
430
431 if ver2011:
432 cuda_pbo_resource = pycuda.gl.RegisteredBuffer(int(pbo_buffer), cuda_gl.graphics_map_flags.WRITE_DISCARD)
433 else:
434 cuda_pbo_resource = cuda_gl.BufferObject(int(pbo_buffer)) # Mapping GLBuffer to cuda_resource
435
436
437 glGenTextures(1, texid); # generate 1 texture reference
438 glBindTexture(GL_TEXTURE_2D, texid); # binding to this texture
439 glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE, imWidth, imHeight, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, None); # Allocate the texture
440 glBindTexture(GL_TEXTURE_2D, 0) # Unbind
441
442 glPixelStorei(GL_UNPACK_ALIGNMENT, 1) # 1-byte row alignment
443 glPixelStorei(GL_PACK_ALIGNMENT, 1) # 1-byte row alignment
444
445
446 def display():
447 global cuda_pbo_resource, pbo_buffer, texid, imWidth, imHeight, timer
448
449 timer = time.time() # Starting timer
450 mapping_obj = cuda_pbo_resource.map() # Map the GlBuffer
451 if ver2011:
452 data, sz = mapping_obj.device_ptr_and_size() # Got the CUDA pointer to GlBuffer
453 else:
454 data = mapping_obj.device_ptr()
455 sobelFilter(data, imWidth, imHeight) # Writing to "data"
456 mapping_obj.unmap() # Unmap the GlBuffer
457
458 glClear(GL_COLOR_BUFFER_BIT) # Clear
459 glBindTexture(GL_TEXTURE_2D, texid)
460 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_buffer)
461 # Copyng from buffer to texture
462 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imWidth, imHeight, GL_LUMINANCE, GL_UNSIGNED_BYTE, None)
463 #glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE, imWidth, imHeight, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, None);
464 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0) # Unbind
465
466 glDisable(GL_DEPTH_TEST)
467 glEnable(GL_TEXTURE_2D)
468 glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR)
469 glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR)
470 glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT)
471 glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT)
472
473 glBegin(GL_QUADS)
474 glVertex2f(0, 0)
475 glTexCoord2f(0, 0)
476 glVertex2f(0, 1)
477 glTexCoord2f(1, 0)
478 glVertex2f(1, 1)
479 glTexCoord2f(1, 1)
480 glVertex2f(1, 0)
481 glTexCoord2f(0, 1)
482 glEnd()
483 glBindTexture(GL_TEXTURE_2D, 0)
484 glutSwapBuffers()
485 timer = time.time()-timer
486 computeFPS()
487 glutPostRedisplay()
488
489 def reshape(x, y):
490 glViewport(0, 0, x, y)
491 glMatrixMode(GL_PROJECTION)
492 glLoadIdentity()
493 glOrtho(0, 1, 0, 1, 0, 1)
494 glMatrixMode(GL_MODELVIEW)
495 glLoadIdentity()
496 glutPostRedisplay()
497
498 def keyboard(key, x=0, y=0):
499 global mode, scale
500 if key=="q":
501 sys.exit(0)
502 elif key=="I" or key=="i":
503 mode = 0
504 elif key=="T" or key=="t":
505 mode = 1
506 elif key=="S" or key=="s":
507 mode = 2
508 elif key=="D" or key=="d":
509 mode = 3
510 elif key == "-":
511 scale -= 0.1
512 elif key == "=":
513 scale += 0.1
514
515 def idle():
516 glutPostRedisplay()
517
518 def main(argv):
519 fn = "defaultimage.jpg"
520 if len(argv) > 1:
521 fn = argv[1]
522
523 loadImage(fn) # Loading the image
524
525 initGL()
526 initData(fn)
527 print """
528 Q: Exit
529 I: display image
530 T: display Sobel edge detection (computed with tex)
531 S: display Sobel edge detection (computed with tex+shared memory)
532 D: display Sobel edge detection (computed with tex+shared memory+fixed block size)
533 Use the '-' and '=' keys to change the brightness.
534
535 TESTED WITH IMAGE SIZE OF 512x512... just like the original demo.
536 Other image sizes may not work
537 """
538 glutDisplayFunc(display)
539 glutKeyboardFunc(keyboard)
540 glutReshapeFunc(reshape)
541 glutIdleFunc(idle)
542 glutMainLoop();
543
544 if __name__ == "__main__":
545 if pycuda.VERSION[0] >= 2011:
546 ver2011 = True
547 main(sys.argv)
