1
2
3
4 """
5 Multiples two square matrices together using a *single* block of threads and
6 global memory only. Each thread computes one element of the resulting matrix.
7 """
8
9 import numpy as np
10 from pycuda import driver, compiler, gpuarray, tools
11
12
13 import pycuda.autoinit
14
15 kernel_code_template = """
16 __global__ void MatrixMulKernel(float *a, float *b, float *c)
17 {
18 // 2D Thread ID (assuming that only *one* block will be executed)
19 int tx = threadIdx.x;
20 int ty = threadIdx.y;
21
22 // Pvalue is used to store the element of the matrix
23 // that is computed by the thread
24 float Pvalue = 0;
25
26 // Each thread loads one row of M and one column of N,
27 // to produce one element of P.
28 for (int k = 0; k < %(MATRIX_SIZE)s; ++k) {
29 float Aelement = a[ty * %(MATRIX_SIZE)s + k];
30 float Belement = b[k * %(MATRIX_SIZE)s + tx];
31 Pvalue += Aelement * Belement;
32 }
33
34 // Write the matrix to device memory;
35 // each thread writes one element
36 c[ty * %(MATRIX_SIZE)s + tx] = Pvalue;
37 }
38 """
39
40
41
42
43
44
45 MATRIX_SIZE = 2
46
47
48 a_cpu = np.random.randn(MATRIX_SIZE, MATRIX_SIZE).astype(np.float32)
49 b_cpu = np.random.randn(MATRIX_SIZE, MATRIX_SIZE).astype(np.float32)
50
51
52 c_cpu = np.dot(a_cpu, b_cpu)
53
54
55 a_gpu = gpuarray.to_gpu(a_cpu)
56 b_gpu = gpuarray.to_gpu(b_cpu)
57
58
59 c_gpu = gpuarray.empty((MATRIX_SIZE, MATRIX_SIZE), np.float32)
60
61
62
63 kernel_code = kernel_code_template % {
64 'MATRIX_SIZE': MATRIX_SIZE
65 }
66
67
68 mod = compiler.SourceModule(kernel_code)
69
70
71 matrixmul = mod.get_function("MatrixMulKernel")
72
73
74 matrixmul(
75
76 a_gpu, b_gpu,
77
78 c_gpu,
79
80 block = (MATRIX_SIZE, MATRIX_SIZE, 1),
81 )
82
83
84 print "-" * 80
85 print "Matrix A (GPU):"
86 print a_gpu.get()
87
88 print "-" * 80
89 print "Matrix B (GPU):"
90 print b_gpu.get()
91
92 print "-" * 80
93 print "Matrix C (GPU):"
94 print c_gpu.get()
95
96 print "-" * 80
97 print "CPU-GPU difference:"
98 print c_cpu - c_gpu.get()
99
100 np.allclose(c_cpu, c_gpu.get())
101
PyCuda/Examples/MatrixmulSimple (last edited 2010-01-31 16:29:22 by ip72-221-120-106)