1 from __future__ import division
2 import pycuda.autoinit
3 import pycuda.driver as drv
4 import pycuda.gpuarray as gpuarray
5 import numpy
6 import numpy.linalg as la
7
8
9
10
11 def main_cg():
12 from optparse import OptionParser
13
14 parser = OptionParser(
15 usage="%prog [options] MATRIX-MARKET-FILE")
16 parser.add_option("-s", "--is-symmetric", action="store_true",
17 help="Specify that the input matrix is already symmetric")
18 options, args = parser.parse_args()
19
20 from pycuda.tools import DeviceMemoryPool, PageLockedMemoryPool
21 dev_pool = DeviceMemoryPool()
22 pagelocked_pool = PageLockedMemoryPool()
23
24 from scipy.io import mmread
25 csr_mat = mmread(args[0]).tocsr().astype(numpy.float32)
26
27 inv_mat_diag = 1/csr_mat.diagonal()
28
29 print "building..."
30 from pycuda.sparse.packeted import PacketedSpMV
31 spmv = PacketedSpMV(csr_mat, options.is_symmetric, csr_mat.dtype)
32 rhs = numpy.random.rand(spmv.shape[0]).astype(spmv.dtype)
33
34 from pycuda.sparse.operator import DiagonalPreconditioner
35 if True:
36 precon = DiagonalPreconditioner(
37 spmv.permute(gpuarray.to_gpu(
38 inv_mat_diag, allocator=dev_pool.allocate)))
39 else:
40 precon = None
41
42 from pycuda.sparse.cg import solve_pkt_with_cg
43 print "start solve"
44 for i in range(4):
45 start = drv.Event()
46 stop = drv.Event()
47 start.record()
48
49 rhs_gpu = gpuarray.to_gpu(rhs, dev_pool.allocate)
50 res_gpu, it_count, res_count = \
51 solve_pkt_with_cg(spmv, rhs_gpu, precon,
52 tol=1e-7 if spmv.dtype == numpy.float64 else 5e-5,
53 pagelocked_allocator=pagelocked_pool.allocate)
54 res = res_gpu.get()
55
56 stop.record()
57 stop.synchronize()
58
59 elapsed = stop.time_since(start)*1e-3
60 est_flops = (csr_mat.nnz*2*(it_count+res_count)
61 + csr_mat.shape[0]*(2+2+2+2+2)*it_count)
62
63 if precon is not None:
64 est_flops += csr_mat.shape[0] * it_count
65
66 print "residual norm: %g" % (la.norm(csr_mat*res - rhs)/la.norm(rhs))
67 print ("size: %d, elapsed: %g s, %d it, %d residual, it/second: %g, "
68 "%g gflops/s" % (
69 csr_mat.shape[0],
70 elapsed, it_count, res_count, it_count/elapsed,
71 est_flops/elapsed/1e9))
72
73
74
75 pagelocked_pool.stop_holding()
76 dev_pool.stop_holding()
77
78
79
80
81
82 if __name__ == "__main__":
83 print "starting..."
84 main_cg()