1 from __future__ import division
   2 import pycuda.autoinit
   3 import pycuda.driver as drv
   4 import pycuda.gpuarray as gpuarray
   5 import numpy
   6 import numpy.linalg as la
   7 
   8 
   9 
  10 
  11 def main_cg():
  12     from optparse import OptionParser
  13 
  14     parser = OptionParser(
  15             usage="%prog [options] MATRIX-MARKET-FILE")
  16     parser.add_option("-s", "--is-symmetric", action="store_true",
  17             help="Specify that the input matrix is already symmetric")
  18     options, args = parser.parse_args()
  19 
  20     from pycuda.tools import DeviceMemoryPool, PageLockedMemoryPool
  21     dev_pool = DeviceMemoryPool()
  22     pagelocked_pool = PageLockedMemoryPool()
  23 
  24     from scipy.io import mmread
  25     csr_mat = mmread(args[0]).tocsr().astype(numpy.float32)
  26 
  27     inv_mat_diag = 1/csr_mat.diagonal()
  28 
  29     print "building..."
  30     from pycuda.sparse.packeted import PacketedSpMV
  31     spmv = PacketedSpMV(csr_mat, options.is_symmetric, csr_mat.dtype)
  32     rhs = numpy.random.rand(spmv.shape[0]).astype(spmv.dtype)
  33 
  34     from pycuda.sparse.operator import DiagonalPreconditioner
  35     if True:
  36         precon = DiagonalPreconditioner(
  37                 spmv.permute(gpuarray.to_gpu(
  38                     inv_mat_diag, allocator=dev_pool.allocate)))
  39     else:
  40         precon = None
  41 
  42     from pycuda.sparse.cg import solve_pkt_with_cg
  43     print "start solve"
  44     for i in range(4):
  45         start = drv.Event()
  46         stop = drv.Event()
  47         start.record()
  48 
  49         rhs_gpu = gpuarray.to_gpu(rhs, dev_pool.allocate)
  50         res_gpu, it_count, res_count = \
  51                 solve_pkt_with_cg(spmv, rhs_gpu, precon,
  52                         tol=1e-7 if spmv.dtype == numpy.float64 else 5e-5,
  53                         pagelocked_allocator=pagelocked_pool.allocate)
  54         res = res_gpu.get()
  55 
  56         stop.record()
  57         stop.synchronize()
  58 
  59         elapsed = stop.time_since(start)*1e-3
  60         est_flops = (csr_mat.nnz*2*(it_count+res_count)
  61             + csr_mat.shape[0]*(2+2+2+2+2)*it_count)
  62 
  63         if precon is not None:
  64             est_flops += csr_mat.shape[0] * it_count
  65 
  66         print "residual norm: %g" % (la.norm(csr_mat*res - rhs)/la.norm(rhs))
  67         print ("size: %d, elapsed: %g s, %d it, %d residual, it/second: %g, "
  68                 "%g gflops/s" % (
  69                     csr_mat.shape[0],
  70                     elapsed, it_count, res_count, it_count/elapsed,
  71                     est_flops/elapsed/1e9))
  72 
  73     # TODO: mixed precision
  74     # TODO: benchmark
  75     pagelocked_pool.stop_holding()
  76     dev_pool.stop_holding()
  77 
  78 
  79 
  80 
  81 
  82 if __name__ == "__main__":
  83     print "starting..."
  84     main_cg()