ocl_kernels_dia.hpp

// **************************************************************************
//
//    PARALUTION   www.paralution.com
//
//    Copyright (C) 2015  PARALUTION Labs UG (haftungsbeschränkt) & Co. KG
//                        Am Hasensprung 6, 76571 Gaggenau
//                        Handelsregister: Amtsgericht Mannheim, HRA 706051
//                        Vertreten durch:
//                        PARALUTION Labs Verwaltungs UG (haftungsbeschränkt)
//                        Am Hasensprung 6, 76571 Gaggenau
//                        Handelsregister: Amtsgericht Mannheim, HRB 721277
//                        Geschäftsführer: Dimitar Lukarski, Nico Trost
//
//    This program is free software: you can redistribute it and/or modify
//    it under the terms of the GNU General Public License as published by
//    the Free Software Foundation, either version 3 of the License, or
//    (at your option) any later version.
//
//    This program is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//    GNU General Public License for more details.
//
//    You should have received a copy of the GNU General Public License
//    along with this program.  If not, see <http://www.gnu.org/licenses/>.
//
// **************************************************************************


// PARALUTION version 1.1.0 


#ifndef PARALUTION_OCL_KERNELS_DIA_HPP_
#define PARALUTION_OCL_KERNELS_DIA_HPP_

namespace paralution {

const char *ocl_kernels_dia =
// Nathan Bell and Michael Garland
// Efficient Sparse Matrix-Vector Multiplication on {CUDA}
// NVR-2008-004 / NVIDIA Technical Report
	"__kernel void kernel_dia_spmv(         const IndexType num_rows,\n"
	"                                       const IndexType num_cols,\n"
	"                                       const IndexType num_diags,\n"
	"                              __global const IndexType *Aoffsets,\n"
	"                              __global const ValueType *Aval,\n"
	"                              __global const ValueType *x,\n"
	"                              __global       ValueType *y) {\n"
	"\n"
	"  IndexType row = get_global_id(0);\n"
	"\n"
	"  if (row < num_rows) {\n"
	"\n"
	"    ValueType sum = (ValueType) 0;\n"
	"\n"
	"    for (IndexType n=0; n<num_diags; ++n) {\n"
	"\n"
	"      const IndexType ind = n * num_rows + row;\n"
	"      const IndexType col = row + Aoffsets[n];\n"
	"      \n"
	"      if ((col >= 0) && (col < num_cols))\n"
	"        sum += Aval[ind] * x[col];\n"
	"\n"
	"    }\n"
	"        \n"
	"    y[row] = sum;\n"
	"\n"
	"  }\n"
	"\n"
	"}\n"
	"\n"
// Nathan Bell and Michael Garland
// Efficient Sparse Matrix-Vector Multiplication on {CUDA}
// NVR-2008-004 / NVIDIA Technical Report
	"__kernel void kernel_dia_add_spmv(         const IndexType num_rows,\n"
	"                                           const IndexType num_cols,\n"
	"                                           const IndexType num_diags,\n"
	"                                  __global const IndexType *Aoffsets,\n"
	"                                  __global const ValueType *Aval,\n"
	"                                           const ValueType scalar,\n"
	"                                  __global const ValueType *x,\n"
	"                                  __global       ValueType *y) {\n"
	"\n"
	"  IndexType row = get_global_id(0);\n"
	"\n"
	"  if (row < num_rows) {\n"
	"\n"
	"    ValueType sum = (ValueType) 0;\n"
	"\n"
	"    for (IndexType n=0; n<num_diags; ++n) {\n"
	"\n"
	"      const IndexType ind = n * num_rows + row;\n"
	"      const IndexType col = row + Aoffsets[n];\n"
	"      \n"
	"      if ((col >= 0) && (col < num_cols))\n"
	"        sum += Aval[ind] * x[col];\n"
	"\n"
	"    }\n"
	"\n"
	"    y[row] += scalar * sum;\n"
	"\n"
	"  }\n"
	"\n"
	"}\n"
	"\n"
	"\n"
;
}

#endif // PARALUTION_OCL_KERNELS_DIA_HPP_