// ************************************************************************** // // PARALUTION www.paralution.com // // Copyright (C) 2015 PARALUTION Labs UG (haftungsbeschränkt) & Co. KG // Am Hasensprung 6, 76571 Gaggenau // Handelsregister: Amtsgericht Mannheim, HRA 706051 // Vertreten durch: // PARALUTION Labs Verwaltungs UG (haftungsbeschränkt) // Am Hasensprung 6, 76571 Gaggenau // Handelsregister: Amtsgericht Mannheim, HRB 721277 // Geschäftsführer: Dimitar Lukarski, Nico Trost // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see <http://www.gnu.org/licenses/>. // // ************************************************************************** // PARALUTION version 1.1.0 #ifndef PARALUTION_GPU_CUDA_KERNELS_MCSR_HPP_ #define PARALUTION_GPU_CUDA_KERNELS_MCSR_HPP_ #include "../matrix_formats_ind.hpp" namespace paralution { template <typename ValueType, typename IndexType> __global__ void kernel_mcsr_spmv_scalar(const IndexType nrow, const IndexType *row_offset, const IndexType *col, const ValueType *val, const ValueType *in, ValueType *out) { IndexType ai = blockIdx.x*blockDim.x+threadIdx.x; IndexType aj; if (ai <nrow) { ValueType sum = val[ai] * in[ai]; for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj) sum = sum + val[aj]*in[col[aj]]; out[ai] = sum; } } template <typename ValueType, typename IndexType> __global__ void kernel_mcsr_add_spmv_scalar(const IndexType nrow, const IndexType *row_offset, const IndexType *col, const ValueType *val, const ValueType scalar, const ValueType *in, ValueType *out) { IndexType ai = blockIdx.x*blockDim.x+threadIdx.x; IndexType aj; if (ai <nrow) { out[ai] = out[ai] + scalar*val[ai] * in[ai]; for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj) { out[ai] = out[ai] + scalar*val[aj]*in[col[aj]]; } } } } #endif