cuda_kernels_dense.hpp 3.18 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
// **************************************************************************
//
//    PARALUTION   www.paralution.com
//
//    Copyright (C) 2015  PARALUTION Labs UG (haftungsbeschränkt) & Co. KG
//                        Am Hasensprung 6, 76571 Gaggenau
//                        Handelsregister: Amtsgericht Mannheim, HRA 706051
//                        Vertreten durch:
//                        PARALUTION Labs Verwaltungs UG (haftungsbeschränkt)
//                        Am Hasensprung 6, 76571 Gaggenau
//                        Handelsregister: Amtsgericht Mannheim, HRB 721277
//                        Geschäftsführer: Dimitar Lukarski, Nico Trost
//
//    This program is free software: you can redistribute it and/or modify
//    it under the terms of the GNU General Public License as published by
//    the Free Software Foundation, either version 3 of the License, or
//    (at your option) any later version.
//
//    This program is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//    GNU General Public License for more details.
//
//    You should have received a copy of the GNU General Public License
//    along with this program.  If not, see <http://www.gnu.org/licenses/>.
//
// **************************************************************************



// PARALUTION version 1.1.0 


#ifndef PARALUTION_GPU_CUDA_KERNELS_DENSE_HPP_
#define PARALUTION_GPU_CUDA_KERNELS_DENSE_HPP_

#include "../matrix_formats_ind.hpp"

namespace paralution {

// Replace column vector
template <typename ValueType, typename IndexType>
__global__ void kernel_dense_replace_column_vector(const ValueType *vec, const IndexType idx, const IndexType nrow,
                                                   const IndexType ncol, ValueType *mat) {

  IndexType ai = blockIdx.x * blockDim.x + threadIdx.x;

  if(ai < nrow)
    mat[DENSE_IND(ai, idx, nrow, ncol)] = vec[ai];

}

// Replace row vector
template <typename ValueType, typename IndexType>
__global__ void kernel_dense_replace_row_vector(const ValueType *vec, const IndexType idx, const IndexType nrow,
                                                const IndexType ncol, ValueType *mat) {

  IndexType aj = blockIdx.x * blockDim.x + threadIdx.x;

  if (aj < ncol)
    mat[DENSE_IND(idx, aj, nrow, ncol)] = vec[aj];

}

// Extract column vector
template <typename ValueType, typename IndexType>
__global__ void kernel_dense_extract_column_vector(ValueType *vec, const IndexType idx, const IndexType nrow,
                                                   const IndexType ncol, const ValueType *mat) {

  IndexType ai = blockIdx.x * blockDim.x + threadIdx.x;

  if (ai < nrow)
    vec[ai] = mat[DENSE_IND(ai, idx, nrow, ncol)];

}

// Extract row vector
template <typename ValueType, typename IndexType>
__global__ void kernel_dense_extract_row_vector(ValueType *vec, const IndexType idx, const IndexType nrow,
                                                const IndexType ncol, const ValueType *mat) {

  IndexType aj = blockIdx.x * blockDim.x + threadIdx.x;

  if (aj < ncol)
    vec[aj] = mat[DENSE_IND(idx, aj, nrow, ncol)];

}


}

#endif