// ************************************************************************** // // PARALUTION www.paralution.com // // Copyright (C) 2015 PARALUTION Labs UG (haftungsbeschränkt) & Co. KG // Am Hasensprung 6, 76571 Gaggenau // Handelsregister: Amtsgericht Mannheim, HRA 706051 // Vertreten durch: // PARALUTION Labs Verwaltungs UG (haftungsbeschränkt) // Am Hasensprung 6, 76571 Gaggenau // Handelsregister: Amtsgericht Mannheim, HRB 721277 // Geschäftsführer: Dimitar Lukarski, Nico Trost // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see <http://www.gnu.org/licenses/>. // // ************************************************************************** // PARALUTION version 1.1.0 #include "../../utils/def.hpp" #include "host_conversion.hpp" #include "../matrix_formats.hpp" #include "../matrix_formats_ind.hpp" #include "../../utils/allocate_free.hpp" #include "../../utils/log.hpp" #include <stdlib.h> #include <complex> #ifdef _OPENMP #include <omp.h> #else #define omp_set_num_threads(num) ; #endif namespace paralution { template <typename ValueType, typename IndexType> bool csr_to_dense(const int omp_threads, const IndexType nnz, const IndexType nrow, const IndexType ncol, const MatrixCSR<ValueType, IndexType> &src, MatrixDENSE<ValueType> *dst) { assert(nnz > 0); assert(nrow > 0); assert(ncol > 0); omp_set_num_threads(omp_threads); allocate_host(nrow*ncol, &dst->val); set_to_zero_host(nrow*ncol, dst->val); #pragma omp parallel for for (IndexType i = 0; i < nrow; ++i) for(IndexType j = src.row_offset[i]; j < src.row_offset[i+1]; ++j) dst->val[DENSE_IND(i,src.col[j],nrow,ncol)] = src.val[j]; return true; } template <typename ValueType, typename IndexType> bool dense_to_csr(const int omp_threads, const IndexType nrow, const IndexType ncol, const MatrixDENSE<ValueType> &src, MatrixCSR<ValueType, IndexType> *dst, IndexType *nnz) { assert(nrow > 0); assert(ncol > 0); omp_set_num_threads(omp_threads); allocate_host(nrow+1, &dst->row_offset); set_to_zero_host(nrow+1, dst->row_offset); #pragma omp parallel for for (IndexType i = 0; i < nrow; ++i) for(IndexType j = 0; j < ncol; ++j) if (src.val[DENSE_IND(i,j,nrow,ncol)] != ValueType(0.0)) dst->row_offset[i] += 1; *nnz = 0; for (IndexType i = 0; i < nrow; ++i) { IndexType tmp = dst->row_offset[i]; dst->row_offset[i] = *nnz; *nnz += tmp; } dst->row_offset[nrow] = *nnz; allocate_host(*nnz, &dst->col); allocate_host(*nnz, &dst->val); set_to_zero_host(*nnz, dst->col); set_to_zero_host(*nnz, dst->val); #pragma omp parallel for for (IndexType i = 0; i < nrow; ++i) { IndexType ind = dst->row_offset[i]; for(IndexType j = 0; j < ncol; ++j) if (src.val[DENSE_IND(i,j,nrow,ncol)] != ValueType(0.0)) { dst->val[ind] = src.val[DENSE_IND(i,j,nrow,ncol)]; dst->col[ind] = j; ++ind; } } return true; } template <typename ValueType, typename IndexType> bool csr_to_mcsr(const int omp_threads, const IndexType nnz, const IndexType nrow, const IndexType ncol, const MatrixCSR<ValueType, IndexType> &src, MatrixMCSR<ValueType, IndexType> *dst) { assert(nnz > 0); assert(nrow > 0); assert(ncol > 0); // No support for non-squared matrices if (nrow != ncol) return false; omp_set_num_threads(omp_threads); // Pre-analysing step to check zero diagonal entries IndexType diag_entries = 0; for (int i=0; i<nrow; ++i) for (int j=src.row_offset[i]; j<src.row_offset[i+1]; ++j) if (i == src.col[j]) ++diag_entries; IndexType zero_diag_entries = nrow - diag_entries; // MCSR does not support zero diagonal entries (yet) if (zero_diag_entries > 0) return false; allocate_host(nrow+1, &dst->row_offset); allocate_host(nnz, &dst->col); allocate_host(nnz, &dst->val); set_to_zero_host(nrow+1, dst->row_offset); set_to_zero_host(nnz, dst->col); set_to_zero_host(nnz, dst->val); for (IndexType ai = 0; ai < nrow+1; ++ai) dst->row_offset[ai] = nrow + src.row_offset[ai] - ai; #pragma omp parallel for for (IndexType ai=0; ai<nrow; ++ai) { IndexType correction = ai; for (IndexType aj=src.row_offset[ai]; aj<src.row_offset[ai+1]; ++aj) if (ai != src.col[aj]) { IndexType ind = nrow + aj - correction; // non-diag dst->col[ind] = src.col[aj]; dst->val[ind] = src.val[aj]; } else { // diag dst->val[ai] = src.val[aj]; ++correction; } } if (dst->row_offset[nrow] != src.row_offset[nrow]) return false; return true; } template <typename ValueType, typename IndexType> bool mcsr_to_csr(const int omp_threads, const IndexType nnz, const IndexType nrow, const IndexType ncol, const MatrixMCSR<ValueType, IndexType> &src, MatrixCSR<ValueType, IndexType> *dst) { assert(nnz > 0); assert(nrow > 0); assert(ncol > 0); // Only support square matrices if (nrow != ncol) return false; omp_set_num_threads(omp_threads); allocate_host(nrow+1, &dst->row_offset); allocate_host(nnz, &dst->col); allocate_host(nnz, &dst->val); set_to_zero_host(nrow+1, dst->row_offset); set_to_zero_host(nnz, dst->col); set_to_zero_host(nnz, dst->val); for (IndexType ai = 0; ai < nrow+1; ++ai) dst->row_offset[ai] = src.row_offset[ai] - nrow + ai; #pragma omp parallel for for (IndexType ai=0; ai<nrow; ++ai) { for (IndexType aj=src.row_offset[ai]; aj<src.row_offset[ai+1]; ++aj) { IndexType ind = aj - nrow + ai; // non-diag dst->col[ind] = src.col[aj]; dst->val[ind] = src.val[aj]; } IndexType diag_ind = src.row_offset[ai+1] - nrow + ai; // diag dst->val[diag_ind] = src.val[ai]; dst->col[diag_ind] = ai; } if (dst->row_offset[nrow] != src.row_offset[nrow]) return false; // Sorting the col (per row) // Bubble sort algorithm #pragma omp parallel for for (IndexType i=0; i<nrow; ++i) for (IndexType j=dst->row_offset[i]; j<dst->row_offset[i+1]; ++j) for (IndexType jj=dst->row_offset[i]; jj<dst->row_offset[i+1]-1; ++jj) if (dst->col[jj] > dst->col[jj+1]) { //swap elements IndexType ind = dst->col[jj]; ValueType val = dst->val[jj]; dst->col[jj] = dst->col[jj+1]; dst->val[jj] = dst->val[jj+1]; dst->col[jj+1] = ind; dst->val[jj+1] = val; } return true; } template <typename ValueType, typename IndexType> bool csr_to_coo(const int omp_threads, const IndexType nnz, const IndexType nrow, const IndexType ncol, const MatrixCSR<ValueType, IndexType> &src, MatrixCOO<ValueType, IndexType> *dst) { assert(nnz > 0); assert(nrow > 0); assert(ncol > 0); omp_set_num_threads(omp_threads); allocate_host(nnz, &dst->row); allocate_host(nnz, &dst->col); allocate_host(nnz, &dst->val); set_to_zero_host(nnz, dst->row); set_to_zero_host(nnz, dst->col); set_to_zero_host(nnz, dst->val); #pragma omp parallel for for (IndexType i = 0; i < nrow; ++i) for(IndexType j = src.row_offset[i]; j < src.row_offset[i+1]; ++j) dst->row[j] = i; #pragma omp parallel for for (IndexType i = 0; i < nnz; ++i) dst->col[i] = src.col[i]; #pragma omp parallel for for (IndexType i = 0; i < nnz; ++i) dst->val[i] = src.val[i]; return true; } template <typename ValueType, typename IndexType> bool csr_to_ell(const int omp_threads, const IndexType nnz, const IndexType nrow, const IndexType ncol, const MatrixCSR<ValueType, IndexType> &src, MatrixELL<ValueType, IndexType> *dst, IndexType *nnz_ell) { assert(nnz > 0); assert(nrow > 0); assert(ncol > 0); omp_set_num_threads(omp_threads); dst->max_row = 0; for (IndexType i = 0; i < nrow; ++i) { IndexType max_row = src.row_offset[i+1] - src.row_offset[i]; if (max_row > dst->max_row) dst->max_row = max_row; } *nnz_ell = dst->max_row * nrow; allocate_host(*nnz_ell, &dst->val); allocate_host(*nnz_ell, &dst->col); set_to_zero_host(*nnz_ell, dst->val); set_to_zero_host(*nnz_ell, dst->col); #pragma omp parallel for for (IndexType i = 0; i < nrow; ++i) { IndexType n = 0; for(IndexType j = src.row_offset[i]; j < src.row_offset[i+1]; ++j) { IndexType ind = ELL_IND(i, n, nrow, dst->max_row); dst->val[ind] = src.val[j]; dst->col[ind] = src.col[j]; ++n; } for (IndexType j = src.row_offset[i+1]-src.row_offset[i]; j < dst->max_row; ++j) { IndexType ind = ELL_IND(i, n, nrow, dst->max_row); dst->val[ind] = ValueType(0.0); dst->col[ind] = IndexType(-1); ++n; } } return true; } template <typename ValueType, typename IndexType> bool ell_to_csr(const int omp_threads, const IndexType nnz, const IndexType nrow, const IndexType ncol, const MatrixELL<ValueType, IndexType> &src, MatrixCSR<ValueType, IndexType> *dst, IndexType *nnz_csr) { assert(nnz > 0); assert(nrow > 0); assert(ncol > 0); omp_set_num_threads(omp_threads); allocate_host(nrow+1, &dst->row_offset); set_to_zero_host(nrow+1, dst->row_offset); #pragma omp parallel for for (IndexType ai=0; ai<nrow; ++ai) { for (IndexType n=0; n<src.max_row; ++n) { IndexType aj = ELL_IND(ai, n, nrow, src.max_row); if ((src.col[aj] >= 0) && (src.col[aj] < ncol)) dst->row_offset[ai] += 1; } } *nnz_csr = 0; for (IndexType i = 0; i < nrow; ++i) { IndexType tmp = dst->row_offset[i]; dst->row_offset[i] = *nnz_csr; *nnz_csr += tmp; } dst->row_offset[nrow] = *nnz_csr; allocate_host(*nnz_csr, &dst->col); allocate_host(*nnz_csr, &dst->val); set_to_zero_host(*nnz_csr, dst->col); set_to_zero_host(*nnz_csr, dst->val); #pragma omp parallel for for (IndexType ai=0; ai<nrow; ++ai) { IndexType ind = dst->row_offset[ai]; for (IndexType n=0; n<src.max_row; ++n) { IndexType aj = ELL_IND(ai, n, nrow, src.max_row); if ((src.col[aj] >= 0) && (src.col[aj] < ncol)) { dst->col[ind] = src.col[aj]; dst->val[ind] = src.val[aj]; ++ind; } } } return true; } template <typename ValueType, typename IndexType> bool hyb_to_csr(const int omp_threads, const IndexType nnz, const IndexType nrow, const IndexType ncol, const IndexType nnz_ell, const IndexType nnz_coo, const MatrixHYB<ValueType, IndexType> &src, MatrixCSR<ValueType, IndexType> *dst, IndexType *nnz_csr) { assert(nnz > 0); assert(nnz == nnz_ell + nnz_coo); assert(nrow > 0); assert(ncol > 0); omp_set_num_threads(omp_threads); allocate_host(nrow+1, &dst->row_offset); set_to_zero_host(nrow+1, dst->row_offset); IndexType start; start = 0; // TODO // #pragma omp parallel for private(start) for (IndexType ai=0 ; ai<nrow; ++ai) { // ELL for (IndexType n=0; n<src.ELL.max_row; ++n) { IndexType aj = ELL_IND(ai, n, nrow, src.ELL.max_row); if ((src.ELL.col[aj] >= 0) && (src.ELL.col[aj] < ncol)) dst->row_offset[ai] += 1; } // COO for (IndexType i = start; i < nnz_coo; ++i) { if (src.COO.row[i] == ai) { dst->row_offset[ai] += 1; ++start; } if (src.COO.row[i] > ai) break; } } *nnz_csr = 0; for (IndexType i = 0; i < nrow; ++i) { IndexType tmp = dst->row_offset[i]; dst->row_offset[i] = *nnz_csr; *nnz_csr += tmp; } dst->row_offset[nrow] = *nnz_csr; allocate_host(*nnz_csr, &dst->col); allocate_host(*nnz_csr, &dst->val); set_to_zero_host(*nnz_csr, dst->col); set_to_zero_host(*nnz_csr, dst->val); start = 0 ; // TODO //#pragma omp parallel for private(start) for (IndexType ai=0; ai<nrow; ++ai) { IndexType ind = dst->row_offset[ai]; // ELL for (IndexType n=0; n<src.ELL.max_row; ++n) { IndexType aj = ELL_IND(ai, n, nrow, src.ELL.max_row); if ((src.ELL.col[aj] >= 0) && (src.ELL.col[aj] < ncol)) { dst->col[ind] = src.ELL.col[aj]; dst->val[ind] = src.ELL.val[aj]; ++ind; } } // COO for (IndexType i = start; i < nnz_coo; ++i) { if (src.COO.row[i] == ai) { dst->col[ind] = src.COO.col[i]; dst->val[ind] = src.COO.val[i]; ++ind; ++start; } if (src.COO.row[i] > ai) break; } } return true; } // ---------------------------------------------------------- // function coo_to_csr(...) // ---------------------------------------------------------- // Modified and adapted from CUSP 0.3.1, // http://code.google.com/p/cusp-library/ // NVIDIA, APACHE LICENSE 2.0 // ---------------------------------------------------------- // CHANGELOG // - adapted interface // - Bubble sort each column after convertion // - OpenMP pragma for // ---------------------------------------------------------- template <typename ValueType, typename IndexType> bool coo_to_csr(const int omp_threads, const IndexType nnz, const IndexType nrow, const IndexType ncol, const MatrixCOO<ValueType, IndexType> &src, MatrixCSR<ValueType, IndexType> *dst) { assert(nnz > 0); assert(nrow > 0); assert(ncol > 0); omp_set_num_threads(omp_threads); allocate_host(nrow+1, &dst->row_offset); allocate_host(nnz, &dst->col); allocate_host(nnz, &dst->val); set_to_zero_host(nrow+1, dst->row_offset); set_to_zero_host(nnz, dst->col); set_to_zero_host(nnz, dst->val); // compute nnz entries per row of CSR for (IndexType n = 0; n < nnz; ++n) dst->row_offset[src.row[n]] = dst->row_offset[src.row[n]] + 1; // cumsum the num_entries per row to get dst->row_offsets[] IndexType cumsum = 0; for(IndexType i = 0; i < nrow; ++i) { IndexType temp = dst->row_offset[i]; dst->row_offset[i] = cumsum; cumsum += temp; } dst->row_offset[nrow] = cumsum; // write Aj,Ax IndexTypeo dst->column_indices,dst->values for(IndexType n = 0; n < nnz; ++n) { IndexType row = src.row[n]; IndexType dest = dst->row_offset[row]; dst->col[dest] = src.col[n]; dst->val[dest] = src.val[n]; dst->row_offset[row] = dst->row_offset[row] + 1; } IndexType last = 0; for(IndexType i = 0; i <= nrow; ++i) { IndexType temp = dst->row_offset[i]; dst->row_offset[i] = last; last = temp; } // Sorting the col (per row) // Bubble sort algorithm #pragma omp parallel for for (IndexType i=0; i<nrow; ++i) for (IndexType j=dst->row_offset[i]; j<dst->row_offset[i+1]; ++j) for (IndexType jj=dst->row_offset[i]; jj<dst->row_offset[i+1]-1; ++jj) if (dst->col[jj] > dst->col[jj+1]) { //swap elements IndexType ind = dst->col[jj]; ValueType val = dst->val[jj]; dst->col[jj] = dst->col[jj+1]; dst->val[jj] = dst->val[jj+1]; dst->col[jj+1] = ind; dst->val[jj+1] = val; } return true; } // ---------------------------------------------------------- // function csr_to_dia(...) // ---------------------------------------------------------- // Modified and adapted from CUSP 0.3.1, // http://code.google.com/p/cusp-library/ // NVIDIA, APACHE LICENSE 2.0 // ---------------------------------------------------------- // CHANGELOG // - adapted interface // - OpenMP pragma for // ---------------------------------------------------------- template <typename ValueType, typename IndexType> bool csr_to_dia(const int omp_threads, const IndexType nnz, const IndexType nrow, const IndexType ncol, const MatrixCSR<ValueType, IndexType> &src, MatrixDIA<ValueType, IndexType> *dst, IndexType *nnz_dia) { assert(nnz > 0); assert(nrow > 0); assert(ncol > 0); if (nrow != ncol) return false; omp_set_num_threads(omp_threads); // compute number of occupied diagonals and enumerate them dst->num_diag = 0; IndexType *diag_map = NULL; allocate_host(nrow+ncol, &diag_map); set_to_zero_host(nrow+ncol, diag_map); for(IndexType i = 0; i < nrow; i++) { for(IndexType jj = src.row_offset[i]; jj < src.row_offset[i+1]; jj++) { IndexType j = src.col[jj]; IndexType map_index = (nrow - i) + j; //offset shifted by + num_rows if(diag_map[map_index] == 0) { diag_map[map_index] = 1; ++dst->num_diag; } } } if (nrow < ncol) { *nnz_dia = ncol * dst->num_diag; } else { *nnz_dia = nrow * dst->num_diag; } // Conversion fails if numbers of diagonal is too large if (dst->num_diag > 200) return false; //allocate DIA structure allocate_host(dst->num_diag, &dst->offset); set_to_zero_host(dst->num_diag, dst->offset); allocate_host(*nnz_dia, &dst->val); set_to_zero_host(*nnz_dia, dst->val); // fill in diagonal_offsets array for(IndexType n = 0, diag = 0 ; n < nrow + ncol; n++) { if(diag_map[n] == 1) { diag_map[n] = diag; dst->offset[diag] = IndexType(n) - IndexType(nrow); diag++; } } #pragma omp parallel for for(IndexType i = 0; i < nrow; i++) { for(IndexType jj = src.row_offset[i]; jj < src.row_offset[i+1]; jj++) { IndexType j = src.col[jj]; IndexType map_index = (nrow - i) + j; //offset shifted by + num_rows IndexType diag = diag_map[map_index]; dst->val[DIA_IND(i, diag, nrow, dst->num_diag)] = src.val[jj]; } } free_host(&diag_map); return true; } // ---------------------------------------------------------- // function dia_to_csr(...) // ---------------------------------------------------------- // Modified and adapted from CUSP 0.3.1, // http://code.google.com/p/cusp-library/ // NVIDIA, APACHE LICENSE 2.0 // ---------------------------------------------------------- // CHANGELOG // - adapted interface // ---------------------------------------------------------- template <typename ValueType, typename IndexType> bool dia_to_csr(const int omp_threads, const IndexType nnz, const IndexType nrow, const IndexType ncol, const MatrixDIA<ValueType, IndexType> &src, MatrixCSR<ValueType, IndexType> *dst, IndexType *nnz_csr) { assert(nnz > 0); assert(nrow > 0); assert(ncol > 0); omp_set_num_threads(omp_threads); *nnz_csr = 0; // count nonzero entries for(IndexType i = 0; i < nrow; i++) { for(IndexType n = 0; n < src.num_diag; n++) { const IndexType j = i + src.offset[n]; // if(j >= IndexType(0) && j < IndexType(ncol) && src.val[i*src.num_diag+n] != ValueType(0.0)) if(j >= IndexType(0) && j < IndexType(ncol) && src.val[DIA_IND(i, n, nrow, src.num_diag)] != ValueType(0.0)) ++(*nnz_csr); } } allocate_host(nrow+1, &dst->row_offset); allocate_host(*nnz_csr, &dst->col); allocate_host(*nnz_csr, &dst->val); set_to_zero_host(nrow+1, dst->row_offset); set_to_zero_host(*nnz_csr, dst->col); set_to_zero_host(*nnz_csr, dst->val); *nnz_csr = 0; dst->row_offset[0] = 0; // copy nonzero entries to CSR structure for(IndexType i = 0; i < nrow; i++) { for(IndexType n = 0; n < src.num_diag; n++) { const IndexType j = i + src.offset[n]; if(j >= IndexType(0) && j < IndexType(ncol)) { IndexType ind = DIA_IND(i, n, nrow, src.num_diag); const ValueType value = src.val[ind]; if (value != ValueType(0.0)) { dst->col[*nnz_csr] = j; dst->val[*nnz_csr] = value; ++(*nnz_csr); } } } dst->row_offset[i + 1] = *nnz_csr; } return true; } // ---------------------------------------------------------- // function csr_to_hyb(...) // ---------------------------------------------------------- // Modified and adapted from CUSP 0.3.1, // http://code.google.com/p/cusp-library/ // NVIDIA, APACHE LICENSE 2.0 // ---------------------------------------------------------- // CHANGELOG // - adapted interface // - the entries per row are defined out side or as nnz/nrow // ---------------------------------------------------------- template <typename ValueType, typename IndexType> bool csr_to_hyb(const int omp_threads, const IndexType nnz, const IndexType nrow, const IndexType ncol, const MatrixCSR<ValueType, IndexType> &src, MatrixHYB<ValueType, IndexType> *dst, IndexType *nnz_hyb, IndexType *nnz_ell, IndexType *nnz_coo) { assert(nnz > 0); assert(nrow > 0); assert(ncol > 0); omp_set_num_threads(omp_threads); if (dst->ELL.max_row == 0) dst->ELL.max_row = (nnz / nrow); *nnz_coo = 0; for (IndexType i=0; i<nrow; ++i) { IndexType nnz_per_row = src.row_offset[i+1] - src.row_offset[i]; if (nnz_per_row > dst->ELL.max_row) *nnz_coo += nnz_per_row - dst->ELL.max_row; } *nnz_ell = dst->ELL.max_row * nrow; *nnz_hyb = *nnz_coo + *nnz_ell; if (*nnz_coo <= 0 || *nnz_ell <= 0 || *nnz_hyb <= 0) return false; // ELL allocate_host(*nnz_ell, &dst->ELL.val); allocate_host(*nnz_ell, &dst->ELL.col); set_to_zero_host(*nnz_ell, dst->ELL.val); #pragma omp parallel for for(IndexType i = 0; i < *nnz_ell; i++) dst->ELL.col[i] = IndexType(-1); // COO allocate_host(*nnz_coo, &dst->COO.row); allocate_host(*nnz_coo, &dst->COO.col); allocate_host(*nnz_coo, &dst->COO.val); set_to_zero_host(*nnz_coo, dst->COO.row); set_to_zero_host(*nnz_coo, dst->COO.col); set_to_zero_host(*nnz_coo, dst->COO.val); IndexType *nnzcoo = NULL; IndexType *nnzell = NULL; allocate_host(nrow, &nnzcoo); allocate_host(nrow, &nnzell); #pragma omp parallel for // copy up to num_cols_per_row values of row i into the ELL for (IndexType i=0; i<nrow; ++i) { IndexType n = 0; nnzcoo[i] = 0; for (IndexType j=src.row_offset[i]; j<src.row_offset[i+1]; ++j) { if (n >= dst->ELL.max_row) { nnzcoo[i] = src.row_offset[i+1] - j; break; } IndexType ind = ELL_IND(i, n, nrow, dst->ELL.max_row); dst->ELL.col[ind] = src.col[j]; dst->ELL.val[ind] = src.val[j]; ++n; } nnzell[i] = n; } for (int i=1; i<nrow; ++i) nnzell[i] += nnzell[i-1]; #pragma omp parallel for // copy any remaining values in row i into the COO for (IndexType i=0; i<nrow; ++i) { for (IndexType j=src.row_offset[i+1] - nnzcoo[i]; j<src.row_offset[i+1]; ++j) { dst->COO.row[j-nnzell[i]] = i; dst->COO.col[j-nnzell[i]] = src.col[j]; dst->COO.val[j-nnzell[i]] = src.val[j]; } } free_host(&nnzcoo); free_host(&nnzell); /* for(IndexType i = 0, coo_nnz = 0; i < nrow; i++) { IndexType n = 0; IndexType jj = src.row_offset[i]; // copy up to num_cols_per_row values of row i into the ELL while(jj < src.row_offset[i+1] && n < dst->ELL.max_row) { IndexType ind = ELL_IND(i, n, nrow, dst->ELL.max_row); dst->ELL.col[ind] = src.col[jj]; dst->ELL.val[ind] = src.val[jj]; jj++, n++; } // copy any remaining values in row i into the COO while(jj < src.row_offset[i+1]) { dst->COO.row[coo_nnz] = i; dst->COO.col[coo_nnz] = src.col[jj]; dst->COO.val[coo_nnz] = src.val[jj]; jj++; coo_nnz++; } } */ return true; } template bool csr_to_coo(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<double, int> &src, MatrixCOO<double, int> *dst); template bool csr_to_coo(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<float, int> &src, MatrixCOO<float, int> *dst); #ifdef SUPPORT_COMPLEX template bool csr_to_coo(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<std::complex<double>, int> &src, MatrixCOO<std::complex<double>, int> *dst); template bool csr_to_coo(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<std::complex<float>, int> &src, MatrixCOO<std::complex<float>, int> *dst); #endif template bool csr_to_coo(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<int, int> &src, MatrixCOO<int, int> *dst); template bool csr_to_mcsr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<double, int> &src, MatrixMCSR<double, int> *dst); template bool csr_to_mcsr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<float, int> &src, MatrixMCSR<float, int> *dst); #ifdef SUPPORT_COMPLEX template bool csr_to_mcsr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<std::complex<double>, int> &src, MatrixMCSR<std::complex<double>, int> *dst); template bool csr_to_mcsr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<std::complex<float>, int> &src, MatrixMCSR<std::complex<float>, int> *dst); #endif template bool csr_to_mcsr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<int, int> &src, MatrixMCSR<int, int> *dst); template bool mcsr_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixMCSR<double, int> &src, MatrixCSR<double, int> *dst); template bool mcsr_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixMCSR<float, int> &src, MatrixCSR<float, int> *dst); #ifdef SUPPORT_COMPLEX template bool mcsr_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixMCSR<std::complex<double>, int> &src, MatrixCSR<std::complex<double>, int> *dst); template bool mcsr_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixMCSR<std::complex<float>, int> &src, MatrixCSR<std::complex<float>, int> *dst); #endif template bool mcsr_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixMCSR<int, int> &src, MatrixCSR<int, int> *dst); template bool csr_to_dia(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<double, int> &src, MatrixDIA<double, int> *dst, int *nnz_dia); template bool csr_to_dia(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<float, int> &src, MatrixDIA<float, int> *dst, int *nnz_dia); #ifdef SUPPORT_COMPLEX template bool csr_to_dia(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<std::complex<double>, int> &src, MatrixDIA<std::complex<double>, int> *dst, int *nnz_dia); template bool csr_to_dia(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<std::complex<float>, int> &src, MatrixDIA<std::complex<float>, int> *dst, int *nnz_dia); #endif template bool csr_to_dia(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<int, int> &src, MatrixDIA<int, int> *dst, int *nnz_dia); template bool csr_to_hyb(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<double, int> &src, MatrixHYB<double, int> *dst, int *nnz_hyb, int *nnz_ell, int *nnz_coo); template bool csr_to_hyb(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<float, int> &src, MatrixHYB<float, int> *dst, int *nnz_hyb, int *nnz_ell, int *nnz_coo); #ifdef SUPPORT_COMPLEX template bool csr_to_hyb(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<std::complex<double>, int> &src, MatrixHYB<std::complex<double>, int> *dst, int *nnz_hyb, int *nnz_ell, int *nnz_coo); template bool csr_to_hyb(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<std::complex<float>, int> &src, MatrixHYB<std::complex<float>, int> *dst, int *nnz_hyb, int *nnz_ell, int *nnz_coo); #endif template bool csr_to_hyb(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<int, int> &src, MatrixHYB<int, int> *dst, int *nnz_hyb, int *nnz_ell, int *nnz_coo); template bool csr_to_ell(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<double, int> &src, MatrixELL<double, int> *dst, int *nnz_ell); template bool csr_to_ell(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<float, int> &src, MatrixELL<float, int> *dst, int *nnz_ell); #ifdef SUPPORT_COMPLEX template bool csr_to_ell(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<std::complex<double>, int> &src, MatrixELL<std::complex<double>, int> *dst, int *nnz_ell); template bool csr_to_ell(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<std::complex<float>, int> &src, MatrixELL<std::complex<float>, int> *dst, int *nnz_ell); #endif template bool csr_to_ell(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<int, int> &src, MatrixELL<int, int> *dst, int *nnz_ell); template bool csr_to_dense(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<double, int> &src, MatrixDENSE<double> *dst); template bool csr_to_dense(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<float, int> &src, MatrixDENSE<float> *dst); #ifdef SUPPORT_COMPLEX template bool csr_to_dense(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<std::complex<double>, int> &src, MatrixDENSE<std::complex<double> > *dst); template bool csr_to_dense(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<std::complex<float>, int> &src, MatrixDENSE<std::complex<float> > *dst); #endif template bool csr_to_dense(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR<int, int> &src, MatrixDENSE<int> *dst); template bool dense_to_csr(const int omp_threads, const int nrow, const int ncol, const MatrixDENSE<double> &src, MatrixCSR<double, int> *dst, int *nnz); template bool dense_to_csr(const int omp_threads, const int nrow, const int ncol, const MatrixDENSE<float> &src, MatrixCSR<float, int> *dst, int *nnz); #ifdef SUPPORT_COMPLEX template bool dense_to_csr(const int omp_threads, const int nrow, const int ncol, const MatrixDENSE<std::complex<double> > &src, MatrixCSR<std::complex<double>, int> *dst, int *nnz); template bool dense_to_csr(const int omp_threads, const int nrow, const int ncol, const MatrixDENSE<std::complex<float> > &src, MatrixCSR<std::complex<float>, int> *dst, int *nnz); #endif template bool dense_to_csr(const int omp_threads, const int nrow, const int ncol, const MatrixDENSE<int> &src, MatrixCSR<int, int> *dst, int *nnz); template bool dia_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixDIA<double, int> &src, MatrixCSR<double, int> *dst, int *nnz_csr); template bool dia_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixDIA<float, int> &src, MatrixCSR<float, int> *dst, int *nnz_csr); #ifdef SUPPORT_COMPLEX template bool dia_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixDIA<std::complex<double>, int> &src, MatrixCSR<std::complex<double>, int> *dst, int *nnz_csr); template bool dia_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixDIA<std::complex<float>, int> &src, MatrixCSR<std::complex<float>, int> *dst, int *nnz_csr); #endif template bool dia_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixDIA<int, int> &src, MatrixCSR<int, int> *dst, int *nnz_csr); template bool ell_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixELL<double, int> &src, MatrixCSR<double, int> *dst, int *nnz_csr); template bool ell_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixELL<float, int> &src, MatrixCSR<float, int> *dst, int *nnz_csr); #ifdef SUPPORT_COMPLEX template bool ell_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixELL<std::complex<double>, int> &src, MatrixCSR<std::complex<double>, int> *dst, int *nnz_csr); template bool ell_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixELL<std::complex<float>, int> &src, MatrixCSR<std::complex<float>, int> *dst, int *nnz_csr); #endif template bool ell_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixELL<int, int> &src, MatrixCSR<int, int> *dst, int *nnz_csr); template bool coo_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCOO<double, int> &src, MatrixCSR<double, int> *dst); template bool coo_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCOO<float, int> &src, MatrixCSR<float, int> *dst); #ifdef SUPPORT_COMPLEX template bool coo_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCOO<std::complex<double>, int> &src, MatrixCSR<std::complex<double>, int> *dst); template bool coo_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCOO<std::complex<float>, int> &src, MatrixCSR<std::complex<float>, int> *dst); #endif template bool coo_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCOO<int, int> &src, MatrixCSR<int, int> *dst); template bool hyb_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const int nnz_ell, const int nnz_coo, const MatrixHYB<double, int> &src, MatrixCSR<double, int> *dst, int *nnz_csr); template bool hyb_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const int nnz_ell, const int nnz_coo, const MatrixHYB<float, int> &src, MatrixCSR<float, int> *dst, int *nnz_csr); #ifdef SUPPORT_COMPLEX template bool hyb_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const int nnz_ell, const int nnz_coo, const MatrixHYB<std::complex<double>, int> &src, MatrixCSR<std::complex<double>, int> *dst, int *nnz_csr); template bool hyb_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const int nnz_ell, const int nnz_coo, const MatrixHYB<std::complex<float>, int> &src, MatrixCSR<std::complex<float>, int> *dst, int *nnz_csr); #endif template bool hyb_to_csr(const int omp_threads, const int nnz, const int nrow, const int ncol, const int nnz_ell, const int nnz_coo, const MatrixHYB<int, int> &src, MatrixCSR<int, int> *dst, int *nnz_csr); }