// ************************************************************************** // // PARALUTION www.paralution.com // // Copyright (C) 2015 PARALUTION Labs UG (haftungsbeschränkt) & Co. KG // Am Hasensprung 6, 76571 Gaggenau // Handelsregister: Amtsgericht Mannheim, HRA 706051 // Vertreten durch: // PARALUTION Labs Verwaltungs UG (haftungsbeschränkt) // Am Hasensprung 6, 76571 Gaggenau // Handelsregister: Amtsgericht Mannheim, HRB 721277 // Geschäftsführer: Dimitar Lukarski, Nico Trost // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see <http://www.gnu.org/licenses/>. // // ************************************************************************** // PARALUTION version 1.1.0 #include "../../utils/def.hpp" #include "host_matrix_dense.hpp" #include "host_matrix_csr.hpp" #include "host_conversion.hpp" #include "host_vector.hpp" #include "../../utils/log.hpp" #include "../../utils/allocate_free.hpp" #include "../../utils/math_functions.hpp" #include "../matrix_formats_ind.hpp" #include <math.h> #include <complex> #ifdef _OPENMP #include <omp.h> #else #define omp_set_num_threads(num) ; #endif #ifdef SUPPORT_MKL #include <mkl.h> #include <mkl_spblas.h> #endif namespace paralution { template <typename ValueType> HostMatrixDENSE<ValueType>::HostMatrixDENSE() { // no default constructors LOG_INFO("no default constructor"); FATAL_ERROR(__FILE__, __LINE__); } template <typename ValueType> HostMatrixDENSE<ValueType>::HostMatrixDENSE(const Paralution_Backend_Descriptor local_backend) { LOG_DEBUG(this, "HostMatrixDENSE::HostMatrixDENSE()", "constructor with local_backend"); this->mat_.val = NULL; this->set_backend(local_backend); } template <typename ValueType> HostMatrixDENSE<ValueType>::~HostMatrixDENSE() { LOG_DEBUG(this, "HostMatrixDENSE::~HostMatrixDENSE()", "destructor"); this->Clear(); } template <typename ValueType> void HostMatrixDENSE<ValueType>::info(void) const { LOG_INFO("HostMatrixDENSE<ValueType>"); if (DENSE_IND_BASE == 0) { LOG_INFO("Dense matrix - row-based"); } else { assert(DENSE_IND_BASE == 1); LOG_INFO("Dense matrix - column-based"); } } template <typename ValueType> void HostMatrixDENSE<ValueType>::Clear() { if (this->nnz_ > 0) { free_host(&this->mat_.val); this->nrow_ = 0; this->ncol_ = 0; this->nnz_ = 0; } } template <typename ValueType> void HostMatrixDENSE<ValueType>::AllocateDENSE(const int nrow, const int ncol) { assert( ncol >= 0); assert( nrow >= 0); if (this->nnz_ > 0) this->Clear(); if (nrow*ncol > 0) { allocate_host(nrow*ncol, &this->mat_.val); set_to_zero_host(nrow*ncol, mat_.val); this->nrow_ = nrow; this->ncol_ = ncol; this->nnz_ = nrow*ncol; } } template <typename ValueType> void HostMatrixDENSE<ValueType>::SetDataPtrDENSE(ValueType **val, const int nrow, const int ncol) { assert(*val != NULL); assert(nrow > 0); assert(ncol > 0); this->Clear(); this->nrow_ = nrow; this->ncol_ = ncol; this->nnz_ = nrow*ncol; this->mat_.val = *val; } template <typename ValueType> void HostMatrixDENSE<ValueType>::LeaveDataPtrDENSE(ValueType **val) { assert(this->nrow_ > 0); assert(this->ncol_ > 0); assert(this->nnz_ > 0); assert(this->nnz_ == this->nrow_*this->ncol_); *val = this->mat_.val; this->mat_.val = NULL; this->nrow_ = 0; this->ncol_ = 0; this->nnz_ = 0; } template <typename ValueType> void HostMatrixDENSE<ValueType>::CopyFrom(const BaseMatrix<ValueType> &mat) { // copy only in the same format assert(this->get_mat_format() == mat.get_mat_format()); if (const HostMatrixDENSE<ValueType> *cast_mat = dynamic_cast<const HostMatrixDENSE<ValueType>*> (&mat)) { this->AllocateDENSE(cast_mat->nrow_, cast_mat->ncol_); assert((this->nnz_ == cast_mat->nnz_) && (this->nrow_ == cast_mat->nrow_) && (this->ncol_ == cast_mat->ncol_) ); if (this->nnz_ > 0) { _set_omp_backend_threads(this->local_backend_, this->nnz_); #pragma omp parallel for for (int j=0; j<this->nnz_; ++j) this->mat_.val[j] = cast_mat->mat_.val[j]; } } else { // Host matrix knows only host matrices // -> dispatching mat.CopyTo(this); } } template <typename ValueType> void HostMatrixDENSE<ValueType>::CopyTo(BaseMatrix<ValueType> *mat) const { mat->CopyFrom(*this); } template <typename ValueType> bool HostMatrixDENSE<ValueType>::ConvertFrom(const BaseMatrix<ValueType> &mat) { this->Clear(); // empty matrix is empty matrix if (mat.get_nnz() == 0) return true; if (const HostMatrixDENSE<ValueType> *cast_mat = dynamic_cast<const HostMatrixDENSE<ValueType>*> (&mat)) { this->CopyFrom(*cast_mat); return true; } if (const HostMatrixCSR<ValueType> *cast_mat = dynamic_cast<const HostMatrixCSR<ValueType>*> (&mat)) { this->Clear(); if (csr_to_dense(this->local_backend_.OpenMP_threads, cast_mat->nnz_, cast_mat->nrow_, cast_mat->ncol_, cast_mat->mat_, &this->mat_) == true) { this->nrow_ = cast_mat->nrow_; this->ncol_ = cast_mat->ncol_; this->nnz_ = this->nrow_ * this->ncol_; return true; } } return false; } #ifdef SUPPORT_MKL template <> void HostMatrixDENSE<double>::Apply(const BaseVector<double> &in, BaseVector<double> *out) const { assert(in. get_size() >= 0); assert(out->get_size() >= 0); assert(in. get_size() == this->ncol_); assert(out->get_size() == this->nrow_); const HostVector<double> *cast_in = dynamic_cast<const HostVector<double>*> (&in); HostVector<double> *cast_out = dynamic_cast< HostVector<double>*> (out); assert(cast_in != NULL); assert(cast_out!= NULL); double alpha = double(1.0); double beta = double(0.0); int nrow = this->nrow_; int ncol = this->ncol_; if (DENSE_IND_BASE == 0) { cblas_dgemv(CblasColMajor, CblasNoTrans, nrow, ncol, alpha, this->mat_.val, nrow, cast_in->vec_, 1, beta, cast_out->vec_, 1); } else { cblas_dgemv(CblasRowMajor, CblasNoTrans, nrow, ncol, alpha, this->mat_.val, ncol, cast_in->vec_, 1, beta, cast_out->vec_, 1); } } template <> void HostMatrixDENSE<float>::Apply(const BaseVector<float> &in, BaseVector<float> *out) const { assert(in. get_size() >= 0); assert(out->get_size() >= 0); assert(in. get_size() == this->ncol_); assert(out->get_size() == this->nrow_); const HostVector<float> *cast_in = dynamic_cast<const HostVector<float>*> (&in); HostVector<float> *cast_out = dynamic_cast< HostVector<float>*> (out); assert(cast_in != NULL); assert(cast_out!= NULL); float alpha = float(1.0); float beta = float(0.0); int nrow = this->nrow_; int ncol = this->ncol_; if (DENSE_IND_BASE == 0) { cblas_sgemv(CblasColMajor, CblasNoTrans, nrow, ncol, alpha, this->mat_.val, nrow, cast_in->vec_, 1, beta, cast_out->vec_, 1); } else { cblas_sgemv(CblasRowMajor, CblasNoTrans, nrow, ncol, alpha, this->mat_.val, ncol, cast_in->vec_, 1, beta, cast_out->vec_, 1); } } template <> void HostMatrixDENSE<std::complex<double> >::Apply(const BaseVector<std::complex<double> > &in, BaseVector<std::complex<double> > *out) const { assert(in. get_size() >= 0); assert(out->get_size() >= 0); assert(in. get_size() == this->ncol_); assert(out->get_size() == this->nrow_); const HostVector<std::complex<double> > *cast_in = dynamic_cast<const HostVector<std::complex<double> >*> (&in); HostVector<std::complex<double> > *cast_out = dynamic_cast< HostVector<std::complex<double> >*> (out); assert(cast_in != NULL); assert(cast_out!= NULL); std::complex<double> alpha = std::complex<double>(1.0, 0.0); std::complex<double> beta = std::complex<double>(0.0, 0.0); int nrow = this->nrow_; int ncol = this->ncol_; if (DENSE_IND_BASE == 0) { cblas_zgemv(CblasColMajor, CblasNoTrans, nrow, ncol, &alpha, this->mat_.val, nrow, cast_in->vec_, 1, &beta, cast_out->vec_, 1); } else { cblas_zgemv(CblasRowMajor, CblasNoTrans, nrow, ncol, &alpha, this->mat_.val, ncol, cast_in->vec_, 1, &beta, cast_out->vec_, 1); } } template <> void HostMatrixDENSE<std::complex<float> >::Apply(const BaseVector<std::complex<float> > &in, BaseVector<std::complex<float> > *out) const { assert(in. get_size() >= 0); assert(out->get_size() >= 0); assert(in. get_size() == this->ncol_); assert(out->get_size() == this->nrow_); const HostVector<std::complex<float> > *cast_in = dynamic_cast<const HostVector<std::complex<float> >*> (&in); HostVector<std::complex<float> > *cast_out = dynamic_cast< HostVector<std::complex<float> >*> (out); assert(cast_in != NULL); assert(cast_out!= NULL); std::complex<float> alpha = std::complex<float>(1.0, 0.0); std::complex<float> beta = std::complex<float>(0.0, 0.0); int nrow = this->nrow_; int ncol = this->ncol_; if (DENSE_IND_BASE == 0) { cblas_cgemv(CblasColMajor, CblasNoTrans, nrow, ncol, &alpha, this->mat_.val, nrow, cast_in->vec_, 1, &beta, cast_out->vec_, 1); } else { cblas_cgemv(CblasRowMajor, CblasNoTrans, nrow, ncol, &alpha, this->mat_.val, ncol, cast_in->vec_, 1, &beta, cast_out->vec_, 1); } } #else template <typename ValueType> void HostMatrixDENSE<ValueType>::Apply(const BaseVector<ValueType> &in, BaseVector<ValueType> *out) const { assert(in. get_size() >= 0); assert(out->get_size() >= 0); assert(in. get_size() == this->ncol_); assert(out->get_size() == this->nrow_); const HostVector<ValueType> *cast_in = dynamic_cast<const HostVector<ValueType>*> (&in); HostVector<ValueType> *cast_out = dynamic_cast< HostVector<ValueType>*> (out); assert(cast_in != NULL); assert(cast_out!= NULL); _set_omp_backend_threads(this->local_backend_, this->nnz_); #pragma omp parallel for for (int ai=0; ai<this->nrow_; ++ai) { cast_out->vec_[ai] = ValueType(0.0); for (int aj=0; aj<this->ncol_; ++aj) cast_out->vec_[ai] += this->mat_.val[DENSE_IND(ai,aj,this->nrow_,this->ncol_)] * cast_in->vec_[aj]; } } #endif #ifdef SUPPORT_MKL template <> void HostMatrixDENSE<double>::ApplyAdd(const BaseVector<double> &in, const double scalar, BaseVector<double> *out) const { assert(in. get_size() >= 0); assert(out->get_size() >= 0); assert(in. get_size() == this->ncol_); assert(out->get_size() == this->nrow_); const HostVector<double> *cast_in = dynamic_cast<const HostVector<double>*> (&in); HostVector<double> *cast_out = dynamic_cast< HostVector<double>*> (out); assert(cast_in != NULL); assert(cast_out!= NULL); double beta = double(1.0); int nrow = this->nrow_; int ncol = this->ncol_; if (DENSE_IND_BASE == 0) { cblas_dgemv(CblasColMajor, CblasNoTrans, nrow, ncol, scalar, this->mat_.val, nrow, cast_in->vec_, 1, beta, cast_out->vec_, 1); } else { cblas_dgemv(CblasRowMajor, CblasNoTrans, nrow, ncol, scalar, this->mat_.val, ncol, cast_in->vec_, 1, beta, cast_out->vec_, 1); } } template <> void HostMatrixDENSE<float>::ApplyAdd(const BaseVector<float> &in, const float scalar, BaseVector<float> *out) const { assert(in. get_size() >= 0); assert(out->get_size() >= 0); assert(in. get_size() == this->ncol_); assert(out->get_size() == this->nrow_); const HostVector<float> *cast_in = dynamic_cast<const HostVector<float>*> (&in); HostVector<float> *cast_out = dynamic_cast< HostVector<float>*> (out); assert(cast_in != NULL); assert(cast_out!= NULL); float beta = float(1.0); int nrow = this->nrow_; int ncol = this->ncol_; if (DENSE_IND_BASE == 0) { cblas_sgemv(CblasColMajor, CblasNoTrans, nrow, ncol, scalar, this->mat_.val, nrow, cast_in->vec_, 1, beta, cast_out->vec_, 1); } else { cblas_sgemv(CblasRowMajor, CblasNoTrans, nrow, ncol, scalar, this->mat_.val, ncol, cast_in->vec_, 1, beta, cast_out->vec_, 1); } } template <> void HostMatrixDENSE<std::complex<double> >::ApplyAdd(const BaseVector<std::complex<double> > &in, const std::complex<double> scalar, BaseVector<std::complex<double> > *out) const { assert(in. get_size() >= 0); assert(out->get_size() >= 0); assert(in. get_size() == this->ncol_); assert(out->get_size() == this->nrow_); const HostVector<std::complex<double> > *cast_in = dynamic_cast<const HostVector<std::complex<double> >*> (&in); HostVector<std::complex<double> > *cast_out = dynamic_cast< HostVector<std::complex<double> >*> (out); assert(cast_in != NULL); assert(cast_out!= NULL); std::complex<double> beta = std::complex<double>(1.0, 0.0); int nrow = this->nrow_; int ncol = this->ncol_; if (DENSE_IND_BASE == 0) { cblas_zgemv(CblasColMajor, CblasNoTrans, nrow, ncol, &scalar, this->mat_.val, nrow, cast_in->vec_, 1, &beta, cast_out->vec_, 1); } else { cblas_zgemv(CblasRowMajor, CblasNoTrans, nrow, ncol, &scalar, this->mat_.val, ncol, cast_in->vec_, 1, &beta, cast_out->vec_, 1); } } template <> void HostMatrixDENSE<std::complex<float> >::ApplyAdd(const BaseVector<std::complex<float> > &in, const std::complex<float> scalar, BaseVector<std::complex<float> > *out) const { assert(in. get_size() >= 0); assert(out->get_size() >= 0); assert(in. get_size() == this->ncol_); assert(out->get_size() == this->nrow_); const HostVector<std::complex<float> > *cast_in = dynamic_cast<const HostVector<std::complex<float> >*> (&in); HostVector<std::complex<float> > *cast_out = dynamic_cast< HostVector<std::complex<float> >*> (out); assert(cast_in != NULL); assert(cast_out!= NULL); std::complex<float> beta = std::complex<float>(1.0, 0.0); int nrow = this->nrow_; int ncol = this->ncol_; if (DENSE_IND_BASE == 0) { cblas_cgemv(CblasColMajor, CblasNoTrans, nrow, ncol, &scalar, this->mat_.val, nrow, cast_in->vec_, 1, &beta, cast_out->vec_, 1); } else { cblas_cgemv(CblasRowMajor, CblasNoTrans, nrow, ncol, &scalar, this->mat_.val, ncol, cast_in->vec_, 1, &beta, cast_out->vec_, 1); } } #else template <typename ValueType> void HostMatrixDENSE<ValueType>::ApplyAdd(const BaseVector<ValueType> &in, const ValueType scalar, BaseVector<ValueType> *out) const { if (this->nnz_ > 0) { assert(in. get_size() >= 0); assert(out->get_size() >= 0); assert(in. get_size() == this->ncol_); assert(out->get_size() == this->nrow_); const HostVector<ValueType> *cast_in = dynamic_cast<const HostVector<ValueType>*> (&in); HostVector<ValueType> *cast_out = dynamic_cast< HostVector<ValueType>*> (out); assert(cast_in != NULL); assert(cast_out!= NULL); _set_omp_backend_threads(this->local_backend_, this->nnz_); #pragma omp parallel for for (int ai=0; ai<this->nrow_; ++ai) for (int aj=0; aj<this->ncol_; ++aj) cast_out->vec_[ai] += scalar * this->mat_.val[DENSE_IND(ai,aj,this->nrow_,this->ncol_)] * cast_in->vec_[aj]; } } #endif #ifdef SUPPORT_MKL template <> bool HostMatrixDENSE<double>::MatMatMult(const BaseMatrix<double> &A, const BaseMatrix<double> &B) { assert((this != &A) && (this != &B)); assert(&A != NULL); assert(&B != NULL); const HostMatrixDENSE<double> *cast_mat_A = dynamic_cast<const HostMatrixDENSE<double>*> (&A); const HostMatrixDENSE<double> *cast_mat_B = dynamic_cast<const HostMatrixDENSE<double>*> (&B); assert(cast_mat_A != NULL); assert(cast_mat_B != NULL); assert(cast_mat_A->ncol_ == cast_mat_B->nrow_); int m = cast_mat_A->nrow_; int n = cast_mat_B->ncol_; int k = cast_mat_A->ncol_; double alpha = double(1.0); double beta = double(0.0); if (DENSE_IND_BASE == 0) { cblas_dgemm (CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, cast_mat_A->mat_.val, m, cast_mat_B->mat_.val, k, beta, this->mat_.val, m); } else { cblas_dgemm (CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, cast_mat_A->mat_.val, k, cast_mat_B->mat_.val, n, beta, this->mat_.val, m); } return true; } template <> bool HostMatrixDENSE<float>::MatMatMult(const BaseMatrix<float> &A, const BaseMatrix<float> &B) { assert((this != &A) && (this != &B)); assert(&A != NULL); assert(&B != NULL); const HostMatrixDENSE<float> *cast_mat_A = dynamic_cast<const HostMatrixDENSE<float>*> (&A); const HostMatrixDENSE<float> *cast_mat_B = dynamic_cast<const HostMatrixDENSE<float>*> (&B); assert(cast_mat_A != NULL); assert(cast_mat_B != NULL); assert(cast_mat_A->ncol_ == cast_mat_B->nrow_); int m = cast_mat_A->nrow_; int n = cast_mat_B->ncol_; int k = cast_mat_A->ncol_; float alpha = float(1.0); float beta = float(0.0); if (DENSE_IND_BASE == 0) { cblas_sgemm (CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, cast_mat_A->mat_.val, m, cast_mat_B->mat_.val, k, beta, this->mat_.val, m); } else { cblas_sgemm (CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, cast_mat_A->mat_.val, k, cast_mat_B->mat_.val, n, beta, this->mat_.val, m); } return true; } template <> bool HostMatrixDENSE<std::complex<double> >::MatMatMult(const BaseMatrix<std::complex<double> > &A, const BaseMatrix<std::complex<double> > &B) { assert((this != &A) && (this != &B)); assert(&A != NULL); assert(&B != NULL); const HostMatrixDENSE<std::complex<double> > *cast_mat_A = dynamic_cast<const HostMatrixDENSE<std::complex<double> >*> (&A); const HostMatrixDENSE<std::complex<double> > *cast_mat_B = dynamic_cast<const HostMatrixDENSE<std::complex<double> >*> (&B); assert(cast_mat_A != NULL); assert(cast_mat_B != NULL); assert(cast_mat_A->ncol_ == cast_mat_B->nrow_); int m = cast_mat_A->nrow_; int n = cast_mat_B->ncol_; int k = cast_mat_A->ncol_; std::complex<double> alpha = std::complex<double>(1.0, 0.0); std::complex<double> beta = std::complex<double>(0.0, 0.0); if (DENSE_IND_BASE == 0) { cblas_zgemm (CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, k, &alpha, cast_mat_A->mat_.val, m, cast_mat_B->mat_.val, k, &beta, this->mat_.val, m); } else { cblas_zgemm (CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, &alpha, cast_mat_A->mat_.val, k, cast_mat_B->mat_.val, n, &beta, this->mat_.val, m); } return true; } template <> bool HostMatrixDENSE<std::complex<float> >::MatMatMult(const BaseMatrix<std::complex<float> > &A, const BaseMatrix<std::complex<float> > &B) { assert((this != &A) && (this != &B)); assert(&A != NULL); assert(&B != NULL); const HostMatrixDENSE<std::complex<float> > *cast_mat_A = dynamic_cast<const HostMatrixDENSE<std::complex<float> >*> (&A); const HostMatrixDENSE<std::complex<float> > *cast_mat_B = dynamic_cast<const HostMatrixDENSE<std::complex<float> >*> (&B); assert(cast_mat_A != NULL); assert(cast_mat_B != NULL); assert(cast_mat_A->ncol_ == cast_mat_B->nrow_); int m = cast_mat_A->nrow_; int n = cast_mat_B->ncol_; int k = cast_mat_A->ncol_; std::complex<float> alpha = std::complex<float>(1.0, 0.0); std::complex<float> beta = std::complex<float>(0.0, 0.0); if (DENSE_IND_BASE == 0) { cblas_cgemm (CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, k, &alpha, cast_mat_A->mat_.val, m, cast_mat_B->mat_.val, k, &beta, this->mat_.val, m); } else { cblas_cgemm (CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, &alpha, cast_mat_A->mat_.val, k, cast_mat_B->mat_.val, n, &beta, this->mat_.val, m); } return true; } #else template <typename ValueType> bool HostMatrixDENSE<ValueType>::MatMatMult(const BaseMatrix<ValueType> &A, const BaseMatrix<ValueType> &B) { assert((this != &A) && (this != &B)); assert(&A != NULL); assert(&B != NULL); const HostMatrixDENSE<ValueType> *cast_mat_A = dynamic_cast<const HostMatrixDENSE<ValueType>*> (&A); const HostMatrixDENSE<ValueType> *cast_mat_B = dynamic_cast<const HostMatrixDENSE<ValueType>*> (&B); assert(cast_mat_A != NULL); assert(cast_mat_B != NULL); assert(cast_mat_A->ncol_ == cast_mat_B->nrow_); #pragma omp parallel for for (int i=0; i<cast_mat_A->nrow_; ++i) { for (int j=0; j<cast_mat_B->ncol_; ++j) { ValueType sum = ValueType(0.0); for (int k=0; k<cast_mat_A->ncol_; ++k) { sum += cast_mat_A->mat_.val[DENSE_IND(i,k,cast_mat_A->nrow_,cast_mat_A->ncol_)] * cast_mat_B->mat_.val[DENSE_IND(k,j,cast_mat_B->nrow_,cast_mat_B->ncol_)]; } this->mat_.val[DENSE_IND(i,j,cast_mat_A->nrow_,cast_mat_B->ncol_)] = sum; } } return true; } #endif template <typename ValueType> bool HostMatrixDENSE<ValueType>::Householder(const int idx, ValueType &beta, BaseVector<ValueType> *vec) const { HostVector<ValueType> *cast_vec = dynamic_cast<HostVector<ValueType>*> (vec); assert(cast_vec != NULL); assert(cast_vec->get_size() >= this->nrow_-idx); ValueType s = ValueType(0.0); ValueType mu; for (int i=0; i<this->nrow_-idx; ++i) cast_vec->vec_[i] = this->mat_.val[DENSE_IND(i+idx, idx, this->nrow_, this->ncol_)]; ValueType y1 = cast_vec->vec_[0]; for (int i=1; i<this->nrow_-idx; ++i) s += cast_vec->vec_[i] * cast_vec->vec_[i]; cast_vec->vec_[0] = ValueType(1.0); if (s == ValueType(0.0)) { beta = ValueType(0.0); } else { mu = sqrt(y1 * y1 + s); if (y1 <= ValueType(0.0)) cast_vec->vec_[0] = y1 - mu; else cast_vec->vec_[0] = -s / (y1 + mu); ValueType y0sq = cast_vec->vec_[0] * cast_vec->vec_[0]; beta = ValueType(2.0) * y0sq / (s + y0sq); ValueType norm = cast_vec->vec_[0]; for (int i=0; i<this->nrow_-idx; ++i) cast_vec->vec_[i] /= norm; } return true; } template <typename ValueType> bool HostMatrixDENSE<ValueType>::QRDecompose(void) { assert(this->nrow_ > 0); assert(this->ncol_ > 0); assert(this->nnz_ > 0); int size = (this->nrow_ < this->ncol_) ? this->nrow_ : this->ncol_; ValueType beta; HostVector<ValueType> v(this->local_backend_); v.Allocate(this->nrow_); for (int i=0; i<size; ++i) { this->Householder(i, beta, &v); if (beta != ValueType(0.0)) { for (int aj=i; aj<this->ncol_; ++aj) { ValueType sum = ValueType(0.0); for (int ai=i; ai<this->nrow_; ++ai) sum += v.vec_[ai-i] * this->mat_.val[DENSE_IND(ai, aj, this->nrow_, this->ncol_)]; sum *= beta; for (int ai=i; ai<this->nrow_; ++ai) this->mat_.val[DENSE_IND(ai, aj, this->nrow_, this->ncol_)] -= sum * v.vec_[ai-i]; } for (int k=i+1; k<this->nrow_; ++k) this->mat_.val[DENSE_IND(k, i, this->nrow_, this->ncol_)] = v.vec_[k-i]; } } return true; } template <typename ValueType> bool HostMatrixDENSE<ValueType>::QRSolve(const BaseVector<ValueType> &in, BaseVector<ValueType> *out) const { assert(in. get_size() >= 0); assert(out->get_size() >= 0); assert(in. get_size() == this->nrow_); assert(out->get_size() == this->ncol_); HostVector<ValueType> *cast_out = dynamic_cast<HostVector<ValueType>*>(out); assert(cast_out!= NULL); HostVector<ValueType> copy_in(this->local_backend_); copy_in.CopyFrom(in); int size = (this->nrow_ < this->ncol_) ? this->nrow_ : this->ncol_; ValueType *v = NULL; allocate_host(this->nrow_, &v); // Apply Q^T on copy_in v[0] = ValueType(1.0); for (int i=0; i<size; ++i) { ValueType sum = ValueType(1.0); for (int j=1; j<this->nrow_-i; ++j) { v[j] = this->mat_.val[DENSE_IND(j+i,i,this->nrow_, this->ncol_)]; sum += v[j] * v[j]; } sum = ValueType(2.0) / sum; if (sum != ValueType(2.0)) { ValueType sum2 = ValueType(0.0); for (int j=0; j<this->nrow_-i; ++j) sum2 += v[j] * copy_in.vec_[j+i]; for (int j=0; j<this->nrow_-i; ++j) copy_in.vec_[j+i] -= sum * sum2 * v[j]; } } free_host(&v); // Backsolve Rx = Q^T b for (int i=size-1; i>=0; --i) { ValueType sum = ValueType(0.0); for (int j=i+1; j<this->ncol_; ++j) sum += this->mat_.val[DENSE_IND(i, j, this->nrow_, this->ncol_)] * cast_out->vec_[j]; cast_out->vec_[i] = (copy_in.vec_[i] - sum) / this->mat_.val[DENSE_IND(i, i, this->nrow_, this->ncol_)]; } return true; } template <typename ValueType> bool HostMatrixDENSE<ValueType>::Invert(void) { assert(this->nrow_ > 0); assert(this->ncol_ > 0); assert(this->nnz_ > 0); assert(this->nrow_ == this->ncol_); ValueType *val = NULL; allocate_host(this->nrow_ * this->ncol_, &val); this->QRDecompose(); #pragma omp parallel for for (int i=0; i<this->nrow_; ++i) { HostVector<ValueType> sol(this->local_backend_); HostVector<ValueType> rhs(this->local_backend_); sol.Allocate(this->nrow_); rhs.Allocate(this->nrow_); rhs.vec_[i] = ValueType(1.0); this->QRSolve(rhs, &sol); for (int j=0; j<this->ncol_; ++j) val[DENSE_IND(j, i, this->nrow_, this->ncol_)] = sol.vec_[j]; } free_host(&this->mat_.val); this->mat_.val = val; return true; } template <typename ValueType> bool HostMatrixDENSE<ValueType>::LUFactorize(void) { assert(this->nrow_ > 0); assert(this->ncol_ > 0); assert(this->nnz_ > 0); assert(this->nrow_ == this->ncol_); for (int i=0; i<this->nrow_-1; ++i) { for (int j=i+1; j<this->nrow_; ++j) { this->mat_.val[DENSE_IND(j, i, this->nrow_, this->ncol_)] /= this->mat_.val[DENSE_IND(i, i, this->nrow_, this->ncol_)]; for (int k=i+1; k<this->ncol_; ++k) this->mat_.val[DENSE_IND(j, k, this->nrow_, this->ncol_)] -= this->mat_.val[DENSE_IND(j, i, this->nrow_, this->ncol_)] * this->mat_.val[DENSE_IND(i, k, this->nrow_, this->ncol_)]; } } return true; } template <typename ValueType> bool HostMatrixDENSE<ValueType>::LUSolve(const BaseVector<ValueType> &in, BaseVector<ValueType> *out) const { assert(in. get_size() >= 0); assert(out->get_size() >= 0); assert(in. get_size() == this->nrow_); assert(out->get_size() == this->ncol_); HostVector<ValueType> *cast_out = dynamic_cast<HostVector<ValueType>*>(out); const HostVector<ValueType> *cast_in = dynamic_cast<const HostVector<ValueType>*>(&in); assert(cast_out!= NULL); // fill solution vector for (int i=0; i<this->nrow_; ++i) cast_out->vec_[i] = cast_in->vec_[i]; // forward sweeps for (int i=0; i<this->nrow_-1; ++i) { for (int j=i+1; j<this->nrow_; ++j) cast_out->vec_[j] -= cast_out->vec_[i] * this->mat_.val[DENSE_IND(j, i, this->nrow_, this->ncol_)]; } // backward sweeps for (int i=this->nrow_-1; i>=0; --i) { cast_out->vec_[i] /= this->mat_.val[DENSE_IND(i, i, this->nrow_, this->ncol_)]; for (int j=0; j<i; ++j) cast_out->vec_[j] -= cast_out->vec_[i] * this->mat_.val[DENSE_IND(j, i, this->nrow_, this->ncol_)]; } return true; } template <typename ValueType> bool HostMatrixDENSE<ValueType>::ReplaceColumnVector(const int idx, const BaseVector<ValueType> &vec) { assert(&vec != NULL); assert(vec.get_size() == this->nrow_); if (this->get_nnz() > 0) { const HostVector<ValueType> *cast_vec = dynamic_cast<const HostVector<ValueType>*> (&vec); assert(cast_vec != NULL); _set_omp_backend_threads(this->local_backend_, this->nrow_); #pragma omp parallel for for (int i=0; i<this->nrow_; ++i) this->mat_.val[DENSE_IND(i, idx, this->nrow_, this->ncol_)] = cast_vec->vec_[i]; } return true; } template <typename ValueType> bool HostMatrixDENSE<ValueType>::ReplaceRowVector(const int idx, const BaseVector<ValueType> &vec) { assert(&vec != NULL); assert(vec.get_size() == this->ncol_); if (this->get_nnz() > 0) { const HostVector<ValueType> *cast_vec = dynamic_cast<const HostVector<ValueType>*> (&vec); assert(cast_vec != NULL); _set_omp_backend_threads(this->local_backend_, this->ncol_); #pragma omp parallel for for (int i=0; i<this->ncol_; ++i) this->mat_.val[DENSE_IND(idx, i, this->nrow_, this->ncol_)] = cast_vec->vec_[i]; } return true; } template <typename ValueType> bool HostMatrixDENSE<ValueType>::ExtractColumnVector(const int idx, BaseVector<ValueType> *vec) const { assert(vec != NULL); assert(vec->get_size() == this->nrow_); if (this->get_nnz() > 0) { HostVector<ValueType> *cast_vec = dynamic_cast<HostVector<ValueType>*> (vec); assert(cast_vec != NULL); _set_omp_backend_threads(this->local_backend_, this->nrow_); #pragma omp parallel for for (int i=0; i<this->nrow_; ++i) cast_vec->vec_[i] = this->mat_.val[DENSE_IND(i, idx, this->nrow_, this->ncol_)]; } return true; } template <typename ValueType> bool HostMatrixDENSE<ValueType>::ExtractRowVector(const int idx, BaseVector<ValueType> *vec) const { assert(vec != NULL); assert(vec->get_size() == this->ncol_); if (this->get_nnz() > 0) { HostVector<ValueType> *cast_vec = dynamic_cast<HostVector<ValueType>*> (vec); assert(cast_vec != NULL); _set_omp_backend_threads(this->local_backend_, this->nrow_); #pragma omp parallel for for (int i=0; i<this->nrow_; ++i) cast_vec->vec_[i] = this->mat_.val[DENSE_IND(idx, i, this->nrow_, this->ncol_)]; } return true; } template class HostMatrixDENSE<double>; template class HostMatrixDENSE<float>; #ifdef SUPPORT_COMPLEX template class HostMatrixDENSE<std::complex<double> >; template class HostMatrixDENSE<std::complex<float> >; #endif }