математическая библиотека и easylogging++ (2 этап)

68e008d9 · Arkadiy Marinenko · 48920dc3 · 68e008d9 · 68e008d9 · 68e008d9
Commit 68e008d9 authored Mar 01, 2018 by Arkadiy Marinenko
15 changed files
--- a/README.md
+++ b/README.md
@@ -3,3 +3,7 @@
 В настройках CMAKE можно выбрать, с какой технологией распараллеливания будут работать решатели: OpenMP, OpenCL или CUDA.
 При этом OpenMP работает только на CPU, OpenCL работает как на CPU, так и на GPU, а CUDA работает только на GPU от Nvidia.
 По умолчанию всегда используется технология OpenMP.
+2. С помощью проекта CodeBlocks (файл math.cbp) или make-файла откомпилировать библиотеку math, находящуюся в папке math.
+По умолчанию включена поддержка как решателей paralution (-DNLA3D_USE_PARALUTION), так и решателей mkl (-DNLA3D_USE_MKL). При необходимости, ненужное отключить в соответствующих настройках проекта CodeBlocks или в make-файле.
+Особое внимание уделить файлу EquationSolver.h и значениям абсолютной невязки "double abs_tol = 1.0E-06" и относительной невязки "double rel_tol = 1.0E-06" для решателей paralution. Это критерии выхода для итерационного решателя. В зависимости от требуемой точности решения их нужно изменять.
+
--- a/easylogging/easylogging++.h
+++ b/easylogging/easylogging++.h
--- a/math/BlockSparseMatrix.h
+++ b/math/BlockSparseMatrix.h
+// This file is a part of nla3d project. For information about authors and
+// licensing go to project's repository on github:
+// https://github.com/dmitryikh/nla3d 
+
+#pragma once
+
+#include "sys.h"
+#include "math/SparseMatrix.h"
+
+namespace nla3d {
+namespace math {
+
+
+// BlockSparseSymMatrix it is useful wrap over several SparseMatrix (and SparseSymMatrix) when your
+// global sparse matrix is represented by different blocks (`nb` is number of blocks). Here is a
+// illustration:
+// Here is global symmetric matrix A `n` by `n`, it consists of 3 distinct sparse matrix blocks:
+//     | A1  | A12 |    
+// A = |-----------|,
+//     |A12^T| A2  |
+//
+// where A1 is `m` by `m` symmetric sparse matrix, A2 is `l` by `l` symmetric sparse matrix and A12
+// `m` by `l` sparse matrix. `n` = `m` + `l`.
+//
+// Such matrix consisted of distinct blocks can be created with:
+// BlockSparseSymMatrix<2> A(m, n);
+//
+// Different blocks can be accessed by block() function:
+// SparseSymMatrix* A1 = A.block(1);
+// SparseSymMatrix* A2 = A.block(2);
+// SparseMatrix* A12 = A.block(1, 2);
+//
+// All others methods like zero(), compress() and others directly trigger the same methods of blocks
+// matrices.
+//
+// Methods addEntry(), addValue() works with global indices (in our example it's from 1 to `n`).
+// Under the hood these methods decide in which block translate your request, then modify indices to
+// local and call block's method.
+template <uint16 nb>
+class BlockSparseSymMatrix {
+  public:
+    BlockSparseSymMatrix(std::initializer_list<uint32> _rows_in_block, uint32 max_in_row = 100);
+    BlockSparseSymMatrix(BlockSparseSymMatrix* ex);
+    
+    // add non-zero entry to sparse matrix. This should be called before compress(). 
+    void addEntry(uint32 _i, uint32 _j);
+
+    // add value to the _i, _j entry. This should be called after compress().
+    void addValue(uint32 _i, uint32 _j, double value);
+
+    SparseSymMatrix* block(uint16 _i);
+    SparseMatrix* block(uint16 _i, uint16 _j);
+
+    void zero();
+    void compress();
+    bool isCompressed();
+
+    uint32 nRows();
+
+  private:
+    void getBlockAndPosition(uint32 _i, uint16* block, uint32* pos);
+
+    SparseSymMatrix diag[nb];
+    SparseMatrix upper[nb * (nb + 1) / 2 - nb];
+
+    std::vector<uint32> rows_in_block;
+    uint32 total_rows = 0;
+    bool compressed = false;
+};
+
+
+template<uint16 nb>
+BlockSparseSymMatrix<nb>::BlockSparseSymMatrix(std::initializer_list<uint32> _rows_in_block, uint32 _mat_in_row) {
+  rows_in_block = std::vector<uint32>(_rows_in_block);
+
+  assert(rows_in_block.size() == nb);
+
+  for (uint16 i = 0; i < nb; i++) {
+    block(i+1)->reinit(rows_in_block[i], _mat_in_row);
+    for (uint16 j = i+1; j < nb; j++) {
+      block(i+1, j+1)->reinit(rows_in_block[i], rows_in_block[j], _mat_in_row);
+    }
+    total_rows += rows_in_block[i];
+  }
+}
+
+
+template<uint16 nb>
+BlockSparseSymMatrix<nb>::BlockSparseSymMatrix(BlockSparseSymMatrix<nb>* ex) {
+  // assign sparsity to all underlying matrices
+  rows_in_block = ex->rows_in_block; // TODO: need copy here..
+  assert(rows_in_block.size() == nb);
+
+  total_rows = ex->total_rows;
+  compressed = ex->compressed;
+
+  for (uint16 i = 0; i < nb; i++) {
+    block(i+1)->setSparsityInfo(ex->block(i+1)->getSparsityInfo());
+    for (uint16 j = i+1; j < nb; j++) {
+      block(i+1, j+1)->setSparsityInfo(ex->block(i+1, j+1)->getSparsityInfo());
+    }
+    total_rows += rows_in_block[i];
+  }
+}
+
+
+template<uint16 nb>
+inline SparseSymMatrix* BlockSparseSymMatrix<nb>::block(uint16 _i) {
+  assert(_i > 0 && _i <= nb);
+  return &diag[_i-1];
+}
+
+
+template<uint16 nb>
+inline SparseMatrix* BlockSparseSymMatrix<nb>::block(uint16 _i, uint16 _j) {
+  CHECK(_i != _j);
+  if (_i > _j) std::swap(_i, _j);
+
+  uint16 ind = nb*(_i-1) - (_i-2)*(_i-1)/2 + (_j-_i) - _i;
+  assert(ind >= 0 && ind < nb * (nb + 1) / 2 - nb);
+
+  return &upper[ind];
+}
+
+
+template<uint16 nb>
+inline void BlockSparseSymMatrix<nb>::getBlockAndPosition(uint32 _i, uint16* block, uint32* pos) {
+  assert(_i > 0 && _i <= total_rows);
+  *block = 1;
+  *pos = _i;
+  for (auto blocknum : rows_in_block) {
+    if (*pos <= blocknum) break;
+    *pos -= blocknum;
+    *block += 1;
+  }
+}
+
+
+template<uint16 nb>
+void BlockSparseSymMatrix<nb>::addEntry(uint32 _i, uint32 _j) {
+  uint16 block_i, block_j;
+  uint32 pos_i, pos_j;
+  if (_i > _j) std::swap(_i, _j);
+  getBlockAndPosition(_i, &block_i, &pos_i);
+  getBlockAndPosition(_j, &block_j, &pos_j);
+
+  if (block_i == block_j) {
+    block(block_i)->addEntry(pos_i, pos_j);
+  } else {
+    block(block_i, block_j)->addEntry(pos_i, pos_j);
+  }
+}
+
+
+template<uint16 nb>
+void BlockSparseSymMatrix<nb>::addValue(uint32 _i, uint32 _j, double value) {
+  uint16 block_i, block_j;
+  uint32 pos_i, pos_j;
+  if (_i > _j) std::swap(_i, _j);
+  getBlockAndPosition(_i, &block_i, &pos_i);
+  getBlockAndPosition(_j, &block_j, &pos_j);
+
+  if (block_i == block_j) {
+    block(block_i)->addValue(pos_i, pos_j, value);
+  } else {
+    block(block_i, block_j)->addValue(pos_i, pos_j, value);
+  }
+}
+
+template<uint16 nb>
+void BlockSparseSymMatrix<nb>::zero() {
+  for (uint16 i = 0; i < nb; i++) {
+    diag[i].zero();
+  }
+
+  for (uint16 i = 0; i < nb * (nb + 1) / 2 - nb; i++) {
+    upper[i].zero();
+  }
+}
+
+
+template<uint16 nb>
+void BlockSparseSymMatrix<nb>::compress() {
+  // NOTE: now this function can be called multiply times. We rely on underlying
+  // SparseMatrix::compress()
+  for (uint16 i = 0; i < nb; i++) {
+    diag[i].compress();
+  }
+
+  for (uint16 i = 0; i < nb * (nb + 1) / 2 - nb; i++) {
+    upper[i].compress();
+  }
+
+  compressed = true;
+}
+
+template<uint16 nb>
+inline bool BlockSparseSymMatrix<nb>::isCompressed(){
+  return compressed;
+}
+
+template<uint16 nb>
+uint32 BlockSparseSymMatrix<nb>::nRows() {
+  return total_rows;
+}
+
+
+} // math
+} // nla3d
--- a/math/EquationSolver.cpp
+++ b/math/EquationSolver.cpp
+// This file is a part of nla3d project. For information about authors and
+// licensing go to project's repository on github:
+// https://github.com/dmitryikh/nla3d
+
+#include "math/EquationSolver.h"
+
+#ifdef NLA3D_USE_MKL
+#include <mkl.h>
+#endif //NLA3D_USE_MKL
+
+#ifdef NLA3D_USE_PARALUTION
+#include <paralution.hpp>
+using namespace paralution;
+#endif //NLA3D_USE_PARALUTION
+
+bool firstRunParalution = true;
+
+namespace nla3d {
+
+namespace math {
+
+EquationSolver* defaultEquationSolver = new GaussDenseEquationSolver;
+
+void EquationSolver::setSymmetric (bool symmetric) {
+  isSymmetric = symmetric;
+}
+
+void EquationSolver::setPositive (bool positive) {
+  isPositive = positive;
+}
+
+
+void GaussDenseEquationSolver::solveEquations (math::SparseSymMatrix* matrix, double* rhs, double* unknowns) {
+  TIMED_SCOPE(t, "solveEquations");
+  factorizeEquations(matrix);
+  substituteEquations(matrix, rhs, unknowns);
+}
+
+
+void GaussDenseEquationSolver::factorizeEquations(math::SparseSymMatrix* matrix) {
+  // nothing to do here..
+  nEq = matrix->nRows();
+  CHECK(nEq < 1000) << "GaussDenseEquationSolver works only with number of equations less than 1000";
+}
+
+
+void GaussDenseEquationSolver::substituteEquations(math::SparseSymMatrix* matrix,
+                                                   double* rhs, double* unknowns) {
+  // NOTE: actually here we perform all steps factorization and substitution
+  // because this is simplest solver dedicated to perform functional tests
+  //
+  CHECK(nrhs == 1) << "GaussDenseEquationSolver support only 1 set of rhs values";
+
+
+  if (matA.dM() == nEq && matA.dN() == nEq) {
+    matA.zero();
+  } else {
+    matA.resize(nEq, nEq);
+  }
+
+  // fill dense matA from sparse matrix
+  for (uint16 i = 0; i < nEq; i++)
+    for (uint16 j = 0; j < nEq; j++)
+      // NOTE: SparseMatrix getters works with indexes started from 1
+      // TODO: we can fill dense matrix from sparse one in a more optimized way
+      matA[i][j] = matrix->value(i+1, j+1);
+  bool res = _solve(unknowns, matA.ptr(), rhs, nEq);
+  CHECK(res == true) << "ERROR during solution";
+}
+
+bool GaussDenseEquationSolver::_solve(double* X, double* A,
+                               double* B, int n)
+{
+    // Gaussian elimination, with partial pivoting. It's an error if the
+    // matrix is singular, because that means two constraints are
+    // equivalent.
+    int i, j, ip, jp, imax;
+    double max, temp;
+
+    for(i = 0; i < n; i++) {
+        // We are trying eliminate the term in column i, for rows i+1 and
+        // greater. First, find a pivot (between rows i and N-1).
+        max = 0;
+        for(ip = i; ip < n; ip++) {
+          //if(ffabs(A[ip][i]) > max) {
+            if(fabs(A[ip*n+i]) > max) {
+                imax = ip;
+              //max = ffabs(A[ip][i]);
+                max = fabs(A[ip*n+i]);
+            }
+        }
+        // Don't give up on a singular matrix unless it's really bad; the
+        // assumption code is responsible for identifying that condition,
+        // so we're not responsible for reporting that error.
+        if(fabs(max) < 1e-20) return false;
+
+        // Swap row imax with row i
+        for(jp = 0; jp < n; jp++) {
+          //SWAP(double, A[i][jp], A[imax][jp]);
+            double tmp;
+            tmp = A[i*n+jp];
+            A[i*n+jp] = A[imax*n+jp];
+            A[imax*n+jp] = tmp;
+        }
+      //SWAP(double, B[i], B[imax]);
+        double tmp;
+        tmp = B[i];
+        B[i] = B[imax];
+        B[imax] = tmp;
+
+        // For rows i+1 and greater, eliminate the term in column i.
+        for(ip = i+1; ip < n; ip++) {
+          //temp = A[ip][i]/A[i][i];
+            temp = A[ip*n+i]/A[i*n+i];
+
+            for(jp = i; jp < n; jp++) {
+              //A[ip][jp] -= temp*(A[i][jp]);
+                A[ip*n+jp] -= temp*(A[i*n+jp]);
+            }
+            B[ip] -= temp*B[i];
+        }
+    }
+
+    // We've put the matrix in upper triangular form, so at this point we
+    // can solve by back-substitution.
+    for(i = n - 1; i >= 0; i--) {
+      //if(fabs(A[i][i]) < 1e-20) return false;
+        if(fabs(A[i*n+i]) < 1e-20) return false;
+
+        temp = B[i];
+        for(j = n - 1; j > i; j--) {
+          //temp -= X[j]*A[i][j];
+            temp -= X[j]*A[i*n+j];
+        }
+      //X[i] = temp / A[i][i];
+        X[i] = temp / A[i*n+i];
+    }
+
+    return true;
+}
+
+
+#ifdef NLA3D_USE_MKL
+PARDISO_equationSolver::~PARDISO_equationSolver () {
+  releasePARDISO();
+}
+
+void PARDISO_equationSolver::solveEquations (math::SparseSymMatrix* matrix, double* rhs, double* unknowns) {
+  TIMED_SCOPE(t, "solveEquations");
+
+  factorizeEquations(matrix);
+  substituteEquations(matrix, rhs, unknowns);
+}
+
+
+void PARDISO_equationSolver::substituteEquations(math::SparseSymMatrix* matrix,
+                                                 double* rhs, double* unknowns) {
+
+	//Back substitution and iterative refinement
+
+  // initialize error code
+	int error = 0;
+	int phase = 33;
+  int n = static_cast<int> (nEq);
+	PARDISO(pt, &maxfct, &mnum, &mtype, &phase,	&n, matrix->getValuesArray(),
+      (int*) matrix->getIofeirArray(),
+      (int*) matrix->getColumnsArray(),
+			NULL, &nrhs, iparm, &msglvl, rhs, unknowns, &error);
+
+	CHECK(error == 0) << "ERROR during solution. Error code = " << error;
+}
+
+
+void PARDISO_equationSolver::factorizeEquations(math::SparseSymMatrix* matrix) {
+  TIMED_SCOPE(t, "factorizeEquations");
+  if (firstRun) {
+    initializePARDISO(matrix);
+  }
+  firstRun = false;
+
+  CHECK(nEq == matrix->nRows());
+
+	int phase;
+
+  // initialize error code
+	int error = 0;
+
+	// phase 22 is the numerical factorization
+	phase = 22;
+  int n = static_cast<int> (nEq);
+
+	PARDISO(pt, &maxfct, &mnum, &mtype, &phase, &n, matrix->getValuesArray(),
+      (int*) matrix->getIofeirArray(),
+      (int*) matrix->getColumnsArray(),
+			NULL, &nrhs, iparm, &msglvl, NULL, NULL, &error);
+  CHECK(error == 0) << "ERROR during numerical factorization. Error code = " << error;
+
+}
+
+void PARDISO_equationSolver::initializePARDISO (math::SparseSymMatrix* matrix) {
+	for (uint16 i = 0; i < 64; i++) {
+    iparm[i]=0;
+  }
+
+  for (uint16 i = 0; i < 64; i++) {
+    pt[i]=0;
+  }
+
+	iparm[0] = 1; //no solver default
+	iparm[1] = 2; //fill-in reordering from meris
+	iparm[2] = MKL_Get_Max_Threads();
+	iparm[3] = 0; //no iterative-direct algorithm
+	iparm[4] = 0; //no user fill-in reducing permutation
+	iparm[5] = 0; //write solution into x
+	iparm[6] = 16; //default logical fortran unit number for output
+	iparm[7] = 2; //max numbers of iterative refinement steps
+	iparm[9] = 13; //pertrub the pivor elements with 1e-13
+	iparm[10] = 1; //use nonsymmetric permutation  and scaling MPS
+	iparm[13]=0; //output: number of perturbed pivots
+	iparm[17]=-1; //output: number of nonzeros in the factor LU
+	iparm[18]=-1; //output: MFLOPS for LU factorization
+	iparm[19] = 0; //output: number of CG Iterations
+
+  LOG_IF(!isSymmetric, FATAL) << "For now PARDISO_equationSolver doesn't support non-symmetric matrices";
+	if (isPositive) {
+    mtype = 2;
+    LOG(INFO) << "EquationSolver will use positive symmetric solver";
+  } else {
+    LOG(INFO) << "EquationSolver will use non-positive symmetric solver";
+    mtype = -2;
+  }
+
+  nEq = matrix->nRows();
+  int n = static_cast<int> (nEq);
+
+  // initialize error code
+	int error = 0;
+
+  int phase = 11;
+
+  PARDISO(pt, &maxfct, &mnum, &mtype,&phase, &n, matrix->getValuesArray(),
+     (int*) matrix->getIofeirArray(),
+     (int*) matrix->getColumnsArray(),
+			NULL, &nrhs, iparm, &msglvl, NULL, NULL, &error);
+  CHECK(error == 0) << "ERROR during symbolic factorization. Error code = " << error;
+  LOG(INFO) << "Number of nonzeros in factors = " << iparm[17] << ", number of factorization MFLOPS = " << iparm[18];
+}
+
+void PARDISO_equationSolver::releasePARDISO () {
+  int phase = -1;
+  int n = static_cast<int> (nEq);
+
+  // initialize error code
+	int error = 0;
+	//Termination and release of memory
+	PARDISO(pt, &maxfct, &mnum, &mtype, &phase,	&n, NULL, NULL, NULL, NULL, &nrhs, iparm, &msglvl, NULL, NULL, &error);
+  LOG_IF (error != 0, WARNING) << "ERROR during PARDISO termination. Error code = " << error;
+}
+#endif //NLA3D_USE_MKL
+
+
+#ifdef NLA3D_USE_PARALUTION
+PARALUTION_equationSolver::~PARALUTION_equationSolver() {
+//	std::cout << "STOP paralution kernel..." << std::endl;
+//	stop_paralution();
+}
+
+void PARALUTION_equationSolver::solveEquations(math::SparseSymMatrix* matrix, double* rhs, double* unknowns) {
+	TIMED_SCOPE(t, "solveEquations");
+
+	factorizeEquations(matrix);
+	substituteEquations(matrix, rhs, unknowns);
+}
+
+void PARALUTION_equationSolver::substituteEquations(math::SparseSymMatrix* matrix,
+	double* rhs, double* unknowns) {
+	_solve(matrix, (int*)matrix->getIofeirArray(), (int*)matrix->getColumnsArray(), matrix->getValuesArray(), rhs, unknowns);
+}
+
+void PARALUTION_equationSolver::_solve(math::SparseSymMatrix* matrix, int* row_offsets, int* col, double* val,
+	double* rhs, double* unknowns) {
+
+	int* IA = new int[2 * matrix->nValues() - matrix->nRows()];
+	int* JA = new int[2 * matrix->nValues() - matrix->nRows()];
+	double* VALS = new double[2 * matrix->nValues() - matrix->nRows()];
+
+	int k = 0;
+	for (int i = 1; i <= matrix->nRows(); i++)
+	{
+		for (int j = row_offsets[i - 1] - 1; j < row_offsets[i] - 1; j++)
+		{
+			if (i == col[j])
+			{
+				IA[k] = i - 1;
+				JA[k] = i - 1;
+				VALS[k] = val[j];
+				k++;
+			}
+		}
+	}
+
+	for (int i = 1; i <= matrix->nRows(); i++)
+	{
+		for (int j = row_offsets[i - 1] - 1; j < row_offsets[i] - 1; j++)
+		{
+			if (i != col[j])
+			{
+				IA[k] = i - 1;
+				JA[k] = col[j] - 1;
+				VALS[k] = val[j];
+				k++;
+			}
+		}
+	}
+
+	for (int i = 1; i <= matrix->nRows(); i++)
+	{
+		for (int j = row_offsets[i - 1] - 1; j < row_offsets[i] - 1; j++)
+		{
+			if (i != col[j])
+			{
+				IA[k] = col[j] - 1;
+				JA[k] = i - 1;
+				VALS[k] = val[j];
+				k++;
+			}
+		}
+	}
+
+
+	LocalVector<double> x;
+	LocalVector<double> pr;
+	LocalMatrix<double> mat;
+
+//	std::cout << "Assembling matrix and vectors..." << std::endl;
+	mat.Assemble(IA, JA, VALS, 2 * matrix->nValues() - matrix->nRows(), "A");
+
+	x.Allocate("x", mat.get_nrow());
+	x.Zeros();
+//	x.Ones();
+
+	pr.Allocate("pr", mat.get_nrow());
+	for (int i = 0; i < x.get_size(); i++)
+	{
+		pr[i] = rhs[i];
+	}
+
+//	mat.info();
+
+	mat.MoveToAccelerator();
+	x.MoveToAccelerator();
+	pr.MoveToAccelerator();
+
+	// Linear Solver
+	CG<LocalMatrix<double>, LocalVector<double>, double > ls;
+
+	// Preconditioner
+	MultiColoredILU<LocalMatrix<double>, LocalVector<double>, double > p;
+
+	ls.SetOperator(mat);
+	ls.SetPreconditioner(p);
+
+	ls.Init(abs_tol, rel_tol, div_tol, max_iter);
+
+	ls.Build();
+
+	ls.Verbose(verb);
+
+	double tick, tack;
+	tick = paralution_time();
+
+	ls.Solve(pr, &x);
+
+	tack = paralution_time();
+	std::cout << "Solver execution:" << (tack - tick) / 1000000 << " sec" << std::endl;
+
+	x.MoveToHost();
+
+	for (int i = 0; i < x.get_size(); i++)
+	{
+		unknowns[i] = x[i];
+	}
+
+	ls.Clear();
+	p.Clear();
+
+	x.Clear();
+	mat.Clear();
+	pr.Clear();
+
+	//	stop_paralution();
+
+	delete[] IA;
+	delete[] JA;
+	delete[] VALS;
+}
+
+void PARALUTION_equationSolver::factorizeEquations(math::SparseSymMatrix* matrix) {
+	TIMED_SCOPE(t, "factorizeEquations");
+
+//	init_paralution();
+	if (firstRunParalution) {
+        std::cout << "INITIALIZE paralution kernel..." << std::endl;
+		init_paralution();
+//		info_paralution();
+	}
+	firstRunParalution = false;
+}
+#endif //NLA3D_USE_PARALUTION
+
+
+} //namespace math
+
+} //namespace nla3d
--- a/math/EquationSolver.h
+++ b/math/EquationSolver.h
+// This file is a part of nla3d project. For information about authors and
+// licensing go to project's repository on github:
+// https://github.com/dmitryikh/nla3d
+
+#pragma once
+#include "sys.h"
+#include "math/Mat.h"
+#include "math/SparseMatrix.h"
+
+namespace nla3d {
+
+namespace math {
+
+class SparseSymMatrix;
+
+// EquationSolver - abstract class for solving a system of linear equations.
+// This class primarly used in FESolver class.
+
+class EquationSolver {
+public:
+  virtual ~EquationSolver() { };
+  virtual void solveEquations(math::SparseSymMatrix* matrix, double* rhs, double* unknowns) = 0;
+  virtual void factorizeEquations(math::SparseSymMatrix* matrix) = 0;
+  virtual void substituteEquations(math::SparseSymMatrix* matrix, double* rhs, double* unknowns) = 0;
+  void setSymmetric (bool symmetric = true);
+  void setPositive (bool positive = true);
+protected:
+  uint32 nEq = 0;
+
+  // number of rhs
+	int nrhs = 1;
+  bool isSymmetric = true;
+  bool isPositive = true;
+};
+
+class GaussDenseEquationSolver : public EquationSolver {
+public:
+  virtual ~GaussDenseEquationSolver() { };
+  virtual void solveEquations (math::SparseSymMatrix* matrix, double* rhs, double* unknowns);
+  virtual void factorizeEquations(math::SparseSymMatrix* matrix);
+  virtual void substituteEquations(math::SparseSymMatrix* matrix, double* rhs, double* unknowns);
+  static bool _solve(double* X, double* A, double* B, int n);
+protected:
+
+  dMat matA = dMat(1, 1);
+};
+
+#ifdef NLA3D_USE_MKL
+class PARDISO_equationSolver : public EquationSolver {
+public:
+  virtual ~PARDISO_equationSolver();
+  virtual void solveEquations (math::SparseSymMatrix* matrix, double* rhs, double* unknowns);
+  virtual void factorizeEquations(math::SparseSymMatrix* matrix);
+  virtual void substituteEquations(math::SparseSymMatrix* matrix, double* rhs, double* unknowns);
+protected:
+  void initializePARDISO (math::SparseSymMatrix* matrix);
+  void releasePARDISO ();
+
+  // Internal solver memory pointer pt
+	void *pt[64];
+  // Paramaters for PARDISO solver (see MKL manual for clarifications)
+	int iparm[64];
+
+  // maximum number of numerical factorizations
+	int maxfct = 1;
+
+  // which factorization to use
+	int mnum = 1;
+  // don't print statistical information in file
+	int msglvl = 0;
+//	int msglvl = 1;
+
+  bool firstRun = true;
+  // real symmetric undifinite defined matrix
+	int mtype = -2;
+};
+#endif //NLA3D_USE_MKL
+
+
+#ifdef NLA3D_USE_PARALUTION
+class PARALUTION_equationSolver : public EquationSolver {
+public:
+	virtual ~PARALUTION_equationSolver();
+	virtual void solveEquations(math::SparseSymMatrix* matrix, double* rhs, double* unknowns);
+	virtual void factorizeEquations(math::SparseSymMatrix* matrix);
+	virtual void substituteEquations(math::SparseSymMatrix* matrix, double* rhs, double* unknowns);
+protected:
+	void _solve(math::SparseSymMatrix* matrix, int* row_offsets, int* col, double* val, double* rhs, double* unknowns);
+
+//	double abs_tol = 1.0E-08;
+	double abs_tol = 1.0E-06;
+//	double rel_tol = 1.0E-12;
+	double rel_tol = 1.0E-06;
+	double div_tol = 1.0E+10;
+	int max_iter = 1000000;
+//	int verb = 0;
+	int verb = 1;
+//	bool firstRun = true;
+};
+#endif //NLA3D_USE_PARALUTION
+
+
+extern EquationSolver* defaultEquationSolver;
+
+} // namespace math
+
+} //namespace nla3d
--- a/math/Mat.cpp
+++ b/math/Mat.cpp
+// This file is a part of nla3d project. For information about authors and
+// licensing go to project's repository on github:
+// https://github.com/dmitryikh/nla3d 
+
+#include "Mat.h"
+
+namespace nla3d {
+namespace math {
+
+//dMat dMat_tmp(1,1);
+
+template<>
+double Mat<1,1>::det()
+{
+	return data[0][0];
+}
+template<>
+double Mat<2,2>::det()
+{
+	return data[0][0]*data[1][1]-data[0][1]*data[1][0];
+}
+template<>
+double Mat<3,3>::det()
+{
+	return data[0][0]*(data[1][1]*data[2][2]-data[1][2]*data[2][1])-data[0][1]*(data[1][0]*data[2][2]-data[1][2]*data[2][0])+data[0][2]*(data[1][0]*data[2][1]-data[1][1]*data[2][0]);
+}
+
+template<>
+Mat<1,1> Mat<1,1>::inv(double det) {
+    assert(det);
+    return (1.0/det);
+}
+template<>
+Mat<2,2> Mat<2,2>::inv(double det) {
+    assert(det);
+	Mat<2,2> tmp(data[1][1],-data[0][1],-data[1][0],data[0][0]);
+	return tmp*(1.0/det);
+}
+template<>
+Mat<3,3> Mat<3,3>::inv(double det) {
+    assert(det);
+	Mat<3,3> tmp(data[1][1]*data[2][2]-data[1][2]*data[2][1],data[0][2]*data[2][1]-data[0][1]*data[2][2],data[0][1]*data[1][2]-data[0][2]*data[1][1],
+				data[2][0]*data[1][2]-data[1][0]*data[2][2],data[0][0]*data[2][2]-data[0][2]*data[2][0],data[1][0]*data[0][2]-data[0][0]*data[1][2],
+				data[1][0]*data[2][1]-data[1][1]*data[2][0],data[0][1]*data[2][0]-data[0][0]*data[2][1],data[0][0]*data[1][1]-data[0][1]*data[1][0]);
+	return tmp*(1.0/det);
+}
+
+
+//---------operator<<----------------------------------------------------------
+std::ostream &operator<<(std::ostream &stream, dMat &obj) {
+	for (uint16 i = 0; i < obj.dimM; i++)
+	{
+		std::cout << "[";
+		for (uint16 j = 0; j < obj.dimN; j++)
+			std::cout << obj[i][j] << " ";
+		std::cout << "]" << endl;
+	}
+	return stream;
+}
+
+} // namespace math
+} // namespace nla3d
--- a/math/Mat.h
+++ b/math/Mat.h
+// This file is a part of nla3d project. For information about authors and
+// licensing go to project's repository on github:
+// https://github.com/dmitryikh/nla3d 
+
+#pragma once
+#include "sys.h"
+#include <iostream>
+#include "math/Vec.h"
+
+#ifdef NLA3D_USE_BLAS
+  #include <mkl.h>
+#endif
+
+namespace nla3d {
+namespace math {
+
+template<uint16 dimM, uint16 dimN>
+class Mat
+{
+public:
+	Mat() {
+		assert(dimM && dimN);
+	}
+	Mat(double first, ...) {
+		assert(dimM && dimN);
+		va_list argp;
+		va_start(argp, first);
+		for (uint16 i=0; i < dimM; i++)
+			for (uint16 j=0; j < dimN; j++)
+			{
+				if (i == 0 && j == 0) data[i][j]=first;
+				else data[i][j]=va_arg(argp, double);
+			}
+		va_end(argp);
+	}
+	~Mat() { }
+
+	Vec<dimN>& operator[] (uint16 n);
+	const Vec<dimN>& operator[] (uint16 n) const;
+	Mat<dimN,dimM> transpose ();
+	void display ();
+	Mat operator+ (const Mat &op);
+	Mat& operator+= (const Mat &op);
+	Mat operator- ();
+	Mat& operator= (const Mat &op);
+	Mat<dimM,1>& operator= (const Vec<dimM> &op); //TODO: CHECK this operation
+	Mat operator- (const Mat &op);
+	Mat operator* (const double op);
+	void Identity ();
+	void zero ();
+	double det();
+	Vec<dimM> eigenvalues();
+	Mat<dimM,dimN> inv(double det);
+	Mat<dimM-1,dimN-1> cross_cut (uint16 cuti, uint16 cutj);
+	string toString ();
+	double* ptr ();
+	bool compare (const Mat<dimM,dimN> &B, double eps = 0.00001);
+	void simple_read (std::istream &st);
+	//friend функции
+	template <uint16 dimM1, uint16 dimN1> 
+	friend std::ostream& operator<< (std::ostream& stream, const Mat<dimM1,dimN1> &obj);
+	template <uint16 dimM1, uint16 dimN1, uint16 dimM2, uint16 dimN2> 
+    friend Mat<dimM1,dimN2> operator* (const Mat<dimM1,dimN1> &op1, const Mat<dimM2,dimN2> &op2);
+	template <uint16 dimM1, uint16 dimN1, uint16 dimM2> 
+    friend Vec<dimM1> operator* (const Mat<dimM1,dimN1> &op1, const Vec<dimM2> &op2);
+    // template<uint16 dimM1, uint16 dimN1>
+    // friend bool matCompare (const Mat<dimM1, dimN>& mat1, const Mat<dimM1, dimN1>& mat2, const double eps);
+
+	Vec<dimN> data[dimM];
+private:
+    //data was moved from here to public
+};
+
+//---------operator<<----------------------------------------------------------
+template<uint16 dimM, uint16 dimN> std::ostream &operator<<(std::ostream &stream, const Mat<dimM,dimN> &obj) {
+	for (uint16 i = 0; i < dimM; i++) {
+		stream << "[" << obj.data[i] << "]" << std::endl;
+	}
+	return stream;
+}
+
+//----------operator[]---------------------------------------------------------
+template<uint16 dimM, uint16 dimN>
+inline Vec<dimN>& Mat<dimM,dimN>::operator[] (uint16 n) {
+		assert(n < dimM);
+		return data[n];
+}
+template<uint16 dimM, uint16 dimN>
+inline const Vec<dimN>& Mat<dimM,dimN>::operator[] (uint16 n) const {
+	assert(n < dimM);
+	return data[n];
+}
+//----------Identity ()---------------------------------------------------------
+template<uint16 dimM, uint16 dimN>
+void Mat<dimM,dimN>::Identity () {
+	assert (dimM==dimN);
+	for (uint16 i=0; i < dimM; i++)
+		for (uint16 j=0; j < dimN; j++)
+			if (i==j)
+				data[i][j]=1.0f;
+			else
+				data[i][j]=0.0f;
+}
+//----------zero()---------------------------------------------------------
+template<uint16 dimM, uint16 dimN>
+void Mat<dimM,dimN>::zero() {
+	memset(data,0,sizeof(double)*dimM*dimN);
+}
+//-------------display()------------------------------------------------------
+template<uint16 dimM, uint16 dimN>
+void Mat<dimM,dimN>::display() {
+	for (unsigned int i = 0; i < dimM; i++) {
+		std::cout << "[";
+		data[i].display();
+		std::cout << "]" << std::endl;
+	} 
+}
+//----------transpose()-------------------------------------------------------
+template<uint16 dimM, uint16 dimN>
+Mat<dimN,dimM> Mat<dimM,dimN>::transpose () {
+	Mat<dimN,dimM> p;
+	for (uint16 i=0; i < dimM; i++)
+		for (uint16 j=0; j < dimN; j++)
+			p[j][i]=this->data[i][j];
+	return p;
+}
+//----------operator+(Mat)---------------------------------------------------------
+template<uint16 dimM, uint16 dimN>
+Mat<dimM,dimN> Mat<dimM,dimN>::operator+ (const Mat<dimM,dimN> &op) {
+	Mat<dimM,dimN> p;
+	for (uint16 i=0; i < dimM; i++)
+		for (uint16 j=0; j < dimN; j++)
+			p[i][j]=this->data[i][j]+op.data[i][j];
+	return p;
+}
+//----------operator-(Mat)---------------------------------------------------------
+//template<uint16 dimM, uint16 dimN>
+//Mat<dimM,dimN> Mat<dimM,dimN>::operator- (const Mat<dimM,dimN> &op) {
+//	Mat<dimM,dimN> p;
+//	for (uint16 i=0; i < dimM; i++)
+//		for (uint16 j=0; j < dimN; j++)
+//			p[i][j]=this->data[i][j]-op.data[i][j];
+//	return p;
+//}
+//-----------operator*(double)----------------------------------------------------------
+template<uint16 dimM, uint16 dimN>
+Mat<dimM,dimN> Mat<dimM,dimN>::operator*(const double op)
+{
+	Mat<dimM,dimN> p;
+	for (uint16 i=0; i < dimM; i++)
+		for (uint16 j=0; j < dimN; j++)
+			p[i][j]=this->data[i][j]*op;
+	return p;
+}
+//----------operator+=---------------------------------------------------------
+template<uint16 dimM, uint16 dimN>
+Mat<dimM,dimN>& Mat<dimM,dimN>::operator+= (const Mat<dimM,dimN> &op) {
+	for (uint16 i=0; i < dimM; i++)
+		for (uint16 j=0; j < dimN; j++)
+			this->data[i][j]+=op.data[i][j];
+	return *this;
+}
+//----------operator=(Mat)---------------------------------------------------------
+template<uint16 dimM, uint16 dimN>
+Mat<dimM,dimN>& Mat<dimM,dimN>::operator= (const Mat<dimM,dimN> &op)
+{
+	for (uint16 i = 0; i < dimM; i++)
+		this->data[i]=op.data[i];
+	return *this;
+}
+//----------operator=(Vec)---------------------------------------------------------
+template<uint16 dimM, uint16 dimN>
+Mat<dimM,1>& Mat<dimM,dimN>::operator= (const Vec<dimM> &op)
+{
+	assert(dimN == 1); //только для матрицы-столбца
+	for (uint16 i = 0; i < dimM; i++)
+		this->data[i][0] = op[i];
+	return *this;
+}
+//----------operator*(Mat)---------------------------------------------------------
+template<uint16 dimM1, uint16 dimN1, uint16 dimM2, uint16 dimN2> Mat<dimM1,dimN2> operator*(const Mat<dimM1,dimN1> &op1, const Mat<dimM2,dimN2> &op2) {
+	assert(dimN1 == dimM2);
+	Mat<dimM1, dimN2> p;
+	double element;
+	double *ptr1 = (double*) op1.data;
+	double *ptr2 = (double*) op2.data;
+	for (uint16 i=0; i < dimM1; i++)
+		for (uint16 j=0; j < dimN2; j++)
+		{
+			element=0.0f;
+			for (uint16 l=0; l < dimN1; l++) element+=ptr1[i*dimN1+l]*ptr2[l*dimN2+j];
+				//(op1.data[i])[l]*(op2.data[l])[j];
+			p[i][j]=element;
+		}
+	return p;
+
+
+	//assert(dimN == dimM2);
+	//Mat<dimM, dimN2> p;
+	//double element;
+	//for (uint16 i=0; i < dimM; i++)
+	//	for (uint16 j=0; j < dimN2; j++)
+	//	{
+	//		element=0.0f;
+	//		for (uint16 l=0; l < dimN; l++) element+=(op1.data[i])[l]*(op2.data[l])[j];
+	//		p[i][j]=element;
+	//	}
+	//return p;
+}
+//-----------operator*(Vec)---------------------------------------------------------
+template <uint16 dimM1, uint16 dimN1, uint16 dimM2> Vec<dimM1> operator* (const Mat<dimM1,dimN1> &op1, const Vec<dimM2> &op2) {
+	assert(dimN1==dimM2);
+	Vec<dimM1> p;
+	double el;
+	for (uint16 i=0; i < dimM1; i++)
+	{
+		el = 0.0f;
+		for (uint16 j=0; j < dimN1; j++)
+			el += op1.data[i][j]*op2[j];
+		p[i] = el;
+	}
+	return p;
+}
+//--------------------------------------------------------------------------------
+template<uint16 dimM, uint16 dimN>
+string Mat<dimM,dimN>::toString()
+{
+	string p;
+	for (unsigned int i = 0; i < dimM; i++) {
+		p+= "[";
+		p+=data[i].toString();
+		p+= "]";
+		if (i!=dimM-1) p+= "\n";
+	} 
+	return p;
+}
+//-------------------------------------------------------------------------------
+template<> double Mat<1,1>::det();
+template<> double Mat<2,2>::det();
+template<> double Mat<3,3>::det();
+
+template<uint16 dimM, uint16 dimN>
+double Mat<dimM,dimN>::det()
+{
+	//разложение по строке i=0
+	assert(dimM == dimN);
+	double det = 0.0;
+	double koef = 1;
+	for (uint16 j=0; j < dimN; j++)
+	{
+		Mat<dimM-1,dimM-1> tmp;
+		det += koef*data[0][j]*cross_cut(0,j).det();
+		koef *= -1;
+	}
+	return det;
+}
+
+template<> Mat<1, 1> Mat<1,1>::inv(double det);
+template<> Mat<2, 2> Mat<2,2>::inv(double det);
+template<> Mat<3, 3> Mat<3,3>::inv(double det);
+
+template<uint16 dimM, uint16 dimN>
+Mat<dimM, dimN> Mat<dimM,dimN>::inv(double det)
+{
+	//через матрицу алг. дополнений
+	assert(dimM == dimN);
+	Mat<dimM,dimN> C;
+	for (uint16 i=0; i < dimM; i++)
+		for (uint16 j=0; j < dimN; j++)
+			C[i][j] = npow(-1,i+j)*cross_cut(i,j).det();  //CHECK
+	C = C.transpose()*(1/det);
+	return C;
+}
+
+template<uint16 dimM, uint16 dimN>
+Mat<dimM-1,dimN-1> Mat<dimM,dimN>::cross_cut (uint16 cuti, uint16 cutj)
+{
+	Mat<dimM-1, dimN-1> tmp;
+	for (uint16 ii=0; ii < dimM-1; ii++)
+		for (uint16 jj=0; jj < dimN-1; jj++)
+			tmp[ii][jj] = data[(ii>=cuti)?ii+1:ii][(jj>=cutj)?jj+1:jj];
+	return tmp;
+}
+template<uint16 dimM, uint16 dimN>
+Vec<dimM>  Mat<dimM,dimN>::eigenvalues()
+{
+	assert(dimM == 3 && dimN == 3); //TODO: пока только для матриц 3 на 3
+	double I1 = data[0][0] + data[1][1] + data[2][2];
+	double I2 = data[0][0]*data[1][1] + data[1][1]*data[2][2] + data[0][0]*data[2][2] - 
+		data[0][1]*data[0][1] - data[0][2]*data[0][2] - data[1][2]*data[1][2];
+	double I3 = data[0][0]*data[1][1]*data[2][2] + 2*data[0][1]*data[1][2]*data[0][2] - 
+		data[0][0]*data[1][2]*data[1][2]-data[1][1]*data[0][2]*data[0][2]-data[2][2]*data[0][1]*data[0][1];
+
+	double a = -I1;
+	double b = I2;
+	double c = -I3;
+	 // код из http://ru.wikipedia.org/wiki/%D0%A2%D1%80%D0%B8%D0%B3%D0%BE%D0%BD%D0%BE%D0%BC%D0%B5%D1%82%D1%80%D0%B8%D1%87%D0%B5%D1%81%D0%BA%D0%B0%D1%8F_%D1%84%D0%BE%D1%80%D0%BC%D1%83%D0%BB%D0%B0_%D0%92%D0%B8%D0%B5%D1%82%D0%B0
+	// для трех вещественных корней
+	// x*x*x + a * x * x + b * x  + c == 0
+	double p = b - a * a / 3;
+	double q = 2 * a * a * a / 27 - a * b / 3 + c;
+	double A = sqrt(- 4 * p / 3); 
+ 
+	double c3phi = - 4 * q / (A * A * A);
+ 
+	double phi = acos(c3phi) / 3;  
+ 
+	double root1 = A * cos(phi) - a / 3;
+	double root2 = A * cos(phi + 2 * M_PI / 3) - a / 3;
+	double root3 = A * cos(phi - 2 * M_PI / 3) - a / 3;
+
+	//сортируем
+	double s1,s2,s3;
+	if (root1 > root2 && root1 > root3)
+	{
+		s1 = root1;
+		if (root2 > root3)
+		{
+			s2 = root2;
+			s3 = root3;
+		}
+		else
+		{
+			s2 = root3;
+			s3 = root2;
+		}
+	}
+	else if (root2 > root1 && root2 > root3)
+	{
+		s1 = root2;
+		if (root1 > root3)
+		{
+			s2 = root1;
+			s3 = root3;
+		}
+		else
+		{
+			s2 = root3;
+			s3 = root1;
+		}
+	}
+	else
+	{
+		s1 = root3;
+		if (root1 > root2)
+		{
+			s2 = root1;
+			s3 = root2;
+		}
+		else
+		{
+			s2 = root2;
+			s3 = root1;
+		}
+	}
+	return Vec<3>(s1,s2,s3);
+}
+
+template<uint16 dimM, uint16 dimN>
+double* Mat<dimM,dimN>::ptr ()
+{
+	return data[0].ptr();
+}
+
+
+template<uint16 dimM, uint16 dimN>
+bool Mat<dimM,dimN>::compare (const Mat<dimM,dimN> &B, double eps) {
+	double *Dp = (double*) data;
+	// double *Bp = B.ptr();
+    double *Bp = (double*) B.data;
+	for (uint16 i = 0; i < dimM; i++) {
+		for (uint16 j = 0; j < dimN; j++) {
+			if (fabs(*Dp-*Bp) > eps) {
+        LOG(INFO) << "Mat[" << i << "][" << j << "]: " << *Dp << " != " << *Bp;
+				return false;
+			}
+			Dp++;
+			Bp++;
+		}
+	}
+	return true;
+}
+
+
+template<uint16 dimM, uint16 dimN>
+void Mat<dimM,dimN>::simple_read (std::istream &st)
+{
+	double *Bp = (double*) data;
+	for (uint16 i=0;i<dimM;i++)
+	{
+		for (uint16 j=0;j<dimN;j++)
+		{
+			st >> *Bp;
+			Bp++;
+		}
+	}
+}
+
+
+class dMat;
+class dMat_interface
+{
+public:
+	dMat_interface ():ptr(NULL),M(0),N(0),row(0) 
+	{	}
+	double& operator[] (uint16 col)
+	{
+		assert(ptr);
+		assert (col < N);
+		assert (row < M);
+		return ptr[row*N + col];
+	}
+	friend class dMat;
+private:
+	uint16 M, N;
+	uint32 row;
+	double *ptr;
+};
+
+//dynamic matrix to pass arbitrary matrix to functions as arguments
+class dMat
+{
+public:
+	dMat(uint16 dim_m, uint16 dim_n) : dimM(0),dimN(0),data(NULL)
+	{
+		if (dim_m && dim_n)
+			resize(dim_m, dim_n);
+	}
+	dMat(uint16 dim_m, uint16 dim_n, double first, ...) : dimM(0),dimN(0),data(NULL) {
+    va_list argp;
+		if (dim_m && dim_n)
+		{
+			resize(dim_m, dim_n);
+			va_start(argp, first);
+			data[0] = first;
+			for (uint16 i=1; i < dimM*dimN; i++)
+				data[i]=va_arg(argp, double);
+			va_end(argp);
+		}
+	}
+	dMat(const dMat &from) : dimM(0),dimN(0),data(NULL)
+	{
+		operator=(from);
+	}
+	dMat_interface& operator[] (uint16 row) {
+        //TODO: it seems unsafe for parallel read access to dMat..
+		dmat_int.row = row;
+		return dmat_int;
+	}
+	void resize (uint16 dim_m, uint16 dim_n)
+	{
+		if (data) delete[] data;
+		data = new double[dim_m*dim_n];
+		dmat_int.ptr = data;
+		dmat_int.M = dim_m;
+		dmat_int.N = dim_n;
+		dimM = dim_m;
+		dimN = dim_n;
+		zero();
+	}
+
+	void fill (double first, ...) {
+		va_list argp;
+		va_start(argp, first);
+		data[0] = first;
+		for (uint16 i=1; i < dimM*dimN; i++)
+			data[i]=va_arg(argp, double);
+		va_end(argp);
+	}
+	~dMat()
+	{
+		if (data) delete[] data;
+		data = NULL;
+	}
+
+	void zero ()
+	{
+		memset(data,0,sizeof(double)*dimM*dimN);
+	}
+	template <uint16 M, uint16 N> Mat<M,N> toMat();
+	template <uint16 M> Vec<M> toVec(uint16 col=0);
+	template <uint16 M, uint16 N> void cpMat(Mat<M,N> &mat);
+	template <uint16 M> void cpVec(Vec<M> &vec, uint16 col=0);
+	dMat& operator= (const dMat &from)
+	{
+		resize(from.dimM, from.dimN);
+		memcpy(this->data, from.data, sizeof(double)*dimM*dimN);
+		return *this;
+	}
+
+	uint16 dM ()
+	{
+		return dimM;
+	}
+	uint16 dN ()
+	{
+		return dimN;
+	}
+
+    double* ptr()
+    {
+        return data;
+    }
+
+	friend std::ostream& operator<< (std::ostream& stream, dMat &obj);
+private:
+	uint16 dimM;
+	uint16 dimN;
+	double *data;
+	dMat_interface dmat_int;
+};
+
+
+//копирует dMat в правый верхний угол Mat<M,N>
+template <uint16 M, uint16 N>
+Mat<M,N> dMat::toMat()
+{
+	assert(M >= dimM && N >= dimN); //Mat >= dMat
+	Mat<M,N> tmp;
+	for (uint16 i = 0; i < M; i++)
+		for (uint16 j = 0; j < N; j++)
+			tmp[i][j] = (*this)[i][j];
+	return tmp;
+}
+
+template <uint16 M> 
+Vec<M> dMat::toVec(uint16 col)
+{
+	assert(M >= dimM && dimN > col); //Vec >= dMat по M 
+	Vec<M> tmp;
+	for (uint16 i = 0; i < M; i++)
+		tmp[i] = (*this)[i][col];
+	return tmp;
+}
+
+template <uint16 M, uint16 N> 
+void dMat::cpMat(Mat<M,N> &mat)
+{
+	assert(dimM >= M && dimN >= N); //dMat >= Mat
+	for (uint16 i = 0; i < M; i++)
+		for (uint16 j = 0; j < N; j++)
+			(*this)[i][j] = mat[i][j];
+}
+
+//функция копирует вектор в столбец col матрицы dMat
+// col - от нуля
+template <uint16 M> 
+void dMat::cpVec(Vec<M> &vec, uint16 col)
+{
+	assert(dimM >= M && dimN > col);
+	for (uint16 i = 0; i < M; i++)
+		(*this)[i][col] = vec[i];
+}
+
+
+//-------------------------------------------------------------
+//	MatSym
+//-------------------------------------------------------------
+template<uint16 dimM>
+class MatSym {
+public:
+	MatSym () {
+		assert(dimM);
+	}
+	double* ptr() {
+		return data;
+	}
+
+	const double* ptr() const {
+		return data;
+	}
+
+	void zero() {
+		memset((void*) data, 0, sizeof(double)*getLength());
+	}
+	
+  // indexing from 0
+	double& comp (uint16 i, uint16 j) {
+		uint16 b;
+		if (i > j) {
+			b = j;
+			j = i;
+			i = b;
+		}
+		b = (2*dimM-(i-1))/2*i + (j-i);
+		b = dimM*i - (i-1)*i/2 + (j-i);
+		return data[b];
+	}
+	
+	uint16 getLength () {
+		return dimM*(dimM+1)/2;
+	}
+  Mat<dimM, dimM> toMat();
+	void simple_read (std::istream &st);
+	bool compare (MatSym<dimM> &B, double eps = 0.00001);
+	MatSym& operator+= (const MatSym &op);
+
+	double data[dimM*(dimM+1)/2];
+};
+
+template<uint16 dimM>
+void MatSym<dimM>::simple_read (std::istream &st)
+{
+	double *Bp = (double*) data;
+	uint16 l = getLength();
+	for (uint16 i=0;i<l;i++)
+	{
+			st >> *Bp;
+			Bp++;
+	}
+}
+
+template<uint16 dimM>
+bool MatSym<dimM>::compare (MatSym<dimM> &B, double eps)
+{
+	double *Dp = (double*) data;
+	double *Bp = B.ptr();
+	uint16 l = getLength();
+	for (uint16 i=0;i<l;i++)
+	{
+		if (fabs(*Dp-*Bp) > eps) {
+			return false;
+		}
+		Dp++;
+		Bp++;
+	}
+	return true;
+}
+
+template<uint16 dimM>
+Mat<dimM, dimM> MatSym<dimM>::toMat() {
+  uint16 ind = 0;
+  Mat<dimM,dimM> mat;
+  for (uint16 i = 0; i < dimM; i++) {
+    for (uint16 j = i; j < dimM; j++) {
+      mat[i][j] = data[ind];
+      mat[j][i] = data[ind];
+      ind++;
+    }
+  }
+  return mat;
+}
+
+
+template<uint16 dimM>
+MatSym<dimM>& MatSym<dimM>::operator+= (const MatSym<dimM> &op) {
+  double* thisp = this->ptr();
+  const double* opp = op.ptr();
+  for (uint16 i = 0; i < getLength(); i++) {
+    *thisp += *opp;
+    thisp++;
+    opp++;
+  }
+
+	return *this;
+}
+
+// [R] = coef * [B]^T*[D]*[B]
+// coef - double scalar
+// [B] - common matrix (dimM x dimN)
+// [D] - symmetric matrix (dimM x dimM)
+// [R] - symmetrix matrix (dimN x dimN)
+template<uint16 dimM,uint16 dimN>
+void matBTDBprod (Mat<dimM,dimN> &B, MatSym<dimM> &D, double coef, MatSym<dimN> &R) 
+{
+	Mat<dimN,dimM> A;
+	A.zero();
+	double *Ap = A.ptr();
+	double *Bp = B.ptr();
+	double *Dp = D.ptr();
+	double *Rp = R.ptr();
+	uint16 i,j,k;
+
+	//A = BT*D
+		// BT*Du + BT*Dd
+	for (k=0;k<dimM;k++)
+	{
+		j = k; // include diag elem
+		while (j < dimM)
+		{
+			for (i=0;i<dimN;i++)
+			{
+				Ap[i*dimM+j] += Bp[k*dimN+i]*(*Dp);
+			}
+			Dp++;
+			j++;
+		}
+	}
+		// BT*DuT
+		Dp = D.ptr();
+	for (j=0;j<dimM;j++)
+	{
+		k = j + 1; Dp++; //leave the diagonal
+		while (k < dimM)
+		{
+			for (i=0;i<dimN;i++)
+			{
+				Ap[i*dimM+j] += Bp[k*dimN+i]*(*Dp);
+			}
+			Dp++;
+			k++;
+		}
+	}
+
+	//R = A*B
+	for (i=0;i<dimN;i++)
+	{
+		for (j=i;j<dimN;j++)
+		{
+			for (k=0;k<dimM;k++)
+			{
+				*Rp += Ap[i*dimM+k]*Bp[k*dimN+j]*coef;
+			}
+			Rp++;
+		}
+	}
+
+}
+
+
+template<uint16 dimM,uint16 dimN>
+void matBTVprod(Mat<dimM,dimN> &B, Vec<dimM> &V, double coef, Vec<dimN> &R)
+{
+#ifndef NLA3D_USE_BLAS
+	double *Bp = B.ptr();
+	uint16 i,j;
+	for (i=0;i<dimN;i++)
+		for (j=0;j<dimM;j++)
+			R[i] += Bp[j*dimN+i]*V[j]*coef;
+#else
+  cblas_dgemv(CblasRowMajor, CblasTrans, dimM, dimN, coef, B.ptr(), dimN, V.ptr(), 1, 0.0, R.ptr(), 1);
+#endif
+}
+
+template<uint16 dimM,uint16 dimN>
+void matBVprod(Mat<dimM,dimN> &B,Vec<dimN> &V, double coef, Vec<dimM> &R) {
+#ifndef NLA3D_USE_BLAS
+	double *Bp = B.ptr();
+	uint16 i,j;
+	for (i=0;i<dimM;i++)
+		for (j=0;j<dimN;j++)
+			R[i] += Bp[i*dimN+j]*V[j]*coef;
+#else
+  cblas_dgemv(CblasRowMajor, CblasNoTrans, dimM, dimN, coef, B.ptr(), dimN, V.ptr(), 1, 0.0, R.ptr(), 1);
+#endif
+}
+
+template<uint16 dimM>
+void matBVprod(MatSym<dimM> &B,Vec<dimM> &V, double coef, Vec<dimM> &R) {
+  // TODO: this this inefficient version
+  // convert to regular matrix
+  Mat<dimM, dimM> mB = B.toMat();
+	double *Bp = mB.ptr();
+	uint16 i,j;
+	for (i=0;i<dimM;i++)
+		for (j=0;j<dimM;j++)
+			R[i] += Bp[i*dimM+j]*V[j]*coef;
+}
+
+
+template<uint16 dimM1,uint16 dimN1,uint16 dimN2>
+void matABprod(Mat<dimM1,dimN1> &A, Mat<dimN1,dimN2> &B, const double coef, Mat<dimM1,dimN2> &R) {
+#ifndef NLA3D_USE_BLAS
+	uint16 i,j,k;
+	double *Rp = R.ptr();
+	double *Ap = A.ptr();
+	double *Bp = B.ptr();
+	for (i=0;i<dimM1;i++) {
+		for (j=0;j<dimN2;j++) {
+			for (k=0;k<dimN1;k++)
+				*Rp += Ap[i*dimN1+k]*Bp[k*dimN2+j]*coef;
+			Rp++;
+		}
+	}
+#else
+  cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, dimM1, dimN2, dimN1, coef, A.ptr(), dimN1, B.ptr(), dimN2, 0.0, R.ptr(), dimN2);
+#endif
+}
+
+template<uint16 dimM1,uint16 dimN1,uint16 dimN2>
+void matATBprod(Mat<dimM1,dimN1> &A, Mat<dimM1,dimN2> &B, const double coef, Mat<dimN1,dimN2> &R)  {
+#ifndef NLA3D_USE_BLAS
+	uint16 i,j,k;
+	double *Rp = R.ptr();
+	double *Ap = A.ptr();
+	double *Bp = B.ptr();
+	for (i=0;i<dimN1;i++)
+	{
+		for (j=0;j<dimN2;j++)
+		{
+			for (k=0;k<dimM1;k++)
+				*Rp += Ap[k*dimN1+i]*Bp[k*dimN2+j]*coef;
+			Rp++;
+		}
+	}
+#else
+  cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, dimN1, dimN2, dimM1, coef, A.ptr(), dimN1, B.ptr(), dimN2, 0.0, R.ptr(), dimN2);
+#endif
+}
+
+} // namespace math
+} // namespace nla3d
--- a/math/SparseMatrix.cpp
+++ b/math/SparseMatrix.cpp
+// This file is a part of nla3d project. For information about authors and
+// licensing go to project's repository on github:
+// https://github.com/dmitryikh/nla3d 
+
+#include "SparseMatrix.h"
+
+namespace nla3d {
+namespace math {
+
+
+const uint32 SparsityInfo::invalid = 0xFFFFFFFF;
+
+SparsityInfo::SparsityInfo() {
+
+}
+
+
+SparsityInfo::SparsityInfo(uint32 _nrows, uint32 _ncols, uint32 _max_in_row) {
+  reinit(_nrows, _ncols, _max_in_row);
+}
+
+
+SparsityInfo::~SparsityInfo() {
+  clear();
+}
+
+
+void SparsityInfo::clear() {
+  if (iofeir) {
+    delete[] iofeir;
+  }
+  iofeir = nullptr;
+
+  if (columns) {
+    delete[] columns;
+  }
+  columns = nullptr;
+
+  compressed = false;
+  numberOfValues = 0;
+}
+
+
+void SparsityInfo::reinit(uint32 _nrows, uint32 _ncols, uint32 _max_in_row) {
+  assert (_max_in_row > 0);
+  clear();
+
+  nRows = _nrows;
+  nColumns = _ncols;
+
+  maxInRow = _max_in_row < nColumns  ? _max_in_row : nColumns;
+	columns = new uint32[nRows * maxInRow];
+  std::fill_n(columns, nRows * maxInRow, invalid);
+
+	iofeir = new uint32[nRows+1];
+  iofeir[0] = 1;
+  for (uint32 i = 1; i <= nRows; i++) {
+    iofeir[i] = iofeir[i-1] + maxInRow;
+  }
+}
+
+
+// row and column positions are started from 1
+void SparsityInfo::addEntry(uint32 _i, uint32 _j) {
+  assert(iofeir != nullptr);
+  assert(columns != nullptr);
+  assert(_i > 0 && _i <= nRows);
+  assert(_j > 0 && _j <= nColumns);
+  assert(compressed == false);
+  // NOTE: before compressions columns are not sorted
+  uint32 st = iofeir[_i-1] - 1;
+  uint32 en = iofeir[_i] - 1;
+  for (uint32 i = st; i < en; i++) {
+    // if element already in matrix
+    if (columns[i] == _j) return;
+    // have a space to store new entry in the row
+    if (columns[i] == invalid) {
+      columns[i] = _j;
+      numberOfValues++;
+      return;
+    }
+  }
+  // no more room for new entry
+  LOG(FATAL) << "maxInRow overflow!";
+}
+
+
+void SparsityInfo::compress() {
+  if (compressed) return;
+
+  uint32 next = 0;
+  uint32 nextRow = 0;
+  uint32* old_columns = columns;
+  columns = new uint32[numberOfValues];
+
+  for (uint32 i = 0; i < nRows; i++) {
+    for (uint32 j = iofeir[i] - 1; j < iofeir[i+1] - 1; j++) {
+      if (old_columns[j] == invalid) break;
+      columns[next++] = old_columns[j];
+    }
+    if ((next - nextRow) > 1) {
+      std::sort(&columns[nextRow], &columns[next]);
+    }
+
+    iofeir[i] = nextRow + 1;
+    nextRow = next;
+  }
+  // check that we used all space in columns
+  assert(next == numberOfValues);
+  iofeir[nRows] = next + 1;
+
+  delete[] old_columns;
+  compressed = true;
+}
+
+
+uint32 SparsityInfo::getIndex(uint32 _i, uint32 _j) {
+  assert(columns);
+  assert(iofeir);
+	assert(_i > 0 && _i <= nRows);
+	assert(_j > 0 && _j <= nColumns);
+
+  uint32 ind;
+	uint32 st = iofeir[_i-1] - 1;
+	uint32 en = iofeir[_i] - 1;
+
+  if (st == en) return invalid;
+
+  en--;
+
+	while(1) {
+		if (en - st == 1) {
+			if (columns[st] == _j)
+        return st;
+			if (columns[en] == _j)
+        return en;
+			return invalid;
+		}
+		ind = (uint32) ((en+st)*0.5);
+		
+		if (columns[ind] == _j)
+      return ind;
+
+		if (en == st)
+      return invalid;
+
+		if (columns[ind] > _j) {
+			en = ind;
+    } else {
+			st = ind;
+    }
+	}
+  LOG(FATAL) << "What i'm doing here..?";
+}
+
+
+void BaseSparseMatrix::printInternalData(std::ostream& out) {
+  assert(si);
+  assert(values);
+  assert(si->compressed);
+
+	out << "values = {";
+	for (uint32 i = 0; i < si->numberOfValues; i++) {
+		out << values[i] << "\t";
+  }
+	out << "}" << std::endl;
+
+	out << "columns = {";
+	for (uint32 i = 0; i < si->nRows; i++) {
+		for (uint32 j = si->iofeir[i] - 1; j < si->iofeir[i + 1] - 1; j++) {
+			out << si->columns[j] << "\t";
+    }
+	}
+	out << "}" << std::endl;
+
+	out << "iofeir = {";
+	for (uint32 i = 0; i <= si->nRows; i++) {
+		out << si->iofeir[i] << "\t";
+  }
+	out << "}" << std::endl;
+}
+
+
+void BaseSparseMatrix::writeCoordinateTextFormat(std::ostream& out) {
+  assert(si);
+  assert(values);
+  assert(si->compressed);
+  // we need to return back old preferences after usage
+  auto old_precision = out.precision(15);
+  auto old_flags = out.setf(std::ios_base::scientific, std::ios_base::floatfield);
+  // write header: nRows, nColumns, numberOfValues
+  out << si->nRows << ' ' << si->nColumns << ' ' << si->numberOfValues << std::endl;
+  uint32 total = 0;
+  for (uint32 i = 1; i <= si->nRows; i++) {
+    for (uint32 j = si->iofeir[i - 1] - 1; j < si->iofeir[i] - 1; j++) {
+        out << i << ' ' << si->columns[j] << ' ' << values[j] << std::endl;
+        total++;
+    }
+  }
+  assert(total == si->numberOfValues);
+  // return back old setup
+  out.precision(old_precision);
+  out.setf(old_flags);
+}
+
+
+void BaseSparseMatrix::readCoordinateTextFormat(std::istream& in) {
+  std::vector<SparseEntry> entries;
+  uint32 _nrows, _ncols, _nvalues;
+  // read header: nRows, nColumns, numberOfValues
+  in >> _nrows >> _ncols >> _nvalues;
+  entries.reserve(_nvalues);
+  for (uint32 i = 1; i <= _nvalues; i++) {
+    SparseEntry entry;
+    in >> entry.i >> entry.j >> entry.v;
+    entries.push_back(entry);
+  }
+  reinit(_nrows, _ncols, entries);
+}
+
+
+bool BaseSparseMatrix::compare(const BaseSparseMatrix& op2, double th) {
+  // this function conduct strict comparison: matrices should have the same sparsity and the same
+  // non-zero values
+  BaseSparseMatrix& op1 = *this;
+  assert(op1.si && op2.si);
+  assert(op1.values && op2.values);
+  assert(op1.si->compressed && op2.si->compressed);
+
+  // first round. compare shapes
+  if (op1.nRows() != op2.nRows() || op1.nColumns() != op2.nColumns()) {
+    return false;
+  }
+
+  // second round. compare number of values
+  if (op1.nValues() != op2.nValues()) {
+    return false;
+  }
+
+  // third round. compare sparsity patterns
+  for (uint32 i = 0; i <= op1.nRows(); i++) {
+    if (op1.si->iofeir[i] != op2.si->iofeir[i]) {
+      return false;
+    }
+  }
+  for (uint32 i = 0; i < op1.nValues(); i++) {
+    if (op1.si->columns[i] != op2.si->columns[i]) {
+      return false;
+    }
+  }
+
+  // fourth round. compare values with thresholds
+  for (uint32 i = 0; i < op1.nValues(); i++) {
+    if (fabs(op1.values[i] - op2.values[i]) > th) {
+      return false;
+    }
+  }
+
+  // all rounds are passed. The matrices are the same
+  return true;
+}
+
+
+BaseSparseMatrix::BaseSparseMatrix() {
+
+}
+
+BaseSparseMatrix::BaseSparseMatrix(uint32 _nrows, uint32 _ncolumns, uint32 _max_in_row) {
+  // create it's own SparsityInfo instance
+  si = std::shared_ptr<SparsityInfo>(new SparsityInfo(_nrows, _ncolumns, _max_in_row));
+}
+
+BaseSparseMatrix::BaseSparseMatrix(std::shared_ptr<SparsityInfo> spar_info) {
+  setSparsityInfo(spar_info);
+}
+
+BaseSparseMatrix::~BaseSparseMatrix() {
+  if (values) {
+      delete[] values;
+      values = nullptr;
+    }
+}
+
+
+void BaseSparseMatrix::reinit(uint32 _nrows, uint32 _ncols, const std::vector<SparseEntry>& entries) {
+  // after this procedure we will obtain matrix in already compressed state
+  // TODO: this is not optimal way to fill new Sparse Matrix, actually we can allocate exactly
+  // number of non-zeros at once, as far as we know all entries before initialization
+  // NOTE: For SparseSymMatrix the caller should be sure that all entries are in upper triangle.
+  //
+  // in case of all zeros matrix
+  if (entries.size() == 0) {
+    si = std::shared_ptr<SparsityInfo>(new SparsityInfo(_nrows, _ncols, 1));
+    compress();
+    return;
+  }
+  // calculate number of entries in every row
+  std::vector<uint32> entriesInRow(_nrows, 0);
+  for (auto v : entries) {
+    entriesInRow[v.i - 1] += 1;
+  }
+  uint32 _max_in_row = *std::max_element(entriesInRow.begin(), entriesInRow.end());
+  // init new SparsityInfo
+  si = std::shared_ptr<SparsityInfo>(new SparsityInfo(_nrows, _ncols, _max_in_row));
+
+  // add non-zero entries into SparsityInfo
+  for (auto v : entries) {
+    si->addEntry(v.i, v.j);
+  }
+
+  // compress matrix
+  compress();
+
+  // write non-zero values into matrix
+  for (auto v : entries) {
+    //addValue(v.i, v.j, v.v);
+    uint32 index = si->getIndex(v.i, v.j);
+    if (index == SparsityInfo::invalid) {
+      LOG(FATAL) << "The position(" << v.i << ", " << v.j << ") is absent in the matrix";
+    }
+    values[index] = v.v;
+  }
+}
+
+void BaseSparseMatrix::compress() {
+  assert(si);
+  if (si->compressed == true) {
+    if (values)
+      // compressed has been finished and values allocated - nothing to do..
+      return;
+  } else {
+    si->compress();
+  }
+  
+  if (values) delete[] values;
+  values = new double[si->numberOfValues];
+  zero();
+}
+
+
+void BaseSparseMatrix::setSparsityInfo(std::shared_ptr<SparsityInfo> spar_info) {
+  assert(spar_info);
+  // use already exist SparsityInfo instance. It can in compressed = false state or already
+  // compressed = true state
+  si = spar_info;
+  // if we add SparsityInfo with alreade fixed number of entries we are ready to allocate values
+  if (si->compressed == true) 
+    compress();
+}
+
+
+SparseMatrix::SparseMatrix() {
+
+}
+
+
+SparseMatrix::SparseMatrix(uint32 _nrows, uint32 _ncolumns, uint32 _max_in_row) :
+    BaseSparseMatrix(_nrows, _ncolumns, _max_in_row)
+{
+
+}
+
+SparseMatrix::SparseMatrix(std::shared_ptr<SparsityInfo> spar_info) :
+    BaseSparseMatrix(spar_info)
+{
+
+}
+
+
+void SparseMatrix::reinit(uint32 _nrows, uint32 _ncols, uint32 _max_in_row) {
+  si = std::shared_ptr<SparsityInfo>(new SparsityInfo(_nrows, _ncols, _max_in_row));
+  if (values) {
+    delete[] values;
+    values = nullptr;
+  }
+}
+
+
+void SparseMatrix::print(std::ostream& out) {
+  assert(si);
+  assert(si->compressed);
+  assert(values);
+
+	uint32 ind;
+	for (uint32 i = 1; i <= si->nRows; i++) {
+		out << "[";
+		for (uint32 j = 1; j <= si->nColumns; j++) {
+      out << value(i, j) << '\t';
+    }
+		out << "]" << std::endl;
+	}
+}
+
+
+double& SparseMatrix::operator()(uint32 _i, uint32 _j) {
+  assert(values);
+  assert(si);
+
+	uint32 index = si->getIndex(_i, _j);
+  if (index == SparsityInfo::invalid) {
+    LOG(FATAL) << "The position(" << _i << ", " << _j << ") is absent in the matrix";
+  }
+  return values[index];
+}
+
+double SparseMatrix::value(uint32 _i, uint32 _j) const {
+  assert(values);
+  assert(si);
+
+	uint32 index = si->getIndex(_i, _j);
+  if (index == SparsityInfo::invalid) {
+    return 0.0;
+  }
+  return values[index];
+}
+
+
+SparseSymMatrix::SparseSymMatrix() {
+
+}
+
+
+SparseSymMatrix::SparseSymMatrix(uint32 _nrows, uint32 _max_in_row) :
+    BaseSparseMatrix(_nrows, _nrows, _max_in_row)
+{
+  assert(si);
+  // NOTE: need to add diagonal elements because MKL PARDISO need it anyway
+  for (uint32 i = 1; i <= si->nRows; i++) {
+    si->addEntry(i, i);
+  }
+}
+
+SparseSymMatrix::SparseSymMatrix(std::shared_ptr<SparsityInfo> spar_info) :
+    BaseSparseMatrix(spar_info)
+{
+  // TODO: we need to check that spar_info meets symmetry requirements
+  // Let's at least check that is is rectangular
+
+  assert(spar_info->nRows == spar_info->nColumns);
+}
+
+void SparseSymMatrix::reinit(uint32 _nrows, uint32 _max_in_row) {
+  si = std::shared_ptr<SparsityInfo>(new SparsityInfo(_nrows, _nrows, _max_in_row));
+  if (values) {
+    delete[] values;
+    values = nullptr;
+  }
+}
+
+
+void SparseSymMatrix::print(std::ostream& out) {
+  assert(si);
+  assert(si->compressed);
+  assert(values);
+
+	uint32 ind;
+	for (uint32 i = 1; i <= si->nRows; i++) {
+		// out << "[";
+		for (uint32 j = 1; j <= si->nColumns; j++) {
+      out << value(i, j) << '\t';
+    }
+		// out << "]" << std::endl;
+    out << std::endl;
+	}
+}
+
+double& SparseSymMatrix::operator() (uint32 _i, uint32 _j) {
+  assert(values);
+  assert(si);
+
+  // ensure that we work in upper triangle
+	if (_i > _j) std::swap(_i, _j);
+
+	uint32 index = si->getIndex(_i, _j);
+  if (index == SparsityInfo::invalid) {
+    LOG(FATAL) << "The position(" << _i << ", " << _j << ") is absent in the matrix";
+  }
+  return values[index];
+}
+
+
+double SparseSymMatrix::value(uint32 _i, uint32 _j) const {
+  assert(values);
+  assert(si);
+
+  // ensure that we work in upper triangle
+	if (_i > _j) std::swap(_i, _j);
+
+	uint32 index = si->getIndex(_i, _j);
+  if (index == SparsityInfo::invalid) {
+    return 0.0;
+  }
+  return values[index];
+}
+
+
+void matBVprod(SparseSymMatrix &B, const dVec &V, const double coef, dVec &R) {
+  assert(B.si);
+  assert(B.si->compressed);
+  assert(B.values);
+  assert(B.nRows() == V.size());
+  assert(R.size() >= B.nRows());
+  // TODO: Try to use BLAS routines and measure speedup
+
+	const double eps = 1e-20;
+
+  for (uint32 i = 1; i <= B.nRows(); i++) {
+    // walk on lower triangle
+    for (uint32 j = 1; j <= i; j++)
+      if (fabs(V[j-1]) > eps)
+        R[i-1] += B.value(j, i) * V[j-1] * coef;
+
+    // walk on upper triangle
+    // TODO: could be done in more efficient way
+    for (uint32 j = i + 1; j <= B.nRows(); j++)
+      if (fabs(V[j-1]) > eps)
+        R[i-1] += B.value(i, j) * V[j-1] * coef;
+  }
+}
+
+
+void matBVprod(SparseMatrix &B, const dVec &V, const double coef, dVec &R) {
+  assert(B.si);
+  assert(B.si->compressed);
+  assert(B.values);
+  assert(B.nColumns() == V.size());
+  assert(R.size() >= B.nRows());
+  // TODO: Try to use BLAS routines and measure speedup
+
+  for (uint32 i = 1; i <= B.nRows(); i++) {
+
+    //if no elements in the current row return zero res
+    if (B.si->iofeir[i] - B.si->iofeir[i-1] == 0)
+      continue;
+
+    uint32 st = B.si->iofeir[i-1] - 1;
+    uint32 en = B.si->iofeir[i] - 2;
+
+    for (uint32 j = st; j <= en; j++)
+      R[i-1] += B.values[j] * V[B.si->columns[j]-1] * coef;
+  }
+}
+
+void matBTVprod(SparseMatrix &B, const dVec &V, const double coef, dVec &R) {
+  assert(B.si);
+  assert(B.si->compressed);
+  assert(B.values);
+  assert(B.nRows() == V.size());
+  assert(R.size() >= B.nColumns());
+  // TODO: Try to use BLAS routines and measure speedup
+
+	for (uint32 i = 1; i <= B.si->nRows; i++) {
+		if (B.si->iofeir[i] - B.si->iofeir[i-1] == 0) {
+			continue;
+    }
+		uint32 st = B.si->iofeir[i-1] - 1;
+		uint32 en = B.si->iofeir[i] - 2;
+		for (uint32 j = st; j <= en; j++)
+      R[B.si->columns[j] - 1] += B.values[j] * V[i-1] * coef;
+	}
+}
+  
+} // namespace math
+} // namespace nla3d
--- a/math/SparseMatrix.h
+++ b/math/SparseMatrix.h
+// This file is a part of nla3d project. For information about authors and
+// licensing go to project's repository on github:
+// https://github.com/dmitryikh/nla3d 
+
+#pragma once
+
+#include "sys.h"
+#include "math/Vec.h"
+
+namespace nla3d {
+namespace math {
+
+// NOTE: Sparse Matrices entries have indexes started from 1 !
+
+class SparseMatrix;
+class SparseSymMatrix;
+
+// TODO: Sparse Matrices in nla3d directly used in MKLs' PARDISO equation solver. That's why we use
+// uint32 for value indexes. More general way is to use templates with arbitrary type for entities
+// and for indexes.
+
+// Class to hold columns and iofeir (Index Of First Element In the Row) of 3-arrays CSR storage
+// format. SparsityInfo can be shared between sparse matrices.
+class SparsityInfo {
+  public:
+    SparsityInfo();
+    SparsityInfo(uint32 _nrows, uint32 _ncols, uint32 _max_in_row);
+    ~SparsityInfo();
+
+    void reinit(uint32 _nrows, uint32 _ncols, uint32 _max_in_row);
+    // add information than (_row, _column) entries has non-zero value
+    // _i, _j indexes are started from 1
+    void addEntry(uint32 _i, uint32 _j);
+    // perform compression procedure. After that positions of non-zero entries can't be changed
+    void compress();
+    bool isCompressed();
+
+    // return number of entries in the _row (_row > 0)
+    uint32 nElementsInRow(uint32 _row);
+    // get index in values array (see SparseMatrix implementation) for entry position _i, _j
+    uint32 getIndex(uint32 _i, uint32 _j);
+
+    friend class BaseSparseMatrix;
+    friend class SparseMatrix;
+    friend class SparseSymMatrix;
+    friend void matBVprod(SparseSymMatrix &B, const dVec &V, const double coef, dVec &R);
+    friend void matBVprod(SparseMatrix &B, const dVec &V, const double coef, dVec &R);
+    friend void matBTVprod(SparseMatrix &B, const dVec &V, const double coef, dVec &R);
+
+  private:
+    void clear();
+
+    // Data arrays to implement compressed sparse row format with 3 arrays (3-array CSR).
+    // Format was implemented by using MKL manual.
+    // Here we don't need to know exactly values. We need to store only info about non-zeros
+    // entries in matrix.
+    // Columns array stores column number of entry
+    uint32* columns = nullptr;
+    // index of first entry in row: iofeir[_row-1] first entry in _row,
+    // iofeir[_row]-1 - last entry in _row
+    uint32* iofeir = nullptr;
+    // number of non-zero entries in the matrix. Also, the size of values array.
+    uint32 numberOfValues = 0;
+    uint32 maxInRow = 0;
+    bool compressed = false;
+
+    // size of the matrix
+    uint32 nRows = 0;
+    uint32 nColumns = 0;
+
+    static const uint32 invalid;
+};
+
+// structure to describe entry in sparse matrix. Used in one of several matrix initialization
+// approaches.
+struct SparseEntry {
+    uint32 i;
+    uint32 j;
+    double v;
+};
+
+
+// Base class for Sparse Matrix in a row-oriented data format. This class doesn't have particular
+// meaning. See SparseMatrix and SparseSymMatrix for practical usage.
+// The implementation of compressed sparse row format with 3 arrays (3-array CSR).
+class BaseSparseMatrix {
+  public:
+    BaseSparseMatrix();
+    BaseSparseMatrix(uint32 _nrows, uint32 _ncolumns, uint32 _max_in_row = 100);
+    BaseSparseMatrix(std::shared_ptr<SparsityInfo> spar_info);
+    ~BaseSparseMatrix();
+    
+    void reinit(uint32 _nrows, uint32 _ncols, const std::vector<SparseEntry>& entries);
+    // perform compression procedure, after this new entries can't be added to matrix
+    void compress();
+
+    // for debug purpose
+    void printInternalData (std::ostream& out);
+    // write matrix non-zero elements in Coordinate Text Format, details could be found here:
+    // http://math.nist.gov/MatrixMarket/formats.html . NOTE: only upper triangle entries will be
+    // stored for symmetric matrix.
+    void writeCoordinateTextFormat(std::ostream& out);
+    // read sparse matrix from Coordinate Text Format. The old data will be dropped. New `si` will
+    // be created. NOTE: For symmetric matrices only upper triangle entries should be in `in`
+    void readCoordinateTextFormat(std::istream& in);
+    // compare `this` with `op2` matrices. Matrices Sparsity should be exactly the same for
+    // true result.
+    bool compare(const BaseSparseMatrix& op2, double th = 1.0e-10);
+
+    // zero all entries
+    void zero();
+
+    // getters
+    double* getValuesArray();
+    uint32* getColumnsArray();
+    uint32* getIofeirArray();
+    uint32 nValues() const;
+    uint32 nRows() const;
+    uint32 nColumns() const;
+    std::shared_ptr<SparsityInfo> getSparsityInfo();
+    void setSparsityInfo(std::shared_ptr<SparsityInfo> spar_info);
+
+    bool isCompressed();
+
+  protected:
+    // particular values array. Part of compressed sparse format with 3 arrays (3-array CSR).
+    double *values = nullptr;
+
+    std::shared_ptr<SparsityInfo> si;
+};
+
+
+
+class SparseMatrix : public BaseSparseMatrix {
+  public:
+    SparseMatrix();
+    SparseMatrix(uint32 nrows, uint32 ncolumns, uint32 max_in_row = 100);
+    SparseMatrix(std::shared_ptr<SparsityInfo> spar_info);
+    
+    void reinit(uint32 _nrows, uint32 _ncols, uint32 _max_in_row = 100);
+
+    // add non-zero entry to sparse matrix. This should be called before compress(). 
+    void addEntry(uint32 _i, uint32 _j);
+
+    // add value to the _i, _j entry. This should be called after compress().
+    void addValue(uint32 _i, uint32 _j, double value);
+
+    // debug output
+    void print (std::ostream& out);
+
+    // return reference to the entry, if the entry doesn't exist in the matrix - fatal error
+    double& operator()(uint32 _i, uint32 _j);
+
+    // return the value of the entry, if the entry doesn't exists - return 0.0
+    double value(uint32 _i, uint32 _j) const;
+
+    friend void matBVprod(SparseMatrix &B, const dVec &V, const double coef, dVec &R);
+    friend void matBTVprod(SparseMatrix &B, const dVec &V, const double coef, dVec &R);
+};
+
+
+class SparseSymMatrix : public BaseSparseMatrix {
+  public:
+    SparseSymMatrix();
+    SparseSymMatrix(uint32 nrows, uint32 max_in_row = 100);
+    SparseSymMatrix(std::shared_ptr<SparsityInfo> spar_info);
+
+    void reinit(uint32 _nrows, uint32 _max_in_row = 100);
+    
+    // add non-zero entry to sparse matrix. This should be called before compress(). 
+    void addEntry(uint32 _i, uint32 _j);
+
+    // add value to the _i, _j entry. This should be called after compress().
+    void addValue(uint32 _i, uint32 _j, double value);
+
+    // debug methods
+    void print (std::ostream& out);
+
+    // return reference to the entry, if the entry doesn't exist in the matrix - fatal error
+    double& operator()(uint32 _i, uint32 _j);
+
+    // return the value of the entry, if the entry doesn't exists - return 0.0
+    double value(uint32 _i, uint32 _j) const;
+
+    friend void matBVprod(SparseSymMatrix &B, const dVec &V, const double coef, dVec &R);
+};
+
+
+inline bool SparsityInfo::isCompressed() {
+    return compressed;
+}
+
+inline uint32 SparsityInfo::nElementsInRow(uint32 _row) {
+    assert(iofeir != nullptr);
+    assert(_row > 0 && _row <= nRows);
+    return iofeir[_row] - iofeir[_row-1];
+}
+
+
+inline void BaseSparseMatrix::zero() {
+  assert(si);
+  assert(values);
+  std::fill_n(&values[0], si->numberOfValues, 0.0);
+}
+
+inline double* BaseSparseMatrix::getValuesArray() {
+  assert(values != nullptr);
+  return values;
+}
+
+inline uint32* BaseSparseMatrix::getColumnsArray() {
+  assert(si);
+  assert(si->compressed);
+  return si->columns;
+}
+
+inline uint32* BaseSparseMatrix::getIofeirArray() {
+  assert(si);
+  assert(si->compressed);
+  return si->iofeir;
+}
+
+inline uint32 BaseSparseMatrix::nValues() const {
+  assert(si);
+  return si->numberOfValues;
+}
+
+inline uint32 BaseSparseMatrix::nRows() const {
+  assert(si);
+  return si->nRows;
+}
+
+inline uint32 BaseSparseMatrix::nColumns() const {
+  assert(si);
+  return si->nColumns;
+}
+
+inline std::shared_ptr<SparsityInfo> BaseSparseMatrix::getSparsityInfo() {
+  assert(si);
+  return si;
+}
+
+inline bool BaseSparseMatrix::isCompressed() {
+  assert(si);
+  return si->compressed;
+}
+
+
+inline void SparseMatrix::addEntry(uint32 _i, uint32 _j) {
+  assert(si);
+  si->addEntry(_i, _j);
+}
+
+inline void SparseMatrix::addValue(uint32 _i, uint32 _j, double value) {
+  this->operator()(_i, _j) += value;
+}
+
+
+inline void SparseSymMatrix::addEntry(uint32 _i, uint32 _j) {
+  assert(si);
+
+  // ensure that we work in upper triangle
+	if (_i > _j) std::swap(_i, _j);
+  si->addEntry(_i, _j);
+}
+
+inline void SparseSymMatrix::addValue(uint32 _i, uint32 _j, double value) {
+  this->operator()(_i, _j) += value;
+}
+
+ 
+} // namespace math
+} // namespace nla3d
--- a/math/Vec.cpp
+++ b/math/Vec.cpp
+// This file is a part of nla3d project. For information about authors and
+// licensing go to project's repository on github:
+// https://github.com/dmitryikh/nla3d 
+
+#include "math/Vec.h"
+
+namespace nla3d {
+namespace math {
+
+
+dVec::dVec() {
+
+}
+
+
+dVec::dVec(uint32 _n, double _val) {
+  reinit(_n, _val);
+}
+
+
+dVec::dVec(dVec& _ref, uint32 _start, uint32 _size) {
+  reinit(_ref, _start, _size);
+}
+
+
+void dVec::reinit(uint32 _n, double _val) {
+  clear();
+  data = new double[_n];
+  memory_owner = true;
+  _size = _n;
+  fill(_val);
+}
+
+void dVec::reinit(dVec& _ref, uint32 _start, uint32 _size) {
+  clear();
+
+  assert(_ref.data);
+  assert(_start + _size <= _ref.size());
+  data = _ref.data + _start;
+  this->_size = _size;
+  memory_owner = false;
+}
+
+
+dVec::~dVec() {
+  clear();  
+}
+
+uint32 dVec::size() const {
+  return _size;
+}
+
+
+void dVec::zero() {
+  fill(0.0);
+}
+
+
+double* dVec::ptr() {
+  assert(data);
+  return data;
+}
+
+
+void dVec::clear() {
+  if (memory_owner && data) {
+    delete[] data;
+  }
+  memory_owner = false;
+  data = nullptr;
+  _size = 0;
+}
+
+
+void dVec::fill(double val) {
+  assert(data);
+  std::fill_n(data, _size, val);
+
+}
+
+
+bool dVec::isInit() {
+  return (data != nullptr);
+}
+
+
+double& dVec::operator[](uint32 _n) {
+  assert(data);
+  assert(_n < _size);
+  return data[_n];
+}
+
+
+double dVec::operator[](uint32 _n) const {
+  assert(data);
+  assert(_n < _size);
+  return data[_n];
+}
+
+
+dVec dVec::operator-() {
+  assert(data);
+  dVec p(size());
+	for (uint32 i = 0; i < size(); i++) {
+    p[i] = -data[i];
+  }
+  return p;
+}
+
+
+dVec dVec::operator+(const dVec& op) {
+  assert(data);
+  assert(op.data);
+  assert(size() == op.size());
+
+  dVec p(size());
+
+	for (uint32 i = 0; i < size(); i++) {
+    p[i] = data[i] + op.data[i];
+  }
+  return p;
+
+}
+
+
+dVec dVec::operator-(const dVec& op) {
+  assert(data);
+  assert(op.data);
+  assert(size() == op.size());
+
+  dVec p(size());
+
+	for (uint32 i = 0; i < size(); i++) {
+    p[i] = data[i] - op.data[i];
+  }
+  return p;
+}
+
+
+dVec dVec::operator*(const double op) {
+  assert(data);
+
+  dVec p(size());
+
+	for (uint32 i = 0; i < size(); i++) {
+    p[i] = data[i] * op;
+  }
+  return p;
+}
+
+dVec operator* (const double op1, const dVec& op2) {
+  assert(op2.data);
+
+  dVec p(op2.size());
+
+	for (uint32 i = 0; i < op2.size(); i++) {
+    p[i] = op2.data[i] * op1;
+  }
+  return p;
+}
+
+
+dVec dVec::operator/(const double op) {
+  assert(data);
+
+  dVec p(size());
+
+	for (uint32 i = 0; i < size(); i++) {
+    p[i] = data[i] / op;
+  }
+  return p;
+
+}
+
+
+dVec& dVec::operator+=(const dVec& op) {
+  assert(data);
+  assert(op.data);
+  assert(size() == op.size());
+
+	for (uint32 i = 0; i < size(); i++) {
+    data[i] += op.data[i];
+  }
+  return *this;
+}
+
+
+dVec& dVec::operator-=(const dVec& op) {
+  assert(data);
+  assert(op.data);
+  assert(size() == op.size());
+
+	for (uint32 i = 0; i < size(); i++) {
+    data[i] -= op.data[i];
+  }
+  return *this;
+}
+
+dVec& dVec::operator=(const dVec& op) {
+  assert(data);
+  assert(op.data);
+  assert(size() == op.size());
+
+	for (uint32 i = 0; i < size(); i++) {
+    data[i] = op.data[i];
+  }
+  return *this;
+}
+
+
+bool dVec::compare(const dVec& op2, double th) {
+  dVec& op1 = *this;
+  assert(op1.data && op2.data);
+
+  // first round. compare lengths
+  if (op1.size() != op2.size()) {
+    return false;
+  }
+
+  // second round. compare values
+  for (uint32 i = 0; i < op1.size(); i++) {
+    if (fabs(op1.data[i] - op2.data[i]) > th) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+
+void dVec::writeTextFormat(std::ostream& out) {
+  out << size() << std::endl;
+  for (uint32 i = 0; i < size(); i++) {
+    out << data[i] << endl;
+  }
+}
+
+
+void dVec::readTextFormat(std::istream& in) {
+  uint32 _len;
+  in >> _len;
+  reinit(_len);
+  for (uint32 i = 0; i < _len; i++) {
+    in >> data[i];
+  }
+}
+
+} // namespace math
+} // namespace nla3d
--- a/math/Vec.h
+++ b/math/Vec.h
+// This file is a part of nla3d project. For information about authors and
+// licensing go to project's repository on github:
+// https://github.com/dmitryikh/nla3d 
+
+#pragma once
+#include "sys.h"
+#include <iostream>
+#include <string>
+
+namespace nla3d {
+namespace math {
+
+using namespace std;
+
+template<uint16 dim> 
+class Vec
+{
+public:
+	Vec() {
+		assert(dim);
+		memset(data,0,sizeof(double)*dim); //не очень красиво
+	}
+	Vec(double first, ...) {
+		assert(dim);
+		va_list argp;
+		va_start(argp, first);
+		data[0]=first;
+		for (uint16 i=1; i < dim; i++) data[i]=va_arg(argp, double);
+		va_end(argp);
+	}
+	~Vec(void) { }
+  // TODO: this should be size_t instead of uint16 
+	double& operator[] (uint16 n);
+	const double operator[] (uint16 n) const;
+	void display ();
+	void zero() {
+		memset(data,0,sizeof(double)*dim);
+	}
+	double length ();
+	double qlength ();
+	Vec operator+ (const Vec<dim> &op);
+	Vec& operator+= (const Vec<dim> &op);
+	Vec operator- ();
+	Vec& operator= (const Vec<dim> &op);
+	Vec operator- (const Vec<dim> &op);
+	Vec operator* (const double op);
+	string toString ();
+	bool compare (Vec<dim>& V, double eps = 0.00005);
+	void simple_read(std::istream& st);
+	double operator* (const Vec<dim> &op);
+  template <uint16 dim1>
+    friend std::ostream& operator<< (std::ostream& stream,const Vec<dim1>& obj);
+  template <uint16 dim1>
+    friend Vec operator* (const double op1, const Vec<dim1> &op2);
+	double* ptr ();
+private:
+	double data[dim];
+};
+//------operator[]------------------------------------------------------
+template<uint16 dim>
+double& Vec<dim>::operator[] (uint16 n) {
+	assert(n < dim);
+	return data[n];
+}
+//-------operator[]-const-----------------------------------------------
+template<uint16 dim>
+const double Vec<dim>::operator[] (uint16 n) const {
+	assert(n < dim);
+	return data[n];
+}
+//--------display----------------------------------------------------
+template<uint16 dim>
+void Vec<dim>::display () {
+	for (uint16 i = 0; i < dim; i++) {
+		std::cout << data[i];
+		if (i < dim-1) std::cout << ", ";
+	}
+}
+//-------operator+-----------------------------------------------------
+template<uint16 dim>
+Vec<dim> Vec<dim>::operator+(const Vec<dim> &op) {
+	Vec<dim> p;
+	for (uint16 i=0; i < dim; i++) p[i]=data[i]+op.data[i];
+	return p;
+}
+//-------operator+=-----------------------------------------------------
+template<uint16 dim>
+Vec<dim>& Vec<dim>::operator+= (const Vec<dim> &op) {
+	for (uint16 i=0; i < dim; i++) this->data[i]+=op.data[i];
+	return *this;
+}
+//-------operator- ----------------------------------------------------
+template<uint16 dim>
+Vec<dim> Vec<dim>::operator-() {
+	Vec<dim> p;
+	for (uint16 i=0; i < dim; i++) p[i]=-data[i];
+	return p;
+}
+//------operator- -----------------------------------------------------
+template<uint16 dim>
+Vec<dim> Vec<dim>::operator-(const Vec<dim> &op) {
+	Vec<dim> p;
+	for (uint16 i=0; i < dim; i++) p[i]=data[i]-op.data[i];
+	return p;
+}
+//------operator*------------------------------------------------------
+template<uint16 dim>
+Vec<dim> Vec<dim>::operator*(const double op) {
+	Vec<dim> p;
+	for (uint16 i=0; i < dim; i++) p[i]=data[i]*op;
+	return p;
+}
+//------operator*------------------------------------------------------
+template<uint16 dim>
+double Vec<dim>::operator*(const Vec<dim> &op) {
+	double p=0;
+	for (uint16 i=0; i < dim; i++) p+=data[i]*op.data[i];
+	return p;
+}
+//---------qlength----------------------------------------------------
+template<uint16 dim>
+double Vec<dim>::qlength() {
+	double p=0;
+	for (uint16 i=0; i < dim; i++) p+=data[i]*data[i];
+	return p;
+}
+//---------length----------------------------------------------------
+template<uint16 dim>
+double Vec<dim>::length() {
+	return sqrt(qlength());
+}
+//---------operator*----------------------------------------------------
+template<uint16 dim1> Vec<dim1> operator*(const double op1, const Vec<dim1> &op2) {
+	Vec<dim1> p;
+	for (uint16 i=0; i < dim1; i++) p[i]=op2.data[i]*op1;
+	return p;
+}
+//--------operator=------------------------------------------------------
+template<uint16 dim> Vec<dim>& Vec<dim>::operator= (const Vec<dim> &op)
+{
+	memcpy(this->data, op.data, sizeof(double)*dim);
+	return *this;
+}
+//---------operator<<----------------------------------------------------
+template<uint16 dim1> std::ostream &operator<<(std::ostream &stream, const Vec<dim1> &obj) {
+    stream << obj.data[0];
+	for (uint16 i = 1; i < dim1; i++) {
+		stream << " " << obj.data[i];
+	}
+	return stream;
+}
+//-------------------------------------------------------------
+template<uint16 dim> string Vec<dim>::toString ()
+{
+	string p;
+	char buff[100];
+	for (uint16 i = 0; i < dim; i++) {
+		sprintf_s(buff,100,"%8.5e",this->data[i]);
+		p+=buff;
+		if (i < dim-1) p+= ", ";
+	}
+	return p;
+}
+//-------------------------------------------------------
+template<uint16 dim> double* Vec<dim>::ptr ()
+{
+	return data;
+}
+//-------------------------------------------------------
+template<uint16 dim> bool Vec<dim>::compare (Vec<dim>& V, double eps)
+{
+	double *Dp  = (double*) data;
+	double *Vp = V.ptr();
+	for (uint16 j=0;j<dim;j++)
+	{
+		if (fabs((double)*Dp-*Vp) > eps) {
+			return false;
+		}
+		Dp++;
+		Vp++;
+	}
+	return true;
+}
+//-------------------------------------------------------
+template<uint16 dim> void Vec<dim>::simple_read(std::istream& st)
+{
+	double *Dp = data;
+	for (uint16 j=0;j<dim;j++)
+	{
+		st >> *Dp;
+		Dp++;
+	}
+}
+
+
+
+// dVec class represents a dynamically allocated double arrays with math operations support
+// dVec has to modes: 1. dVec owns allocated memory; 2. dVec just point to double array memory
+// allocated by another dVec. memory_owner variable is used to distinct these modes
+class dVec {
+  public:
+    dVec();
+    dVec(uint32 _n, double _val = 0.0);
+    dVec(dVec& _ref, uint32 _start, uint32 _size);
+    void reinit(uint32 _n, double _val = 0.0);
+    void reinit(dVec& _ref, uint32 _start, uint32 _size);
+    ~dVec();
+
+    uint32 size() const;
+    void zero();
+    double* ptr();
+    void clear();
+    void fill(double val);
+
+    bool isInit();
+
+    double& operator[](uint32 _n);
+    double operator[](uint32 _n) const;
+
+    dVec operator-();
+
+    dVec operator+(const dVec& op);
+    dVec operator-(const dVec& op);
+    dVec operator*(const double op);
+    friend dVec operator* (const double op1, const dVec& op2);
+    dVec operator/(const double op);
+
+    dVec& operator+=(const dVec& op);
+    dVec& operator-=(const dVec& op);
+
+    dVec& operator=(const dVec& op);
+
+    // for debug purpose:
+    bool compare(const dVec& op2, double th = 1.0e-10);
+    // write and read to/from simple text format:
+    // n
+    // value1
+    // value2
+    // ...
+    // valuen
+    void writeTextFormat(std::ostream& out);
+    void readTextFormat(std::istream& in);
+
+  private:
+
+    bool memory_owner = false;
+    double* data = nullptr;
+    uint32 _size = 0;
+};
+
+
+} // namespace math
+} // namespace nla3d
--- a/math/makefile
+++ b/math/makefile
+#------------------------------------------------------------------------------#
+# This makefile was generated by 'cbp2make' tool rev.147                       #
+#------------------------------------------------------------------------------#
+
+
+WORKDIR = `pwd`
+
+CC = gcc
+CXX = g++
+AR = ar
+LD = g++
+WINDRES = windres
+
+INC = 
+CFLAGS = 
+RESINC = 
+LIBDIR = 
+LIB = 
+LDFLAGS = 
+
+INC_DEBUG = $(INC) -I../../cuda_nla3d -I../math -I../paralution-1.1.0/src -I/opt/intel/mkl/include -I../easylogging
+CFLAGS_DEBUG = $(CFLAGS) -Wall -m64 -g -fPIC -DNLA3D_USE_MKL -DNLA3D_USE_PARALUTION
+RESINC_DEBUG = $(RESINC)
+RCFLAGS_DEBUG = $(RCFLAGS)
+LIBDIR_DEBUG = $(LIBDIR)
+LIB_DEBUG = $(LIB)
+LDFLAGS_DEBUG = $(LDFLAGS) -m64
+OBJDIR_DEBUG = obj/Debug
+DEP_DEBUG = 
+OUT_DEBUG = bin/Debug/libmath.a
+
+INC_RELEASE = $(INC) -I../../cuda_nla3d -I../math -I../paralution-1.1.0/src -I/opt/intel/mkl/include -I../easylogging
+CFLAGS_RELEASE = $(CFLAGS) -O2 -Wall -m64 -fPIC -DNLA3D_USE_MKL -DNLA3D_USE_PARALUTION
+RESINC_RELEASE = $(RESINC)
+RCFLAGS_RELEASE = $(RCFLAGS)
+LIBDIR_RELEASE = $(LIBDIR)
+LIB_RELEASE = $(LIB)
+LDFLAGS_RELEASE = $(LDFLAGS) -s -m64
+OBJDIR_RELEASE = obj/Release
+DEP_RELEASE = 
+OUT_RELEASE = bin/Release/libmath.a
+
+OBJ_DEBUG = $(OBJDIR_DEBUG)/EquationSolver.o $(OBJDIR_DEBUG)/Mat.o $(OBJDIR_DEBUG)/SparseMatrix.o $(OBJDIR_DEBUG)/Vec.o $(OBJDIR_DEBUG)/sys.o
+
+OBJ_RELEASE = $(OBJDIR_RELEASE)/EquationSolver.o $(OBJDIR_RELEASE)/Mat.o $(OBJDIR_RELEASE)/SparseMatrix.o $(OBJDIR_RELEASE)/Vec.o $(OBJDIR_RELEASE)/sys.o
+
+all: debug release
+
+clean: clean_debug clean_release
+
+before_debug: 
+	test -d bin/Debug || mkdir -p bin/Debug
+	test -d $(OBJDIR_DEBUG) || mkdir -p $(OBJDIR_DEBUG)
+
+after_debug: 
+
+debug: before_debug out_debug after_debug
+
+out_debug: before_debug $(OBJ_DEBUG) $(DEP_DEBUG)
+	$(AR) rcs $(OUT_DEBUG) $(OBJ_DEBUG)
+
+$(OBJDIR_DEBUG)/EquationSolver.o: EquationSolver.cpp
+	$(CXX) $(CFLAGS_DEBUG) $(INC_DEBUG) -c EquationSolver.cpp -o $(OBJDIR_DEBUG)/EquationSolver.o
+
+$(OBJDIR_DEBUG)/Mat.o: Mat.cpp
+	$(CXX) $(CFLAGS_DEBUG) $(INC_DEBUG) -c Mat.cpp -o $(OBJDIR_DEBUG)/Mat.o
+
+$(OBJDIR_DEBUG)/SparseMatrix.o: SparseMatrix.cpp
+	$(CXX) $(CFLAGS_DEBUG) $(INC_DEBUG) -c SparseMatrix.cpp -o $(OBJDIR_DEBUG)/SparseMatrix.o
+
+$(OBJDIR_DEBUG)/Vec.o: Vec.cpp
+	$(CXX) $(CFLAGS_DEBUG) $(INC_DEBUG) -c Vec.cpp -o $(OBJDIR_DEBUG)/Vec.o
+
+$(OBJDIR_DEBUG)/sys.o: sys.cpp
+	$(CXX) $(CFLAGS_DEBUG) $(INC_DEBUG) -c sys.cpp -o $(OBJDIR_DEBUG)/sys.o
+
+clean_debug: 
+	rm -f $(OBJ_DEBUG) $(OUT_DEBUG)
+	rm -rf bin/Debug
+	rm -rf $(OBJDIR_DEBUG)
+
+before_release: 
+	test -d bin/Release || mkdir -p bin/Release
+	test -d $(OBJDIR_RELEASE) || mkdir -p $(OBJDIR_RELEASE)
+
+after_release: 
+
+release: before_release out_release after_release
+
+out_release: before_release $(OBJ_RELEASE) $(DEP_RELEASE)
+	$(AR) rcs $(OUT_RELEASE) $(OBJ_RELEASE)
+
+$(OBJDIR_RELEASE)/EquationSolver.o: EquationSolver.cpp
+	$(CXX) $(CFLAGS_RELEASE) $(INC_RELEASE) -c EquationSolver.cpp -o $(OBJDIR_RELEASE)/EquationSolver.o
+
+$(OBJDIR_RELEASE)/Mat.o: Mat.cpp
+	$(CXX) $(CFLAGS_RELEASE) $(INC_RELEASE) -c Mat.cpp -o $(OBJDIR_RELEASE)/Mat.o
+
+$(OBJDIR_RELEASE)/SparseMatrix.o: SparseMatrix.cpp
+	$(CXX) $(CFLAGS_RELEASE) $(INC_RELEASE) -c SparseMatrix.cpp -o $(OBJDIR_RELEASE)/SparseMatrix.o
+
+$(OBJDIR_RELEASE)/Vec.o: Vec.cpp
+	$(CXX) $(CFLAGS_RELEASE) $(INC_RELEASE) -c Vec.cpp -o $(OBJDIR_RELEASE)/Vec.o
+
+$(OBJDIR_RELEASE)/sys.o: sys.cpp
+	$(CXX) $(CFLAGS_RELEASE) $(INC_RELEASE) -c sys.cpp -o $(OBJDIR_RELEASE)/sys.o
+
+clean_release: 
+	rm -f $(OBJ_RELEASE) $(OUT_RELEASE)
+	rm -rf bin/Release
+	rm -rf $(OBJDIR_RELEASE)
+
+.PHONY: before_debug after_debug clean_debug before_release after_release clean_release
+
--- a/math/math.cbp
+++ b/math/math.cbp
+<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
+<CodeBlocks_project_file>
+	<FileVersion major="1" minor="6" />
+	<Project>
+		<Option title="math" />
+		<Option pch_mode="2" />
+		<Option compiler="gcc" />
+		<Build>
+			<Target title="Debug">
+				<Option output="bin/Debug/math" prefix_auto="1" extension_auto="1" />
+				<Option working_dir="" />
+				<Option object_output="obj/Debug/" />
+				<Option type="2" />
+				<Option compiler="gcc" />
+				<Option createDefFile="1" />
+				<Compiler>
+					<Add option="-Wall" />
+					<Add option="-m64" />
+					<Add option="-g" />
+					<Add option="-fPIC" />
+					<Add option="-DNLA3D_USE_MKL" />
+					<Add option="-DNLA3D_USE_PARALUTION" />
+					<Add directory="../../cuda_nla3d" />
+					<Add directory="../math" />
+					<Add directory="../paralution-1.1.0/src" />
+					<Add directory="/opt/intel/mkl/include" />
+					<Add directory="../easylogging" />
+				</Compiler>
+				<Linker>
+					<Add option="-m64" />
+				</Linker>
+			</Target>
+			<Target title="Release">
+				<Option output="bin/Release/math" prefix_auto="1" extension_auto="1" />
+				<Option working_dir="" />
+				<Option object_output="obj/Release/" />
+				<Option type="2" />
+				<Option compiler="gcc" />
+				<Option createDefFile="1" />
+				<Compiler>
+					<Add option="-O2" />
+					<Add option="-Wall" />
+					<Add option="-m64" />
+					<Add option="-fPIC" />
+					<Add option="-DNLA3D_USE_MKL" />
+					<Add option="-DNLA3D_USE_PARALUTION" />
+					<Add directory="../../cuda_nla3d" />
+					<Add directory="../math" />
+					<Add directory="../paralution-1.1.0/src" />
+					<Add directory="/opt/intel/mkl/include" />
+					<Add directory="../easylogging" />
+				</Compiler>
+				<Linker>
+					<Add option="-s" />
+					<Add option="-m64" />
+				</Linker>
+			</Target>
+		</Build>
+		<Unit filename="BlockSparseMatrix.h" />
+		<Unit filename="EquationSolver.cpp" />
+		<Unit filename="EquationSolver.h" />
+		<Unit filename="Mat.cpp" />
+		<Unit filename="Mat.h" />
+		<Unit filename="SparseMatrix.cpp" />
+		<Unit filename="SparseMatrix.h" />
+		<Unit filename="Vec.cpp" />
+		<Unit filename="Vec.h" />
+		<Unit filename="sys.cpp" />
+		<Unit filename="sys.h" />
+		<Extensions>
+			<code_completion />
+			<envvars />
+			<lib_finder disable_auto="1" />
+			<debugger />
+		</Extensions>
+	</Project>
+</CodeBlocks_project_file>
--- a/math/sys.cpp
+++ b/math/sys.cpp
+// This file is a part of nla3d project. For information about authors and
+// licensing go to project's repository on github:
+// https://github.com/dmitryikh/nla3d 
+
+#include "sys.h"
+
+// obligatory macros to initialize easyloggingpp system
+INITIALIZE_EASYLOGGINGPP
+
+namespace nla3d {
+
+static LogInitializer logInitializer;
+
+uint32 tick() {
+	return (uint32) clock();
+}
+
+int32 npow(int16 dig, uint16 power) {
+	int32 res=1; //TODO: if too big number?
+	for (uint16 i=0; i < power; i++) {
+		res *= dig;
+  }
+	return res;
+}
+
+std::vector<std::string> read_tokens(char *input) {
+	std::vector<std::string> vec;
+	std::string tmp("");
+	char delimeters[]="(),";
+	char *p=input;
+	char *start=p;
+	while (*p) {
+		if (*p=='!') {
+      break;
+    }
+		bool isfind=false;
+		char* delp=delimeters;
+		while (*delp) {
+			if (*delp==*p) {
+				isfind=true;
+				break;
+			}
+			delp++;
+		}
+		if (isfind) {
+			tmp.assign(start, (int16) (p-start));
+			del_spaces(tmp);
+			vec.push_back(tmp);
+			vec.push_back(std::string(delp,1));
+			p++;
+			start=p;
+			continue;
+		}
+		p++;
+	}
+	tmp.assign(start, (int16) (p-start));
+	del_spaces(tmp);
+	vec.push_back(tmp);
+	return vec;
+}
+
+void del_spaces (std::string &str) {
+	uint16 start = 0;
+	if (str.length()==0) return;
+	while (str[start]==' ' ) {
+		start++;
+		if (start == str.length()) {
+			str=" ";
+			return;
+		}
+	}
+	uint16 end = static_cast<uint16> (str.length()-1);
+	while (str[end] ==' ') {
+    end--;
+  }
+	str=std::string(str,start, end-start+1);
+}
+
+char* getCmdOption(char ** begin, char ** end, const std::string & option) {
+    char ** itr = std::find(begin, end, option);
+    if (itr != end && ++itr != end) {
+        return *itr;
+    }
+    return 0;
+}
+
+std::vector<char*> getCmdManyOptions(char ** begin, char ** end, const std::string & option) {
+    std::vector<char*> _vec;
+    char ** itr = std::find(begin, end, option);
+    if (itr != end && ++itr != end) {
+      while(!(itr[0][0] == '-' && (itr[0][1] < '0' || itr[0][1] > '9'))) {
+        _vec.push_back(*itr);
+        if (++itr == end) {
+          break;
+        }
+      }
+    }
+    return _vec;
+}
+
+bool cmdOptionExists(char** begin, char** end, const std::string& option) {
+    return std::find(begin, end, option) != end;
+}
+
+//TODO: this functions only truncate file extension. 
+//But it was intended to leave only a file name (delete path and extension)
+std::string getFileNameFromPath(const std::string filename) {
+    std::string::const_reverse_iterator pivot = 
+          std::find( filename.rbegin(), filename.rend(), '.' );
+    return pivot == filename.rend()
+        ? filename
+        : std::string( filename.begin(), pivot.base() - 1 );
+}
+
+} // namespace nla3d
--- a/math/sys.h
+++ b/math/sys.h
+// This file is a part of nla3d project. For information about authors and
+// licensing go to project's repository on github:
+// https://github.com/dmitryikh/nla3d
+
+//#define NLA3D_USE_MKL
+//#define NLA3D_USE_PARALUTION
+
+#pragma once
+// before any include we need to define NOMINMAX to avoid redefining `max` with macros
+#ifdef WIN32
+  #define NOMINMAX
+#endif
+#include <string>
+#include <iostream>
+#include <fstream>
+#include <strstream>
+#include <stdarg.h>
+#include <assert.h>
+#include <time.h>
+#include <sstream>
+#include <vector>
+#include <set>
+#include <list>
+#include <algorithm>
+#include <stdlib.h>
+
+#define ELPP_STL_LOGGING
+#include "easylogging++.h"
+
+#define _USE_MATH_DEFINES
+#include <math.h>
+
+#ifdef linux
+  #include <string.h>
+#endif
+
+// MSVC doent's support C99 standard
+#ifdef linux
+  #define sprintf_s snprintf
+#endif
+
+#ifdef __APPLE__
+  #define sprintf_s snprintf
+#endif
+
+// TODO:
+// for some reasons CHECK_NOTNULL from easylogging++ doesn't work on Apple's clang and g++
+#undef CHECK_NOTNULL
+#define CHECK_NOTNULL(x) x
+
+// usefull macros to check floats with threshold
+#define CHECK_EQTH(a, b, th) CHECK(fabs(a-b) < th)
+
+
+#define int8 char //-127 to +127
+#define uint8 unsigned char //0 to +255
+#define int16 short //-32 767 to +32 767
+#define uint16 unsigned short //0 to +65 535
+#define int32 int //-2 147 483 647 to +2 147 483 647
+#define uint32 unsigned int // 0 to +4 294 967 295
+#define int64 long long //-9 223 372 036 854 775 807 to +9 223 372 036 854 775 807
+#define uint64 unsigned long long //0 to +18 446 744 073 709 551 615
+
+
+namespace nla3d {
+
+const char SYS_VERSION[] = "1.3";
+const char SYS_DATA[] = "16.03.15";
+
+// singleton for configuring the logger
+class LogInitializer {
+public:
+  LogInitializer () {
+    el::Configurations conf;
+    conf.setToDefault();
+    conf.setGlobally(el::ConfigurationType::Format, "%datetime{%H:%m:%s.%g} [%level] %msg");
+    conf.setGlobally(el::ConfigurationType::Enabled, "true");
+    conf.setGlobally(el::ConfigurationType::ToFile, "true");
+    conf.setGlobally(el::ConfigurationType::Filename, "nla3d.log");
+    conf.setGlobally(el::ConfigurationType::ToStandardOutput, "true");
+    conf.setGlobally(el::ConfigurationType::MillisecondsWidth, "3");
+    conf.setGlobally(el::ConfigurationType::PerformanceTracking, "true");
+    conf.setGlobally(el::ConfigurationType::MaxLogFileSize, "20971520"); // 20 MB
+    conf.setGlobally(el::ConfigurationType::LogFlushThreshold, "10");
+
+    conf.set(el::Level::Debug, el::ConfigurationType::Format, "%datetime{%H:%m:%s.%g} [%level] %func %msg");
+
+    conf.set(el::Level::Error, el::ConfigurationType::Format, "%datetime{%H:%m:%s.%g} [%level] %func %msg");
+
+    // reconfigure all loggers
+    el::Loggers::reconfigureAllLoggers(conf);
+    el::Loggers::addFlag(el::LoggingFlag::ColoredTerminalOutput);
+  };
+};
+
+uint32 tick();
+
+// template class to do convert from
+// different types (mainly numerical)
+// to string. It should be just a wrapper
+// on C/C++ capabilities of conversation.
+// There are two options:
+// 1. use stringstream (C++ 03)
+// 2. use std::to_string() (C++ 11)
+// for more read here:
+// http://stackoverflow.com/questions/332111/how-do-i-convert-a-double-into-a-string-in-c
+template <class T>
+std::string toStr (const T& param) {
+  std::stringstream ss;
+  ss << param;
+  return ss.str();
+}
+
+int32 npow(int16 dig, uint16 power);
+
+std::vector<std::string> read_tokens(char *input);
+void del_spaces (std::string &str);
+
+class Timer {
+public:
+  Timer(bool _start = false) : start_time(0), end_time(0) {
+    if (_start) {
+      start();
+    }
+  }
+
+  void start() {
+    start_time = clock();
+    end_time = start_time;
+  }
+
+  double stop() {
+    end_time = clock();
+    return time();
+  }
+
+  double time() {
+    return ((double)end_time - start_time) / CLOCKS_PER_SEC;
+  }
+private:
+  clock_t start_time;
+  clock_t end_time;
+};
+
+// this function is used by FEStorage::read_ans_data funciton
+uint16 str2dof (std::string dof_name);
+
+char* getCmdOption (char** begin, char** end, const std::string& option);
+bool cmdOptionExists (char** begin, char** end, const std::string& option);
+std::vector<char*> getCmdManyOptions (char** begin, char** end, const std::string& option);
+
+struct MatchPathSeparator {
+    bool operator()( char ch ) const {
+        return ch == '\\' || ch == '/';
+    }
+};
+
+std::string getFileNameFromPath(const std::string filename);
+
+enum class ElementType {
+  TRUSS3 = 0,
+  PLANE41,
+  SOLID81,
+  TETRA0,
+  TETRA1,
+  QUADTH,
+  SurfaceLINETH,
+  TRIANGLE4,
+  UNDEFINED
+};
+
+
+const char* const elTypeLabels[] = {
+  "TRUSS3",
+  "PLANE41",
+  "SOLID81",
+  "TETRA0",
+  "TETRA1",
+  "QUADTH",
+  "SurfaceLINETH",
+  "TRIANGLE4",
+  "UNDEFINED"
+};
+
+static_assert((int)ElementType::UNDEFINED == sizeof(elTypeLabels)/sizeof(elTypeLabels[0]) - 1,
+    "ElementType enumeration and elTypeLabels must have the same number of entries");
+
+} // namespace nla3d