diff --git a/flow/src/flow.h b/flow/src/flow.h
index 20e132f04f430d60cc087f26a27ee0e1a45be84a..351ca549ddaf0367b01d74162f0a773b07b5060f 100644
--- a/flow/src/flow.h
+++ b/flow/src/flow.h
@@ -51,6 +51,14 @@ class KuttaElement;
 class Blowing;
 class Boundary;
 
+// Formulation
+class PotentialResidual;
+class FreestreamResidual;
+class WakeResidual;
+class KuttaResidual;
+class BlowingResidual;
+class LoadFunctional;
+
 // Solver
 class Solver;
 class Picard;
diff --git a/flow/src/wAdjoint.cpp b/flow/src/wAdjoint.cpp
index 225327f0f7dab9393b056f3c9be17be04e10595b..6774d1bb73a923f294fd7e37d33670468c4bb5cb 100644
--- a/flow/src/wAdjoint.cpp
+++ b/flow/src/wAdjoint.cpp
@@ -22,12 +22,15 @@
 #include "wKutta.h"
 #include "wBoundary.h"
 #include "wNewton.h"
+#include "wPotentialResidual.h"
+#include "wFreestreamResidual.h"
+#include "wWakeResidual.h"
+#include "wKuttaResidual.h"
+#include "wLoadFunctional.h"
 
 #include "wMshData.h"
 #include "wNode.h"
 #include "wElement.h"
-#include "wCache.h"
-#include "wMem.h"
 #include "wLinearSolver.h"
 #include "wMshDeform.h"
 #include "wResults.h"
@@ -74,11 +77,10 @@ Adjoint::Adjoint(std::shared_ptr<Newton> _sol, std::shared_ptr<tbox::MshDeform>
 }
 
 /**
- * @brief Run the linear solver
+ * @brief Run the adjoint method
  *
  * Solve the adjoint steady transonic Full Potential Equation
- * i.e. computes sensitivites for lift and drag cost functions
- * @authors Adrien Crovato
+ * and compute lift and drag sensitivites
  */
 void Adjoint::run()
 {
@@ -149,6 +151,7 @@ void Adjoint::solve()
 
 /**
  * @brief Compute total gradients of the loads with respect to angle of attack
+ * @todo one gradient for each surface node
  */
 void Adjoint::computeGradientAoa()
 {
@@ -237,6 +240,7 @@ void Adjoint::computeGradientMesh()
 
 /**
  * @brief Build the gradients of the lift and drag with respect to the flow variables
+ * @todo one gradient for each surface node
  */
 void Adjoint::buildGradientLoadsFlow(Eigen::VectorXd &dL, Eigen::VectorXd &dD)
 {
@@ -247,32 +251,30 @@ void Adjoint::buildGradientLoadsFlow(Eigen::VectorXd &dL, Eigen::VectorXd &dD)
     dL.setZero();
     dD.setZero();
 
+    // Lift and drag normalized directions
+    Eigen::RowVector3d dirL, dirD;
+    std::tie(dirD, std::ignore, dirL) = sol->pbl->computeDirections();
+
     // Gradient of lift and drag
     for (auto sur : sol->pbl->bnds)
     {
         tbb::parallel_do(sur->groups[0]->tag->elems.begin(), sur->groups[0]->tag->elems.end(), [&](Element *e) {
-            // Build flux vector b = dCp * V * [inv(J)*dN]
+            // Associated volume element
             Element *eV = sur->svMap.at(e);
-            Eigen::RowVectorXd be = sol->pbl->medium->cP.evalD(*eV, sol->phi, 0) * eV->computeGrad(sol->phi, 0).transpose() * eV->getVMem().getJinv(0) * eV->getVCache().getDsf(0) * e->getVMem().getVol();
-            // Rotate to flow direction
-            double dirL, dirD;
-            if (sol->pbl->nDim == 2)
-            {
-                dirL = -e->normal().dot(Eigen::Vector3d(-sin(sol->pbl->alpha), cos(sol->pbl->alpha), 0)) / sol->pbl->S_ref;
-                dirD = -e->normal().dot(Eigen::Vector3d(cos(sol->pbl->alpha), sin(sol->pbl->alpha), 0)) / sol->pbl->S_ref;
-            }
-            else
-            {
-                dirL = -e->normal().dot(Eigen::Vector3d(-sin(sol->pbl->alpha), 0, cos(sol->pbl->alpha))) / sol->pbl->S_ref;
-                dirD = -e->normal().dot(Eigen::Vector3d(cos(sol->pbl->alpha) * cos(sol->pbl->beta), sin(sol->pbl->beta), sin(sol->pbl->alpha) * cos(sol->pbl->beta))) / sol->pbl->S_ref;
-            }
+            // Build
+            Eigen::MatrixXd Ae = LoadFunctional::buildGradientFlow(*e, *eV, sol->phi, sol->pbl->medium->cP);
             // Assembly
             tbb::spin_mutex::scoped_lock lock(mutex);
-            for (size_t ii = 0; ii < eV->nodes.size(); ++ii)
+            for (size_t i = 0; i < e->nodes.size(); ++i)
             {
-                Node *nodi = eV->nodes[ii];
-                dL(nodi->row) += dirL * be(ii);
-                dD(nodi->row) += dirD * be(ii);
+                Eigen::RowVectorXd AeL = -dirL * Ae.block(i * 3, 0, 3, eV->nodes.size());
+                Eigen::RowVectorXd AeD = -dirD * Ae.block(i * 3, 0, 3, eV->nodes.size());
+                for (size_t j = 0; j < eV->nodes.size(); ++j)
+                {
+                    Node *nodj = eV->nodes[j];
+                    dL(nodj->row) += AeL(j);
+                    dD(nodj->row) += AeD(j);
+                }
             }
         });
     }
@@ -289,24 +291,12 @@ void Adjoint::buildGradientResidualAoa(Eigen::VectorXd &dR)
     // Reset gradient
     dR.setZero();
 
-    // Gradient of freestream velocity
-    Eigen::Vector3d dvi;
-    if (sol->pbl->nDim == 2)
-        dvi = Eigen::Vector3d(-sin(sol->pbl->alpha), cos(sol->pbl->alpha), 0);
-    else
-        dvi = Eigen::Vector3d(-sin(sol->pbl->alpha) * cos(sol->pbl->beta), 0, cos(sol->pbl->alpha) * cos(sol->pbl->beta));
-
     // Freestream contribution
     for (auto nBC : sol->pbl->fBCs)
     {
         tbb::parallel_do(nBC->tag->elems.begin(), nBC->tag->elems.end(), [&](Element *e) {
             // Build elementary flux vector
-            Cache &cache = e->getVCache();
-            Gauss &gauss = cache.getVGauss();
-            Mem &mem = e->getVMem();
-            Eigen::VectorXd be = Eigen::VectorXd::Zero(e->nodes.size());
-            for (size_t k = 0; k < gauss.getN(); ++k)
-                be += cache.getSf(k) * dvi.dot(e->normal()) * gauss.getW(k) * mem.getDetJ(k);
+            Eigen::VectorXd be = FreestreamResidual::buildGradientAoa(*e, sol->phi, *nBC);
 
             // Assembly
             tbb::spin_mutex::scoped_lock lock(mutex);
@@ -322,10 +312,11 @@ void Adjoint::buildGradientResidualAoa(Eigen::VectorXd &dR)
     // @todo very slight influence, can be removed by using Vi = [1,0,0]. Same results, same cost, less general but less code
     for (auto wake : sol->pbl->wBCs)
     {
+        Eigen::VectorXd dvi = sol->pbl->computeGradientVi(); // gradient of freestream velocity
         tbb::parallel_do(wake->wEle.begin(), wake->wEle.end(), [&](WakeElement *we) {
             // Build bu, bl
             Eigen::VectorXd bue, ble;
-            we->buildGradientResidualAoA(bue, ble, sol->phi, dvi.block(0, 0, sol->pbl->nDim, 1));
+            std::tie(bue, ble) = WakeResidual::build(*we, sol->phi, dvi);
             // Assembly
             tbb::spin_mutex::scoped_lock lock(mutex);
             for (size_t ii = 0; ii < we->nRow; ++ii)
@@ -340,33 +331,25 @@ void Adjoint::buildGradientResidualAoa(Eigen::VectorXd &dR)
 
 /**
  * @brief Compute gradients of the lift and drag with respect to angle of attack
- * @todo compute one gradient per boundary
+ * @todo one gradient for each surface node
  */
 void Adjoint::buildGradientLoadsAoa(double &dL, double &dD)
 {
-    // Reste gradients
-    dL = 0, dD = 0;
-    // Compute aerodynamic load coefficients (i.e. Load / pressure / surface) along x and vertical (y in 2D, z in 3D) directions
+    // Gradient of lift and drag normalized directions
+    Eigen::RowVector3d dirL, dirD;
+    std::tie(dirD, std::ignore, dirL) = sol->pbl->computeGradientDirections();
+    // Compute integrated aerodynamic load coefficients (normalized by freestream dynamic pressure and reference area)
     Eigen::Vector3d Cf(0, 0, 0);
     for (auto bnd : sol->pbl->bnds)
-        for (auto e : bnd->groups[0]->tag->elems)
-            Cf -= bnd->integrate(e, sol->phi, sol->pbl->medium->cP) * e->normal();
+        for (size_t i = 0; i < bnd->nodes.size(); ++i)
+            Cf -= bnd->nLoads[i];
     // Rotate to gradient of flow direction
-    if (sol->pbl->nDim == 2)
-    {
-        dD += (-Cf(0) * sin(sol->pbl->alpha) + Cf(1) * cos(sol->pbl->alpha)) / sol->pbl->S_ref;
-        dL += (-Cf(0) * cos(sol->pbl->alpha) - Cf(1) * sin(sol->pbl->alpha)) / sol->pbl->S_ref;
-    }
-    else
-    {
-        dD += (-Cf(0) * sin(sol->pbl->alpha) * cos(sol->pbl->beta) + Cf(2) * cos(sol->pbl->alpha) * cos(sol->pbl->beta)) / sol->pbl->S_ref;
-        dL += (-Cf(0) * cos(sol->pbl->alpha) - Cf(2) * sin(sol->pbl->alpha)) / sol->pbl->S_ref;
-    }
+    dD = Cf.dot(dirD);
+    dL = Cf.dot(dirL);
 }
 
 /**
  * @brief Compute gradients of the residual with respect to mesh nodes
- * @todo compute constant variables outside GP loop
  */
 void Adjoint::buildGradientResidualMesh(Eigen::SparseMatrix<double, Eigen::RowMajor> &dR)
 {
@@ -383,130 +366,31 @@ void Adjoint::buildGradientResidualMesh(Eigen::SparseMatrix<double, Eigen::RowMa
         Element *e = p.first;
         //Upwind element
         Element *eU = p.second[0];
+        // Build elementary matrices
+        Eigen::MatrixXd Ae1, Ae2;
+        std::tie(Ae1, Ae2) = PotentialResidual::buildGradientMesh(*e, *eU, sol->phi, *fluid, sol->pbl->nDim, sol->muCv, sol->mCOv);
 
-        // Get Jacobian, shape functions and gauss points
-        Cache &cache = e->getVCache();
-        Mem &mem = e->getVMem();
-        Gauss &gauss = cache.getVGauss();
-
-        // Subsonic contribution
-        Eigen::MatrixXd Ae1 = Eigen::MatrixXd::Zero(e->nodes.size(), sol->pbl->nDim * e->nodes.size());
-        for (size_t k = 0; k < gauss.getN(); ++k)
+        // Assembly (subsonic)
+        tbb::spin_mutex::scoped_lock lock(mutex);
+        for (size_t i = 0; i < e->nodes.size(); ++i)
         {
-            // @todo, use first GP only? -> better: have 1GP per element?
-            Eigen::VectorXd dPhi = e->computeGrad(sol->phi, k);
-            double rho = fluid->rho.eval(*e, sol->phi, k);
-            double dRho = fluid->rho.evalD(*e, sol->phi, k);
-            Eigen::MatrixXd const &dSf = cache.getDsf(k);
-            Eigen::MatrixXd const &iJ = mem.getJinv(k);
-            // Jacobian gradients
-            std::vector<Eigen::MatrixXd> const &dJ = mem.getGradJ(k);
-            std::vector<double> const &dDetJ = mem.getGradDetJ(k);
-            //
-            double w = gauss.getW(k);
-            double detJ = mem.getDetJ(k);
-            for (size_t i = 0; i < e->nodes.size(); ++i)
+            size_t rowi = e->nodes[i]->row;
+            for (size_t j = 0; j < e->nodes.size(); ++j)
             {
-                for (size_t m = 0; m < sol->pbl->nDim; ++m)
+                for (int n = 0; n < sol->pbl->nDim; ++n)
                 {
-                    size_t idx = i * sol->pbl->nDim + m;
-                    Ae1.col(idx) += w * dRho * dPhi.transpose() * (-iJ * dJ[idx] * dPhi) * (iJ * dSf).transpose() * dPhi * detJ; // drho * grad_phi*grad_psi * detj
-                    Ae1.col(idx) += w * rho * (iJ * dSf).transpose() * (-iJ * dJ[idx] * dPhi) * detJ;                            // rho * dgrad_phi * grad_psi * detj
-                    Ae1.col(idx) += w * rho * (-iJ * dJ[idx] * iJ * dSf).transpose() * dPhi * detJ;                              // rho * grad_phi * dgrad_psi * detj
-                    Ae1.col(idx) += w * rho * (iJ * dSf).transpose() * dPhi * dDetJ[idx];                                        // rho * grad_phi * grad_psi * ddetj
+                    for (size_t jj = 0; jj < arows[e->nodes[j]->row].size(); ++jj)
+                    {
+                        size_t rowj = sol->pbl->nDim * (arows[e->nodes[j]->row][jj]) + n;
+                        T.push_back(Eigen::Triplet<double>(sol->rows[rowi], rowj, Ae1(i, sol->pbl->nDim * j + n)));
+                    }
                 }
             }
         }
-        // Supersonic contribution
+        // Assembly (supersonic)
         double mach = fluid->mach.eval(*e, sol->phi, 0);
         if (mach > sol->mCOv)
         {
-            // switching function and gradient
-            double mu = sol->muCv * (1 - (sol->mCOv * sol->mCOv) / (mach * mach));
-            double dmu = 2 * sol->muCv * sol->mCOv * sol->mCOv / (mach * mach * mach);
-            // upwind density and gradient
-            Eigen::VectorXd dPhiU = eU->computeGrad(sol->phi, 0);
-            double rhoU = fluid->rho.eval(*eU, sol->phi, 0);
-            double dRhoU = fluid->rho.evalD(*eU, sol->phi, 0);
-            Eigen::MatrixXd const &iJU = eU->getVMem().getJinv(0);
-
-            // scale subsonic terms
-            Ae1 *= 1 - mu;
-
-            // compute upwind part of subsonic terms
-            for (size_t k = 0; k < gauss.getN(); ++k)
-            {
-                // @todo, use first GP only? -> better: have 1GP per element?
-                Eigen::VectorXd dPhi = e->computeGrad(sol->phi, k);
-                Eigen::MatrixXd const &dSf = cache.getDsf(k);
-                Eigen::MatrixXd const &iJ = mem.getJinv(k);
-                // Jacobian gradients
-                std::vector<Eigen::MatrixXd> const &dJ = mem.getGradJ(k);
-                std::vector<double> const &dDetJ = mem.getGradDetJ(k);
-                //
-                double w = gauss.getW(k);
-                double detJ = mem.getDetJ(k);
-                for (size_t i = 0; i < e->nodes.size(); ++i)
-                {
-                    for (size_t m = 0; m < sol->pbl->nDim; ++m)
-                    {
-                        size_t idx = i * sol->pbl->nDim + m;
-                        Ae1.col(idx) += w * mu * rhoU * (iJ * dSf).transpose() * (-iJ * dJ[idx] * dPhi) * detJ; // mu * rhoU * dgrad_phi * grad_psi * detj
-                        Ae1.col(idx) += w * mu * rhoU * (-iJ * dJ[idx] * iJ * dSf).transpose() * dPhi * detJ;   // mu * rhoU * grad_phi * dgrad_psi * detj
-                        Ae1.col(idx) += w * mu * rhoU * (iJ * dSf).transpose() * dPhi * dDetJ[idx];             // mu * rhoU * grad_phi * grad_psi * ddetj
-                    }
-                }
-            }
-
-            // compute derivative of switching function terms
-            for (size_t k = 0; k < gauss.getN(); ++k)
-            {
-                // @todo, use first GP only? -> better: have 1GP per element!
-                Eigen::VectorXd dPhi = e->computeGrad(sol->phi, k);
-                double rho = fluid->rho.eval(*e, sol->phi, k);
-                double dM = fluid->mach.evalD(*e, sol->phi, k);
-                Eigen::MatrixXd const &dSf = cache.getDsf(k);
-                Eigen::MatrixXd const &iJ = mem.getJinv(k);
-                // Jacobian gradients
-                std::vector<Eigen::MatrixXd> const &dJ = mem.getGradJ(k);
-                //
-                double w = gauss.getW(k);
-                double detJ = mem.getDetJ(k);
-                for (size_t i = 0; i < e->nodes.size(); ++i)
-                {
-                    for (size_t m = 0; m < sol->pbl->nDim; ++m)
-                    {
-                        size_t idx = i * sol->pbl->nDim + m;
-                        Ae1.col(idx) += w * (-rho + rhoU) * dmu * dM * dPhi.transpose() * (-iJ * dJ[idx] * dPhi) * (iJ * dSf).transpose() * dPhi * detJ; // (rhoU-rho) * dmu * grad_phi * grad_psi * detj
-                    }
-                }
-            }
-
-            // compute derivative of upwind density terms (on upwind element)
-            Eigen::MatrixXd Ae2 = Eigen::MatrixXd::Zero(e->nodes.size(), sol->pbl->nDim * eU->nodes.size());
-            for (size_t k = 0; k < gauss.getN(); ++k)
-            {
-                // @todo, use first GP only? -> better: have 1GP per element!
-                Eigen::VectorXd dPhi = e->computeGrad(sol->phi, k);
-                Eigen::MatrixXd const &dSf = cache.getDsf(k);
-                Eigen::MatrixXd const &iJ = mem.getJinv(k);
-                // Jacobian gradients
-                std::vector<Eigen::MatrixXd> const &dJ = mem.getGradJ(k);
-                //
-                double w = gauss.getW(k);
-                double detJ = mem.getDetJ(k);
-                for (size_t i = 0; i < eU->nodes.size(); ++i)
-                {
-                    for (size_t m = 0; m < sol->pbl->nDim; ++m)
-                    {
-                        size_t idx = i * sol->pbl->nDim + m;
-                        Ae2.col(idx) += w * mu * dRhoU * dPhiU.transpose() * (-iJU * dJ[idx] * dPhiU) * (iJ * dSf).transpose() * dPhi * detJ; // mu * drhoU * grad_phi * grad_psi * detj
-                    }
-                }
-            }
-
-            // Assembly (supersonic)
-            tbb::spin_mutex::scoped_lock lock(mutex);
             for (size_t i = 0; i < e->nodes.size(); ++i)
             {
                 size_t rowi = e->nodes[i]->row;
@@ -523,23 +407,6 @@ void Adjoint::buildGradientResidualMesh(Eigen::SparseMatrix<double, Eigen::RowMa
                 }
             }
         }
-        // Assembly (subsonic)
-        tbb::spin_mutex::scoped_lock lock(mutex);
-        for (size_t i = 0; i < e->nodes.size(); ++i)
-        {
-            size_t rowi = e->nodes[i]->row;
-            for (size_t j = 0; j < e->nodes.size(); ++j)
-            {
-                for (int n = 0; n < sol->pbl->nDim; ++n)
-                {
-                    for (size_t jj = 0; jj < arows[e->nodes[j]->row].size(); ++jj)
-                    {
-                        size_t rowj = sol->pbl->nDim * (arows[e->nodes[j]->row][jj]) + n;
-                        T.push_back(Eigen::Triplet<double>(sol->rows[rowi], rowj, Ae1(i, sol->pbl->nDim * j + n)));
-                    }
-                }
-            }
-        }
     });
     // Farfield B.C. are not taken into account because they have no influence on the solution
 
@@ -550,7 +417,7 @@ void Adjoint::buildGradientResidualMesh(Eigen::SparseMatrix<double, Eigen::RowMa
         tbb::parallel_do(wake->wEle.begin(), wake->wEle.end(), [&](WakeElement *we) {
             // Build elementary matrices
             Eigen::MatrixXd Aeu, Ael, Aes;
-            we->buildGradientResidualMesh(Aeu, Ael, Aes, sol->phi, vi);
+            std::tie(Aeu, Ael, Aes) = WakeResidual::buildGradientMesh(*we, sol->phi, vi);
             // Assembly
             tbb::spin_mutex::scoped_lock lock(mutex);
             for (size_t i = 0; i < we->nRow; ++i)
@@ -598,7 +465,7 @@ void Adjoint::buildGradientResidualMesh(Eigen::SparseMatrix<double, Eigen::RowMa
         tbb::parallel_do(kutta->kEle.begin(), kutta->kEle.end(), [&](KuttaElement *ke) {
             // Build elementary matrices
             Eigen::MatrixXd Ae, Aes;
-            ke->buildGradientResidualMesh(Ae, Aes, sol->phi);
+            std::tie(Ae, Aes) = KuttaResidual::buildGradientMesh(*ke, sol->phi);
             // Assembly
             tbb::spin_mutex::scoped_lock lock(mutex);
             for (size_t i = 0; i < ke->nRow; ++i)
@@ -637,7 +504,7 @@ void Adjoint::buildGradientResidualMesh(Eigen::SparseMatrix<double, Eigen::RowMa
 
 /**
  * @brief Compute gradients of the lift and drag with respect to mesh nodes
- * @todo compute one gradient per boundary
+ * @todo one gradient for each surface node
  */
 void Adjoint::buildGradientLoadsMesh(Eigen::RowVectorXd &dL, Eigen::RowVectorXd &dD)
 {
@@ -648,68 +515,54 @@ void Adjoint::buildGradientLoadsMesh(Eigen::RowVectorXd &dL, Eigen::RowVectorXd
     dL.setZero();
     dD.setZero();
 
-    Eigen::MatrixXd dCf = Eigen::MatrixXd::Zero(3, dL.size());
+    // Lift and drag normalized directions
+    Eigen::RowVector3d dirL, dirD;
+    std::tie(dirD, std::ignore, dirL) = sol->pbl->computeDirections();
+
+    // Gradient of lift and drag
     for (auto bnd : sol->pbl->bnds)
     {
         tbb::parallel_do(bnd->groups[0]->tag->elems.begin(), bnd->groups[0]->tag->elems.end(), [&](Element *e) {
             // Volume element
             Element *eV = bnd->svMap.at(e);
-            // Variables
-            double cp = sol->pbl->medium->cP.eval(*eV, sol->phi, 0); // pressure coefficient
-            Eigen::Vector3d nrm = e->normal();                       // unit normal
-            double surf = e->getVMem().getVol();                     // surface
-            // Gradients
-            double dCp = sol->pbl->medium->cP.evalD(*eV, sol->phi, 0);          // pressure coefficient
-            Eigen::VectorXd dPhi = eV->computeGrad(sol->phi, 0);                // potential
-            Eigen::MatrixXd const &iJ = eV->getVMem().getJinv(0);               // inverse Jacobian
-            std::vector<Eigen::MatrixXd> const &dJ = eV->getVMem().getGradJ(0); // Jacobian
-            std::vector<Eigen::Vector3d> dNrm = e->gradientNormal();            // unit normal
-            std::vector<double> dSurf = e->getVMem().getGradVol();              // surface
-
-            // Build element contributions
-            Eigen::MatrixXd dCfV = Eigen::MatrixXd::Zero(3, eV->nodes.size() * sol->pbl->nDim);
-            Eigen::MatrixXd dCfS = Eigen::MatrixXd::Zero(3, e->nodes.size() * sol->pbl->nDim);
-            for (size_t m = 0; m < sol->pbl->nDim; ++m)
-            {
-                // volume
-                for (size_t i = 0; i < eV->nodes.size(); ++i)
-                {
-                    size_t idx = i * sol->pbl->nDim + m;
-                    dCfV.col(idx) = dCp * dPhi.transpose() * (-iJ * dJ[idx] * dPhi) * nrm * surf; // dCp * n * S
-                }
-                // surface
-                for (size_t i = 0; i < e->nodes.size(); ++i)
-                {
-                    size_t idx = i * sol->pbl->nDim + m;
-                    dCfS.col(idx) = cp * dNrm[idx] * surf + cp * nrm * dSurf[idx]; // Cp * dn * S + Cp * n * dS
-                }
-            }
+            // Build
+            Eigen::MatrixXd Aes, Aev;
+            std::tie(Aes, Aev) = LoadFunctional::buildGradientMesh(*e, *eV, sol->phi, sol->pbl->medium->cP, sol->pbl->nDim);
             // Assembly
             tbb::spin_mutex::scoped_lock lock(mutex);
-            for (size_t m = 0; m < sol->pbl->nDim; ++m)
+            for (size_t i = 0; i < e->nodes.size(); ++i)
             {
-                // volume
-                for (size_t i = 0; i < eV->nodes.size(); ++i)
-                    for (size_t ii = 0; ii < arows[eV->nodes[i]->row].size(); ++ii)
-                        dCf.col((arows[eV->nodes[i]->row][ii]) * sol->pbl->nDim + m) -= dCfV.col(i * sol->pbl->nDim + m);
-                // surface
-                for (size_t i = 0; i < e->nodes.size(); ++i)
-                    for (size_t ii = 0; ii < arows[e->nodes[i]->row].size(); ++ii)
-                        dCf.col((arows[e->nodes[i]->row][ii]) * sol->pbl->nDim + m) -= dCfS.col(i * sol->pbl->nDim + m);
+                // rotate to flow direction
+                Eigen::RowVectorXd AesL = -dirL * Aes.block(i * 3, 0, 3, sol->pbl->nDim * e->nodes.size());
+                Eigen::RowVectorXd AesD = -dirD * Aes.block(i * 3, 0, 3, sol->pbl->nDim * e->nodes.size());
+                Eigen::RowVectorXd AevL = -dirL * Aev.block(i * 3, 0, 3, sol->pbl->nDim * eV->nodes.size());
+                Eigen::RowVectorXd AevD = -dirD * Aev.block(i * 3, 0, 3, sol->pbl->nDim * eV->nodes.size());
+                for (size_t m = 0; m < sol->pbl->nDim; ++m)
+                {
+                    // surface
+                    for (size_t j = 0; j < e->nodes.size(); ++j)
+                    {
+                        size_t rowj = e->nodes[j]->row;
+                        for (size_t jj = 0; jj < arows[rowj].size(); ++jj)
+                        {
+                            dL.col((arows[rowj][jj]) * sol->pbl->nDim + m) += AesL.col(j * sol->pbl->nDim + m);
+                            dD.col((arows[rowj][jj]) * sol->pbl->nDim + m) += AesD.col(j * sol->pbl->nDim + m);
+                        }
+                    }
+                    // volume
+                    for (size_t j = 0; j < eV->nodes.size(); ++j)
+                    {
+                        size_t rowj = eV->nodes[j]->row;
+                        for (size_t jj = 0; jj < arows[rowj].size(); ++jj)
+                        {
+                            dL.col((arows[rowj][jj]) * sol->pbl->nDim + m) += AevL.col(j * sol->pbl->nDim + m);
+                            dD.col((arows[rowj][jj]) * sol->pbl->nDim + m) += AevD.col(j * sol->pbl->nDim + m);
+                        }
+                    }
+                }
             }
         });
     }
-    // Rotate to flow direction
-    if (sol->pbl->nDim == 2)
-    {
-        dD = Eigen::RowVector3d(cos(sol->pbl->alpha), sin(sol->pbl->alpha), 0.) * dCf / sol->pbl->S_ref;
-        dL = Eigen::RowVector3d(-sin(sol->pbl->alpha), cos(sol->pbl->alpha), 0.) * dCf / sol->pbl->S_ref;
-    }
-    else
-    {
-        dD = Eigen::RowVector3d(cos(sol->pbl->alpha) * cos(sol->pbl->beta), sin(sol->pbl->beta), sin(sol->pbl->alpha) * cos(sol->pbl->beta)) * dCf / sol->pbl->S_ref;
-        dL = Eigen::RowVector3d(-sin(sol->pbl->alpha), 0., cos(sol->pbl->alpha)) * dCf / sol->pbl->S_ref;
-    }
 }
 
 /**
diff --git a/flow/src/wBlowingResidual.cpp b/flow/src/wBlowingResidual.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..09dfcf30ec0814d621557706f060a450e658788d
--- /dev/null
+++ b/flow/src/wBlowingResidual.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2020 University of Liège
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "wBlowingResidual.h"
+#include "wFct0.h"
+
+#include "wElement.h"
+#include "wCache.h"
+#include "wGauss.h"
+#include "wMem.h"
+
+using namespace tbox;
+using namespace flow;
+
+/**
+ * @brief Build the residual vector, on one boundary element
+ *
+ * b = \int psi * vn dV
+ */
+Eigen::VectorXd BlowingResidual::build(Element const &e, std::vector<double> const &phi, Fct0 const &f)
+{
+    // Get pre-computed values
+    Cache &cache = e.getVCache();
+    Gauss &gauss = cache.getVGauss();
+    Mem &mem = e.getVMem();
+
+    // Blowing velocity
+    double vn = f.eval(e, phi, 0);
+
+    // Integral of (normal) transpiration velocity
+    Eigen::VectorXd b = Eigen::VectorXd::Zero(e.nodes.size());
+    for (size_t k = 0; k < gauss.getN(); ++k)
+        b += cache.getSf(k) * vn * gauss.getW(k) * mem.getDetJ(k);
+    return b;
+}
diff --git a/flow/src/wBlowingResidual.h b/flow/src/wBlowingResidual.h
new file mode 100644
index 0000000000000000000000000000000000000000..95e09a3e1b12f7ade26c923324c344fed722a424
--- /dev/null
+++ b/flow/src/wBlowingResidual.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2020 University of Liège
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef WBLOWINGRESIDUAL_H
+#define WBLOWINGRESIDUAL_H
+
+#include "flow.h"
+
+#include <vector>
+#include <Eigen/Dense>
+
+namespace flow
+{
+
+/**
+ * @brief Formulation of transpiration boundary terms
+ */
+class FLOW_API BlowingResidual
+{
+public:
+    // Newton
+    static Eigen::VectorXd build(tbox::Element const &e, std::vector<double> const &phi, tbox::Fct0 const &f);
+};
+
+} // namespace flow
+#endif //WBLOWINGRESIDUAL_H
diff --git a/flow/src/wBoundary.cpp b/flow/src/wBoundary.cpp
index ff6822b48708b8e662a838d855a3c5782491420d..61e3bfb1b218df2a993fc8ab662457950bdf66ee 100644
--- a/flow/src/wBoundary.cpp
+++ b/flow/src/wBoundary.cpp
@@ -68,11 +68,9 @@ void Boundary::create()
     std::sort(nodes.begin(), nodes.end());
     auto iterator = std::unique(nodes.begin(), nodes.end());
     nodes.resize(std::distance(nodes.begin(), iterator));
-
-    // Create a map between the boundary (surface) nodes and elements
-    for (auto e : groups[0]->tag->elems)
-        for (auto n : e->nodes)
-            neMap[n].push_back(e);
+    // Create a map between the boundary (surface) nodes and their local index
+    for (size_t i = 0; i < nodes.size(); ++i)
+        nMap[nodes[i]] = i;
 
     // Create unordered set of surface faces
     std::unordered_set<Face *, EquFace, EquFace> bndFaces;
@@ -113,22 +111,9 @@ void Boundary::create()
         svMap[f->el0] = f->el1;
         delete f;
     }
-    // Resize load vectors
-    cLoadX.resize(nodes.size());
-    cLoadY.resize(nodes.size());
-    cLoadZ.resize(nodes.size());
-}
 
-/*
- * @brief Integrate f over an element
- * @authors Adrien Crovato
- */
-double Boundary::integrate(Element *e, std::vector<double> const &u, tbox::Fct0 const &f) const
-{
-    // evaluate the function in the volume element
-    double fk = f.eval(*svMap.at(e), u, 0);
-    // integrate the function on the surface element
-    return fk * e->getVMem().getVol(); // since fk is constant: sum(fk*wk*dtmk) = fk*V
+    // Resize loads vector
+    nLoads.resize(nodes.size(), Eigen::Vector3d::Zero());
 }
 
 /**
diff --git a/flow/src/wBoundary.h b/flow/src/wBoundary.h
index b40e70c1e2fd23b5ee54e9ae2fd474167c549c3b..1d4a1285712da5c50903a3f2087ea11ab5111460 100644
--- a/flow/src/wBoundary.h
+++ b/flow/src/wBoundary.h
@@ -19,8 +19,10 @@
 
 #include "flow.h"
 #include "wGroups.h"
+
 #include <vector>
 #include <map>
+#include <Eigen/Dense>
 
 namespace flow
 {
@@ -32,22 +34,19 @@ namespace flow
 class FLOW_API Boundary : public tbox::Groups
 {
 public:
-    std::vector<tbox::Node *> nodes;                            ///< nodes of Boundary
-    std::map<tbox::Node *, std::vector<tbox::Element *>> neMap; ///< node to element map
-    std::map<tbox::Element *, tbox::Element *> svMap;           ///< surface to volume element map
-    double Cl;                                                  ///< lift coefficient
-    double Cd;                                                  ///< drag coefficient
-    double Cs;                                                  ///< sideforce coefficient
-    double Cm;                                                  ///< pitch moment coefficient (positive nose-up)
-    std::vector<double> cLoadX;                                 ///< x-component of aerodynamic load coefficient on boundary
-    std::vector<double> cLoadY;                                 ///< y-component of aerodynamic load coefficient on boundary
-    std::vector<double> cLoadZ;                                 ///< z-component of aerodynamic load coefficient on boundary
+    std::vector<tbox::Node *> nodes;                  ///< nodes of Boundary
+    std::map<tbox::Node *, size_t> nMap;              ///< node to local index map
+    std::map<tbox::Element *, tbox::Element *> svMap; ///< surface to volume element map
+    double Cl;                                        ///< lift coefficient
+    double Cd;                                        ///< drag coefficient
+    double Cs;                                        ///< sideforce coefficient
+    double Cm;                                        ///< pitch moment coefficient (positive nose-up)
+    std::vector<Eigen::Vector3d> nLoads;              ///< nodal (normalized) aerodynamic loads on boundary
 
     Boundary(std::shared_ptr<tbox::MshData> _msh, std::vector<int> const &nos);
     Boundary(std::shared_ptr<tbox::MshData> _msh, std::vector<std::string> const &names);
     virtual ~Boundary() { std::cout << "~Boundary()\n"; }
 
-    double integrate(tbox::Element *e, std::vector<double> const &u, tbox::Fct0 const &f) const;
     void save(std::string const &name, tbox::Results const &res);
 
 #ifndef SWIG
diff --git a/flow/src/wF0El.cpp b/flow/src/wF0El.cpp
index f4222fbd068d601ba59bcd3be0b11ebc62633288..45a27a4c9a9cf0bf07eb4a03522be0241d7896ec 100644
--- a/flow/src/wF0El.cpp
+++ b/flow/src/wF0El.cpp
@@ -22,7 +22,7 @@ using namespace flow;
  * @brief Evaluate the nonlinear density (constant over element)
  * @authors Adrien Crovato
  */
-double F0ElRho::eval(Element &e, std::vector<double> const &u, size_t k) const
+double F0ElRho::eval(Element const &e, std::vector<double> const &u, size_t k) const
 {
     double gradU = e.computeGrad(u, k).norm(); // norm of velocity
 
@@ -43,7 +43,7 @@ double F0ElRho::eval(Element &e, std::vector<double> const &u, size_t k) const
  * @brief Evaluate the nonlinear density derivative (constant over element)
  * @authors Adrien Crovato
  */
-double F0ElRho::evalD(Element &e, std::vector<double> const &u, size_t k) const
+double F0ElRho::evalD(Element const &e, std::vector<double> const &u, size_t k) const
 {
     double gradU = e.computeGrad(u, k).norm(); // norm of velocity
 
@@ -69,7 +69,7 @@ void F0ElRho::write(std::ostream &out) const
  * @brief Evaluate the linear density (constant over element)
  * @authors Adrien Crovato
  */
-double F0ElRhoL::eval(Element &e, std::vector<double> const &u, size_t k) const
+double F0ElRhoL::eval(Element const &e, std::vector<double> const &u, size_t k) const
 {
     return 1.;
 }
@@ -77,7 +77,7 @@ double F0ElRhoL::eval(Element &e, std::vector<double> const &u, size_t k) const
  * @brief Evaluate the linear density derivative (constant over element)
  * @authors Adrien Crovato
  */
-double F0ElRhoL::evalD(Element &e, std::vector<double> const &u, size_t k) const
+double F0ElRhoL::evalD(Element const &e, std::vector<double> const &u, size_t k) const
 {
     return 0.;
 }
@@ -90,7 +90,7 @@ void F0ElRhoL::write(std::ostream &out) const
  * @brief Evaluate the nonlinear Mach number (constant over element)
  * @authors Adrien Crovato
  */
-double F0ElMach::eval(Element &e, std::vector<double> const &u, size_t k) const
+double F0ElMach::eval(Element const &e, std::vector<double> const &u, size_t k) const
 {
     double gradU2 = e.computeGrad2(u, k);                             // velocity squared
     double a2 = 1 / (mInf * mInf) + 0.5 * (gamma - 1) * (1 - gradU2); // speed of sound squared
@@ -101,7 +101,7 @@ double F0ElMach::eval(Element &e, std::vector<double> const &u, size_t k) const
  * @brief Evaluate the nonlinear Mach number derivative (constant over element)
  * @authors Adrien Crovato
  */
-double F0ElMach::evalD(Element &e, std::vector<double> const &u, size_t k) const
+double F0ElMach::evalD(Element const &e, std::vector<double> const &u, size_t k) const
 {
     double gradU2 = e.computeGrad2(u, k);
     double a2 = 1 / (mInf * mInf) + 0.5 * (gamma - 1) * (1 - gradU2);
@@ -117,7 +117,7 @@ void F0ElMach::write(std::ostream &out) const
  * @brief Evaluate the linear Mach number (constant over element)
  * @authors Adrien Crovato
  */
-double F0ElMachL::eval(Element &e, std::vector<double> const &u, size_t k) const
+double F0ElMachL::eval(Element const &e, std::vector<double> const &u, size_t k) const
 {
     return 0.;
 }
@@ -125,7 +125,7 @@ double F0ElMachL::eval(Element &e, std::vector<double> const &u, size_t k) const
  * @brief Evaluate the linear Mach number derivative (constant over element)
  * @authors Adrien Crovato
  */
-double F0ElMachL::evalD(Element &e, std::vector<double> const &u, size_t k) const
+double F0ElMachL::evalD(Element const &e, std::vector<double> const &u, size_t k) const
 {
     return 0.;
 }
@@ -138,7 +138,7 @@ void F0ElMachL::write(std::ostream &out) const
  * @brief Evaluate the nonlinear pressure coefficient (constant over element)
  * @authors Adrien Crovato
  */
-double F0ElCp::eval(Element &e, std::vector<double> const &u, size_t k) const
+double F0ElCp::eval(Element const &e, std::vector<double> const &u, size_t k) const
 {
     double gradU2 = e.computeGrad2(u, k); // velocity squared
     //particularized: 2 / (gamma * mInf * mInf) * (pow(1 + 0.5 * (gamma - 1) * mInf * mInf * (1 - gradU2), gamma / (gamma - 1)) - 1);
@@ -153,7 +153,7 @@ double F0ElCp::eval(Element &e, std::vector<double> const &u, size_t k) const
  * @brief Evaluate the nonlinear pressure coefficient derivative (constant over element)
  * @authors Adrien Crovato
  */
-double F0ElCp::evalD(Element &e, std::vector<double> const &u, size_t k) const
+double F0ElCp::evalD(Element const &e, std::vector<double> const &u, size_t k) const
 {
     double gradU2 = e.computeGrad2(u, k); // velocity squared
     //particularized: -2 * pow(1 + 0.5 * (gamma - 1) * mInf * mInf * (1 - gradU2), 1 / (gamma - 1));
@@ -173,11 +173,11 @@ void F0ElCp::write(std::ostream &out) const
  * @brief Evaluate the linear pressure coefficient (constant over element)
  * @authors Adrien Crovato
  */
-double F0ElCpL::eval(Element &e, std::vector<double> const &u, size_t k) const
+double F0ElCpL::eval(Element const &e, std::vector<double> const &u, size_t k) const
 {
     return 1 - e.computeGrad2(u, k);
 }
-double F0ElCpL::evalD(Element &e, std::vector<double> const &u, size_t k) const
+double F0ElCpL::evalD(Element const &e, std::vector<double> const &u, size_t k) const
 {
     return -2.; // has to be multiplied by grad u to recover true derivative
 }
diff --git a/flow/src/wF0El.h b/flow/src/wF0El.h
index 021554493c4976cdb67e61c7dd06f261ace72e3a..a92e4d4b5410eb3eea924ac8bcfec9f2987ade06 100644
--- a/flow/src/wF0El.h
+++ b/flow/src/wF0El.h
@@ -46,10 +46,10 @@ public:
     }
 
 #ifndef SWIG
-    virtual double eval(tbox::Element &e,
+    virtual double eval(tbox::Element const &e,
                         std::vector<double> const &u,
                         size_t k) const override;
-    virtual double evalD(tbox::Element &e,
+    virtual double evalD(tbox::Element const &e,
                          std::vector<double> const &u,
                          size_t k) const override;
     virtual void write(std::ostream &out) const override;
@@ -66,10 +66,10 @@ public:
     F0ElRhoL() : Fct0() {}
 
 #ifndef SWIG
-    virtual double eval(tbox::Element &e,
+    virtual double eval(tbox::Element const &e,
                         std::vector<double> const &u,
                         size_t k) const override;
-    virtual double evalD(tbox::Element &e,
+    virtual double evalD(tbox::Element const &e,
                          std::vector<double> const &u,
                          size_t k) const override;
     virtual void write(std::ostream &out) const override;
@@ -91,10 +91,10 @@ public:
     F0ElMach(double _mInf) : Fct0(), gamma(1.4), mInf(_mInf) {}
 
 #ifndef SWIG
-    virtual double eval(tbox::Element &e,
+    virtual double eval(tbox::Element const &e,
                         std::vector<double> const &u,
                         size_t k) const override;
-    virtual double evalD(tbox::Element &e,
+    virtual double evalD(tbox::Element const &e,
                          std::vector<double> const &u,
                          size_t k) const override;
     virtual void write(std::ostream &out) const override;
@@ -111,10 +111,10 @@ public:
     F0ElMachL() : Fct0() {}
 
 #ifndef SWIG
-    virtual double eval(tbox::Element &e,
+    virtual double eval(tbox::Element const &e,
                         std::vector<double> const &u,
                         size_t k) const override;
-    virtual double evalD(tbox::Element &e,
+    virtual double evalD(tbox::Element const &e,
                          std::vector<double> const &u,
                          size_t k) const override;
     virtual void write(std::ostream &out) const override;
@@ -136,10 +136,10 @@ public:
     F0ElCp(double _mInf) : Fct0(), gamma(1.4), mInf(_mInf) {}
 
 #ifndef SWIG
-    virtual double eval(tbox::Element &e,
+    virtual double eval(tbox::Element const &e,
                         std::vector<double> const &u,
                         size_t k) const override;
-    virtual double evalD(tbox::Element &e,
+    virtual double evalD(tbox::Element const &e,
                          std::vector<double> const &u,
                          size_t k) const override;
     virtual void write(std::ostream &out) const override;
@@ -157,10 +157,10 @@ public:
     F0ElCpL() : Fct0() {}
 
 #ifndef SWIG
-    virtual double eval(tbox::Element &e,
+    virtual double eval(tbox::Element const &e,
                         std::vector<double> const &u,
                         size_t k) const override;
-    virtual double evalD(tbox::Element &e,
+    virtual double evalD(tbox::Element const &e,
                          std::vector<double> const &u,
                          size_t k) const override;
     virtual void write(std::ostream &out) const override;
diff --git a/flow/src/wF1El.cpp b/flow/src/wF1El.cpp
index 6cba8d940031a917db05f648efcd64200d63e368..a65f35c86e7aedf64c7c30ff0c140394269c0d0c 100644
--- a/flow/src/wF1El.cpp
+++ b/flow/src/wF1El.cpp
@@ -39,18 +39,31 @@ F1ElVi::F1ElVi(int _nDim, double alpha, double beta) : Fct1(), nDim(_nDim)
 void F1ElVi::update(double alpha, double beta)
 {
     if (nDim == 2)
+    {
         v << cos(alpha), sin(alpha), 0;
+        dv << -sin(alpha), cos(alpha), 0;
+    }
     else
+    {
         v << cos(alpha) * cos(beta), sin(beta), sin(alpha) * cos(beta);
+        dv << -sin(alpha) * cos(beta), 0, cos(alpha) * cos(beta);
+    }
 }
 /**
  * @brief Return the freestream velocity
  * @authors Adrien Crovato
  */
-Eigen::Vector3d F1ElVi::eval(Element &e, std::vector<double> const &u, size_t k) const
+Eigen::Vector3d F1ElVi::eval(Element const &e, std::vector<double> const &u, size_t k) const
 {
     return v;
 }
+/**
+ * @brief Return the gradient of freestream velocity with respect to angle of attack
+ */
+Eigen::Vector3d F1ElVi::evalD(Element const &e, std::vector<double> const &u, size_t k) const
+{
+    return dv;
+}
 void F1ElVi::write(std::ostream &out) const
 {
     out << "F1ElVi";
diff --git a/flow/src/wF1El.h b/flow/src/wF1El.h
index 065dc22b7bfbe7a17d2fb8902e43bb551954fec6..7d86971217a83bf1705fa96dc3911fc1b48f3523 100644
--- a/flow/src/wF1El.h
+++ b/flow/src/wF1El.h
@@ -31,17 +31,21 @@ namespace flow
  */
 class FLOW_API F1ElVi : public tbox::Fct1
 {
-    int nDim;          ///< number of dimensions
-    Eigen::Vector3d v; ///< freestream velocity vector
+    int nDim;           ///< number of dimensions
+    Eigen::Vector3d v;  ///< freestream velocity vector
+    Eigen::Vector3d dv; ///< freestream velocity vector gradient (wrt alpha)
 
 public:
     F1ElVi(int _nDim, double alpha, double beta = 0);
     void update(double alpha, double beta = 0);
 
 #ifndef SWIG
-    virtual Eigen::Vector3d eval(tbox::Element &e,
+    virtual Eigen::Vector3d eval(tbox::Element const &e,
                                  std::vector<double> const &u,
                                  size_t k) const override;
+    virtual Eigen::Vector3d evalD(tbox::Element const &e,
+                                  std::vector<double> const &u,
+                                  size_t k) const override;
     virtual void write(std::ostream &out) const override;
 #endif
 };
diff --git a/flow/src/wFreestreamResidual.cpp b/flow/src/wFreestreamResidual.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..afdd43aebe18a3e3501ea4328fd8b2ac47a5fb4b
--- /dev/null
+++ b/flow/src/wFreestreamResidual.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2020 University of Liège
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "wFreestreamResidual.h"
+#include "wFreestream.h"
+
+#include "wElement.h"
+#include "wCache.h"
+#include "wGauss.h"
+#include "wMem.h"
+
+using namespace tbox;
+using namespace flow;
+
+/**
+ * @brief Build the residual vector, on one freestream boundary element
+ *
+ * b = \int psi * v_inf * n dV
+ */
+Eigen::VectorXd FreestreamResidual::build(Element const &e, std::vector<double> const &phi, Freestream const &fs)
+{
+    // Get pre-computed values
+    Cache &cache = e.getVCache();
+    Gauss &gauss = cache.getVGauss();
+    Mem &mem = e.getVMem();
+
+    // Normal velocity
+    double vn = fs.f.eval(e, phi, 0).dot(e.normal());
+
+    // Integral of mass flux (density times normal velocity) through freestream boundary
+    Eigen::VectorXd b = Eigen::VectorXd::Zero(e.nodes.size());
+    for (size_t k = 0; k < gauss.getN(); ++k)
+        b += cache.getSf(k) * vn * gauss.getW(k) * mem.getDetJ(k);
+    return b;
+}
+
+/**
+ * @brief Build the gradient of the residual vector with respect to the angle of attack, on one freestream boundary element
+ *
+ * b = \int psi * dv_inf * n dV
+ */
+Eigen::VectorXd FreestreamResidual::buildGradientAoa(tbox::Element const &e, std::vector<double> const &phi, Freestream const &fs)
+{
+    // Get pre-computed values
+    Cache &cache = e.getVCache();
+    Gauss &gauss = cache.getVGauss();
+    Mem &mem = e.getVMem();
+
+    // Gradient of normal velocity
+    double dvn = fs.f.evalD(e, phi, 0).dot(e.normal());
+
+    // Build mass flux gradient
+    Eigen::VectorXd b = Eigen::VectorXd::Zero(e.nodes.size());
+    for (size_t k = 0; k < gauss.getN(); ++k)
+        b += cache.getSf(k) * dvn * gauss.getW(k) * mem.getDetJ(k);
+    return b;
+}
diff --git a/flow/src/wFreestreamResidual.h b/flow/src/wFreestreamResidual.h
new file mode 100644
index 0000000000000000000000000000000000000000..abaa783e8514fe846ab5bb7be9beb0eb05df780e
--- /dev/null
+++ b/flow/src/wFreestreamResidual.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2020 University of Liège
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef WFREESTREAMRESIDUAL_H
+#define WFREESTREAMRESIDUAL_H
+
+#include "flow.h"
+
+#include <vector>
+#include <Eigen/Dense>
+
+namespace flow
+{
+
+/**
+ * @brief Formulation of freestream boundary residuals
+ */
+class FLOW_API FreestreamResidual
+{
+public:
+    // Newton
+    static Eigen::VectorXd build(tbox::Element const &e, std::vector<double> const &phi, Freestream const &fs);
+    // Adjoint
+    static Eigen::VectorXd buildGradientAoa(tbox::Element const &e, std::vector<double> const &phi, Freestream const &fs);
+};
+
+} // namespace flow
+#endif //WFREESTREAMRESIDUAL_H
diff --git a/flow/src/wKuttaElement.cpp b/flow/src/wKuttaElement.cpp
index 16a9f5fe96751c7a58f57ecb5a1374df46dbc283..798fd7784d9f3e1228fefdb9ca0ec474b5b83493 100644
--- a/flow/src/wKuttaElement.cpp
+++ b/flow/src/wKuttaElement.cpp
@@ -51,170 +51,6 @@ KuttaElement::KuttaElement(size_t _no, Element *&_surE, Element *&_volE,
     nRow = teN.size();
 }
 
-/**
- * @brief Compute stiffness matrices associated to Kutta
- *
- * K_ij = int V*dN_j * N_i dS
- *
- *              3
- *              o
- *             / \
- *            /   \
- *           /     \
- *     1,1' o-x---x-o 2,2'
- *            A   B
- *
- *     I = sum_j w'_j psi_j N'_j V_j invJ_j dN_j phi_j dtm(J')_j
- *       = psi sum_j (w'_j N'_j V_j invJ_j dN_j dtm(J')_j) phi
- *       = (psi)|1X2 (sum_j K_j)|2X3 (phi)|3X1
- *
- *     K = w'[N'1][Vx Vy]inv([dx/dxi dy/dxi])[dxiN1 dxiN2 dxiN3] dtm([dx/dxi' dy/dxi']^2)
- *           [N'2]          ([dx/det dy/det])[detN1 detN2 detN3]
- *
- *             phi1  phi2  phi3
- *     K = 1' [ .     .     .  ]
- *         2' [ .     .     .  ]
- *
- *     NB: K should be eval at GP A and B, for upper and lower element
- *     NB: V = grad(phi)
- * @authors Adrien Crovato
- */
-void KuttaElement::buildK(Eigen::MatrixXd &K, std::vector<double> const &u)
-{
-    // Get shape functions and Gauss points
-    Cache &surCache = surE->getVCache();
-    Gauss &surGauss = surCache.getVGauss();
-    Eigen::MatrixXd const &volDff = volE->getVCache().getDsf(0);
-    // Get Jacobian
-    Mem &surMem = surE->getVMem();
-    Eigen::MatrixXd const &volJ = volE->getVMem().getJinv(0);
-    // Reset matrix
-    K = Eigen::MatrixXd::Zero(nRow, nCol);
-
-    // Volume contribution
-    // V & dN
-    // A = V*[inv(J)*dN]
-    Eigen::RowVectorXd A(nCol);
-    A = volE->computeGrad(u, 0).transpose() * volJ * volDff;
-
-    // Build
-    for (size_t k = 0; k < surGauss.getN(); ++k)
-    {
-        // N
-        Eigen::VectorXd const &ff = surCache.getSf(k);
-        Eigen::VectorXd N(nRow);
-        for (size_t i = 0; i < nRow; ++i)
-            N(i) = ff(teN[i].first);
-
-        // K = K + N*B*w*dtm
-        K += N * A * sign * surGauss.getW(k) * surMem.getDetJ(k);
-    }
-}
-
-/**
- * @brief Compute stiffness matrices associated to Kutta for Newton-Raphson
- *
- * K_ij = int 2*V*dN_j * N_i dS
- * @authors Adrien Crovato
- */
-void KuttaElement::buildNK(Eigen::MatrixXd &K, std::vector<double> const &u)
-{
-    // Build
-    buildK(K, u);
-    K *= 2.;
-}
-
-/**
- * @brief Compute flux vectors associated to Kutta for Newton-Raphson
- *
- * s_i = int V*V * N_i dS
- * @authors Adrien Crovato
- */
-void KuttaElement::buildNs(Eigen::VectorXd &b, std::vector<double> const &u)
-{
-    // Get shape functions and Gauss points
-    Cache &surCache = surE->getVCache();
-    Gauss &surGauss = surCache.getVGauss();
-    // Get Jacobian
-    Mem &surMem = surE->getVMem();
-    // Reset matrices
-    b = Eigen::VectorXd::Zero(nRow);
-
-    // Build
-    for (size_t k = 0; k < surGauss.getN(); ++k)
-    {
-        // N
-        Eigen::VectorXd const &ff = surCache.getSf(k);
-        Eigen::VectorXd N(nRow);
-        for (size_t i = 0; i < nRow; ++i)
-            N(i) = ff(teN[i].first);
-        // V2
-        double grad2 = volE->computeGrad2(u, 0);
-
-        // Compute flux vector
-        b += sign * N * grad2 * surGauss.getW(k) * surMem.getDetJ(k);
-    }
-}
-
-/**
- * @brief Compute stiffness matrices associated to Kutta for Newton-Raphson
- *
- * K_ij = int d(2*V*dN_j * N_i) dS
- *      = sum 2*V*dgrad_phi * N_i * detJ
- *      + sum grad_phi^2 * N_i ddetJ
- */
-void KuttaElement::buildGradientResidualMesh(Eigen::MatrixXd &K, Eigen::MatrixXd &Ks, std::vector<double> const &u)
-{
-    // Get shape functions and Gauss points
-    Cache &surCache = surE->getVCache();
-    Gauss &surGauss = surCache.getVGauss();
-    // Get Jacobian
-    Mem &surMem = surE->getVMem();
-    Mem &volMem = volE->getVMem();
-    Eigen::MatrixXd const &iJ = volMem.getJinv(0);
-    std::vector<Eigen::MatrixXd> const &dJ = volMem.getGradJ(0);
-    // Reset matrices
-    K = Eigen::MatrixXd::Zero(surE->nodes.size(), nDim * volE->nodes.size());
-    Ks = Eigen::MatrixXd::Zero(surE->nodes.size(), nDim * surE->nodes.size());
-
-    // Gradient of potential
-    Eigen::VectorXd dPhi = volE->computeGrad(u, 0);
-
-    for (size_t k = 0; k < surGauss.getN(); ++k)
-    {
-        // weight and Jacobian determinant
-        double w = surGauss.getW(k);
-        double detJ = surMem.getDetJ(k);
-        std::vector<double> dDetJ;
-        surE->buildGradientJ(k, dDetJ);
-        // shape functions
-        Eigen::VectorXd const &sf = surCache.getSf(k);
-        Eigen::VectorXd n(nRow);
-        for (size_t i = 0; i < nRow; ++i)
-            n(i) = sf(teN[i].first);
-
-        for (size_t i = 0; i < volE->nodes.size(); ++i)
-        {
-            for (size_t m = 0; m < nDim; ++m)
-            {
-                size_t idx = i * nDim + m;
-                // derivative of velocity jump
-                K.col(idx) += sign * w * n * 2 * dPhi.transpose() * (-iJ * dJ[idx] * dPhi) * detJ; // psi * dgrad_phi^2
-            }
-        }
-        // Surface
-        for (size_t i = 0; i < surE->nodes.size(); ++i)
-        {
-            for (size_t m = 0; m < nDim; ++m)
-            {
-                size_t idx = i * nDim + m;
-                // derivative of jacobian determinant
-                Ks.col(idx) += sign * w * n * dPhi.dot(dPhi) * dDetJ[idx]; // psi * grad_phi^2 * ddetJ
-            }
-        }
-    }
-}
-
 void KuttaElement::write(std::ostream &out) const
 {
     out << "KuttaElement #" << no
diff --git a/flow/src/wKuttaElement.h b/flow/src/wKuttaElement.h
index 9a15f94aeb4d4c2513db4cd7d7efcc84a0dffc40..4103cc393a793d246c245e8af54c28b29c97efcd 100644
--- a/flow/src/wKuttaElement.h
+++ b/flow/src/wKuttaElement.h
@@ -48,23 +48,18 @@ public:
     tbox::Element *surE; ///< Surface element
     tbox::Element *volE; ///< Connected volume element
 
-    size_t nRow;                                      ///< number of rows in stiffness matrix
-    size_t nCol;                                      ///< number of columns in stiffness matrix
+    size_t nRow; ///< number of rows in stiffness matrix
+    size_t nCol; ///< number of columns in stiffness matrix
+    size_t nDim; ///< dimension of volume element
+
     std::vector<std::pair<size_t, tbox::Node *>> teN; ///< Map of local node indices and trailing edge nodes
+    int sign;                                         ///< +1 if upper element, -1 if lower element
 
     KuttaElement(size_t _no, tbox::Element *&_surE, tbox::Element *&_volE, std::vector<std::pair<size_t, tbox::Node *>> &_teN, int _sign);
 
 #ifndef SWIG
-    void buildK(Eigen::MatrixXd &K, std::vector<double> const &u);
-    void buildNK(Eigen::MatrixXd &K, std::vector<double> const &u);
-    void buildNs(Eigen::VectorXd &b, std::vector<double> const &u);
-    void buildGradientResidualMesh(Eigen::MatrixXd &K, Eigen::MatrixXd &Ks, std::vector<double> const &u);
     virtual void write(std::ostream &out) const override;
 #endif
-
-private:
-    size_t nDim; ///< dimension of volume element
-    int sign;    ///< +1 if upper element, -1 if lower element
 };
 
 } // namespace flow
diff --git a/flow/src/wKuttaResidual.cpp b/flow/src/wKuttaResidual.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..59c2010a5453f0d76a357af6d4f6b32e23aa9b57
--- /dev/null
+++ b/flow/src/wKuttaResidual.cpp
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2020 University of Liège
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "wKuttaResidual.h"
+#include "wKuttaElement.h"
+
+#include "wElement.h"
+#include "wCache.h"
+#include "wGauss.h"
+#include "wMem.h"
+
+using namespace tbox;
+using namespace flow;
+
+/**
+ * @brief Build the residual matrix for a fixed-point iteration, on one Kutta element
+ *
+ * A = \int psi * v_u*grad_phi dS
+ */
+Eigen::MatrixXd KuttaResidual::buildFixed(KuttaElement const &ke, std::vector<double> const &phi)
+{
+    // Get shape functions, Jacobian and Gauss points
+    Cache &sCache = ke.surE->getVCache();
+    Gauss &sGauss = sCache.getVGauss();
+    Mem &sMem = ke.surE->getVMem();
+    // Get size
+    size_t nRow = ke.nRow;
+
+    // Velocity
+    Eigen::RowVectorXd Aup = ke.volE->computeGrad(phi, 0).transpose() * ke.volE->getVMem().getJinv(0) * ke.volE->getVCache().getDsf(0);
+
+    // Build
+    Eigen::MatrixXd A = Eigen::MatrixXd::Zero(nRow, ke.nCol);
+    for (size_t k = 0; k < sGauss.getN(); ++k)
+    {
+        // shape functions
+        Eigen::VectorXd const &sf = sCache.getSf(k);
+        Eigen::VectorXd sfp(nRow);
+        for (size_t i = 0; i < nRow; ++i)
+            sfp(i) = sf(ke.teN[i].first);
+        // wake contribution
+        A += ke.sign * sfp * Aup * sGauss.getW(k) * sMem.getDetJ(k);
+    }
+    return A;
+}
+
+/**
+ * @brief Build the residual vector, on one Kutta element
+ *
+ * b = \int psi * (grad_phi)^2 dS
+ */
+Eigen::VectorXd KuttaResidual::build(KuttaElement const &ke, std::vector<double> const &phi)
+{
+    // Get shape functions, Jacobian and Gauss points
+    Cache &sCache = ke.surE->getVCache();
+    Gauss &sGauss = sCache.getVGauss();
+    Mem &sMem = ke.surE->getVMem();
+    // Get size
+    size_t nRow = ke.nRow;
+
+    // Velocity
+    double dPhi2 = ke.volE->computeGrad2(phi, 0);
+
+    // Build
+    Eigen::VectorXd b = Eigen::VectorXd::Zero(nRow);
+    for (size_t k = 0; k < sGauss.getN(); ++k)
+    {
+        // shape functions
+        Eigen::VectorXd const &sf = sCache.getSf(k);
+        Eigen::VectorXd sfp(nRow);
+        for (size_t i = 0; i < nRow; ++i)
+            sfp(i) = sf(ke.teN[i].first);
+        // wake contribution
+        b += ke.sign * sfp * dPhi2 * sGauss.getW(k) * sMem.getDetJ(k);
+    }
+    return b;
+}
+
+/**
+ * @brief Build the gradient of the residual vector with respect to the flow variable (jacobian), on one Kutta element
+ *
+ * A = \int psi * 2*(grad_phi_u)*dgrad_phi_u dS
+ */
+Eigen::MatrixXd KuttaResidual::buildGradientFlow(KuttaElement const &ke, std::vector<double> const &phi)
+{
+    // Build
+    Eigen::MatrixXd A = KuttaResidual::buildFixed(ke, phi);
+    return 2 * A;
+}
+
+/**
+ * @brief Build the gradient of the residual vector with respect to the nodes, on one Kutta element
+ *
+ * A = d( \int psi * (grad_phi)^2 dS )
+ *   = \int psi * d(grad_phi)^2 dS
+ *   + \int psi * (grad_phi)^2 ddS
+ */
+std::tuple<Eigen::MatrixXd, Eigen::MatrixXd> KuttaResidual::buildGradientMesh(KuttaElement const &ke, std::vector<double> const &phi)
+{
+    // Get shape functions and Gauss points
+    Cache &surCache = ke.surE->getVCache();
+    Gauss &surGauss = surCache.getVGauss();
+    // Get Jacobian
+    Mem &surMem = ke.surE->getVMem();
+    Mem &volMem = ke.volE->getVMem();
+    Eigen::MatrixXd const &iJ = volMem.getJinv(0);
+    std::vector<Eigen::MatrixXd> const &dJ = volMem.getGradJ(0);
+    // Get size
+    size_t nRow = ke.nRow;
+    size_t nDim = ke.nDim;
+    size_t nCol = ke.nCol;
+    size_t nSur = ke.surE->nodes.size();
+
+    // velocity
+    Eigen::VectorXd dPhi = ke.volE->computeGrad(phi, 0);
+
+    // Build
+    Eigen::MatrixXd Av = Eigen::MatrixXd::Zero(nRow, nDim * nCol);
+    Eigen::MatrixXd As = Eigen::MatrixXd::Zero(nRow, nDim * nSur);
+    for (size_t k = 0; k < surGauss.getN(); ++k)
+    {
+        // weight and Jacobian determinant
+        double w = surGauss.getW(k);
+        double detJ = surMem.getDetJ(k);
+        std::vector<double> dDetJ = surMem.getGradDetJ(k);
+        // shape functions
+        Eigen::VectorXd const &sf = surCache.getSf(k);
+        Eigen::VectorXd sfp(nRow);
+        for (size_t i = 0; i < nRow; ++i)
+            sfp(i) = sf(ke.teN[i].first);
+
+        // Volume
+        for (size_t i = 0; i < nCol; ++i)
+        {
+            for (size_t m = 0; m < nDim; ++m)
+            {
+                size_t idx = i * nDim + m;
+                // gradient of velocity jump
+                Av.col(idx) += ke.sign * w * sfp * 2 * dPhi.transpose() * (-iJ * dJ[idx] * dPhi) * detJ; // psi * dgrad_phi^2
+            }
+        }
+        // Surface
+        for (size_t i = 0; i < nSur; ++i)
+        {
+            for (size_t m = 0; m < nDim; ++m)
+            {
+                size_t idx = i * nDim + m;
+                // gradient of jacobian determinant
+                As.col(idx) += ke.sign * w * sfp * dPhi.dot(dPhi) * dDetJ[idx]; // psi * grad_phi^2 * ddetJ
+            }
+        }
+    }
+    return std::make_tuple(Av, As);
+}
diff --git a/flow/src/wKuttaResidual.h b/flow/src/wKuttaResidual.h
new file mode 100644
index 0000000000000000000000000000000000000000..562b3dcc277d890e2a3f59f4c512c016f7cef7c8
--- /dev/null
+++ b/flow/src/wKuttaResidual.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2020 University of Liège
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef WKUTTARESIDUAL_H
+#define WKUTTARESIDUAL_H
+
+#include "flow.h"
+
+#include <vector>
+#include <Eigen/Dense>
+
+namespace flow
+{
+
+/**
+ * @brief Formulation of Kutta residuals
+ */
+class FLOW_API KuttaResidual
+{
+public:
+    // Picard
+    static Eigen::MatrixXd buildFixed(KuttaElement const &ke, std::vector<double> const &phi);
+    // Newton
+    static Eigen::VectorXd build(KuttaElement const &ke, std::vector<double> const &phi);
+    static Eigen::MatrixXd buildGradientFlow(KuttaElement const &ke, std::vector<double> const &phi);
+    // Adjoint
+    static std::tuple<Eigen::MatrixXd, Eigen::MatrixXd> buildGradientMesh(KuttaElement const &ke, std::vector<double> const &phi);
+};
+
+} // namespace flow
+#endif //WKUTTARESIDUAL_H
diff --git a/flow/src/wLoadFunctional.cpp b/flow/src/wLoadFunctional.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a251f54f689d3fe9ec25a4f6112e467844faf324
--- /dev/null
+++ b/flow/src/wLoadFunctional.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright 2020 University of Liège
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "wLoadFunctional.h"
+#include "wFct0.h"
+
+#include "wElement.h"
+#include "wCache.h"
+#include "wGauss.h"
+#include "wMem.h"
+
+using namespace tbox;
+using namespace flow;
+
+/**
+ * @brief Compute the loads due to pressure normalized by the freestream dynamic pressure, on one boundary element
+ *
+ * b = \int psi * cp * n dS
+ */
+Eigen::VectorXd LoadFunctional::build(Element const &e, Element const &eV, std::vector<double> const &phi, Fct0 const &cp)
+{
+    // Get pre-computed values
+    Cache &cache = e.getVCache();
+    Gauss &gauss = cache.getVGauss();
+    Mem &mem = e.getVMem();
+
+    // Pressure force coefficient in associated volume
+    Eigen::Vector3d fcp = cp.eval(eV, phi, 0) * e.normal();
+
+    // Integrate cp on the surface
+    Eigen::VectorXd b = Eigen::VectorXd::Zero(3 * e.nodes.size());
+    for (size_t k = 0; k < gauss.getN(); ++k)
+    {
+        Eigen::VectorXd sf = cache.getSf(k);
+        double wdj = gauss.getW(k) * mem.getDetJ(k);
+        for (size_t i = 0; i < e.nodes.size(); ++i)
+            b.block<3, 1>(i * 3, 0) += sf(i) * fcp * wdj; // stack nodal force vectors [fx1, fy1, fz1, fx2, fy2, fz2, ...]
+    }
+    return b;
+}
+
+/**
+ * @brief Build the gradient of the loads with respect to the flow variable, on one boundary element
+ *
+ * A = \int psi * dcp * n dS
+ */
+Eigen::MatrixXd LoadFunctional::buildGradientFlow(Element const &e, Element const &eV, std::vector<double> const &phi, Fct0 const &cp)
+{
+    // Get pre-computed values
+    Cache &cache = e.getVCache();
+    Gauss &gauss = cache.getVGauss();
+    Mem &mem = e.getVMem();
+
+    // Gradient of pressure force coefficient in associated volume
+    Eigen::MatrixXd dfcp = e.normal() * cp.evalD(eV, phi, 0) * eV.computeGrad(phi, 0).transpose() * eV.getVMem().getJinv(0) * eV.getVCache().getDsf(0);
+
+    // Integrate cp on the surface
+    Eigen::MatrixXd A = Eigen::MatrixXd::Zero(3 * e.nodes.size(), eV.nodes.size());
+    for (size_t k = 0; k < gauss.getN(); ++k)
+    {
+        Eigen::VectorXd sf = cache.getSf(k);
+        double wdj = gauss.getW(k) * mem.getDetJ(k);
+        for (size_t i = 0; i < e.nodes.size(); ++i)
+            A.block(i * 3, 0, 3, eV.nodes.size()) += sf(i) * dfcp * wdj;
+    }
+    return A;
+}
+
+/**
+ * @brief Build the gradient of the loads with respect to the nodes, on one boundary element
+ *
+ * A = d( \int psi * cp * n dS )
+ *   = \int psi * dcp * n * dS
+ *   + \int psi * cp * dn * dS
+ *   + \int psi * cp * n * ddS
+ */
+std::tuple<Eigen::MatrixXd, Eigen::MatrixXd> LoadFunctional::buildGradientMesh(Element const &e, Element const &eV, std::vector<double> const &phi, Fct0 const &cp, int nDim)
+{
+    // Get pre-computed values
+    Cache &cache = e.getVCache();
+    Gauss &gauss = cache.getVGauss();
+    Mem &mem = e.getVMem();
+
+    // Variables
+    double cP = cp.eval(eV, phi, 0);  // pressure coefficient
+    Eigen::Vector3d nrm = e.normal(); // unit normal
+    // Gradients
+    double dCp = cp.evalD(eV, phi, 0);                                 // pressure coefficient
+    Eigen::VectorXd dPhi = eV.computeGrad(phi, 0);                     // potential
+    Eigen::MatrixXd const &iJ = eV.getVMem().getJinv(0);               // inverse Jacobian
+    std::vector<Eigen::MatrixXd> const &dJ = eV.getVMem().getGradJ(0); // Jacobian
+    std::vector<Eigen::Vector3d> dNrm = e.gradientNormal();            // unit normal
+
+    // Build element contributions
+    Eigen::MatrixXd As = Eigen::MatrixXd::Zero(3 * e.nodes.size(), e.nodes.size() * nDim);
+    Eigen::MatrixXd Av = Eigen::MatrixXd::Zero(3 * e.nodes.size(), eV.nodes.size() * nDim);
+    for (size_t k = 0; k < gauss.getN(); ++k)
+    {
+        Eigen::VectorXd sf = cache.getSf(k);
+        double w = gauss.getW(k);
+        double detJ = mem.getDetJ(k);
+        std::vector<double> dDetJ = mem.getGradDetJ(k);
+        for (size_t i = 0; i < e.nodes.size(); ++i)
+        {
+            for (size_t m = 0; m < nDim; ++m)
+            {
+                // surface
+                for (size_t j = 0; j < e.nodes.size(); ++j)
+                {
+                    size_t idx = j * nDim + m;
+                    As.block<3, 1>(i * 3, idx) += sf(i) * cP * w * (dNrm[idx] * detJ + nrm * dDetJ[idx]); // Cp * dn * S + Cp * n * dS
+                }
+                // volume
+                for (size_t j = 0; j < eV.nodes.size(); ++j)
+                {
+                    size_t idx = j * nDim + m;
+                    Av.block<3, 1>(i * 3, idx) += sf(i) * dCp * dPhi.transpose() * (-iJ * dJ[idx] * dPhi) * nrm * w * detJ; // dCp * n * S
+                }
+            }
+        }
+    }
+    return std::make_tuple(As, Av);
+}
diff --git a/flow/src/wLoadFunctional.h b/flow/src/wLoadFunctional.h
new file mode 100644
index 0000000000000000000000000000000000000000..4a1a25a8ca3497dc18c1210d49b61dd20cb4c425
--- /dev/null
+++ b/flow/src/wLoadFunctional.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2020 University of Liège
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef WLOADFUNCTIONAL_H
+#define WLOADFUNCTIONAL_H
+
+#include "flow.h"
+
+#include <vector>
+#include <Eigen/Dense>
+
+namespace flow
+{
+
+/**
+ * @brief Formulation of load functional
+ */
+class FLOW_API LoadFunctional
+{
+public:
+    // Solver
+    static Eigen::VectorXd build(tbox::Element const &e, tbox::Element const &eV, std::vector<double> const &phi, tbox::Fct0 const &cp);
+    // Adjoint
+    static Eigen::MatrixXd buildGradientFlow(tbox::Element const &e, tbox::Element const &eV, std::vector<double> const &phi, tbox::Fct0 const &cp);
+    static std::tuple<Eigen::MatrixXd, Eigen::MatrixXd> buildGradientMesh(tbox::Element const &e, tbox::Element const &eV, std::vector<double> const &phi, tbox::Fct0 const &cp, int nDim);
+};
+
+} // namespace flow
+#endif //WLOADFUNCTIONAL_H
diff --git a/flow/src/wNewton.cpp b/flow/src/wNewton.cpp
index 4b24dc088b56b2c1f94154103d465179525852eb..4ac08c51708639d0c16c8e06b8a2aabda7e75665 100644
--- a/flow/src/wNewton.cpp
+++ b/flow/src/wNewton.cpp
@@ -23,13 +23,17 @@
 #include "wKutta.h"
 #include "wBlowing.h"
 #include "wFpeLSFunction.h"
+#include "wPotentialResidual.h"
+#include "wFreestreamResidual.h"
+#include "wWakeResidual.h"
+#include "wKuttaResidual.h"
+#include "wBlowingResidual.h"
 
 #include "wMshData.h"
 #include "wTag.h"
 #include "wNode.h"
 #include "wElement.h"
 #include "wMem.h"
-#include "wCache.h"
 #include "wLinesearch.h"
 #include "wLinearSolver.h"
 
@@ -66,7 +70,7 @@ Newton::Newton(std::shared_ptr<Problem> _pbl, std::shared_ptr<tbox::LinearSolver
 
 /**
  * @brief Run the Newton solver
- * 
+ *
  * Solve the steady transonic Full Potential Equation
  * @authors Adrien Crovato
  */
@@ -268,60 +272,35 @@ void Newton::buildJac(Eigen::SparseMatrix<double, Eigen::RowMajor> &J)
         Element *e = p.first;
         //Upwind element
         Element *eU = p.second[0];
+        // Build elementary matrices
+        Eigen::MatrixXd Ae1, Ae2;
+        std::tie(Ae1, Ae2) = PotentialResidual::buildGradientFlow(*e, *eU, phi, *fluid, muCv, mCOv);
 
-        // Subsonic contribution (drho*grad_phi*grad_psi + rho*grad_phi*grad_psi)
-        Eigen::MatrixXd Ae1 = e->buildDK(phi, fluid->rho);
-        Eigen::MatrixXd Ae2 = e->buildK(phi, fluid->rho);
-
-        // Supersonic contribution
+        // Assembly (subsonic)
+        tbb::spin_mutex::scoped_lock lock(mutex);
+        for (size_t ii = 0; ii < e->nodes.size(); ++ii)
+        {
+            Node *nodi = e->nodes[ii];
+            for (size_t jj = 0; jj < e->nodes.size(); ++jj)
+            {
+                Node *nodj = e->nodes[jj];
+                T.push_back(Eigen::Triplet<double>(rows[nodi->row], nodj->row, Ae1(ii, jj)));
+            }
+        }
+        // Assembly (supersonic)
         double mach = fluid->mach.eval(*e, phi, 0);
         if (mach > mCOv)
         {
-            // switching function and derivative
-            double mu = muCv * (1 - (mCOv * mCOv) / (mach * mach));
-            double dmu = 2 * muCv * mCOv * mCOv / (mach * mach * mach);
-
-            // scale 1st and 2nd terms
-            Ae1 *= 1 - mu;
-            Ae2 *= 1 - mu;
-
-            // 3rd term (mu*drhoU*grad_phi*grad_psi)
-            Eigen::MatrixXd Ae3 = mu * fluid->rho.evalD(*eU, phi, 0) * e->buildDs(phi, Fct0C(1.)) * eU->computeGrad(phi, 0).transpose() * eU->getVMem().getJinv(0) * eU->getVCache().getDsf(0);
-
-            // 4th term (mu*rhoU*grad_phi*grad_psi)
-            Eigen::MatrixXd Ae4 = mu * fluid->rho.eval(*eU, phi, 0) * e->buildK(phi, Fct0C(1.));
-
-            // 5th term dmu*(rhoU-rho)*grad_phi*grad_psi
-            Eigen::MatrixXd Ae5 = dmu * (fluid->rho.eval(*eU, phi, 0) - fluid->rho.eval(*e, phi, 0)) * e->buildDK(phi, fluid->mach);
-
-            // Assembly (supersonic)
-            tbb::spin_mutex::scoped_lock lock(mutex);
             for (size_t ii = 0; ii < e->nodes.size(); ++ii)
             {
                 Node *nodi = e->nodes[ii];
-                for (size_t jj = 0; jj < e->nodes.size(); ++jj)
-                {
-                    Node *nodj = e->nodes[jj];
-                    T.push_back(Eigen::Triplet<double>(rows[nodi->row], nodj->row, Ae4(ii, jj) + Ae5(ii, jj)));
-                }
                 for (size_t jj = 0; jj < eU->nodes.size(); ++jj)
                 {
                     Node *nodj = eU->nodes[jj];
-                    T.push_back(Eigen::Triplet<double>(rows[nodi->row], nodj->row, Ae3(ii, jj)));
+                    T.push_back(Eigen::Triplet<double>(rows[nodi->row], nodj->row, Ae2(ii, jj)));
                 }
             }
         }
-        // Assembly (subsonic)
-        tbb::spin_mutex::scoped_lock lock(mutex);
-        for (size_t ii = 0; ii < e->nodes.size(); ++ii)
-        {
-            Node *nodi = e->nodes[ii];
-            for (size_t jj = 0; jj < e->nodes.size(); ++jj)
-            {
-                Node *nodj = e->nodes[jj];
-                T.push_back(Eigen::Triplet<double>(rows[nodi->row], nodj->row, Ae1(ii, jj) + Ae2(ii, jj)));
-            }
-        }
     });
     tms["0-Jbase"].stop();
 
@@ -335,7 +314,7 @@ void Newton::buildJac(Eigen::SparseMatrix<double, Eigen::RowMajor> &J)
         tbb::parallel_do(wake->wEle.begin(), wake->wEle.end(), [&](WakeElement *we) {
             // Build Ku, Kl
             Eigen::MatrixXd Aue, Ale;
-            we->buildNK(Aue, Ale, phi, vi);
+            std::tie(Aue, Ale) = WakeResidual::buildGradientFlow(*we, phi, vi);
             // Assembly
             tbb::spin_mutex::scoped_lock lock(mutex);
             for (size_t ii = 0; ii < we->nRow; ++ii)
@@ -361,8 +340,7 @@ void Newton::buildJac(Eigen::SparseMatrix<double, Eigen::RowMajor> &J)
     {
         tbb::parallel_do(kutta->kEle.begin(), kutta->kEle.end(), [&](KuttaElement *ke) {
             // Build K
-            Eigen::MatrixXd Ae;
-            ke->buildNK(Ae, phi);
+            Eigen::MatrixXd Ae = KuttaResidual::buildGradientFlow(*ke, phi);
             // Assembly
             tbb::spin_mutex::scoped_lock lock(mutex);
             for (size_t ii = 0; ii < ke->nRow; ++ii)
@@ -425,28 +403,8 @@ void Newton::buildRes(Eigen::Map<Eigen::VectorXd> &R)
         Element *e = p.first;
         // Upwind element
         Element *eU = p.second[0];
-
-        // Subsonic contribution
-        Eigen::VectorXd be = e->buildDs(phi, fluid->rho);
-
-        // Supersonic contribution
-        double mach = fluid->mach.eval(*e, phi, 0);
-        if (mach > mCOv)
-        {
-            double mu = muCv * (1 - (mCOv * mCOv) / (mach * mach));
-            double rhoU = fluid->rho.eval(*eU, phi, 0);
-            Eigen::VectorXd beU = e->buildDs(phi, Fct0C(1.));
-            // scale 1st and 2nd terms
-            be *= 1 - mu;
-            beU *= mu * rhoU;
-            // Assembly (supersonic)
-            tbb::spin_mutex::scoped_lock lock(mutex);
-            for (size_t ii = 0; ii < e->nodes.size(); ++ii)
-            {
-                Node *nodi = e->nodes[ii];
-                R(rows[nodi->row]) -= beU(ii);
-            }
-        }
+        // Build elementary vector
+        Eigen::VectorXd be = PotentialResidual::build(*e, *eU, phi, *fluid, muCv, mCOv);
         // Assembly (subsonic)
         tbb::spin_mutex::scoped_lock lock(mutex);
         for (size_t ii = 0; ii < e->nodes.size(); ++ii)
@@ -460,7 +418,7 @@ void Newton::buildRes(Eigen::Map<Eigen::VectorXd> &R)
     {
         tbb::parallel_do(nBC->tag->elems.begin(), nBC->tag->elems.end(), [&](Element *e) {
             // Build elementary flux vector
-            Eigen::VectorXd be = e->builds(phi, nBC->f);
+            Eigen::VectorXd be = FreestreamResidual::build(*e, phi, *nBC);
 
             // Assembly
             tbb::spin_mutex::scoped_lock lock(mutex);
@@ -476,7 +434,7 @@ void Newton::buildRes(Eigen::Map<Eigen::VectorXd> &R)
     {
         tbb::parallel_do(bBC->uE.begin(), bBC->uE.end(), [&](std::pair<Element *, Fct0C *> p) {
             // Build elementary flux vector
-            Eigen::VectorXd be = p.first->builds(phi, *(p.second));
+            Eigen::VectorXd be = BlowingResidual::build(*p.first, phi, *p.second);
 
             // Assembly
             tbb::spin_mutex::scoped_lock lock(mutex);
@@ -498,7 +456,7 @@ void Newton::buildRes(Eigen::Map<Eigen::VectorXd> &R)
         tbb::parallel_do(wake->wEle.begin(), wake->wEle.end(), [&](WakeElement *we) {
             // Build bu, bl
             Eigen::VectorXd bue, ble;
-            we->buildNs(bue, ble, phi, vi);
+            std::tie(bue, ble) = WakeResidual::build(*we, phi, vi);
             // Assembly
             tbb::spin_mutex::scoped_lock lock(mutex);
             for (size_t ii = 0; ii < we->nRow; ++ii)
@@ -516,8 +474,7 @@ void Newton::buildRes(Eigen::Map<Eigen::VectorXd> &R)
     {
         tbb::parallel_do(kutta->kEle.begin(), kutta->kEle.end(), [&](KuttaElement *ke) {
             // Build b
-            Eigen::VectorXd be;
-            ke->buildNs(be, phi);
+            Eigen::VectorXd be = KuttaResidual::build(*ke, phi);
             // Assembly
             tbb::spin_mutex::scoped_lock lock(mutex);
             for (size_t ii = 0; ii < ke->nRow; ++ii)
diff --git a/flow/src/wPicard.cpp b/flow/src/wPicard.cpp
index 7849d56cf322599d1537cff5911aa8b2269cc6fd..8b66292b19a49e4be04a2355e71ddbb91718ea9d 100644
--- a/flow/src/wPicard.cpp
+++ b/flow/src/wPicard.cpp
@@ -22,6 +22,11 @@
 #include "wWake.h"
 #include "wKutta.h"
 #include "wBlowing.h"
+#include "wPotentialResidual.h"
+#include "wFreestreamResidual.h"
+#include "wWakeResidual.h"
+#include "wKuttaResidual.h"
+#include "wBlowingResidual.h"
 
 #include "wMshData.h"
 #include "wTag.h"
@@ -58,7 +63,7 @@ Picard::Picard(std::shared_ptr<Problem> _pbl, std::shared_ptr<tbox::LinearSolver
 
 /**
  * @brief Run the Picard sovler
- * 
+ *
  * Solve the steady subsonic Full Potential Equation
  * @authors Adrien Crovato
  */
@@ -199,7 +204,7 @@ void Picard::build(Eigen::SparseMatrix<double, Eigen::RowMajor> &A, std::vector<
         Element *e = p.first;
 
         // Elementary stiffness
-        Eigen::MatrixXd Ae = e->buildK(phi, fluid->rho);
+        Eigen::MatrixXd Ae = PotentialResidual::buildFixed(*e, phi, *fluid);
 
         // Assembly
         tbb::spin_mutex::scoped_lock lock(mutex);
@@ -225,7 +230,7 @@ void Picard::build(Eigen::SparseMatrix<double, Eigen::RowMajor> &A, std::vector<
         tbb::parallel_do(wake->wEle.begin(), wake->wEle.end(), [&](WakeElement *we) {
             // Build Ku, Kl
             Eigen::MatrixXd Aue, Ale;
-            we->buildK(Aue, Ale, phi, vi);
+            std::tie(Aue, Ale) = WakeResidual::buildFixed(*we, phi, vi);
             // Assembly
             tbb::spin_mutex::scoped_lock lock(mutex);
             for (size_t ii = 0; ii < we->nRow; ++ii)
@@ -251,8 +256,8 @@ void Picard::build(Eigen::SparseMatrix<double, Eigen::RowMajor> &A, std::vector<
     {
         tbb::parallel_do(kutta->kEle.begin(), kutta->kEle.end(), [&](KuttaElement *ke) {
             // Build K
-            Eigen::MatrixXd Ae;
-            ke->buildK(Ae, phi);
+            Eigen::MatrixXd Ae = KuttaResidual::buildFixed(*ke, phi);
+
             // Assembly
             tbb::spin_mutex::scoped_lock lock(mutex);
             for (size_t ii = 0; ii < ke->nRow; ++ii)
@@ -274,7 +279,7 @@ void Picard::build(Eigen::SparseMatrix<double, Eigen::RowMajor> &A, std::vector<
     {
         tbb::parallel_do(nBC->tag->elems.begin(), nBC->tag->elems.end(), [&](Element *e) {
             // Build elementary flux vector
-            Eigen::VectorXd be = e->builds(phi, nBC->f);
+            Eigen::VectorXd be = FreestreamResidual::build(*e, phi, *nBC);
 
             // Assembly
             tbb::spin_mutex::scoped_lock lock(mutex);
@@ -290,7 +295,7 @@ void Picard::build(Eigen::SparseMatrix<double, Eigen::RowMajor> &A, std::vector<
     {
         tbb::parallel_do(bBC->uE.begin(), bBC->uE.end(), [&](std::pair<Element *, Fct0C *> p) {
             // Build elementary flux vector
-            Eigen::VectorXd be = p.first->builds(phi, *(p.second));
+            Eigen::VectorXd be = BlowingResidual::build(*p.first, phi, *p.second);
 
             // Assembly
             tbb::spin_mutex::scoped_lock lock(mutex);
diff --git a/flow/src/wPotentialResidual.cpp b/flow/src/wPotentialResidual.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ff2054788bdefe5fd12d4ebcfcbdaffb128f1552
--- /dev/null
+++ b/flow/src/wPotentialResidual.cpp
@@ -0,0 +1,250 @@
+/*
+ * Copyright 2020 University of Liège
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "wPotentialResidual.h"
+#include "wMedium.h"
+
+#include "wElement.h"
+#include "wCache.h"
+#include "wGauss.h"
+#include "wMem.h"
+
+using namespace tbox;
+using namespace flow;
+
+/**
+ * @brief Build the residual matrix for a fixed-point iteration, on one element
+ *
+ * A = \int rho * grad_phi * grad_psi dV
+ */
+Eigen::MatrixXd PotentialResidual::buildFixed(Element const &e, std::vector<double> const &phi, Medium const &fluid)
+{
+    // Get pre-computed values
+    Cache &cache = e.getVCache();
+    Gauss &gauss = cache.getVGauss();
+    Mem &mem = e.getVMem();
+
+    // Build
+    Eigen::MatrixXd A = Eigen::MatrixXd::Zero(e.nodes.size(), e.nodes.size());
+    for (size_t k = 0; k < gauss.getN(); ++k)
+    {
+        // Shape functions gradient
+        Eigen::MatrixXd const &dSf = mem.getJinv(k) * cache.getDsf(k);
+        // Elementary stiffness matrix
+        A += dSf.transpose() * dSf * fluid.rho.eval(e, phi, k) * gauss.getW(k) * mem.getDetJ(k);
+    }
+    return A;
+}
+
+/**
+ * @brief Build the residual vector, on one element
+ *
+ * b = \int ( (1-mu)*rho + mu*rhoU ) * grad_phi * grad_psi dV
+ * @todo remove muC, mCO + mu as function?
+ */
+Eigen::VectorXd PotentialResidual::build(Element const &e, Element const &eU, std::vector<double> const &phi, Medium const &fluid, double muC, double mCO)
+{
+    // Get pre-computed values
+    Cache &cache = e.getVCache();
+    Gauss &gauss = cache.getVGauss();
+    Mem &mem = e.getVMem();
+
+    // Subsonic contribution
+    Eigen::VectorXd b = Eigen::VectorXd::Zero(e.nodes.size());
+    for (size_t k = 0; k < gauss.getN(); ++k)
+        b += fluid.rho.eval(e, phi, k) * (mem.getJinv(k) * cache.getDsf(k)).transpose() * e.computeGrad(phi, k) * gauss.getW(k) * mem.getDetJ(k);
+
+    // Supersonic contribution
+    double mach = fluid.mach.eval(e, phi, 0);
+    if (mach > mCO)
+    {
+        double mu = muC * (1 - (mCO * mCO) / (mach * mach)); // switching  function
+        double rhoU = fluid.rho.eval(eU, phi, 0);            // density on upwind element
+        // scale 1st term
+        b *= 1 - mu;
+        // contribution of upwind element
+        for (size_t k = 0; k < gauss.getN(); ++k)
+            b += mu * rhoU * (mem.getJinv(k) * cache.getDsf(k)).transpose() * e.computeGrad(phi, k) * gauss.getW(k) * mem.getDetJ(k);
+    }
+    return b;
+}
+
+/**
+ * @brief Build the gradient of the residual vector with respect to the flow variable (jacobian), on one element
+ *
+ * A = \int d( (1-mu)*rho + mu*rhoU ) * grad_phi * grad_psi dV
+ *   = \int (1-mu)*drho * grad_phi * grad_psi dV
+ *   + \int mu*drhoU * grad_phi * grad_psi dV
+ *   + \int ( (1-mu)*rho + mu*rhoU ) * dgrad_phi * grad_psi dV
+ *   + \int (rhoU - rho)*dmu * grad_phi * grad_psi dV
+ * @todo remove muC, mCO + mu as function?
+ */
+std::tuple<Eigen::MatrixXd, Eigen::MatrixXd> PotentialResidual::buildGradientFlow(tbox::Element const &e, tbox::Element const &eU, std::vector<double> const &phi, Medium const &fluid, double muC, double mCO)
+{
+    // Get pre-computed values
+    Cache &cache = e.getVCache();
+    Gauss &gauss = cache.getVGauss();
+    Mem &mem = e.getVMem();
+
+    // Subsonic contribution
+    Eigen::MatrixXd A1 = Eigen::MatrixXd::Zero(e.nodes.size(), e.nodes.size());
+    for (size_t k = 0; k < gauss.getN(); ++k)
+    {
+        // Gauss point and determinant
+        double wdj = gauss.getW(k) * mem.getDetJ(k);
+        // Shape functions and solution gradient
+        Eigen::MatrixXd const &dSf = mem.getJinv(k) * cache.getDsf(k);
+        Eigen::VectorXd dPhi = e.computeGrad(phi, k);
+
+        // rho * grad_phi*grad_psi + drho * grad_phi*grad_psi
+        A1 += fluid.rho.eval(e, phi, k) * dSf.transpose() * dSf * wdj;
+        A1 += fluid.rho.evalD(e, phi, k) * dSf.transpose() * dPhi * dPhi.transpose() * dSf * wdj;
+    }
+
+    // Supersonic contribution
+    Eigen::MatrixXd A2;
+    double mach = fluid.mach.eval(e, phi, 0);
+    if (mach > mCO)
+    {
+        // switching function and gradient
+        double mu = muC * (1 - (mCO * mCO) / (mach * mach));
+        double dmu = 2 * muC * mCO * mCO / (mach * mach * mach);
+        // density and gradients on upwind elements
+        Eigen::VectorXd dPhiU = eU.computeGrad(phi, 0);
+        double rhoU = fluid.rho.eval(eU, phi, 0);
+        double dRhoU = fluid.rho.evalD(eU, phi, 0);
+        Eigen::MatrixXd const &dSfU = eU.getVMem().getJinv(0) * eU.getVCache().getDsf(0);
+
+        // upwinding
+        A1 *= 1 - mu;
+        A2 = Eigen::MatrixXd::Zero(e.nodes.size(), e.nodes.size());
+        for (size_t k = 0; k < gauss.getN(); ++k)
+        {
+            // Gauss point and determinant
+            double wdj = gauss.getW(k) * mem.getDetJ(k);
+            // Shape functions and solution gradient
+            Eigen::MatrixXd const &dSf = mem.getJinv(k) * cache.getDsf(k);
+            Eigen::VectorXd dPhi = e.computeGrad(phi, k);
+
+            // mu*drhoU*grad_phi*grad_psi
+            A2 += mu * dRhoU * dSf.transpose() * dPhi * dPhiU.transpose() * dSfU * wdj;
+            // mu*rhoU*grad_phi*grad_psi + dmu*(rhoU-rho)*grad_phi*grad_psi
+            A1 += mu * rhoU * dSf.transpose() * dSf * wdj;
+            A1 += dmu * (rhoU - fluid.rho.eval(e, phi, k)) * fluid.mach.evalD(e, phi, k) * dSf.transpose() * dPhi * dPhi.transpose() * dSf * wdj;
+        }
+    }
+    return std::make_tuple(A1, A2);
+}
+
+/**
+ * @brief Build the gradient of the residual vector with respect to the nodes, on one element
+ *
+ * A = d( \int ( (1-mu)*rho + mu*rhoU ) * grad_phi * grad_psi dV )
+ *   = \int (1-mu)*drho * grad_phi * grad_psi dV
+ *   + \int mu*drhoU * grad_phi * grad_psi dV
+ *   + \int ( (1-mu)*rho + mu*rhoU ) * dgrad_phi * grad_psi dV
+ *   + \int ( (1-mu)*rho + mu*rhoU ) * grad_phi * dgrad_psi dV
+ *   + \int (rhoU - rho)*dmu * grad_phi * grad_psi dV
+ *   + \int ( (1-mu)*rho + mu*rhoU ) * grad_phi * grad_psi ddV
+ * @todo remove muC, mCO + mu as function?
+ */
+std::tuple<Eigen::MatrixXd, Eigen::MatrixXd> PotentialResidual::buildGradientMesh(tbox::Element const &e, tbox::Element const &eU, std::vector<double> const &phi, Medium const &fluid, int nDim, double muC, double mCO)
+{
+    // Get pre-computed values
+    Cache &cache = e.getVCache();
+    Gauss &gauss = cache.getVGauss();
+    Mem &mem = e.getVMem();
+
+    // Subsonic contribution
+    Eigen::MatrixXd A1 = Eigen::MatrixXd::Zero(e.nodes.size(), nDim * e.nodes.size());
+    for (size_t k = 0; k < gauss.getN(); ++k)
+    {
+        // Density and gradients
+        Eigen::VectorXd dPhi = e.computeGrad(phi, k);
+        double rho = fluid.rho.eval(e, phi, k);
+        double dRho = fluid.rho.evalD(e, phi, k);
+        Eigen::MatrixXd const &dSf = cache.getDsf(k);
+        Eigen::MatrixXd const &iJ = mem.getJinv(k);
+        // Jacobian gradients
+        std::vector<Eigen::MatrixXd> const &dJ = mem.getGradJ(k);
+        std::vector<double> const &dDetJ = mem.getGradDetJ(k);
+        // Gauss points and determinant
+        double w = gauss.getW(k);
+        double detJ = mem.getDetJ(k);
+        for (size_t i = 0; i < e.nodes.size(); ++i)
+        {
+            for (size_t m = 0; m < nDim; ++m)
+            {
+                size_t idx = i * nDim + m;
+                A1.col(idx) += w * dRho * dPhi.transpose() * (-iJ * dJ[idx] * dPhi) * (iJ * dSf).transpose() * dPhi * detJ; // drho * grad_phi*grad_psi * detj
+                A1.col(idx) += w * rho * (iJ * dSf).transpose() * (-iJ * dJ[idx] * dPhi) * detJ;                            // rho * dgrad_phi * grad_psi * detj
+                A1.col(idx) += w * rho * (-iJ * dJ[idx] * iJ * dSf).transpose() * dPhi * detJ;                              // rho * grad_phi * dgrad_psi * detj
+                A1.col(idx) += w * rho * (iJ * dSf).transpose() * dPhi * dDetJ[idx];                                        // rho * grad_phi * grad_psi * ddetj
+            }
+        }
+    }
+    // Supersonic contribution
+    Eigen::MatrixXd A2;
+    double mach = fluid.mach.eval(e, phi, 0);
+    if (mach > mCO)
+    {
+        // switching function and gradient
+        double mu = muC * (1 - (mCO * mCO) / (mach * mach));
+        double dmu = 2 * muC * mCO * mCO / (mach * mach * mach);
+        // upwind density and gradient
+        Eigen::VectorXd dPhiU = eU.computeGrad(phi, 0);
+        double rhoU = fluid.rho.eval(eU, phi, 0);
+        double dRhoU = fluid.rho.evalD(eU, phi, 0);
+        Eigen::MatrixXd const &iJU = eU.getVMem().getJinv(0);
+        std::vector<Eigen::MatrixXd> const &dJU = eU.getVMem().getGradJ(0);
+
+        // upwinding
+        A1 *= 1 - mu;
+        A2 = Eigen::MatrixXd::Zero(e.nodes.size(), nDim * eU.nodes.size());
+        for (size_t k = 0; k < gauss.getN(); ++k)
+        {
+            // Density, Mach and Gradients
+            Eigen::VectorXd dPhi = e.computeGrad(phi, k);
+            double rho = fluid.rho.eval(e, phi, k);
+            double dM = fluid.mach.evalD(e, phi, k);
+            Eigen::MatrixXd const &dSf = cache.getDsf(k);
+            Eigen::MatrixXd const &iJ = mem.getJinv(k);
+            // Jacobian gradients
+            std::vector<Eigen::MatrixXd> const &dJ = mem.getGradJ(k);
+            std::vector<double> const &dDetJ = mem.getGradDetJ(k);
+            // Gauss points and determinant
+            double w = gauss.getW(k);
+            double detJ = mem.getDetJ(k);
+            for (size_t m = 0; m < nDim; ++m)
+            {
+                for (size_t i = 0; i < e.nodes.size(); ++i)
+                {
+                    size_t idx = i * nDim + m;
+                    A1.col(idx) += w * mu * rhoU * (iJ * dSf).transpose() * (-iJ * dJ[idx] * dPhi) * detJ;                                          // mu * rhoU * dgrad_phi * grad_psi * detj
+                    A1.col(idx) += w * mu * rhoU * (-iJ * dJ[idx] * iJ * dSf).transpose() * dPhi * detJ;                                            // mu * rhoU * grad_phi * dgrad_psi * detj
+                    A1.col(idx) += w * mu * rhoU * (iJ * dSf).transpose() * dPhi * dDetJ[idx];                                                      // mu * rhoU * grad_phi * grad_psi * ddetj
+                    A1.col(idx) += w * (-rho + rhoU) * dmu * dM * dPhi.transpose() * (-iJ * dJ[idx] * dPhi) * (iJ * dSf).transpose() * dPhi * detJ; // (rhoU-rho) * dmu * grad_phi * grad_psi * detj
+                }
+                for (size_t i = 0; i < eU.nodes.size(); ++i)
+                {
+                    size_t idx = i * nDim + m;
+                    A2.col(idx) += w * mu * dRhoU * dPhiU.transpose() * (-iJU * dJU[idx] * dPhiU) * (iJ * dSf).transpose() * dPhi * detJ; // mu * drhoU * grad_phi * grad_psi * detj
+                }
+            }
+        }
+    }
+    return std::make_tuple(A1, A2);
+}
diff --git a/flow/src/wPotentialResidual.h b/flow/src/wPotentialResidual.h
new file mode 100644
index 0000000000000000000000000000000000000000..62dde67b704aab847e97578315053d03b97c432e
--- /dev/null
+++ b/flow/src/wPotentialResidual.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2020 University of Liège
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef WPOTENTIALRESIDUAL_H
+#define WPOTENTIALRESIDUAL_H
+
+#include "flow.h"
+
+#include <vector>
+#include <Eigen/Dense>
+
+namespace flow
+{
+
+/**
+ * @brief Formulation of nonlinear potential equation residuals
+ * @todo split stabilization terms from subsonic terms?
+ */
+class FLOW_API PotentialResidual
+{
+public:
+    // Picard
+    static Eigen::MatrixXd buildFixed(tbox::Element const &e, std::vector<double> const &phi, Medium const &fluid);
+    // Newton
+    static Eigen::VectorXd build(tbox::Element const &e, tbox::Element const &eU, std::vector<double> const &phi, Medium const &fluid, double muC, double mCO);
+    static std::tuple<Eigen::MatrixXd, Eigen::MatrixXd> buildGradientFlow(tbox::Element const &e, tbox::Element const &eU, std::vector<double> const &phi, Medium const &fluid, double muC, double mCO);
+    // Adjoint
+    static std::tuple<Eigen::MatrixXd, Eigen::MatrixXd> buildGradientMesh(tbox::Element const &e, tbox::Element const &eU, std::vector<double> const &phi, Medium const &fluid, int nDim, double muC, double mCO);
+};
+
+} // namespace flow
+#endif //WPOTENTIALRESIDUAL_H
diff --git a/flow/src/wProblem.cpp b/flow/src/wProblem.cpp
index 2babefbcd7f985470acc98912a0869a1902bc65b..966064cab05ee1e7b50f696c12745d3d5448bd0d 100644
--- a/flow/src/wProblem.cpp
+++ b/flow/src/wProblem.cpp
@@ -38,6 +38,55 @@ Problem::Problem(std::shared_ptr<MshData> _msh, int dim, double aoa, double aos,
     x_ref(2) = zref;
 }
 
+/**
+ * @brief Compute drag, sideforce and lift normalized directions
+ * @todo use F1ElVi
+ */
+std::tuple<Eigen::Vector3d, Eigen::Vector3d, Eigen::Vector3d> Problem::computeDirections()
+{
+    Eigen::Vector3d dirD, dirS, dirL;
+    if (nDim == 2)
+    {
+        dirD = Eigen::Vector3d(cos(alpha), sin(alpha), 0) / S_ref;
+        dirS = Eigen::Vector3d(0, 0, 0);
+        dirL = Eigen::Vector3d(-sin(alpha), cos(alpha), 0) / S_ref;
+    }
+    else
+    {
+        dirD = Eigen::Vector3d(cos(alpha) * cos(beta), sin(beta), sin(alpha) * cos(beta)) / S_ref;
+        dirS = Eigen::Vector3d(-cos(alpha) * sin(beta), cos(beta), -sin(alpha) * sin(beta)) / S_ref;
+        dirL = Eigen::Vector3d(-sin(alpha), 0, cos(alpha)) / S_ref;
+    }
+    return std::make_tuple(dirD, dirS, dirL);
+}
+
+/**
+ * @brief Compute gradient of drag, sideforce and lift normalized directions
+ * @todo use F1ElVi
+ * @todo only wrt alpha
+ */
+std::tuple<Eigen::Vector3d, Eigen::Vector3d, Eigen::Vector3d> Problem::computeGradientDirections()
+{
+    Eigen::Vector3d dirD, dirS, dirL;
+    if (nDim == 2)
+    {
+        dirD = Eigen::Vector3d(-sin(alpha), cos(alpha), 0) / S_ref;
+        dirS = Eigen::Vector3d(0, 0, 0);
+        dirL = Eigen::Vector3d(-cos(alpha), -sin(alpha), 0) / S_ref;
+    }
+    else
+    {
+        dirD = Eigen::Vector3d(-sin(alpha) * cos(beta), sin(beta), cos(alpha) * cos(beta)) / S_ref;
+        dirS = Eigen::Vector3d(sin(alpha) * sin(beta), cos(beta), -cos(alpha) * sin(beta)) / S_ref;
+        dirL = Eigen::Vector3d(-cos(alpha), 0, -sin(alpha)) / S_ref;
+    }
+    return std::make_tuple(dirD, dirS, dirL);
+}
+
+/**
+ * @brief Compute freestream velocity
+ * @todo use F1ElVi
+ */
 Eigen::VectorXd Problem::computeVi()
 {
     if (nDim == 2)
@@ -46,6 +95,19 @@ Eigen::VectorXd Problem::computeVi()
         return Eigen::Vector3d(cos(alpha) * cos(beta), sin(beta), sin(alpha) * cos(beta));
 }
 
+/**
+ * @brief Compute freestream velocity
+ * @todo use F1ElVi
+ * @todo only wrt alpha
+ */
+Eigen::VectorXd Problem::computeGradientVi()
+{
+    if (nDim == 2)
+        return Eigen::Vector2d(-sin(alpha), cos(alpha));
+    else
+        return Eigen::Vector3d(-sin(alpha) * cos(beta), 0, cos(alpha) * cos(beta));
+}
+
 /**
  * @brief Set the fluid medium
  * @authors Adrien Crovato
diff --git a/flow/src/wProblem.h b/flow/src/wProblem.h
index 008db76ea8f94b6bab13179db8e02cca48c7fdba..3f4b9b630d5f130dda2744ebc9b0f8079b54596d 100644
--- a/flow/src/wProblem.h
+++ b/flow/src/wProblem.h
@@ -62,7 +62,10 @@ public:
             double zref);
     virtual ~Problem() { std::cout << "~Problem()\n"; }
 
+    std::tuple<Eigen::Vector3d, Eigen::Vector3d, Eigen::Vector3d> computeDirections();
+    std::tuple<Eigen::Vector3d, Eigen::Vector3d, Eigen::Vector3d> computeGradientDirections();
     Eigen::VectorXd computeVi();
+    Eigen::VectorXd computeGradientVi();
     void set(std::shared_ptr<Medium> m);
     void add(std::shared_ptr<Boundary> b);
     void set(std::shared_ptr<Initial> i);
diff --git a/flow/src/wSolver.cpp b/flow/src/wSolver.cpp
index 6f7d494e6caa88c883e64685c993dd27857c75c3..c90dda9f650ce5df742dbd6e46e7b580b62e7aa1 100644
--- a/flow/src/wSolver.cpp
+++ b/flow/src/wSolver.cpp
@@ -20,11 +20,11 @@
 #include "wAssign.h"
 #include "wBoundary.h"
 #include "wWake.h"
+#include "wLoadFunctional.h"
 
 #include "wMshData.h"
 #include "wNode.h"
 #include "wElement.h"
-#include "wMem.h"
 #include "wTag.h"
 #include "wResults.h"
 #include "wMshExport.h"
@@ -160,55 +160,50 @@ void Solver::save(MshExport *mshWriter, int n)
  */
 void Solver::computeLoad()
 {
+    // Reset coefficients
     Cl = 0;
     Cd = 0;
     Cs = 0;
     Cm = 0;
+
+    // Lift, drag and sideforce normalized directions
+    Eigen::Vector3d dirL, dirD, dirS;
+    std::tie(dirD, dirS, dirL) = pbl->computeDirections();
+
+    // Compute loads
     for (auto sur : pbl->bnds)
     {
-        // Map each element to its value
-        std::map<Element *, double> euMap;
-        for (auto e : sur->groups[0]->tag->elems)
-            euMap[e] = sur->integrate(e, phi, pbl->medium->cP);
+        // Reset
+        std::fill(sur->nLoads.begin(), sur->nLoads.end(), Eigen::Vector3d::Zero());
 
-        // Compute normalized load (i.e. load / pressure)
-        for (size_t i = 0; i < sur->nodes.size(); ++i)
+        // Compute pressure loads coefficient (normalized by freestream dynamic pressure) on each element
+        for (auto e : sur->groups[0]->tag->elems)
         {
-            sur->cLoadX[i] = 0;
-            sur->cLoadY[i] = 0;
-            sur->cLoadZ[i] = 0;
-            for (auto e : sur->neMap.at(sur->nodes[i]))
+            // Build nodal pressure load
+            Eigen::VectorXd be = LoadFunctional::build(*e, *sur->svMap.at(e), phi, pbl->medium->cP);
+
+            // Assembly
+            for (size_t i = 0; i < e->nodes.size(); ++i)
             {
-                Eigen::Vector3d cLoad = euMap.at(e) * e->normal() / e->nodes.size();
-                sur->cLoadX[i] += cLoad(0);
-                sur->cLoadY[i] += cLoad(1);
-                sur->cLoadZ[i] += cLoad(2);
+                size_t idx = sur->nMap.at(e->nodes[i]);
+                sur->nLoads[idx] += be.block<3, 1>(i * 3, 0);
             }
         }
-
-        // Compute aerodynamic load coefficients (i.e. Load / pressure / surface)
-        // compute coefficients along x and vertical (y in 2D, z in 3D) directions and pitching moment (along -z in 2D, y in 3D)
+        // Compute integrated aerodynamic load coefficients (normalized by freestream dynamic pressure and reference area)
+        // force coefficients along x and vertical (y in 2D, z in 3D) directions and pitching moment (along -z in 2D, y in 3D)
         sur->Cm = 0;
         Eigen::Vector3d Cf(0, 0, 0);
-        for (auto p : euMap)
+        for (size_t i = 0; i < sur->nodes.size(); ++i)
         {
-            Eigen::Vector3d l = p.first->getVMem().getCg() - pbl->x_ref; // lever arm
-            Eigen::Vector3d cf = p.second * p.first->normal();           // force coefficient
+            Eigen::Vector3d l = sur->nodes[i]->pos - pbl->x_ref; // lever arm
+            Eigen::Vector3d cf = sur->nLoads[i];                 // normalized force
             Cf -= cf;
             sur->Cm -= (l(pbl->nDim - 1) * cf(0) - l(0) * cf(pbl->nDim - 1)) / (pbl->S_ref * pbl->c_ref); // moment is positive along y (3D) and negative along z (2D) => positive nose-up
         }
         // rotate to flow direction
-        if (pbl->nDim == 2)
-        {
-            sur->Cd = (Cf(0) * cos(pbl->alpha) + Cf(1) * sin(pbl->alpha)) / pbl->S_ref;
-            sur->Cl = (-Cf(0) * sin(pbl->alpha) + Cf(1) * cos(pbl->alpha)) / pbl->S_ref;
-        }
-        else
-        {
-            sur->Cd = (Cf(0) * cos(pbl->alpha) * cos(pbl->beta) + Cf(1) * sin(pbl->beta) + Cf(2) * sin(pbl->alpha) * cos(pbl->beta)) / pbl->S_ref;
-            sur->Cs = (-Cf(0) * cos(pbl->alpha) * sin(pbl->beta) + Cf(1) * cos(pbl->beta) - Cf(2) * sin(pbl->alpha) * sin(pbl->beta)) / pbl->S_ref;
-            sur->Cl = (-Cf(0) * sin(pbl->alpha) + Cf(2) * cos(pbl->alpha)) / pbl->S_ref;
-        }
+        sur->Cd = Cf.dot(dirD);
+        sur->Cs = Cf.dot(dirS);
+        sur->Cl = Cf.dot(dirL);
         // compute total
         Cl += sur->Cl;
         Cd += sur->Cd;
diff --git a/flow/src/wWakeElement.cpp b/flow/src/wWakeElement.cpp
index 6f946f9c3e4cb7966ff7fb11dbaa5b3a0bc076fa..a75ccaa0e91a73580e0dc1c3d8e2dcffc159bf38 100644
--- a/flow/src/wWakeElement.cpp
+++ b/flow/src/wWakeElement.cpp
@@ -24,9 +24,9 @@ using namespace flow;
 
 WakeElement::WakeElement(size_t _no, Element *&_surUpE, Element *&_surLwE,
                          Element *&_volUpE, Element *&_volLwE,
-                         std::vector<std::pair<size_t, Node *>> &_wkN) : wObject(), no(_no),
-                                                                         surUpE(_surUpE), surLwE(_surLwE), volUpE(_volUpE), volLwE(_volLwE),
-                                                                         wkN(_wkN)
+                         std::vector<std::pair<size_t, Node *>> &_wkN)
+    : wObject(), no(_no), surUpE(_surUpE), surLwE(_surLwE),
+      volUpE(_volUpE), volLwE(_volLwE), wkN(_wkN)
 {
     // Sanity checks
     if (surUpE->type() != surLwE->type())
@@ -70,7 +70,6 @@ WakeElement::WakeElement(size_t _no, Element *&_surUpE, Element *&_surLwE,
 
 /**
  * @brief Map the wake nodes to their local volume node indices
- * @authors Adrien Crovato
  */
 void WakeElement::mapNodes()
 {
@@ -95,292 +94,6 @@ void WakeElement::mapNodes()
     }
 }
 
-/**
- * @brief Compute stiffness matrices associated to Wake
- *
- * K_ij = int V*dN_j * V_inf*dN_i dS
- *
- *              3
- *              o
- *             / \
- *            /   \
- *           /     \
- *     1,1' o-x---x-o 2,2'
- *            A   B
- *
- *     I = sum_j w'_j psi_j V_inf dN'_j V_j invJ_j dN_j phi_j dtm(J')_j
- *       = psi sum_j (w'_j V_inf dN'_j V_j invJ_j dN_j dtm(J')_j) phi
- *       = (psi)|1X2 (sum_j K_j)|2X3 (phi)|3X1
- *
- *     K = w'[dxiN'1 detN'1]T(inv([dx/dxi dy/dxi]))[Vinf,x][Vx Vy]inv([dx/dxi dy/dxi])[dxiN1 dxiN2 dxiN3] dtm([dx/dxi' dy/dxi']^2)
- *           [dxiN'1 detN'1] (   ([dx/det dy/det]))[Vinf,y]          ([dx/det dy/det])[detN1 detN2 detN3]
- *
- *             phi1  phi2  phi3
- *     K = 1' [ .     .     .  ]
- *         2' [ .     .     .  ]
- *
- *     NB: K should be eval at GP A and B, for upper and lower element
- *     NB: V = grad(phi)
- * @authors Adrien Crovato
- */
-void WakeElement::buildK(Eigen::MatrixXd &Kup, Eigen::MatrixXd &Klw, std::vector<double> const &u, Eigen::VectorXd const &Vi)
-{
-    // Get shape functions and Gauss points
-    Cache &surCache = surUpE->getVCache();
-    Gauss &surGauss = surCache.getVGauss();
-    Eigen::MatrixXd const &volUpDff = volUpE->getVCache().getDsf(0);
-    Eigen::MatrixXd const &volLwDff = volLwE->getVCache().getDsf(0);
-    // Get Jacobian
-    Mem &surMem = surUpE->getVMem();
-    Eigen::MatrixXd const &volUpJ = volUpE->getVMem().getJinv(0);
-    Eigen::MatrixXd const &volLwJ = volLwE->getVMem().getJinv(0);
-    // Reset matrices
-    Kup = Eigen::MatrixXd::Zero(nRow, nColUp);
-    Klw = Eigen::MatrixXd::Zero(nRow, nColLw);
-
-    // Volume contribution
-    // dN' (stabilization)
-    Eigen::MatrixXd dN(nRow, nDim);
-    for (size_t i = 0; i < nRow; ++i)
-        for (size_t j = 0; j < nDim; ++j)
-            dN(i, j) = volUpDff(j, vsMap.at(wkN[i].first));
-    Eigen::VectorXd JdNV(nRow);
-    JdNV = dN * volUpJ.transpose() * Vi;
-
-    // A = V*[inv(J)*dN]
-    Eigen::RowVectorXd Aup(nColUp), Alw(nColLw);
-    Aup = volUpE->computeGrad(u, 0).transpose() * volUpJ * volUpDff;
-    Alw = volLwE->computeGrad(u, 0).transpose() * volLwJ * volLwDff;
-
-    // Build
-    for (size_t k = 0; k < surGauss.getN(); ++k)
-    {
-        // N
-        Eigen::VectorXd const &ff = surCache.getSf(k);
-        Eigen::VectorXd N(nRow);
-        for (size_t i = 0; i < nRow; ++i)
-            N(i) = 0.5 * sqrt(surMem.getVol()) * JdNV(i) + ff(wkN[i].first);
-
-        // K = K + N*A*w*dtm
-        Kup += N * Aup * surGauss.getW(k) * surMem.getDetJ(k);
-        Klw += N * Alw * surGauss.getW(k) * surMem.getDetJ(k);
-    }
-}
-
-/**
- * @brief Compute stiffness matrices associated to Kutta for Newton-Raphson
- *
- * K_ij = int 2*V*dN_j * V_inf*dN_i dS
- * @authors Adrien Crovato
- */
-void WakeElement::buildNK(Eigen::MatrixXd &Kup, Eigen::MatrixXd &Klw, std::vector<double> const &u, Eigen::VectorXd const &Vi)
-{
-    // Build
-    buildK(Kup, Klw, u, Vi);
-    Kup *= 2.;
-    Klw *= 2.;
-}
-
-/**
- * @brief Compute flux vectors associated to Kutta for Newton-Raphson
- *
- * s_i = int V*V * V_inf*dN_i dS
- * @authors Adrien Crovato
- */
-void WakeElement::buildNs(Eigen::VectorXd &bUp, Eigen::VectorXd &bLw, std::vector<double> const &u, Eigen::VectorXd const &Vi)
-{
-    // Get shape functions and Gauss points
-    Cache &surCache = surUpE->getVCache();
-    Gauss &surGauss = surCache.getVGauss();
-    Eigen::MatrixXd const &volUpDff = volUpE->getVCache().getDsf(0);
-    // Get Jacobian
-    Mem &surMem = surUpE->getVMem();
-    Eigen::MatrixXd const &volUpJ = volUpE->getVMem().getJinv(0);
-    // Reset matrices
-    bUp = Eigen::VectorXd::Zero(nRow);
-    bLw = Eigen::VectorXd::Zero(nRow);
-
-    // Volume contribution
-    // V_inf & dN' (stabilization)
-    Eigen::MatrixXd dN(nRow, nDim);
-    for (size_t i = 0; i < nRow; ++i)
-        for (size_t j = 0; j < nDim; ++j)
-            dN(i, j) = volUpDff(j, vsMap.at(wkN[i].first));
-    Eigen::VectorXd JdNV(nRow);
-    JdNV = dN * volUpJ.transpose() * Vi;
-
-    // V2
-    double grad2Up = volUpE->computeGrad2(u, 0);
-    double grad2Lw = volLwE->computeGrad2(u, 0);
-
-    // Build
-    for (size_t k = 0; k < surGauss.getN(); ++k)
-    {
-        // N
-        Eigen::VectorXd const &ff = surCache.getSf(k);
-        Eigen::VectorXd N(nRow);
-        for (size_t i = 0; i < nRow; ++i)
-            N(i) = 0.5 * sqrt(surMem.getVol()) * JdNV(i) + ff(wkN[i].first);
-
-        // Compute flux vector
-        bUp += N * grad2Up * surGauss.getW(k) * surMem.getDetJ(k);
-        bLw += N * grad2Lw * surGauss.getW(k) * surMem.getDetJ(k);
-    }
-}
-
-/**
- * @brief Compute gradient of residual with respect to angle of attack
- *
- * s_i = int V*V * dV_inf*dN_i dS
- */
-void WakeElement::buildGradientResidualAoA(Eigen::VectorXd &bUp, Eigen::VectorXd &bLw, std::vector<double> const &u, Eigen::VectorXd const &dVi)
-{
-    // Get shape functions and Gauss points
-    Cache &surCache = surUpE->getVCache();
-    Gauss &surGauss = surCache.getVGauss();
-    Eigen::MatrixXd const &volUpDff = volUpE->getVCache().getDsf(0);
-    // Get Jacobian
-    Mem &surMem = surUpE->getVMem();
-    Eigen::MatrixXd const &volUpJ = volUpE->getVMem().getJinv(0);
-    // Reset matrices
-    bUp = Eigen::VectorXd::Zero(nRow);
-    bLw = Eigen::VectorXd::Zero(nRow);
-
-    // Volume contribution
-    // V_inf & dN' (stabilization)
-    Eigen::MatrixXd dN(nRow, nDim);
-    for (size_t i = 0; i < nRow; ++i)
-        for (size_t j = 0; j < nDim; ++j)
-            dN(i, j) = volUpDff(j, vsMap.at(wkN[i].first));
-    Eigen::VectorXd JdNV(nRow);
-    JdNV = dN * volUpJ.transpose() * dVi;
-    // V2
-    double grad2Up = volUpE->computeGrad2(u, 0);
-    double grad2Lw = volLwE->computeGrad2(u, 0);
-
-    // Build
-    for (size_t k = 0; k < surGauss.getN(); ++k)
-    {
-        // N
-        Eigen::VectorXd N(nRow);
-        for (size_t i = 0; i < nRow; ++i)
-            N(i) = 0.5 * sqrt(surMem.getVol()) * JdNV(i);
-
-        // Compute flux vector
-        bUp += N * grad2Up * surGauss.getW(k) * surMem.getDetJ(k);
-        bLw += N * grad2Lw * surGauss.getW(k) * surMem.getDetJ(k);
-    }
-}
-
-/**
- * @brief Compute gradient of residual with respect to angle of attack
- *
- * K_ij = int d(2*V*dN_j * V_inf*dN_i) dS
- *      = sum 2*V*dgrad_phi * V_inf*gradN_i * detJ
- *      + sum grad_phi^2 * dgradN_i detJ
- *      + sum grad_phi^2 * V_inf*gradN_i ddetJ
- */
-void WakeElement::buildGradientResidualMesh(Eigen::MatrixXd &Kup, Eigen::MatrixXd &Klw, Eigen::MatrixXd &Ks, std::vector<double> const &u, Eigen::VectorXd const &vi)
-{
-    // Get shape functions and Gauss points
-    Cache &surCache = surUpE->getVCache();
-    Gauss &surGauss = surCache.getVGauss();
-    Eigen::MatrixXd const &dSfUp = volUpE->getVCache().getDsf(0);
-    // Get Jacobian
-    Mem &surMem = surUpE->getVMem();
-    Mem &volUpMem = volUpE->getVMem();
-    Mem &volLwMem = volLwE->getVMem();
-    Eigen::MatrixXd const &iJUp = volUpMem.getJinv(0);
-    Eigen::MatrixXd const &iJLw = volLwMem.getJinv(0);
-    std::vector<Eigen::MatrixXd> const &dJUp = volUpMem.getGradJ(0);
-    std::vector<Eigen::MatrixXd> const &dJLw = volLwMem.getGradJ(0);
-    // Get surface gradient
-    std::vector<double> const &dSurf = surMem.getGradVol();
-    // Reset matrices
-    Kup = Eigen::MatrixXd::Zero(nRow, nDim * volUpE->nodes.size());
-    Klw = Eigen::MatrixXd::Zero(nRow, nDim * volLwE->nodes.size());
-    Ks = Eigen::MatrixXd::Zero(nRow, nDim * surUpE->nodes.size());
-
-    // Stabilization term
-    double sqSurf = sqrt(surMem.getVol());
-    Eigen::MatrixXd gradN(nRow, nDim);
-    for (size_t i = 0; i < nRow; ++i)
-        for (size_t m = 0; m < nDim; ++m)
-            gradN(i, m) = dSfUp(m, vsMap.at(wkN[i].first));
-    Eigen::VectorXd JgradNV = 0.5 * sqSurf * gradN * iJUp.transpose() * vi;
-    // Gradient of stabilization term
-    std::vector<Eigen::VectorXd> dGradSfV(nDim * volUpE->nodes.size());
-    std::vector<Eigen::VectorXd> dGradSfS(nDim * surUpE->nodes.size());
-    for (size_t i = 0; i < volUpE->nodes.size(); ++i)
-    {
-        for (size_t m = 0; m < nDim; ++m)
-        {
-            size_t idx = i * nDim + m;
-            dGradSfV[idx] = 0.5 * sqSurf * gradN * (-iJUp * dJUp[idx] * iJUp).transpose() * vi; // S * dgradN
-        }
-    }
-    for (size_t i = 0; i < surUpE->nodes.size(); ++i)
-    {
-        for (size_t m = 0; m < nDim; ++m)
-        {
-            size_t idx = i * nDim + m;
-            dGradSfS[idx] = 0.5 * 0.5 / sqSurf * dSurf[idx] * gradN * iJUp.transpose() * vi; // dS * gradN
-        }
-    }
-    // Gradients of potential
-    Eigen::VectorXd dPhiUp = volUpE->computeGrad(u, 0);
-    Eigen::VectorXd dPhiLw = volLwE->computeGrad(u, 0);
-
-    for (size_t k = 0; k < surGauss.getN(); ++k)
-    {
-        // weight and Jacobian determinant
-        double w = surGauss.getW(k);
-        double detJ = surMem.getDetJ(k);
-        std::vector<double> dDetJ;
-        surUpE->buildGradientJ(k, dDetJ);
-        // shape functions and stabilization
-        Eigen::VectorXd const &sf = surCache.getSf(k);
-        Eigen::VectorXd sfp(nRow);
-        for (size_t i = 0; i < nRow; ++i)
-            sfp(i) = sf(wkN[i].first) + JgradNV(i);
-
-        // Build gradient wrt to upper volume nodes
-        for (size_t i = 0; i < volUpE->nodes.size(); ++i)
-        {
-            for (size_t m = 0; m < nDim; ++m)
-            {
-                size_t idx = i * nDim + m;
-                // derivative of velocity jump
-                Kup.col(idx) += w * sfp * 2 * dPhiUp.transpose() * (-iJUp * dJUp[idx] * dPhiUp) * detJ; // (N + gradN') * dgrad_phi^2 * detJ
-                // derivative of stabilization term (volume)
-                Kup.col(idx) += w * dGradSfV[idx] * (dPhiUp.dot(dPhiUp) - dPhiLw.dot(dPhiLw)) * detJ; // dgradN' * grad_phi^2 * detJ
-            }
-        }
-        // Build gradient wrt to lower volume nodes
-        for (size_t i = 0; i < volLwE->nodes.size(); ++i)
-        {
-            for (size_t m = 0; m < nDim; ++m)
-            {
-                size_t idx = i * nDim + m;
-                // derivative of velocity jump
-                Klw.col(idx) += w * sfp * 2 * dPhiLw.transpose() * (-iJLw * dJLw[idx] * dPhiLw) * detJ; // (N + gradN') * dgrad_phi^2 * detJ
-            }
-        }
-        // Build gradient wrt to (upper) surface nodes
-        for (size_t i = 0; i < surUpE->nodes.size(); ++i)
-        {
-            for (size_t m = 0; m < nDim; ++m)
-            {
-                size_t idx = i * nDim + m;
-                // derivative of stabilization term (surface)
-                Ks.col(idx) += w * dGradSfS[idx] * (dPhiUp.dot(dPhiUp) - dPhiLw.dot(dPhiLw)) * detJ; // dgradN' * grad_phi^2 * detJ
-                // derivative of jacobian determinant
-                Ks.col(idx) += w * sfp * (dPhiUp.dot(dPhiUp) - dPhiLw.dot(dPhiLw)) * dDetJ[idx]; // (N + gradN') * grad_phi^2 * ddetJ
-            }
-        }
-    }
-}
-
 void WakeElement::write(std::ostream &out) const
 {
     out << "WakeElement #" << no
diff --git a/flow/src/wWakeElement.h b/flow/src/wWakeElement.h
index 28ba1347aef6a5455c30cac47fa4a8ac71f93ebe..cf531650f1c4e44d57fb33c8c943836901ab6195 100644
--- a/flow/src/wWakeElement.h
+++ b/flow/src/wWakeElement.h
@@ -47,26 +47,21 @@ public:
     tbox::Element *volUpE; ///< Connected volume elements "+" side
     tbox::Element *volLwE; ///< Connected volume elements "-" side
 
-    size_t nRow;                                      ///< number of rows in stiffness matrix
-    size_t nColUp;                                    ///< number of columns in in stiffness matrix "+" side
-    size_t nColLw;                                    ///< number of columns in in stiffness matrix "-" side
+    size_t nRow;   ///< number of rows in stiffness matrix
+    size_t nColUp; ///< number of columns in in stiffness matrix "+" side
+    size_t nColLw; ///< number of columns in in stiffness matrix "-" side
+    size_t nDim;   ///< dimension of volume element
+
     std::vector<std::pair<size_t, tbox::Node *>> wkN; ///< map between local index and contributing wake global nodes
+    std::map<size_t, size_t> vsMap;                   ///< map between surface node and volume node indices
 
     WakeElement(size_t _no, tbox::Element *&_surUpE, tbox::Element *&_surLwE, tbox::Element *&_volUpE, tbox::Element *&_volLwE, std::vector<std::pair<size_t, tbox::Node *>> &_wkN);
 
 #ifndef SWIG
-    void buildK(Eigen::MatrixXd &Kup, Eigen::MatrixXd &Klw, std::vector<double> const &u, Eigen::VectorXd const &Vi);
-    void buildNK(Eigen::MatrixXd &Kup, Eigen::MatrixXd &Klw, std::vector<double> const &u, Eigen::VectorXd const &Vi);
-    void buildNs(Eigen::VectorXd &bUp, Eigen::VectorXd &bLw, std::vector<double> const &u, Eigen::VectorXd const &Vi);
-    void buildGradientResidualAoA(Eigen::VectorXd &bUp, Eigen::VectorXd &bLw, std::vector<double> const &u, Eigen::VectorXd const &dVi);
-    void buildGradientResidualMesh(Eigen::MatrixXd &Kup, Eigen::MatrixXd &Klw, Eigen::MatrixXd &Ks, std::vector<double> const &u, Eigen::VectorXd const &vi);
     virtual void write(std::ostream &out) const override;
 #endif
 
 private:
-    size_t nDim;                    ///< dimension of volume element
-    std::map<size_t, size_t> vsMap; ///< map between surface node and volume node indices
-
     void mapNodes();
 };
 
diff --git a/flow/src/wWakeResidual.cpp b/flow/src/wWakeResidual.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..376db08fa30d573caddfc7a0975a2fbe1b2dee8a
--- /dev/null
+++ b/flow/src/wWakeResidual.cpp
@@ -0,0 +1,251 @@
+/*
+ * Copyright 2020 University of Liège
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "wWakeResidual.h"
+#include "wWakeElement.h"
+
+#include "wElement.h"
+#include "wCache.h"
+#include "wGauss.h"
+#include "wMem.h"
+
+using namespace tbox;
+using namespace flow;
+
+/**
+ * @brief Build the residual matrix for a fixed-point iteration, on one wake element
+ *
+ * A_u = \int (psi + grad_psi*v_inf) * v_u*grad_phi_u dS
+ * A_l = \int (psi + grad_psi*v_inf) * v_l*grad_phi_l dS
+ * NB: A_u and A_l will be assembled on same row DOF, but on different columns
+ */
+std::tuple<Eigen::MatrixXd, Eigen::MatrixXd> WakeResidual::buildFixed(WakeElement const &we, std::vector<double> const &phi, Eigen::VectorXd const &vi)
+{
+    // Get shape functions and Gauss points
+    Cache &sCache = we.surUpE->getVCache();
+    Gauss &sGauss = sCache.getVGauss();
+    Eigen::MatrixXd const &vUpDsf = we.volUpE->getVCache().getDsf(0);
+    Eigen::MatrixXd const &vLwDsf = we.volLwE->getVCache().getDsf(0);
+    // Get Jacobian
+    Mem &sMem = we.surUpE->getVMem();
+    Eigen::MatrixXd const &vUpJ = we.volUpE->getVMem().getJinv(0);
+    Eigen::MatrixXd const &vLwJ = we.volLwE->getVMem().getJinv(0);
+    // Get size
+    size_t nRow = we.nRow;
+    size_t nDim = we.nDim;
+
+    // Stabilization
+    Eigen::MatrixXd dSf(nRow, nDim);
+    for (size_t i = 0; i < nRow; ++i)
+        for (size_t j = 0; j < nDim; ++j)
+            dSf(i, j) = vUpDsf(j, we.vsMap.at(we.wkN[i].first));
+    Eigen::VectorXd dSfp(nRow);
+    dSfp = 0.5 * sqrt(sMem.getVol()) * dSf * vUpJ.transpose() * vi;
+    // Velocity
+    Eigen::RowVectorXd Aup = we.volUpE->computeGrad(phi, 0).transpose() * vUpJ * vUpDsf;
+    Eigen::RowVectorXd Alw = we.volLwE->computeGrad(phi, 0).transpose() * vLwJ * vLwDsf;
+
+    // Build
+    Eigen::MatrixXd Au = Eigen::MatrixXd::Zero(nRow, we.nColUp);
+    Eigen::MatrixXd Al = Eigen::MatrixXd::Zero(nRow, we.nColLw);
+    for (size_t k = 0; k < sGauss.getN(); ++k)
+    {
+        // stabilized shape functions
+        Eigen::VectorXd const &sf = sCache.getSf(k);
+        Eigen::VectorXd sfp(nRow);
+        for (size_t i = 0; i < nRow; ++i)
+            sfp(i) = dSfp(i) + sf(we.wkN[i].first);
+        // wake contribution
+        Au += sfp * Aup * sGauss.getW(k) * sMem.getDetJ(k);
+        Al += sfp * Alw * sGauss.getW(k) * sMem.getDetJ(k);
+    }
+    return std::make_tuple(Au, Al);
+}
+
+/**
+ * @brief Build the residual vector, on one wake element
+ *
+ * b_u = \int (psi + grad_psi*v_inf) * (grad_phi_u)^2 dS
+ * b_l = \int (psi + grad_psi*v_inf) * (grad_phi_l)^2 dS
+ */
+std::tuple<Eigen::VectorXd, Eigen::VectorXd> WakeResidual::build(WakeElement const &we, std::vector<double> const &phi, Eigen::VectorXd const &vi)
+{
+    // Get shape functions and Gauss points
+    Cache &sCache = we.surUpE->getVCache();
+    Gauss &sGauss = sCache.getVGauss();
+    Eigen::MatrixXd const &vUpDsf = we.volUpE->getVCache().getDsf(0);
+    // Get Jacobian
+    Mem &sMem = we.surUpE->getVMem();
+    // Get size
+    size_t nRow = we.nRow;
+    size_t nDim = we.nDim;
+
+    // Stabilization
+    Eigen::MatrixXd dSf(nRow, nDim);
+    for (size_t i = 0; i < nRow; ++i)
+        for (size_t j = 0; j < nDim; ++j)
+            dSf(i, j) = vUpDsf(j, we.vsMap.at(we.wkN[i].first));
+    Eigen::VectorXd dSfp(nRow);
+    dSfp = 0.5 * sqrt(sMem.getVol()) * dSf * we.volUpE->getVMem().getJinv(0).transpose() * vi;
+    // Velocity squared
+    double dPhi2u = we.volUpE->computeGrad2(phi, 0);
+    double dPhi2l = we.volLwE->computeGrad2(phi, 0);
+
+    // Build
+    Eigen::VectorXd bu = Eigen::VectorXd::Zero(nRow);
+    Eigen::VectorXd bl = Eigen::VectorXd::Zero(nRow);
+    for (size_t k = 0; k < sGauss.getN(); ++k)
+    {
+        // stabilized shape functions
+        Eigen::VectorXd const &sf = sCache.getSf(k);
+        Eigen::VectorXd sfp(nRow);
+        for (size_t i = 0; i < nRow; ++i)
+            sfp(i) = dSfp(i) + sf(we.wkN[i].first);
+        // wake contribution
+        bu += sfp * dPhi2u * sGauss.getW(k) * sMem.getDetJ(k);
+        bl += sfp * dPhi2l * sGauss.getW(k) * sMem.getDetJ(k);
+    }
+    return std::make_tuple(bu, bl);
+}
+
+/**
+ * @brief Build the gradient of the residual vector with respect to the flow variable (jacobian), on one wake element
+ *
+ * A_u = \int (psi + grad_psi*v_inf) * 2*(grad_phi_u)*dgrad_phi_u dS
+ * A_l = \int (psi + grad_psi*v_inf) * 2*(grad_phi_l)*dgrad_phi_l dS
+ */
+std::tuple<Eigen::MatrixXd, Eigen::MatrixXd> WakeResidual::buildGradientFlow(WakeElement const &we, std::vector<double> const &phi, Eigen::VectorXd const &vi)
+{
+    // Build
+    Eigen::MatrixXd Au, Al;
+    std::tie(Au, Al) = WakeResidual::buildFixed(we, phi, vi);
+    return std::make_tuple(2* Au, 2* Al);
+}
+
+/**
+ * @brief Build the gradient of the residual vector with respect to the nodes, on one wake element
+ *
+ * A = d( \int (psi + grad_psi*v_inf) * (grad_phi)^2 dS )
+ *   = \int dgrad_psi*v_inf * (grad_phi)^2 dS
+ *   + \int (psi + grad_psi*v_inf) * d(grad_phi)^2 dS
+ *   + \int (psi + grad_psi*v_inf) * (grad_phi)^2 ddS
+ */
+std::tuple<Eigen::MatrixXd, Eigen::MatrixXd, Eigen::MatrixXd> WakeResidual::buildGradientMesh(WakeElement const &we, std::vector<double> const &phi, Eigen::VectorXd const &vi)
+{
+    // Get shape functions and Gauss points
+    Cache &sCache = we.surUpE->getVCache();
+    Gauss &surGauss = sCache.getVGauss();
+    Eigen::MatrixXd const &dSfUp = we.volUpE->getVCache().getDsf(0);
+    // Get Jacobian
+    Mem &sMem = we.surUpE->getVMem();
+    Mem &vUpMem = we.volUpE->getVMem();
+    Mem &vLwMem = we.volLwE->getVMem();
+    Eigen::MatrixXd const &iJUp = vUpMem.getJinv(0);
+    Eigen::MatrixXd const &iJLw = vLwMem.getJinv(0);
+    std::vector<Eigen::MatrixXd> const &dJUp = vUpMem.getGradJ(0);
+    std::vector<Eigen::MatrixXd> const &dJLw = vLwMem.getGradJ(0);
+    // Get surface gradient
+    std::vector<double> const &dSurf = sMem.getGradVol();
+    // Get size
+    size_t nRow = we.nRow;
+    size_t nDim = we.nDim;
+    size_t nColUp = we.nColUp;
+    size_t nColLw = we.nColLw;
+    size_t nSur = we.surUpE->nodes.size();
+
+    // Stabilization term
+    double sqSurf = sqrt(sMem.getVol());
+    Eigen::MatrixXd dSf(nRow, nDim);
+    for (size_t i = 0; i < nRow; ++i)
+        for (size_t m = 0; m < nDim; ++m)
+            dSf(i, m) = dSfUp(m, we.vsMap.at(we.wkN[i].first));
+    Eigen::VectorXd gradSfp = 0.5 * sqSurf * dSf * iJUp.transpose() * vi;
+    // Gradient of stabilization term (volume and surface)
+    std::vector<Eigen::VectorXd> dGradSfV(nDim * nColUp);
+    for (size_t i = 0; i < nColUp; ++i)
+    {
+        for (size_t m = 0; m < nDim; ++m)
+        {
+            size_t idx = i * nDim + m;
+            dGradSfV[idx] = 0.5 * sqSurf * dSf * (-iJUp * dJUp[idx] * iJUp).transpose() * vi; // S * dgrad_psi
+        }
+    }
+    std::vector<Eigen::VectorXd> dGradSfS(nDim * nSur);
+    for (size_t i = 0; i < nSur; ++i)
+    {
+        for (size_t m = 0; m < nDim; ++m)
+        {
+            size_t idx = i * nDim + m;
+            dGradSfS[idx] = 0.5 * 0.5 / sqSurf * dSurf[idx] * dSf * iJUp.transpose() * vi; // dS * grad_psi
+        }
+    }
+    // Gradients of potential
+    Eigen::VectorXd dPhiUp = we.volUpE->computeGrad(phi, 0);
+    Eigen::VectorXd dPhiLw = we.volLwE->computeGrad(phi, 0);
+
+    // Build
+    Eigen::MatrixXd Au = Eigen::MatrixXd::Zero(nRow, nDim * nColUp);
+    Eigen::MatrixXd Al = Eigen::MatrixXd::Zero(nRow, nDim * nColLw);
+    Eigen::MatrixXd As = Eigen::MatrixXd::Zero(nRow, nDim * nSur);
+    for (size_t k = 0; k < surGauss.getN(); ++k)
+    {
+        // weight and Jacobian determinant
+        double w = surGauss.getW(k);
+        double detJ = sMem.getDetJ(k);
+        std::vector<double> dDetJ = sMem.getGradDetJ(k);
+        // shape functions and stabilization
+        Eigen::VectorXd const &sf = sCache.getSf(k);
+        Eigen::VectorXd sfp(nRow);
+        for (size_t i = 0; i < nRow; ++i)
+            sfp(i) = sf(we.wkN[i].first) + gradSfp(i);
+
+        // Build gradient wrt to upper volume nodes
+        for (size_t i = 0; i < nColUp; ++i)
+        {
+            for (size_t m = 0; m < nDim; ++m)
+            {
+                size_t idx = i * nDim + m;
+                // gradient of velocity jump
+                Au.col(idx) += w * sfp * 2 * dPhiUp.transpose() * (-iJUp * dJUp[idx] * dPhiUp) * detJ; // (psi + grad_psi) * dgrad_phi^2 * detJ
+                // gradient of stabilization term (volume)
+                Au.col(idx) += w * dGradSfV[idx] * (dPhiUp.dot(dPhiUp) - dPhiLw.dot(dPhiLw)) * detJ; // dgrad_psi' * grad_phi^2 * detJ
+            }
+        }
+        // Build gradient wrt to lower volume nodes
+        for (size_t i = 0; i < nColLw; ++i)
+        {
+            for (size_t m = 0; m < nDim; ++m)
+            {
+                size_t idx = i * nDim + m;
+                // gradient of velocity jump
+                Al.col(idx) += w * sfp * 2 * dPhiLw.transpose() * (-iJLw * dJLw[idx] * dPhiLw) * detJ; // (psi + grad_psi) * dgrad_phi^2 * detJ
+            }
+        }
+        // Build gradient wrt to (upper) surface nodes
+        for (size_t i = 0; i < nSur; ++i)
+        {
+            for (size_t m = 0; m < nDim; ++m)
+            {
+                size_t idx = i * nDim + m;
+                // gradient of stabilization term (surface)
+                As.col(idx) += w * dGradSfS[idx] * (dPhiUp.dot(dPhiUp) - dPhiLw.dot(dPhiLw)) * detJ; // dgrad_psi * grad_phi^2 * detJ
+                // gradient of jacobian determinant
+                As.col(idx) += w * sfp * (dPhiUp.dot(dPhiUp) - dPhiLw.dot(dPhiLw)) * dDetJ[idx]; // (N + grad_psi) * grad_phi^2 * ddetJ
+            }
+        }
+    }
+    return std::make_tuple(Au, Al, As);
+}
diff --git a/flow/src/wWakeResidual.h b/flow/src/wWakeResidual.h
new file mode 100644
index 0000000000000000000000000000000000000000..fc536849ef6c9931c1494d9ba7b937dfbe057c26
--- /dev/null
+++ b/flow/src/wWakeResidual.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2020 University of Liège
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef WWAKERESIDUAL_H
+#define WWAKERESIDUAL_H
+
+#include "flow.h"
+
+#include <vector>
+#include <Eigen/Dense>
+
+namespace flow
+{
+
+/**
+ * @brief Formulation of wake boundary residuals
+ */
+class FLOW_API WakeResidual
+{
+public:
+    // Picard
+    static std::tuple<Eigen::MatrixXd, Eigen::MatrixXd> buildFixed(WakeElement const &we, std::vector<double> const &phi, Eigen::VectorXd const &vi);
+    // Newton
+    static std::tuple<Eigen::VectorXd, Eigen::VectorXd> build(WakeElement const &we, std::vector<double> const &phi, Eigen::VectorXd const &vi);
+    static std::tuple<Eigen::MatrixXd, Eigen::MatrixXd> buildGradientFlow(WakeElement const &we, std::vector<double> const &phi, Eigen::VectorXd const &vi);
+    // Adjoint
+    static std::tuple<Eigen::MatrixXd, Eigen::MatrixXd, Eigen::MatrixXd> buildGradientMesh(WakeElement const &we, std::vector<double> const &phi, Eigen::VectorXd const &vi);
+};
+
+} // namespace flow
+#endif //WWAKERESIDUAL_H
diff --git a/flow/tests/lift.py b/flow/tests/lift.py
index 5a9f4959c5a7ed682612ce5182871df3ad2df8c0..a5757ec99e360e07d019d7bec543e87e9b521490 100644
--- a/flow/tests/lift.py
+++ b/flow/tests/lift.py
@@ -89,13 +89,16 @@ def main():
     if M_inf == 0 and alpha == 3*math.pi/180:
         tests.add(CTest('min(Cp)', min(Cp[:,3]), -1.1, 1.5e-1))
         tests.add(CTest('Cl', solver.Cl, 0.36, 5e-2))
+        tests.add(CTest('Cm', solver.Cm, 0.0, 1e-2))
     elif M_inf == 0.6 and alpha == 2*math.pi/180:
         tests.add(CTest('min(Cp)', min(Cp[:,3]), -1.03, 1e-1))
         tests.add(CTest('Cl', solver.Cl, 0.315, 5e-2))
+        tests.add(CTest('Cm', solver.Cm, 0.0, 1e-2))
     elif M_inf == 0.7 and alpha == 2*math.pi/180:
         tests.add(CTest('iteration count', solver.nIt, 12, 3, forceabs=True))
         tests.add(CTest('min(Cp)', min(Cp[:,3]), -1.28, 5e-2))
         tests.add(CTest('Cl', solver.Cl, 0.388, 5e-2))
+        tests.add(CTest('Cm', solver.Cm, 0.0, 1e-2))
     else:
         raise Exception('Test not defined for this flow')
     tests.run()
diff --git a/flow/tests/lift3.py b/flow/tests/lift3.py
index cc0bd6319838d39063c301c6e0c2f156b6c65740..da872d0f38d0ff14e575d9d0e6b214c3c25ff771 100644
--- a/flow/tests/lift3.py
+++ b/flow/tests/lift3.py
@@ -89,6 +89,8 @@ def main():
         tests.add(CTest('iteration count', solver.nIt, 3, 1, forceabs=True))
         tests.add(CTest('CL', solver.Cl, 0.135, 5e-2))
         tests.add(CTest('CD', solver.Cd, 0.0062, 1e-2)) # Tranair (NF=0.0062, FF=0.0030), Panair 0.0035
+        tests.add(CTest('CS', solver.Cs, 0.012, 1e-2))
+        tests.add(CTest('CM', solver.Cm, -0.0278, 1e-2))
     else:
         raise Exception('Test not defined for this flow')
     tests.run()
diff --git a/flow/tests/meshDef.py b/flow/tests/meshDef.py
index 411b0c6ba522e69a702e8f6d7a99499c0c9542a8..d258e5a6d57463e480382e8ff2ac17601d16879d 100644
--- a/flow/tests/meshDef.py
+++ b/flow/tests/meshDef.py
@@ -64,7 +64,7 @@ def main():
 
     # set the problem and add medium, initial/boundary conditions
     tms['pre'].start()
-    pbl, _, _, bnd, _ = floD.problem(msh, dim, alpha0, 0., M_inf, M_crit, c_ref, c_ref, xc, 0., 0., 'airfoil', te = 'te')
+    pbl, _, _, bnd, _ = floD.problem(msh, dim, alpha0, 0., M_inf, M_crit, c_ref, c_ref, xc+dx, dy, 0., 'airfoil', te = 'te')
     # set the refrence problem and add medium, initial/boundary conditions
     pbl_ref, _, _, bnd_ref, _ = floD.problem(msh_ref, dim, alpha0, 0., M_inf, M_crit, c_ref, c_ref, xc, 0., 0., 'airfoil', te = 'te')
     tms['pre'].stop()
@@ -121,7 +121,6 @@ def main():
     print('  case        M    alpha       Cl       Cd       Cm')
     print('  true {0:8.2f} {1:8.1f} {2:8.4f} {3:8.4f} {4:8.4f}'.format(M_inf, alfa*180/np.pi, solver.Cl, solver.Cd, solver.Cm))
     print('   ref {0:8.2f} {1:8.1f} {2:8.4f} {3:8.4f} {4:8.4f}'.format(M_inf, alfa*180/np.pi, solver_ref.Cl, solver_ref.Cd, solver_ref.Cm))
-    print('Cm will differ unless dx and dy are set to 0...')
 
     # display timers
     tms["total"].stop()
@@ -140,6 +139,7 @@ def main():
     tests.add(CTest('solver_ref: iteration count', solver_ref.nIt, 5, 1, forceabs=True))
     tests.add(CTest('min(Cp)', min(Cp[:,3]), min(Cp_ref[:,3]), 5e-2))
     tests.add(CTest('Cl', solver.Cl, solver_ref.Cl, 5e-2))
+    tests.add(CTest('Cm', solver.Cm, solver_ref.Cm, 5e-2))
     tests.run()
 
     # eof
diff --git a/flow/tests/meshDef3.py b/flow/tests/meshDef3.py
index 1b2d95024a01d3923cebbd576d4890cd9817bd2f..ab3fd22a414d8e73068057038add0778bfe531e5 100644
--- a/flow/tests/meshDef3.py
+++ b/flow/tests/meshDef3.py
@@ -124,6 +124,8 @@ def main():
         tests.add(CTest('iteration count', solver.nIt, 3, 1, forceabs=True))
         tests.add(CTest('CL', solver.Cl, 0.135, 5e-1))
         tests.add(CTest('CD', solver.Cd, 0.0062, 1e-2)) # Tranair (NF=0.0062, FF=0.0030), Panair 0.0035
+        tests.add(CTest('CS', solver.Cs, -0.011, 1e-2))
+        tests.add(CTest('CM', solver.Cm, 0.0061, 1e-2))
     tests.run()
 
     # eof
diff --git a/flow/tests/nonlift.py b/flow/tests/nonlift.py
index 450008c84c13c75cd36004ce8de9aea6c0a2d44c..ef6c22cc69e265943b129c1c30cb17aff91ba821 100644
--- a/flow/tests/nonlift.py
+++ b/flow/tests/nonlift.py
@@ -96,7 +96,8 @@ def main():
         tests.add(CTest('min(Cp)', min(Cp[:,3]), -0.89, 5e-2))
     else:
         raise Exception('Test not defined for this flow')
-    tests.add(CTest('Cl', solver.Cl, 0., 5e-2))
+    tests.add(CTest('Cl', solver.Cl, 0., 1e-2))
+    tests.add(CTest('Cm', solver.Cm, 0., 1e-2))
     tests.run()
 
     # eof
diff --git a/tbox/src/wElement.cpp b/tbox/src/wElement.cpp
index a1b3d5615ba43ad9df9d2dec4ed60ac5a0e8adfb..303f6efad44a566e5b885c2d1c6da4d4627f5e52 100644
--- a/tbox/src/wElement.cpp
+++ b/tbox/src/wElement.cpp
@@ -205,10 +205,6 @@ Eigen::MatrixXd Element::buildK(std::vector<double> const &u, Fct2 const &fct, b
 {
     throw std::runtime_error("Element::buildK not implemented\n");
 }
-Eigen::MatrixXd Element::buildDK(std::vector<double> const &u, Fct0 const &fct)
-{
-    throw std::runtime_error("Element::buildNK not implemented\n");
-}
 //---------------------
 Eigen::MatrixXd Element::buildK_mechanical(Eigen::MatrixXd const &H)
 {
@@ -265,19 +261,15 @@ Eigen::VectorXd Element::builds(std::vector<double> const &u, Fct1 const &f)
 {
     throw std::runtime_error("Element::builds not implemented\n");
 }
-Eigen::VectorXd Element::buildDs(std::vector<double> const &u, Fct0 const &f)
-{
-    throw std::runtime_error("Element::buildNs not implemented\n");
-}
 Eigen::MatrixXd Element::build(MATTYPE type)
 {
     throw std::runtime_error("Element::build not implemented\n");
 }
-Eigen::VectorXd Element::computeGrad(std::vector<double> const &u, size_t k)
+Eigen::VectorXd Element::computeGrad(std::vector<double> const &u, size_t k) const
 {
     throw std::runtime_error("Element::computeGrad not implemented\n");
 }
-double Element::computeGrad2(std::vector<double> const &u, size_t k)
+double Element::computeGrad2(std::vector<double> const &u, size_t k) const
 {
     throw std::runtime_error("Element::computeGrad2 not implemented\n");
 }
diff --git a/tbox/src/wElement.h b/tbox/src/wElement.h
index 48d60c9252ac06f959b6ff80de8d3220c3b876e5..068f1bece6d8864928828750312b1d058d4bd37d 100644
--- a/tbox/src/wElement.h
+++ b/tbox/src/wElement.h
@@ -94,7 +94,6 @@ public:
     virtual void write(std::ostream &out) const override;
     virtual Eigen::MatrixXd buildK(std::vector<double> const &u, Fct0 const &fct);
     virtual Eigen::MatrixXd buildK(std::vector<double> const &u, Fct2 const &fct, bool fake);
-    virtual Eigen::MatrixXd buildDK(std::vector<double> const &u, Fct0 const &fct);
     //---------------------
     virtual Eigen::MatrixXd buildK_mechanical(Eigen::MatrixXd const &H);
     virtual Eigen::MatrixXd buildS_thermomechanical(Eigen::MatrixXd const &D);
@@ -109,10 +108,9 @@ public:
     virtual Eigen::MatrixXd buildS();
     virtual Eigen::VectorXd builds(std::vector<double> const &u, Fct0 const &f);
     virtual Eigen::VectorXd builds(std::vector<double> const &u, Fct1 const &f);
-    virtual Eigen::VectorXd buildDs(std::vector<double> const &u, Fct0 const &f);
     virtual Eigen::MatrixXd build(MATTYPE type);
-    virtual Eigen::VectorXd computeGrad(std::vector<double> const &u, size_t k);
-    virtual double computeGrad2(std::vector<double> const &u, size_t k);
+    virtual Eigen::VectorXd computeGrad(std::vector<double> const &u, size_t k) const;
+    virtual double computeGrad2(std::vector<double> const &u, size_t k) const;
     virtual Eigen::VectorXd computeqV(std::vector<double> const &u, Fct2 const &fct);
     virtual Eigen::VectorXd computeqint(std::vector<double> const &u, Fct2 const &fct);
     virtual double computeV(std::vector<double> const &u, Fct0 const &fct);
diff --git a/tbox/src/wFct0.cpp b/tbox/src/wFct0.cpp
index 4b3baad31b7754d8e74a73477a616f489218d040..b0a7b0dc334be0a68182e6ff2885ea34927e82b3 100644
--- a/tbox/src/wFct0.cpp
+++ b/tbox/src/wFct0.cpp
@@ -66,7 +66,7 @@ void PwLf::write(std::ostream &out) const
 
 //-------------------------------------------------------------------------
 
-double Fct0::evalD(Element &e, std::vector<double> const &u, size_t k) const
+double Fct0::evalD(Element const &e, std::vector<double> const &u, size_t k) const
 {
     throw std::runtime_error("Fct0::evalD not implemented\n");
 }
@@ -81,7 +81,7 @@ void Fct0C::write(std::ostream &out) const
 //-------------------------------------------------------------------------
 
 double
-Fct0XYZ::eval(Element &e, std::vector<double> const &u, size_t k) const
+Fct0XYZ::eval(Element const &e, std::vector<double> const &u, size_t k) const
 {
     Cache &cache = e.getVCache();
     Eigen::VectorXd const &ff = cache.getSf(k);
@@ -102,7 +102,7 @@ void Fct0XYZ::write(std::ostream &out) const
 //-------------------------------------------------------------------------
 
 double
-Fct0U::eval(Element &e, std::vector<double> const &u, size_t k) const
+Fct0U::eval(Element const &e, std::vector<double> const &u, size_t k) const
 {
     Cache &cache = e.getVCache();
     Eigen::VectorXd const &ff = cache.getSf(k);
diff --git a/tbox/src/wFct0.h b/tbox/src/wFct0.h
index a46d59cb24f504314bccb68273dcc3ade6d166d1..81da6cd4156c4ce04b8660aa55f1c087d675a06b 100644
--- a/tbox/src/wFct0.h
+++ b/tbox/src/wFct0.h
@@ -59,10 +59,10 @@ public:
     }
 
 #ifndef SWIG
-    virtual double eval(Element &e,
+    virtual double eval(Element const &e,
                         std::vector<double> const &u,
                         size_t k) const = 0;
-    virtual double evalD(Element &e,
+    virtual double evalD(Element const &e,
                          std::vector<double> const &u,
                          size_t k) const;
 #endif
@@ -81,13 +81,13 @@ public:
     void update(double _v) { v = _v; }
 
 #ifndef SWIG
-    virtual double eval(Element &e,
+    virtual double eval(Element const &e,
                         std::vector<double> const &u,
                         size_t k) const override
     {
         return v;
     }
-    virtual double evalD(Element &e,
+    virtual double evalD(Element const &e,
                          std::vector<double> const &u,
                          size_t k) const override
     {
@@ -107,7 +107,7 @@ public:
     virtual double eval(Eigen::Vector3d const &pos) const = 0;
 
 #ifndef SWIG
-    virtual double eval(Element &e,
+    virtual double eval(Element const &e,
                         std::vector<double> const &u,
                         size_t k) const override;
     virtual void write(std::ostream &out) const override;
@@ -124,7 +124,7 @@ public:
     virtual double eval(double u) const = 0;
 
 #ifndef SWIG
-    virtual double eval(Element &e,
+    virtual double eval(Element const &e,
                         std::vector<double> const &u,
                         size_t k) const override;
     virtual void write(std::ostream &out) const override;
diff --git a/tbox/src/wFct1.h b/tbox/src/wFct1.h
index 0edd85849a14bdd95a99518ef7a38f33b9e38505..439747f2bf3e4dc23a3e381466009067f20aa230 100644
--- a/tbox/src/wFct1.h
+++ b/tbox/src/wFct1.h
@@ -44,9 +44,12 @@ public:
     }
 
 #ifndef SWIG
-    virtual Eigen::Vector3d eval(Element &e,
+    virtual Eigen::Vector3d eval(Element const &e,
                                  std::vector<double> const &u,
                                  size_t k) const = 0;
+    virtual Eigen::Vector3d evalD(tbox::Element const &e,
+                                  std::vector<double> const &u,
+                                  size_t k) const = 0;
 #endif
 };
 
@@ -67,12 +70,18 @@ public:
     }
 
 #ifndef SWIG
-    virtual Eigen::Vector3d eval(Element &e,
+    virtual Eigen::Vector3d eval(Element const &e,
                                  std::vector<double> const &u,
                                  size_t k) const override
     {
         return v;
     }
+    virtual Eigen::Vector3d evalD(tbox::Element const &e,
+                                  std::vector<double> const &u,
+                                  size_t k) const override
+    {
+        return Eigen::Vector3d::Zero();
+    }
     virtual void write(std::ostream &out) const override;
 #endif
 };
diff --git a/tbox/src/wFct2.cpp b/tbox/src/wFct2.cpp
index b510d11475606713a843e6a86dd203982f9250ec..4b83977ab0b2b536087ca74f0f56d235a52e08d1 100644
--- a/tbox/src/wFct2.cpp
+++ b/tbox/src/wFct2.cpp
@@ -35,7 +35,7 @@ Fct2C::Fct2C(double _v11, double _v22, double _v12) : Fct2()
         throw std::runtime_error("det(Fct2C)<0!");
 }
 
-void Fct2C::eval(Element &e,
+void Fct2C::eval(Element const &e,
                  std::vector<double> const &u,
                  size_t k, Eigen::MatrixXd &out, bool fake) const
 {
@@ -49,7 +49,7 @@ void Fct2C::write(std::ostream &out) const
 
 //-------------------------------------------------------------------------
 
-void Fct2XYZ::eval(Element &e,
+void Fct2XYZ::eval(Element const &e,
                    std::vector<double> const &u,
                    size_t k, Eigen::MatrixXd &out, bool fake) const
 {
@@ -71,7 +71,7 @@ void Fct2XYZ::write(std::ostream &out) const
 
 //-------------------------------------------------------------------------
 
-void Fct2U::eval(Element &e,
+void Fct2U::eval(Element const &e,
                  std::vector<double> const &u,
                  size_t k, Eigen::MatrixXd &out, bool fake) const
 {
@@ -93,7 +93,7 @@ void Fct2U::write(std::ostream &out) const
 
 //-------------------------------------------------------------------------
 
-void Fct2UdU::eval(Element &e,
+void Fct2UdU::eval(Element const &e,
                    std::vector<double> const &u,
                    size_t k, Eigen::MatrixXd &out, bool fake) const
 {
diff --git a/tbox/src/wFct2.h b/tbox/src/wFct2.h
index 002a2188ca7686c3f9eaa2dc78dcfea589dc1a2d..4b902552555a0b2e521b3ddbae3f8ccd44afa526 100644
--- a/tbox/src/wFct2.h
+++ b/tbox/src/wFct2.h
@@ -45,7 +45,7 @@ public:
 
     virtual void evalall() {}
 #ifndef SWIG
-    virtual void eval(Element &e,
+    virtual void eval(Element const &e,
                       std::vector<double> const &u,
                       size_t k, Eigen::MatrixXd &out, bool fake) const = 0;
 #endif
@@ -62,7 +62,7 @@ public:
     Fct2C(double _v11, double _v22, double _v12);
 
 #ifndef SWIG
-    virtual void eval(Element &e,
+    virtual void eval(Element const &e,
                       std::vector<double> const &u,
                       size_t k, Eigen::MatrixXd &out, bool fake) const override;
     virtual void write(std::ostream &out) const override;
@@ -78,7 +78,7 @@ public:
     virtual void eval(Eigen::Vector3d const &pos, Eigen::MatrixXd &v, bool fake) const = 0;
 
 #ifndef SWIG
-    virtual void eval(Element &e,
+    virtual void eval(Element const &e,
                       std::vector<double> const &u,
                       size_t k, Eigen::MatrixXd &out, bool fake) const override;
     virtual void write(std::ostream &out) const override;
@@ -94,7 +94,7 @@ public:
     virtual void eval(double u, Eigen::MatrixXd &v, bool fake) const = 0;
 
 #ifndef SWIG
-    virtual void eval(Element &e,
+    virtual void eval(Element const &e,
                       std::vector<double> const &u,
                       size_t k, Eigen::MatrixXd &out, bool fake) const override;
     virtual void write(std::ostream &out) const override;
@@ -106,10 +106,10 @@ public:
 class TBOX_API Fct2UdU : public Fct2
 {
 public:
-    virtual void eval(Element &e, size_t k, double u, Eigen::VectorXd const &gradu,
+    virtual void eval(Element const &e, size_t k, double u, Eigen::VectorXd const &gradu,
                       Eigen::MatrixXd &v, bool fake) const = 0;
 #ifndef SWIG
-    virtual void eval(Element &e,
+    virtual void eval(Element const &e,
                       std::vector<double> const &u,
                       size_t k, Eigen::MatrixXd &out, bool fake) const override;
     virtual void write(std::ostream &out) const override;
diff --git a/tbox/src/wLine2.cpp b/tbox/src/wLine2.cpp
index 986904ef6cb4d0cf3584eac14d14173d87bc4ebe..9e34ca2267eda0fc1777103aa84a6d80b1f8db93 100644
--- a/tbox/src/wLine2.cpp
+++ b/tbox/src/wLine2.cpp
@@ -203,9 +203,9 @@ Eigen::VectorXd Line2::builds(std::vector<double> const &u, Fct1 const &f)
  *
  * grad = (Delta_U / Delta_L)^2
  */
-double Line2::computeGrad2(std::vector<double> const &u, size_t k)
+double Line2::computeGrad2(std::vector<double> const &u, size_t k) const
 {
-    double dUdL = (u[nodes[1]->row] - u[nodes[0]->row]) / computeV(); // V = length of the element
+    double dUdL = (u[nodes[1]->row] - u[nodes[0]->row]) / getMem().getVol(); // V = length of the element
     return dUdL * dUdL;
 }
 
diff --git a/tbox/src/wLine2.h b/tbox/src/wLine2.h
index 400d87dac3f26eecdccf2fd9ff100c36a0e1d0cb..4e033f3e3e902153a0012ea4098c4a47f1a2a9cf 100644
--- a/tbox/src/wLine2.h
+++ b/tbox/src/wLine2.h
@@ -43,7 +43,7 @@ class TBOX_API Line2 : public Element
     virtual Eigen::VectorXd builds(std::vector<double> const &u, Fct0 const &f) override;
     virtual Eigen::VectorXd builds(std::vector<double> const &u, Fct1 const &f) override;
 
-    virtual double computeGrad2(std::vector<double> const &u, size_t k) override;
+    virtual double computeGrad2(std::vector<double> const &u, size_t k) const override;
     virtual double computeV(std::vector<double> const &u, Fct0 const &fct) override;
     virtual double computeV() override;
 
diff --git a/tbox/src/wMem.inl b/tbox/src/wMem.inl
index 17aa9331098e95c83b3022aee80aea98ab84bf6d..217af82ab85bb06859d1e84cf863141e94d4580b 100644
--- a/tbox/src/wMem.inl
+++ b/tbox/src/wMem.inl
@@ -97,7 +97,7 @@ inline std::vector<double> const &Mem::getGradDetJ(size_t k)
 }
 
 /**
- * @brief Return the gradient of determinant of the Jacobian with respect to node positions at Gauss point k
+ * @brief Return the gradient of the Jacobian matrix with respect to node positions at Gauss point k
  */
 inline std::vector<Eigen::MatrixXd> const &Mem::getGradJ(size_t k)
 {
diff --git a/tbox/src/wTetra4.cpp b/tbox/src/wTetra4.cpp
index fe2e1fca0fc17a338aedcf13089dd2ace39c5c0c..e488be5814bee278b29ed023bd3b1f6afbd2e811 100644
--- a/tbox/src/wTetra4.cpp
+++ b/tbox/src/wTetra4.cpp
@@ -165,50 +165,6 @@ Eigen::MatrixXd Tetra4::buildK(std::vector<double> const &u, Fct0 const &fct)
     return K;
 }
 
-/**
- * @brief Compute (first part of) derivative stiffness matrix for scalar field (Newton method)
- *
- * K_ij(4,4) = int (df du*dN_j du*dN_i) dV
- */
-Eigen::MatrixXd Tetra4::buildDK(std::vector<double> const &u, Fct0 const &fct)
-{
-    CacheTetra4 &cache = getCache(order);
-    GaussTetra4 &gauss = cache.gauss;
-    MemTetra4 &mem = getMem();
-
-    // Gauss integration
-    Eigen::MatrixXd K = Eigen::Matrix4d::Zero();
-    for (size_t k = 0; k < gauss.getN(); ++k)
-    {
-        // Jacobian inverse, shape functions and gradient
-        Eigen::Matrix3d const &J = mem.getJinv(k);
-        Eigen::MatrixXd const &dff = cache.getDsf(k);
-        Eigen::VectorXd gradu = computeGrad(u, k);
-
-        // Elementary stiffness matrix
-        K += (gradu.transpose() * J * dff).transpose() * gradu.transpose() * J * dff * fct.evalD(*this, u, k) * gauss.getW(k) * mem.getDetJ(k);
-    }
-    return K;
-}
-
-/**
- * @brief Compute derivative of flux vector for a scalar field (Newton method)
- *
- * s_i(4) = int f*du*dN_i dV
- */
-Eigen::VectorXd Tetra4::buildDs(std::vector<double> const &u, Fct0 const &f)
-{
-    CacheTetra4 &cache = getCache(order);
-    GaussTetra4 &gauss = cache.gauss;
-    MemTetra4 &mem = getMem();
-
-    // Gauss integration
-    Eigen::VectorXd s = Eigen::Vector4d::Zero();
-    for (size_t k = 0; k < gauss.getN(); ++k)
-        s += computeGrad(u, k).transpose() * mem.getJinv(k) * cache.getDsf(k) * f.eval(*this, u, k) * gauss.getW(k) * mem.getDetJ(k); // elementary flux vector
-    return s;
-}
-
 /**
  * @brief Compute linear elasticity stiffness matrix
  *
@@ -337,7 +293,7 @@ void Tetra4::strain_stress(Eigen::MatrixXd &Strain, Eigen::MatrixXd &Stress, Eig
 /**
  * @brief Compute gradient of a scalar field at Gauss point k
  */
-Eigen::VectorXd Tetra4::computeGrad(std::vector<double> const &u, size_t k)
+Eigen::VectorXd Tetra4::computeGrad(std::vector<double> const &u, size_t k) const
 {
     // Solution vector
     Eigen::Vector4d ue;
@@ -350,7 +306,7 @@ Eigen::VectorXd Tetra4::computeGrad(std::vector<double> const &u, size_t k)
 /**
  * @brief Compute square of gradient of a scalar field at Gauss point k
  */
-double Tetra4::computeGrad2(std::vector<double> const &u, size_t k)
+double Tetra4::computeGrad2(std::vector<double> const &u, size_t k) const
 {
     return computeGrad(u, k).squaredNorm();
 }
diff --git a/tbox/src/wTetra4.h b/tbox/src/wTetra4.h
index 76108f233aac893885e83cccb554629d0fccd189..f524f9c9243994d196274a59768b400e3fb0a6a0 100644
--- a/tbox/src/wTetra4.h
+++ b/tbox/src/wTetra4.h
@@ -42,16 +42,14 @@ class TBOX_API Tetra4 : public Element
 
     virtual Eigen::MatrixXd buildM() override;
     virtual Eigen::MatrixXd buildK(std::vector<double> const &u, Fct0 const &fct) override;
-    virtual Eigen::MatrixXd buildDK(std::vector<double> const &u, Fct0 const &fct) override;
-    virtual Eigen::VectorXd buildDs(std::vector<double> const &u, Fct0 const &f) override;
     virtual Eigen::MatrixXd buildK_mechanical(Eigen::MatrixXd const &H) override;
 
     virtual Eigen::MatrixXd buildS_thermomechanical(Eigen::MatrixXd const &D) override;
     virtual Eigen::MatrixXd buildL_thermal(Eigen::MatrixXd const &C) override;
     virtual void strain_stress(Eigen::MatrixXd &Strain, Eigen::MatrixXd &Stress, Eigen::MatrixXd const &H, std::vector<double> const &u) override;
 
-    virtual Eigen::VectorXd computeGrad(std::vector<double> const &u, size_t k) override;
-    virtual double computeGrad2(std::vector<double> const &u, size_t k) override;
+    virtual Eigen::VectorXd computeGrad(std::vector<double> const &u, size_t k) const override;
+    virtual double computeGrad2(std::vector<double> const &u, size_t k) const override;
     virtual double computeV(std::vector<double> const &u, Fct0 const &fct) override;
     virtual double computeV() override;
 #endif
diff --git a/tbox/src/wTri3.cpp b/tbox/src/wTri3.cpp
index b5a107db15fac483cf2a0e8697d036345533fa8f..2be7fa463b9b24f7b95fa9024294bcf767a2a7c4 100644
--- a/tbox/src/wTri3.cpp
+++ b/tbox/src/wTri3.cpp
@@ -318,32 +318,6 @@ Eigen::MatrixXd Tri3::buildK_mechanical(Eigen::MatrixXd const &H)
     return K;
 }
 
-/**
- * @brief Compute (first part of) derivative stiffness matrix for scalar field (Newton method)
- *
- * K_ij(3,3) = int (df du*dN_j du*dN_i) dV
- */
-Eigen::MatrixXd Tri3::buildDK(std::vector<double> const &u, Fct0 const &fct)
-{
-    CacheTri3 &cache = getCache(order);
-    GaussTri3 &gauss = cache.gauss;
-    MemTri3 &mem = getMem();
-
-    // Gauss integration
-    Eigen::MatrixXd K = Eigen::Matrix3d::Zero();
-    for (size_t k = 0; k < gauss.getN(); ++k)
-    {
-        // Jacobian inverse, shape functions and gradient
-        Eigen::Matrix2d const &J = mem.getJinv(k);
-        Eigen::MatrixXd const &dff = cache.getDsf(k);
-        Eigen::VectorXd gradu = computeGrad(u, k);
-
-        // Elementary stiffness matrix
-        K += (gradu.transpose() * J * dff).transpose() * gradu.transpose() * J * dff * fct.evalD(*this, u, k) * gauss.getW(k) * mem.getDetJ(k);
-    }
-    return K;
-}
-
 /**
  * @brief Compute flux vector for a scalar field
  *
@@ -380,24 +354,6 @@ Eigen::VectorXd Tri3::builds(std::vector<double> const &u, Fct1 const &f)
     return s;
 }
 
-/**
- * @brief Compute derivative of flux vector for a scalar field (Newton method)
- *
- * s_i(3) = int f*du*dN_i dV
- */
-Eigen::VectorXd Tri3::buildDs(std::vector<double> const &u, Fct0 const &f)
-{
-    CacheTri3 &cache = getCache(order);
-    GaussTri3 &gauss = cache.gauss;
-    MemTri3 &mem = getMem();
-
-    // Gauss integration
-    Eigen::VectorXd s = Eigen::Vector3d::Zero();
-    for (size_t k = 0; k < gauss.getN(); ++k)
-        s += computeGrad(u, k).transpose() * mem.getJinv(k) * cache.getDsf(k) * f.eval(*this, u, k) * gauss.getW(k) * mem.getDetJ(k); // elementary flux vector
-    return s;
-}
-
 /**
  * @brief Compute mass matrix
  *
@@ -425,7 +381,7 @@ Eigen::MatrixXd Tri3::buildS()
 /**
  * @brief Compute gradient of a scalar field at Gauss point k
  */
-Eigen::VectorXd Tri3::computeGrad(std::vector<double> const &u, size_t k)
+Eigen::VectorXd Tri3::computeGrad(std::vector<double> const &u, size_t k) const
 {
     // Solution vector
     Eigen::Vector3d ue;
@@ -438,7 +394,7 @@ Eigen::VectorXd Tri3::computeGrad(std::vector<double> const &u, size_t k)
 /**
  * @brief Compute square of gradient of a scalar field at Gauss point k
  */
-double Tri3::computeGrad2(std::vector<double> const &u, size_t k)
+double Tri3::computeGrad2(std::vector<double> const &u, size_t k) const
 {
     return computeGrad(u, k).squaredNorm();
 }
diff --git a/tbox/src/wTri3.h b/tbox/src/wTri3.h
index 7255a33cd1bc7cf21fca3329cf5e71d359ac4de9..bea667503ed240fc0e2ba09a4d214813d8057ccb 100644
--- a/tbox/src/wTri3.h
+++ b/tbox/src/wTri3.h
@@ -47,12 +47,10 @@ class TBOX_API Tri3 : public Element
     virtual Eigen::MatrixXd buildK_mechanical(Eigen::MatrixXd const &H) override;
     virtual Eigen::VectorXd builds(std::vector<double> const &u, Fct0 const &f) override;
     virtual Eigen::VectorXd builds(std::vector<double> const &u, Fct1 const &f) override;
-    virtual Eigen::MatrixXd buildDK(std::vector<double> const &u, Fct0 const &fct) override;
-    virtual Eigen::VectorXd buildDs(std::vector<double> const &u, Fct0 const &f) override;
     virtual Eigen::MatrixXd buildS() override;
 
-    virtual Eigen::VectorXd computeGrad(std::vector<double> const &u, size_t k) override;
-    virtual double computeGrad2(std::vector<double> const &u, size_t k) override;
+    virtual Eigen::VectorXd computeGrad(std::vector<double> const &u, size_t k) const override;
+    virtual double computeGrad2(std::vector<double> const &u, size_t k) const override;
     virtual Eigen::VectorXd computeqV(std::vector<double> const &u, Fct2 const &fct) override;
     virtual Eigen::VectorXd computeqint(std::vector<double> const &u, Fct2 const &fct) override;
     virtual double computeV(std::vector<double> const &u, Fct0 const &fct) override;