diff --git a/dart/interfaces/mphys.py b/dart/interfaces/mphys.py
index 9c6333ced756ec35cca90c717e4f6c70f1c93c2f..8de8b09b753ff25fc10473a4553659d70f4eb121 100644
--- a/dart/interfaces/mphys.py
+++ b/dart/interfaces/mphys.py
@@ -25,6 +25,16 @@
 #           - Solver
 #           - Loads
 #       - AeroCoefficients
+#
+# Remarks:
+# - for each component that uses the openMDAO matrix-free API, the partial
+#   gradients are first computed internally in dart::Adjoint, through
+#   om.ImplicitComponent.linearize() or om.ExplicitComponent.compute_partials(),
+#   and the matrix-vector product is then computed in dart:: Adjoint through
+#   om.ImplicitComponent.apply_linear() or om.ExplicitComponent.compute_jacvec_product().
+#   This allows faster gradient evaluations, at the cost of a reasonable
+#   increase in memory storage, without interfacing any sparse matrices through
+#   python.
 
 import numpy as np
 import openmdao.api as om
@@ -139,7 +149,7 @@ class DartMorpher(om.ImplicitComponent):
         self.adj.linearizeMesh()
 
     def apply_linear(self, inputs, outputs, d_inputs, d_outputs, d_residuals, mode):
-        """Compute the partials derivatives and perform the matrix-vector product
+        """Perform the matrix-vector product using the mesh Jacobian
         """
         if mode == 'rev':
             if 'xv' in d_residuals:
@@ -153,7 +163,7 @@ class DartMorpher(om.ImplicitComponent):
             raise NotImplementedError('DartMorpher - forward mode not implemented!\n')
 
     def solve_linear(self, d_outputs, d_residuals, mode):
-        """Solve for the partials gradients of the mesh residuals
+        """Solve for the mesh residuals using the mesh Jacobian
         """
         if mode == 'rev':
             d_residuals['xv'] = self.adj.solveMesh(d_outputs['xv']) # dRx = dRx_dX^-T * dX
@@ -239,7 +249,7 @@ class DartSolver(om.ImplicitComponent):
         self.adj.linearizeFlow()
 
     def apply_linear(self, inputs, outputs, d_inputs, d_outputs, d_residuals, mode):
-        """Compute the partials derivatives and perform the matrix-vector product
+        """Perform the matrix-vector product using the partial gradients of the flow residuals
         """
         if mode == 'rev':
             if 'phi' in d_residuals:
@@ -253,7 +263,7 @@ class DartSolver(om.ImplicitComponent):
             raise NotImplementedError('DartSolver - forward mode not implemented!\n')
 
     def solve_linear(self, d_outputs, d_residuals, mode):
-        """Solve for the partials gradients of the flow residuals
+        """Solve for the flow residuals using the flow Jacobian
         """
         if mode == 'rev':
             d_residuals['phi'] = self.adj.solveFlow(d_outputs['phi']) # dRu = dRu_dU^-T * dU
@@ -298,15 +308,20 @@ class DartLoads(om.ExplicitComponent):
             for j in range(3):
                 outputs['f_aero'][3 * i + j] = self.qinf * self.bnd.nLoads[i][j]
 
+    def compute_partials(self, inputs, partials):
+        """Compute the partials gradients of the loads
+        """
+        self.adj.linearizeLoads(self.bnd)
+
     def compute_jacvec_product(self, inputs, d_inputs, d_outputs, mode):
-        """Compute the partials derivatives and perform the matrix-vector product
+        """Perform the matrix-vector product using the partial gradients of the loads
         """
         if mode == 'rev':
             if 'f_aero' in d_outputs:
                 if 'xv' in d_inputs:
-                    d_inputs['xv'] += self.qinf * np.asarray(self.adj.computeJacVecLoadsMesh(d_outputs['f_aero'], self.bnd)) # dX = dL_dX^T * dL
+                    d_inputs['xv'] += self.qinf * np.asarray(self.adj.computeJacVecLoadsMesh(d_outputs['f_aero'])) # dX = dL_dX^T * dL
                 if 'phi' in d_inputs:
-                    d_inputs['phi'] += self.qinf * np.asarray(self.adj.computeJacVecLoadsFlow(d_outputs['f_aero'], self.bnd)) # dU = dL_dU^T * dL
+                    d_inputs['phi'] += self.qinf * np.asarray(self.adj.computeJacVecLoadsFlow(d_outputs['f_aero'])) # dU = dL_dU^T * dL
         elif mode == 'fwd':
             raise NotImplementedError('DartLoads - forward mode not implemented!\n')
 
@@ -365,7 +380,7 @@ class DartCoefficients(om.ExplicitComponent):
         outputs['cd'] = self.sol.Cd
 
     def compute_partials(self, inputs, partials):
-        """Compute the partials derivatives
+        """Compute the partials gradients of the load coefficients
         """
         # Gradients wrt to angle of attack
         dCfAoa = self.adj.computeGradientCoefficientsAoa()
diff --git a/dart/src/wAdjoint.cpp b/dart/src/wAdjoint.cpp
index f2dbabaf8fad976591c6093000172ef0804490c5..0750bc08ab009142758781f26385ec47145ed88c 100644
--- a/dart/src/wAdjoint.cpp
+++ b/dart/src/wAdjoint.cpp
@@ -74,7 +74,7 @@ Adjoint::Adjoint(std::shared_ptr<Newton> _sol, std::shared_ptr<tbox::MshDeform>
  *
  * Solve the adjoint steady transonic Full Potential Equation
  * and compute lift and drag sensitivites with respect to angle of attack and
- * mesh coordinates (only for pure aerodynamic problem)
+ * mesh coordinates for aerodynamic optimization (single-discipline)
  */
 void Adjoint::run()
 {
@@ -174,7 +174,7 @@ void Adjoint::run()
 /**
  * @brief Compute the mesh Jacobian
  *
- * J = dRx/dX
+ * J = dRx/dX (x is the vector of mesh coordinates)
  */
 void Adjoint::linearizeMesh()
 {
@@ -202,7 +202,7 @@ std::vector<double> Adjoint::solveMesh(std::vector<double> const &dX)
  * @brief Compute the matrix-vector product of the mesh Jacobian with the mesh residual
  *
  * dX = dRx/dX^T * dRx
- * @todo not tested
+ * Note: only for multi-disciplinary, not tested by run()
  */
 std::vector<double> Adjoint::computeJacVecMesh(std::vector<double> const &dR)
 {
@@ -213,9 +213,9 @@ std::vector<double> Adjoint::computeJacVecMesh(std::vector<double> const &dR)
 /**
  * @brief Compute the partial gradients of the flow residuals
  *
- * dRu_U = dRu / dU (u is the vector of flow variables, i.e. the potential "phi")
- * dRu_A = dRu / dA
- * dRu_X = dRu / dX
+ * dRu_U = dRu/dU (u is the vector of flow variables, i.e. the potential "phi")
+ * dRu_A = dRu/dA (a is the aoa)
+ * dRu_X = dRu/dX (x is the vector of mesh coordinates)
  */
 void Adjoint::linearizeFlow()
 {
@@ -250,7 +250,7 @@ std::vector<double> Adjoint::solveFlow(std::vector<double> const &dU)
  * @brief Compute the matrix-vector product of the flow Jacobian with the flow residuals
  *
  * dU = dRu/dU^T * dRu
- * @todo not tested
+ * Note: only for multi-disciplinary, not tested by run()
  */
 std::vector<double> Adjoint::computeJacVecFlow(std::vector<double> const &dR)
 {
@@ -280,36 +280,45 @@ std::vector<double> Adjoint::computeJacVecFlowMesh(std::vector<double> const &dR
 }
 
 /**
- * @brief Compute the matrix-vector product of the gradients of the loads (wrt flow variables) with the loads, on a given body
+ * @brief Compute the partial gradients of the loads, on a given body
  *
- * dU = dL/dU^T * dU
- * Note: only for FSI, not tested by run()
- * @todo linearize outside (check om)
+ * dL_U = dL/dU (u is the vector of flow variables, ie the potential "phi")
+ * dL_X = dL/dX (x is the vector of mesh coordinates)
+ * Note: only for multi-disciplinary, not tested by run()
  */
-std::vector<double> Adjoint::computeJacVecLoadsFlow(std::vector<double> const &dL, Body const &bnd)
+void Adjoint::linearizeLoads(Body const &bnd)
 {
-    // Build gradients
-    Eigen::SparseMatrix<double, Eigen::RowMajor> dL_U = Eigen::SparseMatrix<double, Eigen::RowMajor>(3 * bnd.nodes.size(), sol->pbl->msh->nodes.size());
+    // Build gradients wrt flow variables
+    dL_U = Eigen::SparseMatrix<double, Eigen::RowMajor>(3 * bnd.nodes.size(), sol->pbl->msh->nodes.size());
     this->buildGradientLoadsFlow(dL_U, bnd);
-    // Compute product
+    // Build gradients wrt to mesh coordinates
+    dL_X = Eigen::SparseMatrix<double, Eigen::RowMajor>(3 * bnd.nodes.size(), sol->pbl->nDim * sol->pbl->msh->nodes.size());
+    this->buildGradientLoadsMesh(dL_X, bnd);
+}
+
+/**
+ * @brief Compute the matrix-vector product of the gradients of the loads (wrt flow variables) with the loads
+ *
+ * dU = dL/dU^T * dU
+ * Note: only for multi-disciplinary, not tested by run()
+ * Note: only for body passed to linearizeLoads()
+ */
+std::vector<double> Adjoint::computeJacVecLoadsFlow(std::vector<double> const &dL)
+{
     Eigen::VectorXd dU = dL_U.transpose() * Eigen::Map<const Eigen::VectorXd>(dL.data(), dL.size());
     return std::vector<double>(dU.data(), dU.data() + dU.size());
 }
 
 /**
- * @brief Compute the matrix-vector product of the gradients of the loads (wrt the mesh coordinates) with the loads, on a given body
+ * @brief Compute the matrix-vector product of the gradients of the loads (wrt the mesh coordinates) with the loads
  *
  * dX = dL/dX^T * dL
- * Note: only for FSI, not tested by run()
- * @todo linearize outside (check om)
+ * Note: only for multi-disciplinary, not tested by run()
+ * Note: only for body passed to linearizeLoads()
  */
-std::vector<double> Adjoint::computeJacVecLoadsMesh(std::vector<double> const &dL, Body const &bnd)
+std::vector<double> Adjoint::computeJacVecLoadsMesh(std::vector<double> const &dL)
 {
-    // Build gradients
-    Eigen::SparseMatrix<double, Eigen::RowMajor> dL_X = Eigen::SparseMatrix<double, Eigen::RowMajor>(3 * bnd.nodes.size(), sol->pbl->nDim * sol->pbl->msh->nodes.size());
-    this->buildGradientLoadsMesh(dL_X, bnd);
-    // Compute product
-    Eigen::VectorXd dX = dL_X.transpose() * Eigen::Map<const Eigen::VectorXd>(dL.data(), dL.size()); // dMsh = dLoads_dMsh^T * dLoads
+    Eigen::VectorXd dX = dL_X.transpose() * Eigen::Map<const Eigen::VectorXd>(dL.data(), dL.size());
     return std::vector<double>(dX.data(), dX.data() + dX.size());
 }
 
diff --git a/dart/src/wAdjoint.h b/dart/src/wAdjoint.h
index dd00b057f290c43e50677b8ce878bbfb77ece6a8..a38c2706e9cac8a4a7af8acb20eda63c095f78c2 100644
--- a/dart/src/wAdjoint.h
+++ b/dart/src/wAdjoint.h
@@ -56,6 +56,8 @@ private:
     Eigen::SparseMatrix<double, Eigen::RowMajor> dRu_U; ///< flow Jacobian
     Eigen::VectorXd dRu_A;                              ///< partial gradients of flow residual wrt aoa
     Eigen::SparseMatrix<double, Eigen::RowMajor> dRu_X; ///< partial gradients of flow residual wrt mesh
+    Eigen::SparseMatrix<double, Eigen::RowMajor> dL_U;  ///< partial gradients of loads wrt flow
+    Eigen::SparseMatrix<double, Eigen::RowMajor> dL_X;  ///< partial gradients of loads wrt mesh
     fwk::Timers tms;                                    ///< internal timers
 
 public:
@@ -76,8 +78,9 @@ public:
     double computeJacVecFlowAoa(std::vector<double> const &dR);
     std::vector<double> computeJacVecFlowMesh(std::vector<double> const &dR);
     // Partial gradients of the loads
-    std::vector<double> computeJacVecLoadsFlow(std::vector<double> const &dLoads, Body const &bnd);
-    std::vector<double> computeJacVecLoadsMesh(std::vector<double> const &dLoads, Body const &bnd);
+    void linearizeLoads(Body const &bnd);
+    std::vector<double> computeJacVecLoadsFlow(std::vector<double> const &dL);
+    std::vector<double> computeJacVecLoadsMesh(std::vector<double> const &dL);
     // Partial gradients of the coefficients
     std::vector<std::vector<double>> computeGradientCoefficientsFlow();
     std::vector<double> computeGradientCoefficientsAoa();
diff --git a/dart/src/wLoadFunctional.cpp b/dart/src/wLoadFunctional.cpp
index a48e3f6d5b6cfbb17d796851ef62e3c3b98db31f..cc1f1107a39f28bb2e96a937c91eafcf514ae81f 100644
--- a/dart/src/wLoadFunctional.cpp
+++ b/dart/src/wLoadFunctional.cpp
@@ -71,7 +71,7 @@ Eigen::MatrixXd LoadFunctional::buildGradientFlow(Element const &e, Element cons
         Eigen::VectorXd sf = cache.getSf(k);
         double wdj = gauss.getW(k) * e.getDetJ(k);
         for (size_t i = 0; i < e.nodes.size(); ++i)
-            A.block(i * 3, 0, 3, eV.nodes.size()) += sf(i) * dfcp * wdj;
+            A.middleRows(i * 3, 3) += sf(i) * dfcp * wdj;
     }
     return A;
 }
diff --git a/dart/src/wNewton.cpp b/dart/src/wNewton.cpp
index 83d4cb2cce04d55d283c1150fdbd4c728b2d4398..72da1a640d43d3f0cf83d8590d4bd88f8c1694fb 100644
--- a/dart/src/wNewton.cpp
+++ b/dart/src/wNewton.cpp
@@ -266,7 +266,7 @@ bool Newton::run()
 
 /**
  * @brief Build the Jacobian (tangent) matrix
- * 
+ *
  * J = dR/dPhi = \int d( (1-mu)*rho + mu*rhoU * grad_phi ) / dPhi * grad_psi dV
  */
 void Newton::buildJac(Eigen::SparseMatrix<double, Eigen::RowMajor> &J)
@@ -398,7 +398,7 @@ void Newton::buildJac(Eigen::SparseMatrix<double, Eigen::RowMajor> &J)
 
 /**
  * @brief Build the residual vector
- * 
+ *
  * R = \int ( (1-mu)*rho + mu*rhoU ) * grad_phi * grad_psi dV
  */
 void Newton::buildRes(Eigen::Map<Eigen::VectorXd> &R)
@@ -519,10 +519,10 @@ void Newton::findUpwd()
         // -> find (cgU-cgE) \dot vel_ closest to one
         double SPbest = -1;
         size_t idx = 0;
-        Eigen::VectorXd cgE = (p.first)->getCg().block(0, 0, pbl->nDim, 1);
+        Eigen::VectorXd cgE = (p.first)->getCg().middleRows(0, pbl->nDim);
         for (size_t i = 0; i < p.second.size(); ++i)
         {
-            Eigen::VectorXd cgU = (p.second[i])->getCg().block(0, 0, pbl->nDim, 1);
+            Eigen::VectorXd cgU = (p.second[i])->getCg().middleRows(0, pbl->nDim);
             double SP = (cgU - cgE).normalized().dot(vel_);
             if (fabs(SP - 1) < fabs(SPbest - 1))
             {
diff --git a/dart/src/wWakeResidual.cpp b/dart/src/wWakeResidual.cpp
index 9316ce39ef92f8de7c9871ce545249ed04fbc736..671eb02674d119592d428eb2959ba23f74edd740 100644
--- a/dart/src/wWakeResidual.cpp
+++ b/dart/src/wWakeResidual.cpp
@@ -52,7 +52,7 @@ std::tuple<Eigen::MatrixXd, Eigen::MatrixXd> WakeResidual::buildFixed(WakeElemen
         for (size_t j = 0; j < nDim; ++j)
             dSf(i, j) = vUpDsf(j, we.vsMap.at(we.wkN[i].first));
     Eigen::VectorXd dSfp(nRow);
-    dSfp = 0.5 * sqrt(we.surUpE->getVol()) * dSf * vUpJ.transpose() * Eigen::Vector3d(1, 0, 0).block(0, 0, nDim, 1);
+    dSfp = 0.5 * sqrt(we.surUpE->getVol()) * dSf * vUpJ.transpose() * Eigen::Vector3d(1, 0, 0).middleRows(0, nDim);
     // Velocity
     Eigen::RowVectorXd Aup = we.volUpE->computeGradient(phi, 0).transpose() * vUpJ * vUpDsf;
     Eigen::RowVectorXd Alw = we.volLwE->computeGradient(phi, 0).transpose() * vLwJ * vLwDsf;
@@ -97,7 +97,7 @@ std::tuple<Eigen::VectorXd, Eigen::VectorXd> WakeResidual::build(WakeElement con
         for (size_t j = 0; j < nDim; ++j)
             dSf(i, j) = vUpDsf(j, we.vsMap.at(we.wkN[i].first));
     Eigen::VectorXd dSfp(nRow);
-    dSfp = 0.5 * sqrt(we.surUpE->getVol()) * dSf * we.volUpE->getJinv(0).transpose() * Eigen::Vector3d(1, 0, 0).block(0, 0, nDim, 1);
+    dSfp = 0.5 * sqrt(we.surUpE->getVol()) * dSf * we.volUpE->getJinv(0).transpose() * Eigen::Vector3d(1, 0, 0).middleRows(0, nDim);
     // Velocity squared
     Eigen::VectorXd dPhiU = we.volUpE->computeGradient(phi, 0);
     Eigen::VectorXd dPhiL = we.volLwE->computeGradient(phi, 0);
@@ -166,7 +166,7 @@ std::tuple<Eigen::MatrixXd, Eigen::MatrixXd, Eigen::MatrixXd> WakeResidual::buil
     size_t nSur = we.surUpE->nodes.size();
 
     // Stabilization term
-    Eigen::VectorXd vi = Eigen::Vector3d(1, 0, 0).block(0, 0, nDim, 1);
+    Eigen::VectorXd vi = Eigen::Vector3d(1, 0, 0).middleRows(0, nDim);
     double sqSurf = sqrt(we.surUpE->getVol());
     Eigen::MatrixXd dSf(nRow, nDim);
     for (size_t i = 0; i < nRow; ++i)