Merge branch 'topic/fddp' into 'devel'

Added the FDDP solver See merge request loco-3d/crocoddyl!215

Merge branch 'topic/fddp' into 'devel'
Added the FDDP solver See merge request loco-3d/crocoddyl!215
b55caa6a · Carlos Mastalli · b5070f55 · 46713980 · b55caa6a · b55caa6a
Commit b55caa6a authored 5 years ago by Carlos Mastalli
--- a/README.md
+++ b/README.md
@@ -63,7 +63,7 @@ Finally you will need to configure your environment variables, e.g.:
 * [pinocchio](https://github.com/stack-of-tasks/pinocchio)
 * [quadprog](https://pypi.org/project/quadprog/)
 * [multicontact-api](https://gepgitlab.laas.fr/loco-3d/multicontact-api)
-* [example-robot-data](https://gepgitlab.laas.fr/gepetto/example-robot-data) (optional for running examples)
+* [example-robot-data](https://gepgitlab.laas.fr/gepetto/example-robot-data) (optional for running examples, install Python loaders)
 * [gepetto-viewer-corba](https://github.com/Gepetto/gepetto-viewer-corba) (optional for running examples and notebooks)
 * [jupyter](https://jupyter.org/) (optional for running notebooks)
 * [matplotlib](https://matplotlib.org/) (optional for running examples)

--- a/bindings/python/crocoddyl/core.hpp
+++ b/bindings/python/crocoddyl/core.hpp
@@ -26,6 +26,7 @@
 #include "python/crocoddyl/core/activations/quadratic.hpp"
 #include "python/crocoddyl/core/activations/weighted-quadratic.hpp"
 #include "python/crocoddyl/core/solvers/ddp.hpp"
+#include "python/crocoddyl/core/solvers/fddp.hpp"
 #include "python/crocoddyl/core/utils/callbacks.hpp"

 namespace crocoddyl {
@@ -49,6 +50,7 @@ void exposeCore() {
  exposeActivationQuad();
  exposeActivationWeightedQuad();
  exposeSolverDDP();
+  exposeSolverFDDP();
  exposeCallbacks();
 }


--- a/bindings/python/crocoddyl/core/solvers/fddp.hpp
+++ b/bindings/python/crocoddyl/core/solvers/fddp.hpp
+///////////////////////////////////////////////////////////////////////////////
+// BSD 3-Clause License
+//
+// Copyright (C) 2018-2019, LAAS-CNRS
+// Copyright note valid unless otherwise stated in individual files.
+// All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef BINDINGS_PYTHON_CROCODDYL_CORE_SOLVERS_FDDP_HPP_
+#define BINDINGS_PYTHON_CROCODDYL_CORE_SOLVERS_FDDP_HPP_
+
+#include "crocoddyl/core/solvers/fddp.hpp"
+
+namespace crocoddyl {
+namespace python {
+
+namespace bp = boost::python;
+
+BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(SolverFDDP_solves, SolverFDDP::solve, 0, 5)
+BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(SolverFDDP_computeDirections, SolverFDDP::computeDirection, 0, 1)
+BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(SolverFDDP_trySteps, SolverFDDP::tryStep, 0, 1)
+
+void exposeSolverFDDP() {
+  bp::class_<SolverFDDP, bp::bases<SolverDDP> >(
+      "SolverFDDP",
+      "FDDP solver.\n\n"
+      "The FDDP solver computes an optimal trajectory and control commands by iterates\n"
+      "running backward and forward passes. The backward-pass updates locally the\n"
+      "quadratic approximation of the problem and computes descent direction,\n"
+      "and the forward-pass rollouts this new policy by integrating the system dynamics\n"
+      "along a tuple of optimized control commands U*.\n"
+      ":param shootingProblem: shooting problem (list of action models along trajectory.)",
+      bp::init<ShootingProblem&>(bp::args(" self", " problem"),
+                                 "Initialize the vector dimension.\n\n"
+                                 ":param problem: shooting problem.")[bp::with_custodian_and_ward<1, 2>()])
+      .def("solve", &SolverFDDP::solve,
+           SolverFDDP_solves(
+               bp::args(" self", " init_xs=[]", " init_us=[]", " maxiter=100", " isFeasible=False", " regInit=None"),
+               "Compute the optimal trajectory xopt, uopt as lists of T+1 and T terms.\n\n"
+               "From an initial guess init_xs,init_us (feasible or not), iterate\n"
+               "over computeDirection and tryStep until stoppingCriteria is below\n"
+               "threshold. It also describes the globalization strategy used\n"
+               "during the numerical optimization.\n"
+               ":param init_xs: initial guess for state trajectory with T+1 elements.\n"
+               ":param init_us: initial guess for control trajectory with T elements.\n"
+               ":param maxiter: maximun allowed number of iterations.\n"
+               ":param isFeasible: true if the init_xs are obtained from integrating the init_us (rollout).\n"
+               ":param regInit: initial guess for the regularization value. Very low values are typical\n"
+               "                used with very good guess points (init_xs, init_us).\n"
+               ":returns the optimal trajectory xopt, uopt and a boolean that describes if convergence was reached."))
+
+      .def("expectedImprovement", &SolverFDDP::expectedImprovement,
+           bp::return_value_policy<bp::copy_const_reference>(), bp::args(" self"),
+           "Return two scalars denoting the quadratic improvement model\n\n"
+           "For computing the expected improvement, you need to compute first\n"
+           "the search direction by running computeDirection. The quadratic\n"
+           "improvement model is described as dV = f_0 - f_+ = d1*a + d2*a**2/2.")
+      .def("calc", &SolverFDDP::calc, bp::args(" self"),
+           "Update the Jacobian and Hessian of the optimal control problem\n\n"
+           "These derivatives are computed around the guess state and control\n"
+           "trajectory. These trajectory can be set by using setCandidate.\n"
+           ":return the total cost around the guess trajectory.")
+      .def("backwardPass", &SolverFDDP::backwardPass, bp::args(" self"),
+           "Run the backward pass (Riccati sweep)\n\n"
+           "It assumes that the Jacobian and Hessians of the optimal control problem have been\n"
+           "compute. These terms are computed by running calc.")
+      .def("forwardPass", &SolverFDDP::forwardPass, bp::args(" self", " stepLength=1"),
+           "Run the forward pass or rollout\n\n"
+           "It rollouts the action model give the computed policy (feedfoward terns and feedback\n"
+           "gains) by the backwardPass. We can define different step lengths\n"
+           ":param stepLength: applied step length (<= 1. and >= 0.)");
+}
+
+}  // namespace python
+}  // namespace crocoddyl
+
+#endif  // BINDINGS_PYTHON_CROCODDYL_CORE_SOLVERS_DDP_HPP_
--- a/include/crocoddyl/core/solvers/ddp.hpp
+++ b/include/crocoddyl/core/solvers/ddp.hpp
@@ -44,7 +44,6 @@ class SolverDDP : public SolverAbstract {
  const std::vector<Eigen::VectorXd>& get_k() const;
  const std::vector<Eigen::VectorXd>& get_gaps() const;

- private:
  void computeGains(unsigned int const& t);
  void increaseRegularization();
  void decreaseRegularization();
@@ -72,7 +71,6 @@ class SolverDDP : public SolverAbstract {
  std::vector<Eigen::VectorXd> k_;
  std::vector<Eigen::VectorXd> gaps_;

- private:
  Eigen::VectorXd xnext_;
  Eigen::VectorXd x_reg_;
  Eigen::MatrixXd FxTVxx_p_;

--- a/include/crocoddyl/core/solvers/fddp.hpp
+++ b/include/crocoddyl/core/solvers/fddp.hpp
@@ -9,6 +9,36 @@
 #ifndef CROCODDYL_CORE_SOLVERS_FDDP_HPP_
 #define CROCODDYL_CORE_SOLVERS_FDDP_HPP_

-// TODO: SolverFDDP
+#include <Eigen/Cholesky>
+#include <vector>
+#include "crocoddyl/core/solvers/ddp.hpp"
+
+namespace crocoddyl {
+
+class SolverFDDP : public SolverDDP {
+ public:
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+
+  explicit SolverFDDP(ShootingProblem& problem);
+  ~SolverFDDP();
+  double calc();
+  void backwardPass();
+  void forwardPass(const double& stepLength);
+  const Eigen::Vector2d& expectedImprovement();
+  void updateExpectedImprovement();
+  bool solve(const std::vector<Eigen::VectorXd>& init_xs = DEFAULT_VECTOR,
+             const std::vector<Eigen::VectorXd>& init_us = DEFAULT_VECTOR, unsigned int const& maxiter = 100,
+             const bool& is_feasible = false, const double& regInit = 1e-9);
+
+ protected:
+  double dg_;
+  double dq_;
+  double dv_;
+
+ private:
+  double th_acceptNegStep_;
+};
+
+}  // namespace crocoddyl

 #endif  // CROCODDYL_CORE_SOLVERS_FDDP_HPP_
--- a/include/crocoddyl/core/utils/version.hpp
+++ b/include/crocoddyl/core/utils/version.hpp
@@ -9,9 +9,9 @@
 #ifndef CROCODDYL_CORE_UTILS_VERSION_HPP_
 #define CROCODDYL_CORE_UTILS_VERSION_HPP_

-#include "crocoddyl/config.hh"
 #include <string>
 #include <sstream>
+#include "crocoddyl/config.hh"

 namespace crocoddyl {


--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -12,6 +12,7 @@ SET(${PROJECT_NAME}_SOURCES
  core/utils/callbacks.cpp
  core/optctrl/shooting.cpp
  core/solvers/ddp.cpp
+  core/solvers/fddp.cpp
  core/states/euclidean.cpp
  core/actions/unicycle.cpp
  core/actions/lqr.cpp

--- a/src/core/solvers/ddp.cpp
+++ b/src/core/solvers/ddp.cpp
@@ -93,7 +93,7 @@ bool SolverDDP::solve(const std::vector<Eigen::VectorXd>& init_xs, const std::ve
    }
    stoppingCriteria();

-    const unsigned int& n_callbacks = static_cast<unsigned int>(callbacks_.size());
+    unsigned int const& n_callbacks = static_cast<unsigned int>(callbacks_.size());
    for (unsigned int c = 0; c < n_callbacks; ++c) {
      CallbackAbstract& callback = *callbacks_[c];
      callback(*this);
@@ -120,7 +120,7 @@ double SolverDDP::tryStep(const double& steplength) {

 double SolverDDP::stoppingCriteria() {
  stop_ = 0.;
-  const unsigned int& T = this->problem_.get_T();
+  unsigned int const& T = this->problem_.get_T();
  for (unsigned int t = 0; t < T; ++t) {
    stop_ += Qu_[t].squaredNorm();
  }
@@ -129,7 +129,7 @@ double SolverDDP::stoppingCriteria() {

 const Eigen::Vector2d& SolverDDP::expectedImprovement() {
  d_.fill(0);
-  const unsigned int& T = this->problem_.get_T();
+  unsigned int const& T = this->problem_.get_T();
  for (unsigned int t = 0; t < T; ++t) {
    d_[0] += Qu_[t].dot(k_[t]);
    d_[1] -= k_[t].dot(Quuk_[t]);
@@ -143,7 +143,7 @@ double SolverDDP::calc() {
    const Eigen::VectorXd& x0 = problem_.get_x0();
    problem_.running_models_[0]->get_state().diff(xs_[0], x0, gaps_[0]);

-    const unsigned int& T = problem_.get_T();
+    unsigned int const& T = problem_.get_T();
    for (unsigned int t = 0; t < T; ++t) {
      ActionModelAbstract* model = problem_.running_models_[t];
      boost::shared_ptr<ActionDataAbstract>& d = problem_.running_datas_[t];
@@ -218,13 +218,13 @@ void SolverDDP::forwardPass(const double& steplength) {
  assert(steplength <= 1. && "Step length has to be <= 1.");
  assert(steplength >= 0. && "Step length has to be >= 0.");
  cost_try_ = 0.;
-  const unsigned int& T = problem_.get_T();
+  unsigned int const& T = problem_.get_T();
  for (unsigned int t = 0; t < T; ++t) {
    ActionModelAbstract* m = problem_.running_models_[t];
    boost::shared_ptr<ActionDataAbstract>& d = problem_.running_datas_[t];

    m->get_state().diff(xs_[t], xs_try_[t], dx_[t]);
-    us_try_[t] = us_[t] - k_[t] * steplength - K_[t] * dx_[t];
+    us_try_[t].noalias() = us_[t] - k_[t] * steplength - K_[t] * dx_[t];
    m->calc(d, xs_try_[t], us_try_[t]);
    xs_try_[t + 1] = d->get_xnext();
    cost_try_ += d->cost;

--- a/src/core/solvers/fddp.cpp
+++ b/src/core/solvers/fddp.cpp
+///////////////////////////////////////////////////////////////////////////////
+// BSD 3-Clause License
+//
+// Copyright (C) 2018-2019, LAAS-CNRS
+// Copyright note valid unless otherwise stated in individual files.
+// All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include "crocoddyl/core/solvers/fddp.hpp"
+
+namespace crocoddyl {
+
+SolverFDDP::SolverFDDP(ShootingProblem& problem) : SolverDDP(problem), dg_(0), dq_(0), dv_(0), th_acceptNegStep_(2) {}
+
+SolverFDDP::~SolverFDDP() {}
+
+bool SolverFDDP::solve(const std::vector<Eigen::VectorXd>& init_xs, const std::vector<Eigen::VectorXd>& init_us,
+                       const unsigned int& maxiter, const bool& is_feasible, const double& reginit) {
+  setCandidate(init_xs, init_us, is_feasible);
+
+  if (std::isnan(reginit)) {
+    xreg_ = regmin_;
+    ureg_ = regmin_;
+  } else {
+    xreg_ = reginit;
+    ureg_ = reginit;
+  }
+  was_feasible_ = false;
+
+  bool recalc = true;
+  for (iter_ = 0; iter_ < maxiter; ++iter_) {
+    while (true) {
+      try {
+        computeDirection(recalc);
+      } catch (const char* msg) {
+        recalc = false;
+        increaseRegularization();
+        if (xreg_ == regmax_) {
+          return false;
+        } else {
+          continue;
+        }
+      }
+      break;
+    }
+    updateExpectedImprovement();
+
+    // We need to recalculate the derivatives when the step length passes
+    recalc = false;
+    for (std::vector<double>::const_iterator it = alphas_.begin(); it != alphas_.end(); ++it) {
+      steplength_ = *it;
+
+      try {
+        dV_ = tryStep(steplength_);
+      } catch (const char* msg) {
+        continue;
+      }
+      expectedImprovement();
+      dVexp_ = steplength_ * (d_[0] + 0.5 * steplength_ * d_[1]);
+
+      if (dVexp_ > 0) {  // descend direction
+        if (d_[0] < th_grad_ || dV_ > th_acceptstep_ * dVexp_) {
+          // Accept step
+
+          was_feasible_ = is_feasible_;
+          setCandidate(xs_try_, us_try_, (was_feasible_) || (steplength_ == 1));
+          cost_ = cost_try_;
+          recalc = true;
+          break;
+        }
+      } else {
+        if (d_[0] < th_grad_ || dV_ < th_acceptNegStep_ * dVexp_) {
+          // accept step
+          was_feasible_ = is_feasible_;
+          setCandidate(xs_try_, us_try_, (was_feasible_) || (steplength_ == 1));
+          cost_ = cost_try_;
+          recalc = true;
+          break;
+        }
+      }
+    }
+    if (steplength_ > th_step_) {
+      decreaseRegularization();
+    }
+    if (steplength_ == alphas_.back()) {
+      increaseRegularization();
+      if (xreg_ == regmax_) {
+        return false;
+      }
+    }
+    stoppingCriteria();
+
+    unsigned int const& n_callbacks = static_cast<unsigned int>(callbacks_.size());
+    for (unsigned int c = 0; c < n_callbacks; ++c) {
+      CallbackAbstract& callback = *callbacks_[c];
+      callback(*this);
+    }
+
+    if (was_feasible_ && stop_ < th_stop_) {
+      return true;
+    }
+  }
+  return false;
+}
+
+void SolverFDDP::updateExpectedImprovement() {
+  dg_ = 0;
+  dq_ = 0;
+  unsigned int const& T = this->problem_.get_T();
+  if (!is_feasible_) {
+    dg_ -= Vx_.back().transpose() * gaps_.back();
+    dq_ += gaps_.back().transpose() * Vxx_.back() * gaps_.back();
+  }
+  for (unsigned int t = 0; t < T; ++t) {
+    dg_ += Qu_[t].transpose() * k_[t];
+    dq_ -= k_[t].transpose() * Quuk_[t];
+    if (!is_feasible_) {
+      dg_ -= Vx_[t].transpose() * gaps_[t];
+      fTVxx_p_.noalias() = Vxx_[t] * gaps_[t];
+      dq_ += gaps_[t].transpose() * fTVxx_p_;
+    }
+  }
+}
+
+const Eigen::Vector2d& SolverFDDP::expectedImprovement() {
+  dv_ = 0;
+  d_.fill(0);
+  unsigned int const& T = this->problem_.get_T();
+  if (!is_feasible_) {
+    problem_.running_models_.back()->get_state().diff(xs_try_.back(), xs_.back(), dx_.back());
+    fTVxx_p_.noalias() = Vxx_.back() * dx_.back();
+    dv_ -= gaps_.back().transpose() * fTVxx_p_;
+    for (unsigned int t = 0; t < T; ++t) {
+      problem_.running_models_[t]->get_state().diff(xs_try_[t], xs_[t], dx_[t]);
+      fTVxx_p_.noalias() = Vxx_[t] * dx_[t];
+      dv_ -= gaps_[t].transpose() * fTVxx_p_;
+    }
+  }
+  d_[0] = dg_ + dv_;
+  d_[1] = dq_ - 2 * dv_;
+  return d_;
+}
+
+double SolverFDDP::calc() {
+  cost_ = problem_.calcDiff(xs_, us_);
+  if (!is_feasible_) {
+    const Eigen::VectorXd& x0 = problem_.get_x0();
+    problem_.running_models_[0]->get_state().diff(xs_[0], x0, gaps_[0]);
+
+    unsigned int const& T = problem_.get_T();
+    for (unsigned int t = 0; t < T; ++t) {
+      ActionModelAbstract* model = problem_.running_models_[t];
+      boost::shared_ptr<ActionDataAbstract>& d = problem_.running_datas_[t];
+      model->get_state().diff(xs_[t + 1], d->get_xnext(), gaps_[t + 1]);
+    }
+  } else if (!was_feasible_) {
+    for (std::vector<Eigen::VectorXd>::iterator it = gaps_.begin(); it != gaps_.end(); ++it) {
+      it->setZero();
+    }
+  }
+  return cost_;
+}
+
+void SolverFDDP::backwardPass() {
+  boost::shared_ptr<ActionDataAbstract>& d_T = problem_.terminal_data_;
+  Vxx_.back() = d_T->get_Lxx();
+  Vx_.back() = d_T->get_Lx();
+
+  x_reg_.fill(xreg_);
+  if (!std::isnan(xreg_)) {
+    Vxx_.back().diagonal() += x_reg_;
+  }
+
+  if (!is_feasible_) {
+    Vx_.back() += Vxx_.back() * gaps_.back();
+  }
+
+  for (int t = static_cast<int>(problem_.get_T()) - 1; t >= 0; --t) {
+    ActionModelAbstract* m = problem_.running_models_[t];
+    boost::shared_ptr<ActionDataAbstract>& d = problem_.running_datas_[t];
+    const Eigen::MatrixXd& Vxx_p = Vxx_[t + 1];
+    const Eigen::VectorXd& Vx_p = Vx_[t + 1];
+
+    FxTVxx_p_.noalias() = d->get_Fx().transpose() * Vxx_p;
+    FuTVxx_p_[t].noalias() = d->get_Fu().transpose() * Vxx_p;
+    Qxx_[t].noalias() = d->get_Lxx() + FxTVxx_p_ * d->get_Fx();
+    Qxu_[t].noalias() = d->get_Lxu() + FxTVxx_p_ * d->get_Fu();
+    Quu_[t].noalias() = d->get_Luu() + FuTVxx_p_[t] * d->get_Fu();
+    Qx_[t].noalias() = d->get_Lx() + d->get_Fx().transpose() * Vx_p;
+    Qu_[t].noalias() = d->get_Lu() + d->get_Fu().transpose() * Vx_p;
+
+    if (!std::isnan(ureg_)) {
+      unsigned int const& nu = m->get_nu();
+      Quu_[t].diagonal() += Eigen::VectorXd::Constant(nu, ureg_);
+    }
+
+    computeGains(t);
+
+    if (std::isnan(ureg_)) {
+      Vx_[t].noalias() = Qx_[t] - K_[t].transpose() * Qu_[t];
+    } else {
+      Quuk_[t].noalias() = Quu_[t] * k_[t];
+      Vx_[t].noalias() = Qx_[t] + K_[t].transpose() * Quuk_[t] - 2 * K_[t].transpose() * Qu_[t];
+    }
+    Vxx_[t].noalias() = Qxx_[t] - Qxu_[t] * K_[t];
+    Vxx_[t] = 0.5 * (Vxx_[t] + Vxx_[t].transpose()).eval();
+    // TODO(cmastalli): as suggested by Nicolas
+
+    if (!std::isnan(xreg_)) {
+      Vxx_[t].diagonal() += x_reg_;
+    }
+
+    // Compute and store the Vx gradient at end of the interval (rollout state)
+    if (!is_feasible_) {
+      Vx_[t].noalias() += Vxx_[t] * gaps_[t];
+    }
+
+    if (raiseIfNaN(Vx_[t].lpNorm<Eigen::Infinity>())) {
+      throw "backward_error";
+    }
+    if (raiseIfNaN(Vxx_[t].lpNorm<Eigen::Infinity>())) {
+      throw "backward_error";
+    }
+  }
+}
+
+void SolverFDDP::forwardPass(const double& steplength) {
+  assert(steplength <= 1. && "Step length has to be <= 1.");
+  assert(steplength >= 0. && "Step length has to be >= 0.");
+  cost_try_ = 0.;
+  xnext_ = problem_.get_x0();
+  unsigned int const& T = problem_.get_T();
+  for (unsigned int t = 0; t < T; ++t) {
+    ActionModelAbstract* m = problem_.running_models_[t];
+    boost::shared_ptr<ActionDataAbstract>& d = problem_.running_datas_[t];
+    if ((is_feasible_) || (steplength == 1)) {
+      xs_try_[t] = xnext_;
+    } else {
+      m->get_state().integrate(xnext_, gaps_[t] * (steplength - 1), xs_try_[t]);
+    }
+    m->get_state().diff(xs_[t], xs_try_[t], dx_[t]);
+    us_try_[t].noalias() = us_[t] - k_[t] * steplength - K_[t] * dx_[t];
+    m->calc(d, xs_try_[t], us_try_[t]);
+    cost_try_ += d->cost;
+
+    if (raiseIfNaN(cost_try_)) {
+      throw "forward_error";
+    }
+    if (raiseIfNaN(d->get_xnext().lpNorm<Eigen::Infinity>())) {
+      throw "forward_error";
+    }
+  }
+
+  ActionModelAbstract* m = problem_.terminal_model_;
+  boost::shared_ptr<ActionDataAbstract>& d = problem_.terminal_data_;
+
+  if ((is_feasible_) || (steplength == 1)) {
+    xs_try_.back() = problem_.running_datas_.back()->get_xnext();
+  } else {
+    m->get_state().integrate(problem_.running_datas_.back()->get_xnext(), gaps_.back() * (steplength - 1),
+                             xs_try_.back());
+  }
+  m->calc(d, xs_try_.back());
+  cost_try_ += d->cost;
+
+  if (raiseIfNaN(cost_try_)) {
+    throw "forward_error";
+  }
+}
+
+}  // namespace crocoddyl