diff --git a/CMakeLists.txt b/CMakeLists.txt index e89f3c0660572df22e2c646200137db1b3469b9a..a8a58375d9ef7e0efda18bd9edf4fd9d727d87a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,7 +40,7 @@ SET(CXX_DISABLE_WERROR True) OPTION(ENABLE_VECTORIZATION "Enable vectorization and futhers processor-related optimizations" OFF) OPTION(BUILD_PYTHON_INTERFACE "Build the python binding" ON) OPTION(BUILD_UNIT_TESTS "Build the unitary tests" ON) -OPTION(BUILD_BENCHMARK "Build the benchmark" OFF) +OPTION(BUILD_BENCHMARK "Build the benchmark" ON) IF(ENABLE_VECTORIZATION) @@ -57,6 +57,29 @@ ADD_OPTIONAL_DEPENDENCY("multicontact-api >= 1.1.0") ADD_OPTIONAL_DEPENDENCY("quadprog") ADD_OPTIONAL_DEPENDENCY("scipy") + +OPTION(BUILD_WITH_MULTITHREADS "Build the library with the OpenMP support (required OpenMP)" OFF) +IF(BUILD_WITH_MULTITHREADS) + SET(BUILD_WITH_NTHREADS "4" CACHE STRING "Number of threads") + string(REGEX MATCH "^[0-9]+$" BUILD_WITH_NTHREADS ${BUILD_WITH_NTHREADS}) + IF(NOT BUILD_WITH_NTHREADS MATCHES "^[0-9]+$") + SET(BUILD_WITH_NTHREADS 4) + MESSAGE("Warning: the number of threads have to be an interger value, set to ${BUILD_WITH_NTHREADS}") + ENDIF() +ENDIF() + +# Add OpenMP +if(BUILD_WITH_MULTITHREADS) + find_package(OpenMP) +ENDIF() +if(OPENMP_FOUND AND BUILD_WITH_MULTITHREADS) + SET(CMAKE_CXX_FLAGS "-fopenmp") + ADD_DEFINITIONS(-DWITH_MULTITHREADING) + ADD_DEFINITIONS(-DWITH_NTHREADS=${BUILD_WITH_NTHREADS}) + LIST(APPEND CFLAGS_DEPENDENCIES "-DWITH_MULTITHREADING" "-DWITH_NTHREADS") +ENDIF() + + SET(BOOST_REQUIERED_COMPONENTS filesystem serialization system) SET(BOOST_BUILD_COMPONENTS unit_test_framework) SET(BOOST_OPTIONAL_COMPONENTS "") @@ -97,4 +120,4 @@ IF(BUILD_BENCHMARK) ENDIF(BUILD_BENCHMARK) -SETUP_PROJECT_FINALIZE() \ No newline at end of file +SETUP_PROJECT_FINALIZE() diff --git a/benchmark/lqr.cpp b/benchmark/lqr.cpp index 43b4d995ce174e45a9fe92b942ebc4654c43922b..19d9f6104f733999f02872b2936b57aace639cc6 100644 --- a/benchmark/lqr.cpp +++ b/benchmark/lqr.cpp @@ -40,17 +40,52 @@ int main() { } // Solving the optimal control problem - std::clock_t c_start, c_end; + struct timespec start, finish; + double elapsed; Eigen::ArrayXd duration(T); for (unsigned int i = 0; i < T; ++i) { - c_start = std::clock(); + clock_gettime(CLOCK_MONOTONIC, &start); ddp.solve(xs, us, MAXITER); - c_end = std::clock(); - duration[i] = 1e3 * (double)(c_end - c_start) / CLOCKS_PER_SEC; + clock_gettime(CLOCK_MONOTONIC, &finish); + elapsed = (finish.tv_sec - start.tv_sec) * 1000000.0; + elapsed += (finish.tv_nsec - start.tv_nsec) / 1000.0; + duration[i] = elapsed / 1000.; } double avrg_duration = duration.sum() / T; double min_duration = duration.minCoeff(); double max_duration = duration.maxCoeff(); - std::cout << "CPU time [ms]: " << avrg_duration << " (" << min_duration << "-" << max_duration << ")" << std::endl; + std::cout << "Wall time [mu]: " << avrg_duration << " (" << min_duration << "-" << max_duration << ")" << std::endl; + + // Running calc + for (unsigned int i = 0; i < T; ++i) { + clock_gettime(CLOCK_MONOTONIC, &start); + problem.calc(xs, us); + clock_gettime(CLOCK_MONOTONIC, &finish); + elapsed = (finish.tv_sec - start.tv_sec) * 1000000.0; + elapsed += (finish.tv_nsec - start.tv_nsec) / 1000.0; + duration[i] = elapsed / 1000.; + } + + avrg_duration = duration.sum() / T; + min_duration = duration.minCoeff(); + max_duration = duration.maxCoeff(); + std::cout << "Wall time calc [ms]: " << avrg_duration << " (" << min_duration << "-" << max_duration << ")" + << std::endl; + + // Running calcDiff + for (unsigned int i = 0; i < T; ++i) { + clock_gettime(CLOCK_MONOTONIC, &start); + problem.calcDiff(xs, us); + clock_gettime(CLOCK_MONOTONIC, &finish); + elapsed = (finish.tv_sec - start.tv_sec) * 1000000.0; + elapsed += (finish.tv_nsec - start.tv_nsec) / 1000.0; + duration[i] = elapsed / 1000.; + } + + avrg_duration = duration.sum() / T; + min_duration = duration.minCoeff(); + max_duration = duration.maxCoeff(); + std::cout << "Wall time calcDiff [ms]: " << avrg_duration << " (" << min_duration << "-" << max_duration << ")" + << std::endl; } \ No newline at end of file diff --git a/benchmark/unicycle.cpp b/benchmark/unicycle.cpp index 07516cf466c7a103d3aec03adc2bd6410874e7d8..92a726560e29b55110a93b8d98ea513d86d88117 100644 --- a/benchmark/unicycle.cpp +++ b/benchmark/unicycle.cpp @@ -1,7 +1,11 @@ #include "crocoddyl/core/actions/unicycle.hpp" #include "crocoddyl/core/utils/callbacks.hpp" #include "crocoddyl/core/solvers/ddp.hpp" -#include <ctime> +#include <time.h> + +#ifdef WITH_MULTITHREADING +#include <omp.h> +#endif // WITH_MULTITHREADING int main() { bool CALLBACKS = false; @@ -37,17 +41,53 @@ int main() { } // Solving the optimal control problem - std::clock_t c_start, c_end; + struct timespec start, finish; + double elapsed; Eigen::ArrayXd duration(T); for (unsigned int i = 0; i < T; ++i) { - c_start = std::clock(); + clock_gettime(CLOCK_MONOTONIC, &start); ddp.solve(xs, us, MAXITER); - c_end = std::clock(); - duration[i] = 1e3 * (double)(c_end - c_start) / CLOCKS_PER_SEC; + clock_gettime(CLOCK_MONOTONIC, &finish); + elapsed = (finish.tv_sec - start.tv_sec) * 1000000.0; + elapsed += (finish.tv_nsec - start.tv_nsec) / 1000.0; + duration[i] = elapsed / 1000.; } double avrg_duration = duration.sum() / T; double min_duration = duration.minCoeff(); double max_duration = duration.maxCoeff(); - std::cout << "CPU time [ms]: " << avrg_duration << " (" << min_duration << "-" << max_duration << ")" << std::endl; + std::cout << "Wall time solve [ms]: " << avrg_duration << " (" << min_duration << "-" << max_duration << ")" + << std::endl; + + // Running calc + for (unsigned int i = 0; i < T; ++i) { + clock_gettime(CLOCK_MONOTONIC, &start); + problem.calc(xs, us); + clock_gettime(CLOCK_MONOTONIC, &finish); + elapsed = (finish.tv_sec - start.tv_sec) * 1000000.0; + elapsed += (finish.tv_nsec - start.tv_nsec) / 1000.0; + duration[i] = elapsed / 1000.; + } + + avrg_duration = duration.sum() / T; + min_duration = duration.minCoeff(); + max_duration = duration.maxCoeff(); + std::cout << "Wall time calc [ms]: " << avrg_duration << " (" << min_duration << "-" << max_duration << ")" + << std::endl; + + // Running calcDiff + for (unsigned int i = 0; i < T; ++i) { + clock_gettime(CLOCK_MONOTONIC, &start); + problem.calcDiff(xs, us); + clock_gettime(CLOCK_MONOTONIC, &finish); + elapsed = (finish.tv_sec - start.tv_sec) * 1000000.0; + elapsed += (finish.tv_nsec - start.tv_nsec) / 1000.0; + duration[i] = elapsed / 1000.; + } + + avrg_duration = duration.sum() / T; + min_duration = duration.minCoeff(); + max_duration = duration.maxCoeff(); + std::cout << "Wall time calcDiff [ms]: " << avrg_duration << " (" << min_duration << "-" << max_duration << ")" + << std::endl; } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b906f4ef5f914b703a6688f6d5a379f175f009aa..4f49c86a7a6212967c26dad1bc72b6fa7731030d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -49,6 +49,10 @@ IF(UNIX) PKG_CONFIG_USE_DEPENDENCY(${PROJECT_NAME} pinocchio) TARGET_LINK_LIBRARIES(${PROJECT_NAME} ${Boost_FILESYSTEM_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${Boost_SERIALIZATION_LIBRARY}) + if(OPENMP_FOUND) + TARGET_LINK_LIBRARIES(${PROJECT_NAME} ${OpenMP_CXX_LIBRARIES}) + ENDIF() + INSTALL(TARGETS ${PROJECT_NAME} DESTINATION lib) INSTALL(DIRECTORY ${CMAKE_SOURCE_DIR}/include/ DESTINATION include diff --git a/src/core/optctrl/shooting.cpp b/src/core/optctrl/shooting.cpp index c85e4c65624a5dcc96548c1de1f248f57623b07f..b3f185433e3f8ba32bbdba0ec290e15127341bf9 100644 --- a/src/core/optctrl/shooting.cpp +++ b/src/core/optctrl/shooting.cpp @@ -7,6 +7,11 @@ /////////////////////////////////////////////////////////////////////////////// #include "crocoddyl/core/optctrl/shooting.hpp" +#include <iostream> +#ifdef WITH_MULTITHREADING +#include <omp.h> +#define NUM_THREADS WITH_NTHREADS +#endif // WITH_MULTITHREADING namespace crocoddyl { @@ -47,15 +52,20 @@ double ShootingProblem::calcDiff(const std::vector<Eigen::VectorXd>& xs, const s assert(us.size() == T_ && "Wrong dimension of the control trajectory, it should be T."); cost_ = 0; - for (unsigned int i = 0; i < T_; ++i) { - ActionModelAbstract* model = running_models_[i]; - boost::shared_ptr<ActionDataAbstract>& data = running_datas_[i]; - const Eigen::VectorXd& x = xs[i]; - const Eigen::VectorXd& u = us[i]; + unsigned int i; + +#ifdef WITH_MULTITHREADING + omp_set_num_threads(NUM_THREADS); +#pragma omp parallel for +#endif + for (i = 0; i < T_; ++i) { + running_models_[i]->calcDiff(running_datas_[i], xs[i], us[i]); + } - model->calcDiff(data, x, u); - cost_ += data->cost; + for (unsigned int i = 0; i < T_; ++i) { + cost_ += running_datas_[i]->cost; } + terminal_model_->calcDiff(terminal_data_, xs.back()); cost_ += terminal_data_->cost; return cost_;