Change the C++ compiller in ROS to use with openACC and CUDA
I'm trying to compile a rospackage with the PGI compiler that uses openACC. I want to parallelize some code.
This works with standard c++ code and uses the pgcc / pgc++ compiler. So I tried to compile a simple ros package with this compiler. Here is the source code:
#include <ros/ros.h>
#include <iostream>
#include "std_msgs/String.h"
#include <sstream>
int main(int argc, char **argv)
{
ros::init(argc, argv, "pgi_test_node");
ros::NodeHandle n;
ros::Publisher chatter_pub = n.advertise<std_msgs::String>("chatter", 1000);
ros::Rate loop_rate(10);
int cout = 0;
while(ros::ok()) {
std_msgs::String msg;
ss << heelo world" << count;
ROS_INFO("%s", msg.data.c_str());
chatter_pub.publish(msg);
ros::spinOnce();
loop_rate.sleep();
++count;
}
return 0;
}
And here is the my cmakelists.txt . I've tried a lot of things but the mains change might be the compiler and its flags in the beginning.
cmake_minimum_required(VERSION 2.8.3)
project(pgi_test)
## Compile as C++11, supported in ROS Kinetic and newer
add_compile_options(-std=c++11)
SET(CMAKE_C_COMPILER /opt/pgi/linux86-64/18.4/bin/pgcc)
SET(CMAKE_CXX_COMPILER /opt/pgi/linux86-64/18.4/bin/pgc++)
# flags
add_definitions("-DENABLE_SSE")
SET(CMAKE_CXX_FLAGS
"${SSE_FLAGS} -O3 -std=c++11 -ta=tesla:cuda9.1 -acc -Minfo=accel"
)
## Find catkin macros and libraries
## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
## is used, also find other catkin packages
find_package(catkin REQUIRED
roscpp
rospy
std_msgs
genmsg
)
## System dependencies are found with CMake's conventions
#find_package(Boost REQUIRED COMPONENTS system)
find_package(Boost REQUIRED COMPONENTS system thread filesystem)
INCLUDE_DIRECTORIES( ${Boost_INCLUDE_DIR} )
find_package( CUDA REQUIRED )
include_directories(
${catkin_INCLUDE_DIRS}
${CUDA_INCLUDE_DIRS}
)
SET(BOOST_HAS_FLOAT128)
## Uncomment this if the package has a setup.py. This macro ensures
## modules and global scripts declared therein get installed
## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html
# catkin_python_setup()
################################################
## Declare ROS messages, services and actions ##
################################################
## To declare and build messages, services or actions from within this
## package, follow these steps:
## * Let MSG_DEP_SET be the set of packages whose message types you use in
## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...).
## * In the file package.xml:
## * add a build_depend tag for "message_generation"
## * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET
## * If MSG_DEP_SET isn't empty the following dependency has been pulled in
## but can be declared for certainty nonetheless:
## * add a exec_depend tag for "message_runtime"
## * In this file (CMakeLists.txt):
## * add "message_generation" and every package in MSG_DEP_SET to
## find_package(catkin REQUIRED COMPONENTS ...)
## * add "message_runtime" and every package in MSG_DEP_SET to
## catkin_package(CATKIN_DEPENDS ...)
## * uncomment the add_*_files sections below as needed
## and list every .msg/.srv/.action file to be processed
## * uncomment the generate_messages entry below
## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...)
## Generate messages in the 'msg' folder
# add_message_files(
# FILES
# Message1.msg
# Message2.msg
# )
## Generate services in the 'srv' folder
# add_service_files(
# FILES
# Service1.srv
# Service2.srv
# )
## Generate actions in the 'action' folder
# add_action_files(
# FILES
# Action1.action
# Action2.action
# )
## Generate added messages and services with any dependencies listed here
# generate_messages(
# DEPENDENCIES
# std_msgs # Or other packages containing msgs
# )
################################################
## Declare ROS dynamic reconfigure parameters ##
################################################
## To declare and build dynamic reconfigure parameters within this
## package, follow these steps:
## * In the file package.xml:
## * add a build_depend and a exec_depend tag for "dynamic_reconfigure"
## * In this file (CMakeLists.txt):
## * add "dynamic_reconfigure" to
## find_package(catkin REQUIRED COMPONENTS ...)
## * uncomment the "generate_dynamic_reconfigure_options" section below
## and list every .cfg file to be processed
## Generate dynamic reconfigure parameters in the 'cfg' folder
# generate_dynamic_reconfigure_options(
# cfg/DynReconf1.cfg
# cfg/DynReconf2.cfg
# )
###################################
## catkin specific configuration ##
###################################
## The catkin_package macro generates cmake config files for your package
## Declare things to be passed to dependent projects
## INCLUDE_DIRS: uncomment this if your package contains header files
## LIBRARIES: libraries you create in this project that dependent projects also need
## CATKIN_DEPENDS: catkin_packages dependent projects also need
## DEPENDS: system dependencies of this project that dependent projects also need
catkin_package(
# INCLUDE_DIRS include
# LIBRARIES pgi_test
# CATKIN_DEPENDS other_catkin_pkg
# DEPENDS system_lib
)
###########
## Build ##
###########
SET(CUDA_NVCC_FLAGS "-arch=sm_61" CACHE STRING "nvcc flags" FORCE)
SET (CUDA_VERBOSE_BUILD ON CACHE BOOL "nvcc verbose" FORCE)
SET(LIB_TYPE STATIC)
SET(CUDACC_DEFINE D__CUDACC__)
#CUDA_ADD_LIBRARY(TestLib ${LIB_TYPE} src/Novo_Teste.cu)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -DMY_DEF=1")
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMY_DEF=1" )
set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMY_DEF=1" )
link_directories(${CUDA_LIBRARY_DIRS})
## Specify additional locations of header files
## Your package locations should be listed before other locations
include_directories(
# include
${catkin_INCLUDE_DIRS}
)
## Declare a C++ library
# add_library(${PROJECT_NAME}
# src/${PROJECT_NAME}/pgi_test.cpp
# )
## Add cmake target dependencies of the library
## as an example, code may need to be generated before libraries
## either from message generation or dynamic reconfigure
# add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
## Declare a C++ executable
## With catkin_make all packages are built within a single CMake context
## The recommended prefix ensures that target names across packages don't collide
add_executable(${PROJECT_NAME}_node src/pgi_test_node.cpp)
## Rename C++ executable without prefix
## The above recommended prefix causes long target names, the following renames the
## target back to the shorter version for ease of user use
## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node"
# set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "")
## Add cmake target dependencies of the executable
## same as for the library above
# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
## Specify libraries to link a library or executable target against
target_link_libraries(${PROJECT_NAME}_node
${catkin_LIBRARIES}
${cuda_LIBRARIES}
${Boost_LIBRARIES}
)
#############
## Install ##
#############
# all install targets should use catkin DESTINATION variables
# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html
## Mark executable scripts (Python etc.) for installation
## in contrast to setup.py, you can choose the destination
# install(PROGRAMS
# scripts/my_python_script
# DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
# )
## Mark executables and/or libraries for installation
# install(TARGETS ${PROJECT_NAME} ${PROJECT_NAME}_node
# ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
# LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
# RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
# )
## Mark cpp header files for installation
# install(DIRECTORY include/${PROJECT_NAME}/
# DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
# FILES_MATCHING PATTERN "*.h"
# PATTERN ".svn" EXCLUDE
# )
## Mark other files for installation (e.g. launch and bag files, etc.)
# install(FILES
# # myfile1
# # myfile2
# DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
# )
#############
## Testing ##
#############
## Add gtest based cpp test target and link libraries
# catkin_add_gtest(${PROJECT_NAME}-test test/test_pgi_test.cpp)
# if(TARGET ${PROJECT_NAME}-test)
# target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})
# endif()
## Add folders to be run by python nosetests
# catkin_add_nosetests(test)
So, Like this I get errors like these:
catkin_ws$ catkin_make
Base path: /home/catkin_ws
Source space: /home/catkin_ws/src
Build space: /home/catkin_ws/build
Devel space: /home/catkin_ws/devel
Install space: /home/catkin_ws/install
####
#### Running command: "make cmake_check_build_system" in "/home/catkin_ws/build"
####
####
#### Running command: "make -j8 -l8" in "/home/catkin_ws/build"
####
[ 50%] Building CXX object pgi_test/CMakeFiles/pgi_test_node.dir/src/pgi_test_node.cpp.o
"/usr/include/x86_64-linux-gnu/bits/floatn.h", line 75: error: 128-bit
floating-point types are not supported in this configuration
typedef _Complex float __cfloat128 __attribute__ ((__mode__ (__TC__)));
^
"/usr/include/x86_64-linux-gnu/bits/iscanonical.h", line 51: error: function
"iscanonical(long double)" has already been defined
inline int iscanonical (_Float128 __val) { return __iscanonicalf128 (__val); }
^
"/usr/include/math.h", line 709: error: function "issignaling(long double)" has
already been defined
inline int issignaling (_Float128 __val) { return __issignalingf128 (__val); }
^
"/usr/include/math.h", line 1237: error: class "__iseqsig_type<long double>"
has already been defined
template<> struct __iseqsig_type<_Float128>
^
"/usr/include/boost/math/policies/error_handling.hpp", line 117: error:
function "boost::math::policies::detail::name_of<T>() [with T=long
double]" has already been defined
inline const char* name_of<BOOST_MATH_FLOAT128_TYPE>()
^
"/usr/include/boost/type_traits/is_floating_point.hpp", line 25: error: class
"boost::is_floating_point<long double>" has already been defined
template<> struct is_floating_point<__float128> : public true_type{};
^
"/usr/lib/gcc/x86_64-linux-gnu/6/include/quadmath.h", line 32: error: 128-bit
floating-point types are not supported in this configuration
typedef _Complex float __attribute__((mode(TC))) __complex128;
I've been searching and found a solution for these errors that works fine with a c++ program. this consists in adding this linde in the beggening of the code:
#define __CUDACC__
#include .....
When I try to apply that solution to a ROS package, I obtain an error in the boost library:
/catkin_ws$ catkin_make
Base path: /home/catkin_ws
Source space: /home/catkin_ws/src
Build space: /home/catkin_ws/build
Devel space: /home/catkin_ws/devel
Install space: /home/catkin_ws/install
####
#### Running command: "make cmake_check_build_system" in "/home/catkin_ws/build"
####
####
#### Running command: "make -j8 -l8" in "/home/catkin_ws/build"
####
Scanning dependencies of target pgi_test_node
[ 50%] Building CXX object pgi_test/CMakeFiles/pgi_test_node.dir/src/pgi_test_node.cpp.o
"/usr/include/boost/type_traits/is_floating_point.hpp", line 25: error: class
"boost::is_floating_point<long double>" has already been defined
template<> struct is_floating_point<__float128> : public true_type{};
^
"/usr/include/boost/core/swap.hpp", line 32: error: identifier "__host__" is
undefined
BOOST_GPU_ENABLED
^
"/usr/include/boost/core/swap.hpp", line 32: error: "__device__" is not a
function or static data member
BOOST_GPU_ENABLED
^
"/usr/include/boost/core/swap.hpp", line 48: error: expected a ";"
}
^
"/usr/include/boost/core/swap.hpp", line 53: error: identifier "__host__" is
undefined
BOOST_GPU_ENABLED
^
"/usr/include/boost/core/swap.hpp", line 53: error: "__device__" is not a
function or static data member
BOOST_GPU_ENABLED
^
"/usr/include/boost/core/swap.hpp", line 58: error: expected a ";"
}
For last but not less important, I'm using ROS melodic in ubuntu 18.04, I have a Nvidia gtx 1050 ti (CUDA code works fine) and I'm using cuda 9.1. As for the compiler is PGI-18.4 (pgc++)
Does someone know how to handle this?
Asked by billyDong on 2018-11-14 13:28:14 UTC
Answers
This doesn't directly answer your question, but in the past I have made my CUDA-specific code a custom external C++ library, which you then link to from catkin. I don't know how different pgcc is from gcc, so you will possibly run into ABI issues, but according to a quick search it seems like it might work okay.
Asked by john.j.oneill on 2018-11-14 13:52:14 UTC
Comments
I managed to compile and run with success, but I discourage this approach, so I will not close the question until I found a better way to solve the problem. A normal system update will ruin this, and I'm not sure if this will not have any consequences in the future when I need to use BOOST library. Anyway, here's my approach:
As I said, in the first place you need to add this line in the beginning of your code:
#define __CUCACC__
#include <ros/ros.h>
A normal catkin_make would give two errors with this approach. For the first one:
sudo nano /usr/include/boost/type_traits/is_floating_point.hpp
Replace this line:
#if defined(BOOST_HAS_FLOAT128)
with:
#if defined(BOOST_HAS_FLOAT128) && !defined(__PGI)
For the following errors:
sudo nano /usr/include/boost/core/swap.hpp
Comment all the line with:
BOOST_GPU_ENABLED
There should be 3 lines.
With this I compiled and run the code. I added a pi generator example to test the speed in CPU and GPU. If someone wants to test..:
#define __CUDACC__
#include <ros/ros.h>
#include <iostream>
#include "std_msgs/String.h"
#include <sstream>
#define N 2000000000
#define vl 1024
int main(int argc, char **argv)
{
ros::init(argc, argv, "pgi_test_node");
ros::NodeHandle n;
ros::Publisher chatter_pub = n.advertise<std_msgs::String>("chatter", 1000);
ros::Rate loop_rate(10);
int count = 0;
while (ros::ok())
{
std_msgs::String msg;
std::stringstream ss;
ss << "hello world " << count;
msg.data = ss.str();
ROS_INFO("%s", msg.data.c_str());
double pi = 0.0f;
long long i;
#pragma acc parallel vector_length(vl)
#pragma acc loop reduction(+:pi)
for (i=0; i<N; i++) {
double t= (double)((i+0.5)/N);
pi +=4.0/(1.0+t*t);
}
printf("pi=%11.10f\n", pi/N);
chatter_pub.publish(msg);
ros::spinOnce();
loop_rate.sleep();
++count;
}
return 0;
}
Timers are not even necessary, if you just comment the pragmas, the loop will run on CPU and you can clearly see the difference.
Asked by billyDong on 2018-11-15 06:14:55 UTC
Comments