Robotics StackExchange | Archived questions

CUDA project is built, but CUDA isn't working

I tried to make ROS package with CUDA. All is built and started, but CUDA doesn't work. I took a nvidia sample to realize:

using namespace std;
#include <iostream>

// CUDA headers
#include <cuda.h>
#include <cuda_runtime.h>

__global__ void add(int * a, int * b, int *c)
{
    int index = threadIdx.x + blockIdx.x * blockDim.x;
    c[index] = a[index] + b[index];
}

int main(int argc, char **argv)
{
    int *a, *b, *c;
    int *d_a, *d_b, *d_c;
    int length = 4;
    int N = length * length;
    size_t size = N * sizeof(int);

    a = (int *)malloc(size);
    for(size_t i = 0; i < N; ++i)
    {
        *(a+i) = i;
    }
    b = (int *)malloc(size);
    for(size_t i = 0; i < N; ++i)
    {
        *(b+i) = i;
    }
    c = (int *)malloc(size);

    cudaMalloc((void **)&d_a, size);
    cudaMalloc((void **)&d_b, size);
    cudaMalloc((void **)&d_c, size);

    cudaMemcpy(d_a, a, size, cudaMemcpyHostToDevice);
    cudaMemcpy(d_b, b, size, cudaMemcpyHostToDevice);

    add<<<length, length>>>(d_a, d_b, d_c);

    cudaMemcpy(c, d_c, size, cudaMemcpyDeviceToHost);
    cudaFree(d_a);
    cudaFree(d_b);
    cudaFree(d_c);


    for(size_t i = 0; i < N; ++i)
    {
        std::cout << *(a+i) << " + " << *(b + i) << " = " << *(c + i) << std::endl;
    }

    free(&a);
    free(&b);
    free(&c);
    return 0;
}

If I'm building without ROS and it`s working (CMakeLists.txt):

cmake_minimum_required(VERSION 2.8.3)
project(add_exec)

## Compile as C++11, supported in ROS Kinetic and newer
add_compile_options(-std=c++11)

find_package( CUDA REQUIRED )

set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -DMY_DEF=1")
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMY_DEF=1" )
set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMY_DEF=1" )

## create an executable
cuda_add_executable( add_exec main.cu )
target_link_libraries( add_exec ${catkin_LIBRARIES} )

bur with ROS ...

cmake_minimum_required(VERSION 2.8.3)
project(ocv_gpu_test)

## Compile as C++11, supported in ROS Kinetic and newer
add_compile_options(-std=c++11)

## Find catkin and any catkin packages
find_package(catkin REQUIRED COMPONENTS roscpp rospy std_msgs genmsg)

## Generate added messages and services
generate_messages(DEPENDENCIES std_msgs)

## Declare a catkin package
catkin_package()

# Build add_exec and imclient
include_directories(include ${catkin_INCLUDE_DIRS})

find_package(CUDA REQUIRED)
find_package(VisionWorks NO_MODULE)
find_package(VisionWorks-NVXIO NO_MODULE)

# Compile-time definitions
add_definitions(-std=c++11 -DNVX)

set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -DMY_DEF=1")
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMY_DEF=1" )
set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMY_DEF=1" )

include_directories(
  ${catkin_INCLUDE_DIRS} 
  ${CUDA_INCLUDE_DIRS}
  ${VisionWorks_INCLUDE_DIRS}
  ${VisionWorks-NVXIO_INCLUDE_DIRS}
)

cuda_add_executable(add_exec src/main.cu )

target_link_libraries(add_exec 
    ${catkin_LIBRARIES} 
    ${CUDA_LIBS} 
    ${VisionWorks_LIBRARIES} )

is like as:

0 + 0 = 2566178
1 + 1 = 2566178
2 + 2 = 2566178
3 + 3 = 2566178
4 + 4 = 2566178
5 + 5 = 2566178
6 + 6 = 2566178
7 + 7 = 2566178
8 + 8 = 2566178
9 + 9 = 2566178
10 + 10 = 2566178
11 + 11 = 2566178
12 + 12 = 2566178
13 + 13 = 2566178
14 + 14 = 2566178
15 + 15 = 2566178
16 + 16 = 2566178
17 + 17 = 2566178
18 + 18 = 2566178
19 + 19 = 2566178
20 + 20 = 2566178
21 + 21 = 2566178
22 + 22 = 2566178
23 + 23 = 2566178
24 + 24 = 2566178
25 + 25 = 2566178
26 + 26 = 2566178
....

or

0 + 0 = 0
1 + 1 = 0
2 + 2 = 0
3 + 3 = 0
4 + 4 = 0
5 + 5 = 0
6 + 6 = 0
7 + 7 = 0
8 + 8 = 0
9 + 9 = 0
10 + 10 = 0
11 + 11 = 0
12 + 12 = 0
13 + 13 = 0
14 + 14 = 0
15 + 15 = 0
16 + 16 = 0

Asked by DEngine on 2017-09-22 08:38:43 UTC

Comments

It would be nice if you could tell us where you found your answer. Right now this is not very useful for future readers that happen to come across your question.

Please post an answer yourself and then accept that.

Asked by gvdhoorn on 2017-09-23 16:50:18 UTC

Answers