TransformListener x10 higher CPU load in rclpy than rclpp
System info
- Operating System:
- Docker + Ubuntu 20.04, 10th gen i7
- Docker + Ubuntu 20.04, jetson AGX Xavier
- ROS2 Version:
- Foxy binaries
Steps to reproduce issue
Make sure the system is publishing transforms to /tf
at 30Hz. Run a simple node with a TransformListener
in python and in C++.
Python
import rclpy
from rclpy.node import Node
from tf2_ros.buffer import Buffer
from tf2_ros.transform_listener import TransformListener
class MyNode(Node):
def __init__(
self,
) -> None:
super().__init__("navigation_god_node")
self.buffer = Buffer()
self.listener = TransformListener(
buffer=self.buffer, node=self, spin_thread=False
)
self.listener.buffer = self.buffer
self.tm_main = self.create_timer(
timer_period_sec=0.5,
callback=self.timer_callback,
)
def timer_callback(self):
if self.buffer.can_transform(
"camera_color_optical_frame",
"map",
rclpy.time.Time(seconds=0, nanoseconds=0).to_msg(),
):
self.get_logger().info("Can transform")
def main(args=None) -> None:
rclpy.init(args=args)
node = MyNode()
rclpy.spin(node)
node.destroy_node()
rclpy.shutdown()
if __name__ == "__main__":
main()
C++
#include <string>
#include "rclcpp/rclcpp.hpp"
#include "rclcpp/time.hpp"
#include <tf2_ros/buffer.h>
#include <tf2_ros/transform_listener.h>
#include "tf2_geometry_msgs/tf2_geometry_msgs.h"
using namespace std::chrono_literals;
using namespace std;
class MyNode : public rclcpp::Node
{
public:
MyNode(const string& node_name, const string& nm)
: Node(node_name, nm)
{
_buffer_tf2 = std::make_unique<tf2_ros::Buffer>(this->get_clock());
_listener_tf2 = std::make_shared<tf2_ros::TransformListener>(*_buffer_tf2);
timer_ = this->create_wall_timer(
500ms, std::bind(&TFListenerNode::timer_callback, this));
}
private:
void timer_callback()
{
if (_buffer_tf2->canTransform("camera_color_optical_frame", "map", tf2::TimePointZero))
RCLCPP_INFO(this->get_logger(), "Can transform");
}
rclcpp::TimerBase::SharedPtr timer_;
std::unique_ptr<tf2_ros::Buffer> _buffer_tf2;
std::shared_ptr<tf2_ros::TransformListener> _listener_tf2;
};
int main(int argc, char* argv[])
{
rclcpp::init(argc, argv);
auto tf_transforms_node = make_shared<MyNode>("listener_transforms_node", "tf_transforms");
rclcpp::spin(tf_transforms_node);
rclcpp::shutdown();
return 0;
}
Expected behavior
Roughly the CPU load should be the same, at least not that big difference.
Actual behavior
The python implementation loads the CPU almost 10x. Tested in my beefy laptop and in a Jetson.
My laptop
Python node: ~8% CPU one core C++ node: ~1% CPU one core
Jetson
Python node: ~20% CPU one core C++ node: ~2% CPU one core
In this case, it makes the python API unusable in a constraint environment like a Jetson.
Does anyone know any known issue with tf2_ros in python? Does this implementation look bad?