Fix depth roi computation using camera_info. Assuming same rgb and depth FOV

df056634 · Fernando Herrero · 6ea1eab9 · df056634 · df056634 · df056634
Commit df056634 authored 2 years ago by Fernando Herrero
--- a/get_obj_dist/launch/node.launch
+++ b/get_obj_dist/launch/node.launch
@@ -3,7 +3,8 @@
 <launch>

  <arg name="camera_in_topic"      default="/camera/color/image_raw"/>
-  <arg name="depth_image_in_topic"       default="/camera/aligned_depth_to_color/image_raw"/>
+  <arg name="depth_image_in_topic" default="/camera/aligned_depth_to_color/image_raw"/>
+  <arg name="depth_info_in_topic"  default="/camera/aligned_depth_to_color/camera_info"/>

  <arg name="node_name"        default="human_tensorflow_detection_3d"/>
  <arg name="bbox_in_topic"        default="/human_tensorflow_detection_2d/bounding_box"/>
@@ -16,7 +17,7 @@


  <arg name="output"             default="screen"/>
-  <arg name="required"           default="true"/>
+  <arg name="required"           default="false"/>

  <node pkg="get_obj_dist"
        name="$(arg node_name)"
@@ -29,6 +30,7 @@
    <remap from="~bounding_box_in"        to="$(arg bbox_in_topic)"/>
    <remap from="~detection2d_array"      to="$(arg detection2d_array_in)"/>
    <remap from="~depth_image_in"         to="$(arg depth_image_in_topic)"/>
+    <remap from="~depth_info_in"          to="$(arg depth_info_in_topic)"/>
    <remap from="~detection_distance_image_out"  to="$(arg detection_distance_image_out)"/>
    <remap from="~people_marker_array_out"       to="$(arg people_marker_array_out)"/>
    <remap from="~detected_persons"              to="$(arg detected_persons_out)"/>

--- a/get_obj_dist/src/human_2d_to_3d.py
+++ b/get_obj_dist/src/human_2d_to_3d.py
@@ -4,9 +4,10 @@ import sys
 import cv2
 import rospy
 import message_filters
+import copy

 from visualization_msgs.msg import Marker, MarkerArray
-from sensor_msgs.msg import Image, PointCloud2
+from sensor_msgs.msg import Image, PointCloud2, CameraInfo
 from cv_bridge import CvBridge, CvBridgeError
 from human_detection.msg import box_list
 from utils import *
@@ -22,9 +23,6 @@ line_type = 2
 marker_size = 5
 size_of_moving_avg = 15

-#MESH_PATH = 'package://get_obj_dist/human_model.STL'
-
-
 class obj_dist:
    def __init__(self):
        # Declare functional variables
@@ -36,10 +34,10 @@ class obj_dist:
        self.run_once = False

        self.moving_average = [[3] * size_of_moving_avg] * marker_size
-        #self.set_marker_array(marker_size, 'camera_color_optical_frame', MESH_PATH)

        # Initiale publishers
        self.image_pub = rospy.Publisher('~detection_distance_image_out', Image, queue_size=1)
+        self.roi_image_pub = rospy.Publisher('~roi_image_out', Image, queue_size=1)
        self.marker_pub = rospy.Publisher('~people_marker_array_out', MarkerArray, queue_size=1)
        self.detections_pub = rospy.Publisher('~detected_persons', DetectedPersons, queue_size=1)

@@ -48,60 +46,15 @@ class obj_dist:
        self.bbx_sub = message_filters.Subscriber('~bounding_box_in', box_list)
        self.human_image_sub = message_filters.Subscriber('~detection_image_in', Image)
        self.depth_image_sub = message_filters.Subscriber('~depth_image_in', Image)
-        # self.depth_image_sub = message_filters.Subscriber('/camera/depth/image_rect_raw', Image)
-
-
-        """ from previous subscription to bounding_box, missing confidence, replaced with callback2
-        ts = message_filters.ApproximateTimeSynchronizer([self.bbx_sub, self.human_image_sub,
-                                                          #self.depth_image_sub],
-                                                         #queue_size=10,
-                                                         #slop=1)
-        ts.registerCallback(self.callback)
-        """
+        self.depth_info_sub = message_filters.Subscriber('~depth_info_in', CameraInfo)

        ts2 = message_filters.ApproximateTimeSynchronizer([self.det2d_sub, self.human_image_sub,
-                                                          self.depth_image_sub],
+                                                          self.depth_image_sub, self.depth_info_sub],
                                                         queue_size=10,
                                                         slop=1)
-        ts2.registerCallback(self.callback2)
-
-    """ from previous subscription to bounding_box, missing confidence, replaced with callback2
-    def callback(self, bbx, image, depth):
-
-
-        detections = DetectedPersons()
-
-
-        detections.header = depth.header
-
-        #print('working lol')
-        if bbx.length:
-            cv_depth = self.bridge.imgmsg_to_cv2(depth, 'passthrough')
-            cv_image = self.bridge.imgmsg_to_cv2(image, 'bgr8')
-
-            for i in range(0, 5):
-                if i <= bbx.length - 1:
-                    cood = self.get_human_distance(cv_depth, cv_image, bbx.people_list[i], i)
-                    self.set_model_coordinates(cood, i)
-
-                    det = DetectedPerson()
-                    det.confidence = 0 #TODO: subscribe to detection2d_array which has confidence (bbox doesnt)
-                    det.pose.pose.position.x = cood[0]
-                    det.pose.pose.position.y = cood[1]
-                    det.pose.pose.position.z = cood[2]
-                    det.pose.pose.orientation.w=1.0
-                    det.modality = "rgbd"
-
-                    detections.detections.append(det)
+        ts2.registerCallback(self.callback)

-                else:
-                    self.set_model_coordinates((-1, -1, -1), i)
-
-        self.marker_pub.publish(self.marker_array)
-        self.detections_pub.publish(detections)
-    """
-
-    def callback2(self, det2d, image, depth):
+    def callback(self, det2d, image, depth, info):

        if not self.run_once:
          self.set_marker_array(marker_size, det2d.header.frame_id)
@@ -110,7 +63,6 @@ class obj_dist:
        detections = DetectedPersons()
        detections.header = depth.header

-        #print('working lol')
        cv_image = self.bridge.imgmsg_to_cv2(image, 'bgr8')
        if len(det2d.detections) > 0:
            cv_depth = self.bridge.imgmsg_to_cv2(depth, 'passthrough')
@@ -118,13 +70,16 @@ class obj_dist:

            for i in range(0, 5):
                if i <= len(det2d.detections) - 1:
-                    cood = self.get_human_distance2(cv_depth, cv_image, det2d.detections[i], i)
+
+                    det2d_rgb_i = det2d.detections[i]
+                    det2d_depth_i = self.convert_det2d_from_rgb_to_depth(det2d_rgb_i, image.width, image.height, depth.width, depth.height)
+                    cood = self.get_human_distance(cv_depth, cv_image, det2d_depth_i, i, depth.width, info)
                    self.set_model_coordinates(cood, i)

                    if cood[0]!=-1 and cood[1]!=-1 and cood[2]!=-1:
                        det = DetectedPerson()
                        det.detection_id = i #TODO: used?
-                        det.confidence = det2d.detections[i].results[0].score
+                        det.confidence = det2d_depth_i.results[0].score
                        det.pose.pose.position.x = cood[0]
                        det.pose.pose.position.y = cood[1]
                        det.pose.pose.position.z = cood[2]
@@ -134,6 +89,16 @@ class obj_dist:

                        detections.detections.append(det)

+                        det2d_rgb_i_xmin= int(det2d_rgb_i.bbox.center.x - det2d_rgb_i.bbox.size_x/2)
+                        det2d_rgb_i_ymax= int(det2d_rgb_i.bbox.center.y + det2d_rgb_i.bbox.size_y/2)
+                        cv2.putText(cv_image,
+                                    'z={}m/id={}'.format(round(cood[2], 2), i),
+                                    (int(det2d_rgb_i_xmin+(det2d_rgb_i.bbox.size_x/2)-50), det2d_rgb_i_ymax - 100),
+                                    font,
+                                    font_scale,
+                                    font_color,
+                                    line_type)
+
                else:
                    self.set_model_coordinates((-1, -1, -1), i)

@@ -143,38 +108,28 @@ class obj_dist:
        self.marker_pub.publish(self.marker_array)
        self.detections_pub.publish(detections)

-    """ from previous subscription to bounding_box, missing confidence, replaced with callback2
-    def get_human_distance(self, cv_depth, cv_image, box, person_id):
-        roi_depth = cv_depth[box.ymin:box.ymax, box.xmin:box.xmax]
-        # filtered_depth, _size = filter_background(roi_depth)
-        filtered_depth, _size = dynamic_background(roi_depth)
-        if _size:
-            self.moving_average[person_id].pop()
-            self.moving_average[person_id].insert(0, filtered_depth.sum() / _size / 1000.0)
+    def convert_det2d_from_rgb_to_depth(self, det2d_i, rgb_w, rgb_h, depth_w, depth_h):
+        kx = depth_w/float(rgb_w)
+        ky = depth_h/float(rgb_h)
+        det2d_depth_i = copy.deepcopy(det2d_i)

-            current_avg = sum(self.moving_average[person_id]) / size_of_moving_avg
+        det2d_depth_i.bbox.size_x   *= kx
+        det2d_depth_i.bbox.size_y   *= ky

-            x_meter = get_x_in_meters(box.xmin, box.xmax, current_avg)
-            cv2.putText(cv_image, '{} meters / Person {}'.format(round(current_avg, 2), person_id),
-                        (box.xmin, box.ymax - 100),
-                        font,
-                        font_scale,
-                        font_color,
-                        line_type)
-            self.image_pub.publish(self.bridge.cv2_to_imgmsg(cv_image))
+        det2d_depth_i.bbox.center.x *= kx
+        det2d_depth_i.bbox.center.y *= ky

-            # TODO: Add dynamic find floor
-            return x_meter, 0.0, current_avg
-        else:
-            return -1, -1, -1
-    """
+        return det2d_depth_i

-    def get_human_distance2(self, cv_depth, cv_image, det, person_id):
+    def get_human_distance(self, cv_depth, cv_image, det, person_id, depth_width, info):
        ymin = int(det.bbox.center.y - det.bbox.size_y/2)
        ymax = int(det.bbox.center.y + det.bbox.size_y/2)
        xmin = int(det.bbox.center.x - det.bbox.size_x/2)
        xmax = int(det.bbox.center.x + det.bbox.size_x/2)
+
        roi_depth = cv_depth[ymin:ymax, xmin:xmax]
+        self.roi_image_pub.publish(self.bridge.cv2_to_imgmsg(roi_depth))
+
        # filtered_depth, _size = filter_background(roi_depth)
        filtered_depth, _size = dynamic_background(roi_depth)
        if _size:
@@ -183,14 +138,7 @@ class obj_dist:

            current_avg = sum(self.moving_average[person_id]) / size_of_moving_avg

-            x_meter = get_x_in_meters(xmin, xmax, current_avg)
-            cv2.putText(cv_image, 'z={}m/id={}'.format(round(current_avg, 2), person_id),
-                        (int(xmin+(det.bbox.size_x/2)-50), ymax - 100),
-                        font,
-                        font_scale,
-                        font_color,
-                        line_type)
-            #self.image_pub.publish(self.bridge.cv2_to_imgmsg(cv_image))
+            x_meter = get_x_in_meters(xmin, xmax, current_avg, info.K[0], depth_width )

            # TODO: Add dynamic find floor
            return x_meter, 0.0, current_avg
@@ -204,7 +152,6 @@ class obj_dist:
            self.marker_array.markers[counter].lifetime = rospy.Duration()
            self.marker_array.markers[counter].id = counter
            self.marker_array.markers[counter].type = Marker.CYLINDER
-            #self.marker_array.markers[counter].mesh_resource = package

    def set_model_coordinates(self, cood, person_id):
        self.marker_array.markers[person_id].header.stamp = rospy.Time.now()
@@ -219,7 +166,6 @@ class obj_dist:
            self.marker_array.markers[person_id].color.b = 1.0
            self.marker_array.markers[person_id].color.a = 0.5

-            # TODO: Change the scale of the DAE model so these numbers make more sense
            self.marker_array.markers[person_id].scale.x = 0.5
            self.marker_array.markers[person_id].scale.y = 0.5
            self.marker_array.markers[person_id].scale.z = 1.0
@@ -233,7 +179,6 @@ class obj_dist:
            self.marker_array.markers[person_id].pose.position.y = cood[1]
            self.marker_array.markers[person_id].pose.position.z = cood[2]

-
 def main(args):
    rospy.init_node('obj_dist', anonymous=True)
    #print("object_to_distance.py is running")

--- a/get_obj_dist/src/utils.py
+++ b/get_obj_dist/src/utils.py
@@ -8,6 +8,7 @@ _bin = [i * _diff for i in range(0, 11)]

 # Really basic way of separating foreground and background.
 def filter_background(roi, max_depth=8000):
+
    # Anything further than 8000mm, we consider it as background
    # Anything less than 100mm is consider noise
    ret_val = np.ma.masked_greater(roi, max_depth)
@@ -24,18 +25,20 @@ def dynamic_background(roi):
    # Anything less than 100mm is sure noise
    # roi = np.ma.masked_less(roi, 100)
    roi_1d = roi.flatten()
+
    hist, bins = np.histogram(roi_1d, bins=_bin, density=True)
    max_bin = hist.argmax() + 1

-    # plt.hist(roi_1d, bins=_bin, density=True)
-    # plt.title('Hello')
-    # plt.show()
+    #plt.hist(roi_1d, bins=_bin, density=True)
+    #plt.title('depth histogram')
+    #plt.show()
+
    return filter_background(roi, max_bin * _diff)


-def get_x_in_meters(xmin, xmax, z_i):
+def get_x_in_meters(xmin, xmax, z_i, fx, width):
    # Tune z_c to get better value lol.
    # 500 is literally randomly chosen lol
-    z_c = 500
-    ret_val = (z_i * (xmax + xmin - 600.0)) / (2 * z_c)
+    #z_c = 696
+    ret_val = (z_i * (xmax + xmin - width)) / (2 * fx)
    return ret_val
--- a/human_detection/launch/node.launch
+++ b/human_detection/launch/node.launch
@@ -3,13 +3,14 @@
 <launch>

  <arg name="camera_in_topic"  default="/camera/color/image_raw"/>
+  <arg name="info_in_topic"    default="/camera/color/camera_info"/>

  <arg name="node_name"        default="human_tensorflow_detection_2d"/>
  <arg name="image_out_topic"  default="/$(arg node_name)/image"/>
  <arg name="bbox_out_topic"   default="/$(arg node_name)/bounding_box"/>
  <arg name="virtual_env_path" default="$(env HOME)/iri-lab/virtualenv/raadical_venv"/>
  <arg name="output"           default="screen"/>
-  <arg name="required"         default="true"/>
+  <arg name="required"         default="false"/>

  <node pkg="human_detection"
        name="$(arg node_name)"
@@ -18,6 +19,7 @@
        output="$(arg output)"
        args="--venv $(arg virtual_env_path)">
    <remap from="~camera/color/image_raw" to="$(arg camera_in_topic)"/>
+    <remap from="~camera/color/camera_info" to="$(arg info_in_topic)"/>
    <remap from="~detection_image"        to="$(arg image_out_topic)"/>
    <remap from="~bounding_box"           to="$(arg bbox_out_topic)"/>
  </node>

--- a/human_detection/launch/test.launch
+++ b/human_detection/launch/test.launch
@@ -2,7 +2,9 @@
 <launch>

  <arg name="camera_in_topic"  default="/camera/color/image_raw"/>
+  <arg name="info_in_topic"  default="/camera/color/camera_info"/>
  <arg name="depth_image_in_topic" default="/camera/aligned_depth_to_color/image_raw"/>
+  <arg name="depth_info_in_topic"  default="/camera/aligned_depth_to_color/camera_info"/>
  <arg name="detected_persons_out" default="/human_tensorflow_detection_3d/detected_persons"/>

  <arg name="virtual_env_path" default="$(env HOME)/iri-lab/virtualenv/raadical_venv"/>
@@ -11,6 +13,7 @@

  <include file="$(find human_detection)/launch/node.launch">
    <arg name="camera_in_topic"  value="$(arg camera_in_topic)"/>
+    <arg name="info_in_topic"         value="$(arg info_in_topic)"/>
    <arg name="virtual_env_path" value="$(arg virtual_env_path)"/>
    <arg name="output"           value="$(arg output)"/>
  </include>
@@ -18,6 +21,7 @@
  <include file="$(find get_obj_dist)/launch/node.launch">
    <arg name="camera_in_topic"       value="$(arg camera_in_topic)"/>
    <arg name="depth_image_in_topic"  value="$(arg depth_image_in_topic)"/>
+    <arg name="depth_info_in_topic"   value="$(arg depth_info_in_topic)"/>
    <arg name="detected_persons_out"  value="$(arg detected_persons_out)"/>
    <arg name="output"                value="$(arg output)"/>
  </include>

--- a/human_detection/src/analysis_tools/define.py
+++ b/human_detection/src/analysis_tools/define.py
@@ -12,5 +12,5 @@ font_color = (255, 255, 255)
 line_type = 2
 run_time_list = []
 prediction_level_list = []
-screen_width = 600
-screen_height = 480
+#screen_width = 640
+#screen_height = 480
--- a/human_detection/src/human_detector.py
+++ b/human_detection/src/human_detector.py
@@ -9,6 +9,9 @@ import os
 import rospy
 import numpy as np
 import tensorflow as tf
+
+import message_filters
+
 from std_srvs.srv import Trigger, TriggerResponse

 sys.path.append(os.getcwd())
@@ -17,7 +20,7 @@ sys.path.append(os.getcwd())
 # from analysis_tools.data_grapher import *
 from analysis_tools.define import *
 from human_detection.msg import bounding_box, box_list
-from sensor_msgs.msg import Image
+from sensor_msgs.msg import Image, CameraInfo
 from cv_bridge import CvBridge, CvBridgeError

 from vision_msgs.msg import Detection2DArray, Detection2D, ObjectHypothesisWithPose
@@ -76,13 +79,23 @@ class human_detector:
        rospy.loginfo(rospy.get_name() + ": " + text)

        self.bridge = CvBridge()
-        self.image_sub = rospy.Subscriber("~camera/color/image_raw", Image, self.callback, queue_size=1, buff_size=6291456)
+
+        #self.image_sub = rospy.Subscriber("~camera/color/image_raw", Image, self.callback, queue_size=1, buff_size=6291456)
+        self.image_sub = message_filters.Subscriber("~camera/color/image_raw", Image)
+        self.info_sub = message_filters.Subscriber("~camera/color/camera_info", CameraInfo)
+        ts = message_filters.ApproximateTimeSynchronizer([self.image_sub, self.info_sub],queue_size=10,slop=1)
+        ts.registerCallback(self.callback)

        self.image_pub = rospy.Publisher("~detection_image", Image, queue_size=1)
        self.bbx_pub = rospy.Publisher("~bounding_box", box_list, queue_size=1)
        self.detection2d_array_pub = rospy.Publisher("~detection2d_array", Detection2DArray, queue_size=1)

-    def callback(self, data):
+    #def callback(self, data):
+    def callback(self, data, info):
+
+        screen_width = data.width
+        screen_height = data.height
+
        t0 = time.time()
        try:
            cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8")