# python – Finding a point in 3D point cloud, given a pixel in 2D image

I have a set of .laz files recorded from a moving vehicle with an MMS (mobile mapping system), with points all located in a global coordinate system. Along with these point cloud files, RGB images are recorded at set intervals. The location, heading, and orientation of the vehicle is stored, for each image taken, so origin of the images are known.

I then perform object detection on the RGB images, and try to locate the detected objects within the point cloud. The approach being to project a line through the point cloud, and return the first points that intersect the line.

I have found a number of projects that theoretically offer methods to perform this action (often referred to as “unproject()”, but havent had any success with any yet. This is my latest effort with code from the Pylot library (https:/ /github.com/erdos-project/pylot):

The problem seems to be in the call to ‘get_pixel_location()’. I tried removing the call to fwd_points, but the result is still the same “Location(x=0.4049829018075233, y=-0.04347826086956519, z=inf)” which is just the transposed pixel matrix

Does this method in reality demand a point cloud that treats the origin of each axis, as the location of our lidar? IE: the lidar sensor being in X = 0, Y = 0, Z = 0

``````Utils.py
class Rotation(object):
"""Used to represent the rotation of an actor or obstacle.

Rotations are applied in the order: Roll (X), Pitch (Y), Yaw (Z).
A 90-degree "Roll" maps the positive Z-axis to the positive Y-axis.
A 90-degree "Pitch" maps the positive X-axis to the positive Z-axis.
A 90-degree "Yaw" maps the positive X-axis to the positive Y-axis.

"""
def __init__(self, pitch=0, yaw=0, roll=0):
self.pitch = pitch
self.yaw = yaw
self.roll = roll

def as_numpy_array(self):
"""Retrieves the Rotation as a numpy array."""
return np.array([self.pitch, self.yaw, self.roll])

def __repr__(self):
return self.__str__()

def __str__(self):
return 'Rotation(pitch={}, yaw={}, roll={})'.format(
self.pitch, self.yaw, self.roll)

class Vector3D(object):
"""Represents a 3D vector and provides useful helper functions.

Args:
x: The value of the first axis.
y: The value of the second axis.
z: The value of the third axis.

Attributes:
x: The value of the first axis.
y: The value of the second axis.
z: The value of the third axis.
"""
def __init__(self, x=0, y=0, z=0):
self.x, self.y, self.z = float(x), float(y), float(z)

def as_numpy_array(self):
"""Retrieves the 3D vector as a numpy array."""
return np.array([self.x, self.y, self.z])

def as_numpy_array_2D(self):
"""Drops the 3rd dimension."""
return np.array([self.x, self.y])

def l1_distance(self, other):
"""Calculates the L1 distance between the point and another point.

Args:
other (:py:class:`~.Vector3D`): The other vector used to
calculate the L1 distance to.

Returns:
:obj:`float`: The L1 distance between the two points.
"""
return abs(self.x - other.x) + abs(self.y - other.y) + abs(self.z -
other.z)

def l2_distance(self, other):
"""Calculates the L2 distance between the point and another point.

Args:
other (:py:class:`~.Vector3D`): The other vector used to
calculate the L2 distance to.

Returns:
:obj:`float`: The L2 distance between the two points.
"""
vec = np.array([self.x - other.x, self.y - other.y, self.z - other.z])
return np.linalg.norm(vec)

def magnitude(self):
"""Returns the magnitude of the 3D vector."""
return np.linalg.norm(self.as_numpy_array())

def to_camera_view(self, extrinsic_matrix, intrinsic_matrix):
"""Converts the given 3D vector to the view of the camera using
the extrinsic and the intrinsic matrix.

Args:
extrinsic_matrix: The extrinsic matrix of the camera.
intrinsic_matrix: The intrinsic matrix of the camera.

Returns:
:py:class:`.Vector3D`: An instance with the coordinates converted
to the camera view.
"""
position_vector = np.array([[self.x], [self.y], [self.z], [1.0]])

# Transform the points to the camera in 3D.
transformed_3D_pos = np.dot(np.linalg.inv(extrinsic_matrix),
position_vector)

# Transform the points to 2D.
position_2D = np.dot(intrinsic_matrix, transformed_3D_pos[:3])

# Normalize the 2D points.
location_2D = type(self)(float(position_2D / position_2D),
float(position_2D / position_2D),
float(position_2D))
return location_2D

def rotate(self, angle):
"""Rotate the vector by a given angle.

Args:
angle (float): The angle to rotate the Vector by (in degrees).

Returns:
:py:class:`.Vector3D`: An instance with the coordinates of the
rotated vector.
"""
x_ = math.cos(math.radians(angle)) * self.x - math.sin(
y_ = math.sin(math.radians(angle)) * self.x - math.cos(
return type(self)(x_, y_, self.z)

"""Adds the two vectors together and returns the result."""
return type(self)(x=self.x + other.x,
y=self.y + other.y,
z=self.z + other.z)

def __sub__(self, other):
"""Subtracts the other vector from self and returns the result."""
return type(self)(x=self.x - other.x,
y=self.y - other.y,
z=self.z - other.z)

def __repr__(self):
return self.__str__()

def __str__(self):
return 'Vector3D(x={}, y={}, z={})'.format(self.x, self.y, self.z)

class Vector2D(object):
"""Represents a 2D vector and provides helper functions."""
def __init__(self, x, y):
self.x = x
self.y = y

def as_numpy_array(self):
"""Retrieves the 2D vector as a numpy array."""
return np.array([self.x, self.y])

def get_angle(self, other):
"""Computes the angle between the vector and another vector."""
angle = math.atan2(self.y, self.x) - math.atan2(other.y, other.x)
if angle > math.pi:
angle -= 2 * math.pi
elif angle < -math.pi:
angle += 2 * math.pi
return angle

def l1_distance(self, other):
"""Calculates the L1 distance between the point and another point.

Args:
other (:py:class:`~.Vector2D`): The other vector used to
calculate the L1 distance to.

Returns:
:obj:`float`: The L1 distance between the two points.
"""
return abs(self.x - other.x) + abs(self.y - other.y)

def l2_distance(self, other):
"""Calculates the L2 distance between the point and another point.

Args:
other (:py:class:`~.Vector2D`): The other vector used to
calculate the L2 distance to.

Returns:
:obj:`float`: The L2 distance between the two points.
"""
vec = np.array([self.x - other.x, self.y - other.y])
return np.linalg.norm(vec)

def magnitude(self):
"""Returns the magnitude of the 2D vector."""
return np.linalg.norm(self.as_numpy_array())

"""Adds the two vectors together and returns the result. """
return type(self)(x=self.x + other.x, y=self.y + other.y)

def __sub__(self, other):
"""Subtracts the other vector from self and returns the result. """
return type(self)(x=self.x - other.x, y=self.y - other.y)

def __repr__(self):
return self.__str__()

def __str__(self):
return 'Vector2D(x={}, y={})'.format(self.x, self.y)

class Location(Vector3D):
"""Stores a 3D location, and provides useful helper methods.

Args:
x: The value of the x-axis.
y: The value of the y-axis.
z: The value of the z-axis.

Attributes:
x: The value of the x-axis.
y: The value of the y-axis.
z: The value of the z-axis.
"""
def __init__(self, x=0, y=0, z=0):
super(Location, self).__init__(x, y, z)

@classmethod
def from_gps(cls, latitude, longitude, altitude):
"""Creates Location from GPS (latitude, longitude, altitude).

This is the inverse of the _location_to_gps method found in
https://github.com/carla-simulator/scenario_runner/blob/master/srunner/tools/route_manipulation.py
"""
# The following reference values are applicable for towns 1 through 7,
# and are taken from the corresponding CARLA OpenDrive map files.
LAT_REF = 49.0
LON_REF = 8.0

scale = math.cos(LAT_REF * math.pi / 180.0)
basex = scale * math.pi * EARTH_RADIUS_EQUA / 180.0 * LON_REF
basey = scale * EARTH_RADIUS_EQUA * math.log(
math.tan((90.0 + LAT_REF) * math.pi / 360.0))

x = scale * math.pi * EARTH_RADIUS_EQUA / 180.0 * longitude - basex
y = scale * EARTH_RADIUS_EQUA * math.log(
math.tan((90.0 + latitude) * math.pi / 360.0)) - basey

# This wasn't in the original carla method, but seems to be necessary.
y *= -1

return cls(x, y, altitude)

def distance(self, other):
"""Calculates the Euclidean distance between the given point and the
other point.

Args:
other (:py:class:`~.Location`): The other location used to
calculate the Euclidean distance to.

Returns:
:obj:`float`: The Euclidean distance between the two points.
"""
return (self - other).magnitude()

def as_vector_2D(self):
"""Transforms the Location into a Vector2D.

Note:
The method just drops the z-axis.

Returns:
:py:class:`.Vector2D`: A 2D vector.
"""
return Vector2D(self.x, self.y)

def __repr__(self):
return self.__str__()

def __str__(self):
return 'Location(x={}, y={}, z={})'.format(self.x, self.y, self.z)

class Transform(object):
"""A class that stores the location and rotation of an obstacle.

It can be created from a carla.Transform, defines helper functions needed
in Pylot, and makes the carla.Transform serializable.

A transform object is instantiated with either a location and a rotation,
or using a matrix.

Args:
location (:py:class:`.Location`, optional): The location of the object
represented by the transform.
rotation (:py:class:`.Rotation`, optional): The rotation  (in degrees)
of the object represented by the transform.
matrix: The transformation matrix used to convert points in the 3D
coordinate space with respect to the location and rotation of the
given object.

Attributes:
location (:py:class:`.Location`): The location of the object
represented by the transform.
rotation (:py:class:`.Rotation`): The rotation (in degrees) of the
object represented by the transform.
forward_vector (:py:class:`.Vector3D`): The forward vector of the
object represented by the transform.
matrix: The transformation matrix used to convert points in the 3D
coordinate space with respect to the location and rotation of the
given object.
"""
def __init__(self, location=None, rotation=None, matrix=None):
if matrix is not None:
self.matrix = matrix
self.location = Location(matrix[0, 3], matrix[1, 3], matrix[2, 3])

self.forward_vector = Vector3D(self.matrix[0, 0],
self.matrix[1, 0], self.matrix[2,
0])
pitch_r = math.asin(self.forward_vector.z)
yaw_r = math.acos(
np.clip(self.forward_vector.x / math.cos(pitch_r), -1, 1))
roll_r = math.asin(matrix[2, 1] / (-1 * math.cos(pitch_r)))
self.rotation = Rotation(math.degrees(pitch_r),
math.degrees(yaw_r), math.degrees(roll_r))
else:
self.location, self.rotation = location, rotation
self.matrix = Transform._create_matrix(self.location,
self.rotation)

self.forward_vector = Vector3D(self.matrix[0, 0],
self.matrix[1, 0], self.matrix[2,
0])

@staticmethod
def _create_matrix(location, rotation):
"""Creates a transformation matrix to convert points in the 3D world
coordinate space with respect to the object.

Use the transform_points function to transpose a given set of points
with respect to the object.

Args:
location (:py:class:`.Location`): The location of the object
represented by the transform.
rotation (:py:class:`.Rotation`): The rotation of the object
represented by the transform.

Returns:
A 4x4 numpy matrix which represents the transformation matrix.
"""
matrix = np.identity(4)
matrix[0, 3] = location.x
matrix[1, 3] = location.y
matrix[2, 3] = location.z
matrix[0, 0] = (cp * cy)
matrix[0, 1] = (cy * sp * sr - sy * cr)
matrix[0, 2] = -1 * (cy * sp * cr + sy * sr)
matrix[1, 0] = (sy * cp)
matrix[1, 1] = (sy * sp * sr + cy * cr)
matrix[1, 2] = (cy * sr - sy * sp * cr)
matrix[2, 0] = (sp)
matrix[2, 1] = -1 * (cp * sr)
matrix[2, 2] = (cp * cr)
return matrix

def __transform(self, points, matrix):
"""Internal function to transform the points according to the
given matrix. This function either converts the points from
coordinate space relative to the transform to the world coordinate
space (using self.matrix), or from world coordinate space to the
space relative to the transform (using inv(self.matrix))

Args:
points: An n by 3 numpy array, where each row is the
(x, y, z) coordinates of a point.
matrix: The matrix of the transformation to apply.

Returns:
An n by 3 numpy array of transformed points.
"""
# Needed format: [[X0,..Xn],[Y0,..Yn],[Z0,..Zn]].
# So let's transpose the point matrix.
points = points.transpose()

points = np.append(points, np.ones((1, points.shape)), axis=0)

# Point transformation (depends on the given matrix)
points = np.dot(matrix, points)

# Get all but the last row in array form.
points = np.asarray(points[0:3].transpose()).astype(np.float16)

return points

def transform_points(self, points):
"""Transforms the given set of points (specified in the coordinate
space of the current transform) to be in the world coordinate space.

For example, if the transform is at location (3, 0, 0) and the
location passed to the argument is (10, 0, 0), this function will
return (13, 0, 0) i.e. the location of the argument in the world
coordinate space.

Args:
points: A (number of points) by 3 numpy array, where each row is
the (x, y, z) coordinates of a point.

Returns:
An n by 3 numpy array of transformed points.
"""
return self.__transform(points, self.matrix)

def inverse_transform_points(self, points):
"""Transforms the given set of points (specified in world coordinate
space) to be relative to the given transform.

For example, if the transform is at location (3, 0, 0) and the location
passed to the argument is (10, 0, 0), this function will return
(7, 0, 0) i.e. the location of the argument relative to the given
transform.

Args:
points: A (number of points) by 3 numpy array, where each row is
the (x, y, z) coordinates of a point.

Returns:
An n by 3 numpy array of transformed points.
"""
return self.__transform(points, np.linalg.inv(self.matrix))

def transform_locations(self, locations):
"""Transforms the given set of locations (specified in the coordinate
space of the current transform) to be in the world coordinate space.

This method has the same functionality as transform_points, and
is provided for convenience; when dealing with a large number of
points, it is advised to use transform_points to avoid the slow
conversion between a numpy array and list of locations.

Args:
points (list(:py:class:`.Location`)): List of locations.

Returns:
list(:py:class:`.Location`): List of transformed points.
"""
points = np.array([loc.as_numpy_array() for loc in locations])
transformed_points = self.__transform(points, self.matrix)
return [Location(x, y, z) for x, y, z in transformed_points]

def inverse_transform_locations(self, locations):
"""Transforms the given set of locations (specified in world coordinate
space) to be relative to the given transform.

This method has the same functionality as inverse_transform_points,
and is provided for convenience; when dealing with a large number of
points, it is advised to use inverse_transform_points to avoid the slow
conversion between a numpy array and list of locations.

Args:
points (list(:py:class:`.Location`)): List of locations.

Returns:
list(:py:class:`.Location`): List of transformed points.
"""

points = np.array([loc.as_numpy_array() for loc in locations])
transformed_points = self.__transform(points,
np.linalg.inv(self.matrix))
return [Location(x, y, z) for x, y, z in transformed_points]

def get_angle_and_magnitude(self, target_loc):
"""Computes relative angle between the transform and a target location.

Args:
target_loc (:py:class:`.Location`): Location of the target.

Returns:
Angle in radians and vector magnitude.
"""
target_vec = target_loc.as_vector_2D() - self.location.as_vector_2D()
magnitude = target_vec.magnitude()
if magnitude > 0:
forward_vector = Vector2D(
angle = target_vec.get_angle(forward_vector)
else:
angle = 0
return angle, magnitude

"""Checks if a location is within a distance.

Args:
dst_loc (:py:class:`.Location`): Location to compute distance to.
max_distance (:obj:`float`): Maximum allowed distance.

Returns:
bool: True if other location is within max_distance.
"""
d_angle, norm_dst = self.get_angle_and_magnitude(dst_loc)
# Return if the vector is too small.
if norm_dst < 0.001:
return True
# Return if the vector is greater than the distance.
if norm_dst > max_distance:
return False
return d_angle < 90.0

def inverse_transform(self):
"""Returns the inverse transform of this transform."""
new_matrix = np.linalg.inv(self.matrix)
return Transform(matrix=new_matrix)

def __mul__(self, other):
new_matrix = np.dot(self.matrix, other.matrix)
return Transform(matrix=new_matrix)

``````
``````point_cloud.py

class PointCloud(object):
"""Class that stores points clouds.
Args:
points: A (number of points) by 3 numpy array, where each row is
the (x, y, z) coordinates of a point.
transform (:py:class:`~pylot.utils.Transform`): Transform of the
point cloud, relative to the ego-vehicle.
Attributes:
points: A (number of points) by 3 numpy array, where each row is
the (x, y, z) coordinates of a point.
transform (:py:class:`~pylot.utils.Transform`): Transform of the
point cloud, relative to the ego-vehicle.
"""
def __init__(self, points, lidar_setup: LidarSetup):
# Transform point cloud from lidar to camera coordinates.
self._lidar_setup = lidar_setup
self.global_points = copy.deepcopy(points)
self.points = self._to_camera_coordinates(points)
self.transform = lidar_setup.get_transform()

def _to_camera_coordinates(self, points):
# Converts points in lidar coordinates to points in camera coordinates.
# See CameraSetup in pylot/drivers/sensor_setup.py for coordinate
# axis orientations.
#
# The Velodyne coordinate space is defined as:
# +x into the screen, +y to the left, and +z up.
#
# Note: We're using the ROS velodyne driver coordinate
# system, not the one specified in the Velodyne manual.
# Link to the ROS coordinate system:
# https://www.ros.org/reps/rep-0103.html#axis-orientation
if self._lidar_setup.lidar_type == 'sensor.lidar.ray_cast':
if self._lidar_setup.legacy:
# The legacy CARLA Lidar coordinate space is defined as:
# +x to right, +y out of the screen, +z down.
to_camera_transform = Transform(matrix=np.array(
[[1, 0, 0, 0], [0, 0, 1, 0], [0, -1, 0, 0], [0, 0, 0, 1]]))
else:
# The latest coordiante space is the unreal space.
to_camera_transform = Transform(matrix=np.array(
[[0, 1, 0, 0], [0, 0, -1, 0], [1, 0, 0, 0], [0, 0, 0, 1]]))
elif self._lidar_setup.lidar_type == 'velodyne':
to_camera_transform = Transform(matrix=np.array(
[[0, -1, 0, 0], [0, 0, -1, 0], [1, 0, 0, 0], [0, 0, 0, 1]]))
else:
raise ValueError('Unexpected lidar type {}'.format(
self._lidar_setup.lidar_type))
transformed_points = to_camera_transform.transform_points(points)
return transformed_points

def get_pixel_location(self, pixel, camera_setup: CameraSetup):
""" Gets the 3D world location from pixel coordinates.
Args:
pixel (:py:class:`~pylot.utils.Vector2D`): Pixel coordinates.
camera_setup (:py:class:`~pylot.drivers.sensor_setup.CameraSetup`):
The setup of the camera with its transform in the world frame
of reference.
Returns:
:py:class:`~pylot.utils.Location`: The 3D world location, or None
if all the point cloud points are behind.
"""
# Select only points that are in front.
# Setting the threshold to 0.1 because super close points cause
# floating point errors.
fwd_points = self.points[np.where(self.points[:, 2] > 0.1)]
if len(fwd_points) == 0:
return None
intrinsic_mat = camera_setup.get_intrinsic_matrix()
# Project our 2D pixel location into 3D space, onto the z=1 plane.
p3d = np.dot(inv(intrinsic_mat), np.array([[pixel.x], [pixel.y],
[1.0]]))

if self._lidar_setup.lidar_type == 'sensor.lidar.ray_cast':
location = PointCloud.get_closest_point_in_point_cloud(
fwd_points, Vector2D(p3d, p3d), normalized=True)
# Use the depth from the retrieved location.
p3d *= np.array([location.z])
p3d = p3d.transpose()
# Convert from camera to unreal coordinates if the lidar type is
# sensor.lidar.ray_cast
to_world_transform = camera_setup.get_unreal_transform()
camera_point_cloud = to_world_transform.transform_points(p3d)
pixel_location = Location(camera_point_cloud,
camera_point_cloud,
camera_point_cloud)
elif self._lidar_setup.lidar_type == 'velodyne':
location = PointCloud.get_closest_point_in_point_cloud(
self.fwd_points, Vector2D(p3d, p3d), normalized=False)
# Use the depth from the retrieved location.

p3d = location.z
p3d = p3d.transpose()

pixel_location = Location(p3d[0, 0], p3d[0, 1], p3d[0, 2])
return pixel_location

@staticmethod
def get_closest_point_in_point_cloud(fwd_points,
pixel,
normalized: bool = False):
"""Finds the closest point in the point cloud to the given point.
Args:
pixel (:py:class:`~pylot.utils.Vector2D`): Camera coordinates.
Returns:
:py:class:`~pylot.utils.Location`: Closest point cloud point.
"""
# Select x and y.
pc_xy = fwd_points[:, 0:2]

# Create an array from the x, y coordinates of the point.
xy = np.array([pixel.x, pixel.y]).transpose()

# Compute distance
if normalized:
# Select z
pc_z = fwd_points[:, 2]
# Divize x, y by z
normalized_pc = pc_xy / pc_z[:, None]
dist = np.sum((normalized_pc - xy)**2, axis=1)
else:
dist = np.sum((pc_xy - xy)**2, axis=1)

# Select index of the closest point.
closest_index = np.argmin(dist)

# Return the closest point.
return Location(fwd_points[closest_index],
fwd_points[closest_index],
fwd_points[closest_index])

``````
``````notebook.ipynb

import utils
import point_cloud
import laspy

pointcloud.xyz.shape # Returns '(13072210, 3)' for the testing file

pointcloud.xyz # Returns 'array([2.55824278e+05, 6.65766286e+06, 1.95586000e+02])',
# which corresponds with the coordinate system used (ETRS89 / UTM zone 33N)

pixels = utils.Vector2D(1438,979) # Pixel location from the image where an object is detected. Image resolution is 2048, 2048

location = utils.Location(x=255792.510, y=6657642.340, z=195.8919) # Data from mobile mapping system

rotation = utils.Rotation(pitch=0.303, yaw=59.16, roll= 1.032) # Data from mobile mapping system

transform = utils.Transform(location=location, rotation= rotation)

lidarsetup = point_cloud.LidarSetup(name="lidar", lidar_type="velodyne", transform=transform)

cloud = point_cloud.PointCloud(pointcloud.xyz, lidarsetup)

# Location and rotation of camera for extrinsic camera matrix, my assumption here is that this can be equal to the values from the point cloud.
# Potentially Yaw should be 0 for the point cloud, and only have the heading value for the camera?
rotation2 = utils.Rotation(pitch=0.303, yaw=59.16, roll= 1.032)
location2 = utils.Location(x=255792.510, y=6657642.340, z=195.8919)
transform2 = utils.Transform(location=location2, rotation = rotation2)

# Inputing variables for the intrinsic camera matrix, FOV and resolution.
camera_setup = point_cloud.CameraSetup(name="cam",camera_type="sensor.camera.rgb", width=2048, height=2048, transform=transform2, fov=90)

# This line should ideally give the answer, but returns: 'Location(x=0.4049829018075233, y=-0.04347826086956519, z=inf)', which is the transposed pixel matrice.
cloud.get_pixel_location(pixels, camera_setup=camera_setup)

``````

Any help or feedback would be greatly appreciated