Skip to content
2 changes: 1 addition & 1 deletion source/extensions/omni.isaac.lab/config/extension.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

# Note: Semantic Versioning is used: https://semver.org/
version = "0.24.13"
version = "0.24.16"

# Description
title = "Isaac Lab framework for Robot Learning"
Expand Down
24 changes: 18 additions & 6 deletions source/extensions/omni.isaac.lab/docs/CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,24 +1,36 @@
Changelog
---------

0.22.15 (2024-09-20)
0.24.16 (2024-10-03)
~~~~~~~~~~~~~~~~~~~~

Changed
^^^^^^^

* Renamed the observation function :meth:`grab_images` to :meth:`image` to follow convention of noun-based naming.
* Renamed the function :meth:`convert_perspective_depth_to_orthogonal_depth` to a shorter name
:meth:`omni.isaac.lab.utils.math.orthogonalize_perspective_depth`.


0.24.15 (2024-09-20)
~~~~~~~~~~~~~~~~~~~~

Added
^^^^^

* Added :meth:`grab_images` to be able to use images for an observation term in manager based environments
* Added :meth:`grab_images` to be able to use images for an observation term in manager-based environments.


0.24.14 (2024-09-20)
~~~~~~~~~~~~~~~~~~~~

Added
^^^^^

* Added :meth:`convert_perspective_depth_to_orthogonal_depth`. :meth:`unproject_depth` assumes
that the input depth image is orthogonal. The new :meth:`convert_perspective_depth_to_orthogonal_depth`
can be used to convert a perspective depth image into an orthogonal depth image, so that the point cloud
can be unprojected correctly with :meth:`unproject_depth`.
* Added the method :meth:`convert_perspective_depth_to_orthogonal_depth` to convert perspective depth
images to orthogonal depth images. This is useful for the :meth:`~omni.isaac.lab.utils.math.unproject_depth`,
since it expects orthogonal depth images as inputs.


0.24.13 (2024-09-08)
~~~~~~~~~~~~~~~~~~~~
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,38 +182,52 @@ def body_incoming_wrench(env: ManagerBasedEnv, asset_cfg: SceneEntityCfg) -> tor
return link_incoming_forces.view(env.num_envs, -1)


def grab_images(
def image(
env: ManagerBasedEnv,
sensor_cfg: SceneEntityCfg = SceneEntityCfg("tiled_camera"),
data_type: str = "rgb",
convert_perspective_to_orthogonal: bool = False,
normalize: bool = True,
) -> torch.Tensor:
"""Grab all of the latest images of a specific datatype produced by a specific camera.
"""Images of a specific datatype from the camera sensor.

If the flag :attr:`normalize` is True, post-processing of the images are performed based on their
data-types:

- "rgb": Scales the image to (0, 1) and subtracts with the mean of the current image batch.
- "depth" or "distance_to_camera" or "distance_to_plane": Replaces infinity values with zero.

Args:
env: The environment the cameras are placed within.
sensor_cfg: The desired sensor to read from. Defaults to SceneEntityCfg("tiled_camera").
data_type: The data type to pull from the desired camera. Defaults to "rgb".
convert_perspective_to_orthogonal: Whether to convert perspective
depth images to orthogonal depth images. Defaults to False.
normalize: Set to True to normalize images. Defaults to True.
convert_perspective_to_orthogonal: Whether to orthogonalize perspective depth images.
This is used only when the data type is "distance_to_camera". Defaults to False.
normalize: Whether to normalize the images. This depends on the selected data type.
Defaults to True.

Returns:
The images produced at the last timestep
The images produced at the last time-step
"""
# extract the used quantities (to enable type-hinting)
sensor: TiledCamera | Camera | RayCasterCamera = env.scene.sensors[sensor_cfg.name]

# obtain the input image
images = sensor.data.output[data_type]

# depth image conversion
if (data_type == "distance_to_camera") and convert_perspective_to_orthogonal:
images = math_utils.convert_perspective_depth_to_orthogonal_depth(images, sensor.data.intrinsic_matrices)
images = math_utils.orthogonalize_perspective_depth(images, sensor.data.intrinsic_matrices)

# rgb/depth image normalization
if normalize:
if data_type == "rgb":
images = images / 255
images = images.float() / 255.0
mean_tensor = torch.mean(images, dim=(1, 2), keepdim=True)
images -= mean_tensor
elif "distance_to" in data_type or "depth" in data_type:
images[images == float("inf")] = 0

return images.clone()


Expand Down
212 changes: 109 additions & 103 deletions source/extensions/omni.isaac.lab/omni/isaac/lab/utils/math.py
Original file line number Diff line number Diff line change
Expand Up @@ -987,115 +987,30 @@ def transform_points(


@torch.jit.script
def unproject_depth(depth: torch.Tensor, intrinsics: torch.Tensor) -> torch.Tensor:
r"""Unproject depth image into a pointcloud. This method assumes that depth
is provided orthogonally relative to the image plane, as opposed to absolutely relative to the camera's
principal point (perspective depth). To unproject a perspective depth image, use
:meth:`convert_perspective_depth_to_orthogonal_depth` to convert
to an orthogonal depth image prior to calling this method. Otherwise, the
created point cloud will be distorted, especially around the edges.
def orthogonalize_perspective_depth(depth: torch.Tensor, intrinsics: torch.Tensor) -> torch.Tensor:
"""Converts perspective depth image to orthogonal depth image.

This function converts depth images into points given the calibration matrix of the camera.

.. math::
p_{3D} = K^{-1} \times [u, v, 1]^T \times d

where :math:`p_{3D}` is the 3D point, :math:`d` is the depth value, :math:`u` and :math:`v` are
the pixel coordinates and :math:`K` is the intrinsic matrix.

If `depth` is a batch of depth images and `intrinsics` is a single intrinsic matrix, the same
calibration matrix is applied to all depth images in the batch.

The function assumes that the width and height are both greater than 1. This makes the function
deal with many possible shapes of depth images and intrinsics matrices.

Args:
depth: The depth measurement. Shape is (H, W) or or (H, W, 1) or (N, H, W) or (N, H, W, 1).
intrinsics: A tensor providing camera's calibration matrix. Shape is (3, 3) or (N, 3, 3).

Returns:
The 3D coordinates of points. Shape is (P, 3) or (N, P, 3).

Raises:
ValueError: When depth is not of shape (H, W) or (H, W, 1) or (N, H, W) or (N, H, W, 1).
ValueError: When intrinsics is not of shape (3, 3) or (N, 3, 3).
"""
depth_batch = depth.clone()
intrinsics_batch = intrinsics.clone()
# check if inputs are batched
is_batched = depth_batch.dim() == 4 or (depth_batch.dim() == 3 and depth_batch.shape[-1] != 1)
# make sure inputs are batched
if depth_batch.dim() == 3 and depth_batch.shape[-1] == 1:
depth_batch = depth_batch.squeeze(dim=2) # (H, W, 1) -> (H, W)
if depth_batch.dim() == 2:
depth_batch = depth_batch[None] # (H, W) -> (1, H, W)
if depth_batch.dim() == 4 and depth_batch.shape[-1] == 1:
depth_batch = depth_batch.squeeze(dim=3) # (N, H, W, 1) -> (N, H, W)
if intrinsics_batch.dim() == 2:
intrinsics_batch = intrinsics_batch[None] # (3, 3) -> (1, 3, 3)
# check shape of inputs
if depth_batch.dim() != 3:
raise ValueError(f"Expected depth images to have dim = 2 or 3 or 4: got shape {depth.shape}")
if intrinsics_batch.dim() != 3:
raise ValueError(f"Expected intrinsics to have shape (3, 3) or (N, 3, 3): got shape {intrinsics.shape}")

# get image height and width
im_height, im_width = depth_batch.shape[1:]
# create image points in homogeneous coordinates (3, H x W)
indices_u = torch.arange(im_width, device=depth.device, dtype=depth.dtype)
indices_v = torch.arange(im_height, device=depth.device, dtype=depth.dtype)
img_indices = torch.stack(torch.meshgrid([indices_u, indices_v], indexing="ij"), dim=0).reshape(2, -1)
pixels = torch.nn.functional.pad(img_indices, (0, 0, 0, 1), mode="constant", value=1.0)
pixels = pixels.unsqueeze(0) # (3, H x W) -> (1, 3, H x W)

# unproject points into 3D space
points = torch.matmul(torch.inverse(intrinsics_batch), pixels) # (N, 3, H x W)
points = points / points[:, -1, :].unsqueeze(1) # normalize by last coordinate
# flatten depth image (N, H, W) -> (N, H x W)
depth_batch = depth_batch.transpose_(1, 2).reshape(depth_batch.shape[0], -1).unsqueeze(2)
depth_batch = depth_batch.expand(-1, -1, 3)
# scale points by depth
points_xyz = points.transpose_(1, 2) * depth_batch # (N, H x W, 3)

# return points in same shape as input
if not is_batched:
points_xyz = points_xyz.squeeze(0)

return points_xyz


@torch.jit.script
def convert_perspective_depth_to_orthogonal_depth(
perspective_depth: torch.Tensor, intrinsics: torch.Tensor
) -> torch.Tensor:
r"""Provided depth image(s) where depth is provided as the distance to the principal
point of the camera (perspective depth), this function converts it so that depth
is provided as the distance to the camera's image plane (orthogonal depth).

This is helpful because `unproject_depth` assumes that depth is expressed in
the orthogonal depth format.

If `perspective_depth` is a batch of depth images and `intrinsics` is a single intrinsic matrix,
the same calibration matrix is applied to all depth images in the batch.
Perspective depth images contain distances measured from the camera's optical center.
Meanwhile, orthogonal depth images provide the distance from the camera's image plane.
This method uses the camera geometry to convert perspective depth to orthogonal depth image.

The function assumes that the width and height are both greater than 1.

Args:
perspective_depth: The depth measurement obtained with the distance_to_camera replicator.
Shape is (H, W) or or (H, W, 1) or (N, H, W) or (N, H, W, 1).
intrinsics: A tensor providing camera's calibration matrix. Shape is (3, 3) or (N, 3, 3).
depth: The perspective depth images. Shape is (H, W) or or (H, W, 1) or (N, H, W) or (N, H, W, 1).
intrinsics: The camera's calibration matrix. If a single matrix is provided, the same
calibration matrix is used across all the depth images in the batch.
Shape is (3, 3) or (N, 3, 3).

Returns:
The depth image as if obtained by the distance_to_image_plane replicator. Shape
matches the input shape of depth
The orthogonal depth images. Shape matches the input shape of depth images.

Raises:
ValueError: When depth is not of shape (H, W) or (H, W, 1) or (N, H, W) or (N, H, W, 1).
ValueError: When intrinsics is not of shape (3, 3) or (N, 3, 3).
"""

# Clone inputs to avoid in-place modifications
perspective_depth_batch = perspective_depth.clone()
perspective_depth_batch = depth.clone()
intrinsics_batch = intrinsics.clone()

# Check if inputs are batched
Expand Down Expand Up @@ -1123,7 +1038,7 @@ def convert_perspective_depth_to_orthogonal_depth(

# Validate input shapes
if perspective_depth_batch.dim() != 3:
raise ValueError(f"Expected perspective_depth to have 2, 3, or 4 dimensions; got {perspective_depth.shape}.")
raise ValueError(f"Expected depth images to have 2, 3, or 4 dimensions; got {depth.shape}.")
if intrinsics_batch.dim() != 3:
raise ValueError(f"Expected intrinsics to have shape (3, 3) or (N, 3, 3); got {intrinsics.shape}.")

Expand All @@ -1137,8 +1052,8 @@ def convert_perspective_depth_to_orthogonal_depth(
cy = intrinsics_batch[:, 1, 2].view(-1, 1, 1)

# Create meshgrid of pixel coordinates
u_grid = torch.arange(im_width, device=perspective_depth.device, dtype=perspective_depth.dtype)
v_grid = torch.arange(im_height, device=perspective_depth.device, dtype=perspective_depth.dtype)
u_grid = torch.arange(im_width, device=depth.device, dtype=depth.dtype)
v_grid = torch.arange(im_height, device=depth.device, dtype=depth.dtype)
u_grid, v_grid = torch.meshgrid(u_grid, v_grid, indexing="xy")

# Expand the grids for batch processing
Expand All @@ -1150,17 +1065,104 @@ def convert_perspective_depth_to_orthogonal_depth(
y_term = ((v_grid - cy) / fy) ** 2

# Calculate the orthogonal (normal) depth
normal_depth = perspective_depth_batch / torch.sqrt(1 + x_term + y_term)
orthogonal_depth = perspective_depth_batch / torch.sqrt(1 + x_term + y_term)

# Restore the last dimension if it was present in the input
if add_last_dim:
normal_depth = normal_depth.unsqueeze(-1)
orthogonal_depth = orthogonal_depth.unsqueeze(-1)

# Return to original shape if input was not batched
if not is_batched:
normal_depth = normal_depth.squeeze(0)
orthogonal_depth = orthogonal_depth.squeeze(0)

return normal_depth
return orthogonal_depth


@torch.jit.script
def unproject_depth(depth: torch.Tensor, intrinsics: torch.Tensor, is_ortho: bool = True) -> torch.Tensor:
r"""Un-project depth image into a pointcloud.

This function converts orthogonal or perspective depth images into points given the calibration matrix
of the camera. It uses the following transformation based on camera geometry:

.. math::
p_{3D} = K^{-1} \times [u, v, 1]^T \times d

where :math:`p_{3D}` is the 3D point, :math:`d` is the depth value (measured from the image plane),
:math:`u` and :math:`v` are the pixel coordinates and :math:`K` is the intrinsic matrix.

The function assumes that the width and height are both greater than 1. This makes the function
deal with many possible shapes of depth images and intrinsics matrices.

.. note::
If :attr:`is_ortho` is False, the input depth images are transformed to orthogonal depth images
by using the :meth:`orthogonalize_perspective_depth` method.

Args:
depth: The depth measurement. Shape is (H, W) or or (H, W, 1) or (N, H, W) or (N, H, W, 1).
intrinsics: The camera's calibration matrix. If a single matrix is provided, the same
calibration matrix is used across all the depth images in the batch.
Shape is (3, 3) or (N, 3, 3).
is_ortho: Whether the input depth image is orthogonal or perspective depth image. If True, the input
depth image is considered as the *orthogonal* type, where the measurements are from the camera's
image plane. If False, the depth image is considered as the *perspective* type, where the
measurements are from the camera's optical center. Defaults to True.

Returns:
The 3D coordinates of points. Shape is (P, 3) or (N, P, 3).

Raises:
ValueError: When depth is not of shape (H, W) or (H, W, 1) or (N, H, W) or (N, H, W, 1).
ValueError: When intrinsics is not of shape (3, 3) or (N, 3, 3).
"""
# clone inputs to avoid in-place modifications
intrinsics_batch = intrinsics.clone()
# convert depth image to orthogonal if needed
if not is_ortho:
depth_batch = orthogonalize_perspective_depth(depth, intrinsics)
else:
depth_batch = depth.clone()

# check if inputs are batched
is_batched = depth_batch.dim() == 4 or (depth_batch.dim() == 3 and depth_batch.shape[-1] != 1)
# make sure inputs are batched
if depth_batch.dim() == 3 and depth_batch.shape[-1] == 1:
depth_batch = depth_batch.squeeze(dim=2) # (H, W, 1) -> (H, W)
if depth_batch.dim() == 2:
depth_batch = depth_batch[None] # (H, W) -> (1, H, W)
if depth_batch.dim() == 4 and depth_batch.shape[-1] == 1:
depth_batch = depth_batch.squeeze(dim=3) # (N, H, W, 1) -> (N, H, W)
if intrinsics_batch.dim() == 2:
intrinsics_batch = intrinsics_batch[None] # (3, 3) -> (1, 3, 3)
# check shape of inputs
if depth_batch.dim() != 3:
raise ValueError(f"Expected depth images to have dim = 2 or 3 or 4: got shape {depth.shape}")
if intrinsics_batch.dim() != 3:
raise ValueError(f"Expected intrinsics to have shape (3, 3) or (N, 3, 3): got shape {intrinsics.shape}")

# get image height and width
im_height, im_width = depth_batch.shape[1:]
# create image points in homogeneous coordinates (3, H x W)
indices_u = torch.arange(im_width, device=depth.device, dtype=depth.dtype)
indices_v = torch.arange(im_height, device=depth.device, dtype=depth.dtype)
img_indices = torch.stack(torch.meshgrid([indices_u, indices_v], indexing="ij"), dim=0).reshape(2, -1)
pixels = torch.nn.functional.pad(img_indices, (0, 0, 0, 1), mode="constant", value=1.0)
pixels = pixels.unsqueeze(0) # (3, H x W) -> (1, 3, H x W)

# unproject points into 3D space
points = torch.matmul(torch.inverse(intrinsics_batch), pixels) # (N, 3, H x W)
points = points / points[:, -1, :].unsqueeze(1) # normalize by last coordinate
# flatten depth image (N, H, W) -> (N, H x W)
depth_batch = depth_batch.transpose_(1, 2).reshape(depth_batch.shape[0], -1).unsqueeze(2)
depth_batch = depth_batch.expand(-1, -1, 3)
# scale points by depth
points_xyz = points.transpose_(1, 2) * depth_batch # (N, H x W, 3)

# return points in same shape as input
if not is_batched:
points_xyz = points_xyz.squeeze(0)

return points_xyz


@torch.jit.script
Expand Down Expand Up @@ -1191,8 +1193,10 @@ def project_points(points: torch.Tensor, intrinsics: torch.Tensor) -> torch.Tens
Returns:
Projected 3D coordinates of points. Shape is (P, 3) or (N, P, 3).
"""
# clone the inputs to avoid in-place operations modifying the original data
points_batch = points.clone()
intrinsics_batch = intrinsics.clone()

# check if inputs are batched
is_batched = points_batch.dim() == 2
# make sure inputs are batched
Expand All @@ -1205,12 +1209,14 @@ def project_points(points: torch.Tensor, intrinsics: torch.Tensor) -> torch.Tens
raise ValueError(f"Expected points to have dim = 3: got shape {points.shape}.")
if intrinsics_batch.dim() != 3:
raise ValueError(f"Expected intrinsics to have shape (3, 3) or (N, 3, 3): got shape {intrinsics.shape}.")

# project points into 2D image plane
points_2d = torch.matmul(intrinsics_batch, points_batch.transpose(1, 2))
points_2d = points_2d / points_2d[:, -1, :].unsqueeze(1) # normalize by last coordinate
points_2d = points_2d.transpose_(1, 2) # (N, 3, P) -> (N, P, 3)
# replace last coordinate with depth
points_2d[:, :, -1] = points_batch[:, :, -1]

# return points in same shape as input
if not is_batched:
points_2d = points_2d.squeeze(0) # (1, 3, P) -> (3, P)
Expand Down
Loading
Loading