Skip to content
2 changes: 1 addition & 1 deletion source/extensions/omni.isaac.lab/config/extension.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

# Note: Semantic Versioning is used: https://semver.org/
version = "0.24.13"
version = "0.24.15"

# Description
title = "Isaac Lab framework for Robot Learning"
Expand Down
24 changes: 18 additions & 6 deletions source/extensions/omni.isaac.lab/docs/CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,24 +1,36 @@
Changelog
---------

0.22.15 (2024-09-20)
0.24.16 (2024-10-03)
~~~~~~~~~~~~~~~~~~~~

Changed
^^^^^^^

* Renamed the observation function :meth:`grab_images` to :meth:`image` to follow convention of noun-based naming.
* Renamed the function :meth:`convert_perspective_depth_to_orthogonal_depth` to a shorter name
:meth:`omni.isaac.lab.utils.math.orthogonalize_perspective_depth`.


0.24.15 (2024-09-20)
~~~~~~~~~~~~~~~~~~~~

Added
^^^^^

* Added :meth:`grab_images` to be able to use images for an observation term in manager based environments
* Added :meth:`grab_images` to be able to use images for an observation term in manager-based environments.


0.24.14 (2024-09-20)
~~~~~~~~~~~~~~~~~~~~

Added
^^^^^

* Added :meth:`convert_perspective_depth_to_orthogonal_depth`. :meth:`unproject_depth` assumes
that the input depth image is orthogonal. The new :meth:`convert_perspective_depth_to_orthogonal_depth`
can be used to convert a perspective depth image into an orthogonal depth image, so that the point cloud
can be unprojected correctly with :meth:`unproject_depth`.
* Added the method :meth:`convert_perspective_depth_to_orthogonal_depth` to convert perspective depth
images to orthogonal depth images. This is useful for the :meth:`~omni.isaac.lab.utils.math.unproject_depth`,
since it expects orthogonal depth images as inputs.


0.24.13 (2024-09-08)
~~~~~~~~~~~~~~~~~~~~
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,38 +182,52 @@ def body_incoming_wrench(env: ManagerBasedEnv, asset_cfg: SceneEntityCfg) -> tor
return link_incoming_forces.view(env.num_envs, -1)


def grab_images(
def image(
env: ManagerBasedEnv,
sensor_cfg: SceneEntityCfg = SceneEntityCfg("tiled_camera"),
data_type: str = "rgb",
convert_perspective_to_orthogonal: bool = False,
normalize: bool = True,
) -> torch.Tensor:
"""Grab all of the latest images of a specific datatype produced by a specific camera.
"""Images of a specific datatype from the camera sensor.

If the flag :attr:`normalize` is True, post-processing of the images are performed based on their
data-types:

- "rgb": Scales the image to (0, 1) and subtracts with the mean of the current image batch.
- "depth" or "distance_to_camera" or "distance_to_plane": Replaces infinity values with zero.

Args:
env: The environment the cameras are placed within.
sensor_cfg: The desired sensor to read from. Defaults to SceneEntityCfg("tiled_camera").
data_type: The data type to pull from the desired camera. Defaults to "rgb".
convert_perspective_to_orthogonal: Whether to convert perspective
depth images to orthogonal depth images. Defaults to False.
normalize: Set to True to normalize images. Defaults to True.
convert_perspective_to_orthogonal: Whether to orthogonalize perspective depth images.
This is used only when the data type is "distance_to_camera". Defaults to False.
normalize: Whether to normalize the images. This depends on the selected data type.
Defaults to True.

Returns:
The images produced at the last timestep
The images produced at the last time-step
"""
# extract the used quantities (to enable type-hinting)
sensor: TiledCamera | Camera | RayCasterCamera = env.scene.sensors[sensor_cfg.name]

# obtain the input image
images = sensor.data.output[data_type]

# depth image conversion
if (data_type == "distance_to_camera") and convert_perspective_to_orthogonal:
images = math_utils.convert_perspective_depth_to_orthogonal_depth(images, sensor.data.intrinsic_matrices)
images = math_utils.orthogonalize_perspective_depth(images, sensor.data.intrinsic_matrices)

# rgb/depth image normalization
if normalize:
if data_type == "rgb":
images = images / 255
images = images.float() / 255.0
mean_tensor = torch.mean(images, dim=(1, 2), keepdim=True)
images -= mean_tensor
elif "distance_to" in data_type or "depth" in data_type:
images[images == float("inf")] = 0

return images.clone()


Expand Down
68 changes: 33 additions & 35 deletions source/extensions/omni.isaac.lab/omni/isaac/lab/utils/math.py
Original file line number Diff line number Diff line change
Expand Up @@ -988,12 +988,7 @@ def transform_points(

@torch.jit.script
def unproject_depth(depth: torch.Tensor, intrinsics: torch.Tensor) -> torch.Tensor:
Copy link
Collaborator

@glvov-bdai glvov-bdai Oct 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although it mentions in the doc string that depth here should be orthogonal maybe good to also mention it in the method name like unproject_orthogonal_depth as I feel like the doc string comment could be missed

Copy link
Contributor Author

@Mayankm96 Mayankm96 Oct 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a parameter to the function which user can set as True/False for orthogonal/perspective depth image respectively. This is probably less breaking.

r"""Unproject depth image into a pointcloud. This method assumes that depth
is provided orthogonally relative to the image plane, as opposed to absolutely relative to the camera's
principal point (perspective depth). To unproject a perspective depth image, use
:meth:`convert_perspective_depth_to_orthogonal_depth` to convert
to an orthogonal depth image prior to calling this method. Otherwise, the
created point cloud will be distorted, especially around the edges.
r"""Un-project orthogonal depth image into a pointcloud.

This function converts depth images into points given the calibration matrix of the camera.

Expand All @@ -1003,15 +998,19 @@ def unproject_depth(depth: torch.Tensor, intrinsics: torch.Tensor) -> torch.Tens
where :math:`p_{3D}` is the 3D point, :math:`d` is the depth value, :math:`u` and :math:`v` are
the pixel coordinates and :math:`K` is the intrinsic matrix.

If `depth` is a batch of depth images and `intrinsics` is a single intrinsic matrix, the same
calibration matrix is applied to all depth images in the batch.

The function assumes that the width and height are both greater than 1. This makes the function
deal with many possible shapes of depth images and intrinsics matrices.

Additionally, the provided depth images need to be the orthogonal distance to the camera's image plane.
In the case of perspective depth images (i.e. the depth is computed from the camera's optical center),
please use the :meth:`orthogonalize_perspective_depth` to convert the depth image to
orthogonal depth image. Otherwise, the generated pointcloud will be distorted, especially around the edges.

Args:
depth: The depth measurement. Shape is (H, W) or or (H, W, 1) or (N, H, W) or (N, H, W, 1).
intrinsics: A tensor providing camera's calibration matrix. Shape is (3, 3) or (N, 3, 3).
intrinsics: The camera's calibration matrix. If a single matrix is provided, the same
calibration matrix is used across all the depth images in the batch.
Shape is (3, 3) or (N, 3, 3).

Returns:
The 3D coordinates of points. Shape is (P, 3) or (N, P, 3).
Expand All @@ -1020,8 +1019,10 @@ def unproject_depth(depth: torch.Tensor, intrinsics: torch.Tensor) -> torch.Tens
ValueError: When depth is not of shape (H, W) or (H, W, 1) or (N, H, W) or (N, H, W, 1).
ValueError: When intrinsics is not of shape (3, 3) or (N, 3, 3).
"""
# clone inputs to avoid in-place modifications
depth_batch = depth.clone()
intrinsics_batch = intrinsics.clone()

# check if inputs are batched
is_batched = depth_batch.dim() == 4 or (depth_batch.dim() == 3 and depth_batch.shape[-1] != 1)
# make sure inputs are batched
Expand Down Expand Up @@ -1065,37 +1066,30 @@ def unproject_depth(depth: torch.Tensor, intrinsics: torch.Tensor) -> torch.Tens


@torch.jit.script
def convert_perspective_depth_to_orthogonal_depth(
perspective_depth: torch.Tensor, intrinsics: torch.Tensor
) -> torch.Tensor:
r"""Provided depth image(s) where depth is provided as the distance to the principal
point of the camera (perspective depth), this function converts it so that depth
is provided as the distance to the camera's image plane (orthogonal depth).

This is helpful because `unproject_depth` assumes that depth is expressed in
the orthogonal depth format.
def orthogonalize_perspective_depth(depth: torch.Tensor, intrinsics: torch.Tensor) -> torch.Tensor:
"""Converts perspective depth image to orthogonal depth image.

If `perspective_depth` is a batch of depth images and `intrinsics` is a single intrinsic matrix,
the same calibration matrix is applied to all depth images in the batch.
Perspective depth images contain distances measured from the camera's optical center.
Meanwhile, orthogonal depth images provide the distance from the camera's image plane.
This method uses the camera geometry to convert perspective depth to orthogonal depth image.

The function assumes that the width and height are both greater than 1.

Args:
perspective_depth: The depth measurement obtained with the distance_to_camera replicator.
Shape is (H, W) or or (H, W, 1) or (N, H, W) or (N, H, W, 1).
intrinsics: A tensor providing camera's calibration matrix. Shape is (3, 3) or (N, 3, 3).
depth: The perspective depth images. Shape is (H, W) or or (H, W, 1) or (N, H, W) or (N, H, W, 1).
intrinsics: The camera's calibration matrix. If a single matrix is provided, the same
calibration matrix is used across all the depth images in the batch.
Shape is (3, 3) or (N, 3, 3).

Returns:
The depth image as if obtained by the distance_to_image_plane replicator. Shape
matches the input shape of depth
The orthogonal depth images. Shape matches the input shape of depth images.

Raises:
ValueError: When depth is not of shape (H, W) or (H, W, 1) or (N, H, W) or (N, H, W, 1).
ValueError: When intrinsics is not of shape (3, 3) or (N, 3, 3).
"""

# Clone inputs to avoid in-place modifications
perspective_depth_batch = perspective_depth.clone()
perspective_depth_batch = depth.clone()
intrinsics_batch = intrinsics.clone()

# Check if inputs are batched
Expand Down Expand Up @@ -1123,7 +1117,7 @@ def convert_perspective_depth_to_orthogonal_depth(

# Validate input shapes
if perspective_depth_batch.dim() != 3:
raise ValueError(f"Expected perspective_depth to have 2, 3, or 4 dimensions; got {perspective_depth.shape}.")
raise ValueError(f"Expected depth images to have 2, 3, or 4 dimensions; got {depth.shape}.")
if intrinsics_batch.dim() != 3:
raise ValueError(f"Expected intrinsics to have shape (3, 3) or (N, 3, 3); got {intrinsics.shape}.")

Expand All @@ -1137,8 +1131,8 @@ def convert_perspective_depth_to_orthogonal_depth(
cy = intrinsics_batch[:, 1, 2].view(-1, 1, 1)

# Create meshgrid of pixel coordinates
u_grid = torch.arange(im_width, device=perspective_depth.device, dtype=perspective_depth.dtype)
v_grid = torch.arange(im_height, device=perspective_depth.device, dtype=perspective_depth.dtype)
u_grid = torch.arange(im_width, device=depth.device, dtype=depth.dtype)
v_grid = torch.arange(im_height, device=depth.device, dtype=depth.dtype)
u_grid, v_grid = torch.meshgrid(u_grid, v_grid, indexing="xy")

# Expand the grids for batch processing
Expand All @@ -1150,17 +1144,17 @@ def convert_perspective_depth_to_orthogonal_depth(
y_term = ((v_grid - cy) / fy) ** 2

# Calculate the orthogonal (normal) depth
normal_depth = perspective_depth_batch / torch.sqrt(1 + x_term + y_term)
orthogonal_depth = perspective_depth_batch / torch.sqrt(1 + x_term + y_term)

# Restore the last dimension if it was present in the input
if add_last_dim:
normal_depth = normal_depth.unsqueeze(-1)
orthogonal_depth = orthogonal_depth.unsqueeze(-1)

# Return to original shape if input was not batched
if not is_batched:
normal_depth = normal_depth.squeeze(0)
orthogonal_depth = orthogonal_depth.squeeze(0)

return normal_depth
return orthogonal_depth


@torch.jit.script
Expand Down Expand Up @@ -1191,8 +1185,10 @@ def project_points(points: torch.Tensor, intrinsics: torch.Tensor) -> torch.Tens
Returns:
Projected 3D coordinates of points. Shape is (P, 3) or (N, P, 3).
"""
# clone the inputs to avoid in-place operations modifying the original data
points_batch = points.clone()
intrinsics_batch = intrinsics.clone()

# check if inputs are batched
is_batched = points_batch.dim() == 2
# make sure inputs are batched
Expand All @@ -1205,12 +1201,14 @@ def project_points(points: torch.Tensor, intrinsics: torch.Tensor) -> torch.Tens
raise ValueError(f"Expected points to have dim = 3: got shape {points.shape}.")
if intrinsics_batch.dim() != 3:
raise ValueError(f"Expected intrinsics to have shape (3, 3) or (N, 3, 3): got shape {intrinsics.shape}.")

# project points into 2D image plane
points_2d = torch.matmul(intrinsics_batch, points_batch.transpose(1, 2))
points_2d = points_2d / points_2d[:, -1, :].unsqueeze(1) # normalize by last coordinate
points_2d = points_2d.transpose_(1, 2) # (N, 3, P) -> (N, P, 3)
# replace last coordinate with depth
points_2d[:, :, -1] = points_batch[:, :, -1]

# return points in same shape as input
if not is_batched:
points_2d = points_2d.squeeze(0) # (1, 3, P) -> (3, P)
Expand Down
2 changes: 1 addition & 1 deletion source/extensions/omni.isaac.lab/test/utils/test_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ def test_depth_perspective_conversion(self):
intrinsics = torch.tensor([[500.0, 0.0, 5.0], [0.0, 500.0, 5.0], [0.0, 0.0, 1.0]])

# Convert perspective depth to orthogonal depth
orthogonal_depth = math_utils.convert_perspective_depth_to_orthogonal_depth(perspective_depth, intrinsics)
orthogonal_depth = math_utils.orthogonalize_perspective_depth(perspective_depth, intrinsics)

# Manually compute expected orthogonal depth based on the formula for comparison
expected_orthogonal_depth = torch.tensor(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@
# SPDX-License-Identifier: BSD-3-Clause

import omni.isaac.lab.sim as sim_utils
from omni.isaac.lab.envs.mdp.observations import grab_images
from omni.isaac.lab.managers import ObservationGroupCfg as ObsGroup
from omni.isaac.lab.managers import ObservationTermCfg as ObsTerm
from omni.isaac.lab.managers import SceneEntityCfg
from omni.isaac.lab.sensors import TiledCameraCfg
from omni.isaac.lab.utils import configclass

from omni.isaac.lab_tasks.manager_based.classic.cartpole.cartpole_env_cfg import CartpoleEnvCfg, CartpoleSceneCfg
import omni.isaac.lab_tasks.manager_based.classic.cartpole.mdp as mdp

from .cartpole_env_cfg import CartpoleEnvCfg, CartpoleSceneCfg

##
# Scene definition
Expand All @@ -20,6 +21,8 @@

@configclass
class CartpoleRGBCameraSceneCfg(CartpoleSceneCfg):

# add camera to the scene
tiled_camera: TiledCameraCfg = TiledCameraCfg(
prim_path="{ENV_REGEX_NS}/Camera",
offset=TiledCameraCfg.OffsetCfg(pos=(-7.0, 0.0, 3.0), rot=(0.9945, 0.0, 0.1045, 0.0), convention="world"),
Expand All @@ -34,6 +37,8 @@ class CartpoleRGBCameraSceneCfg(CartpoleSceneCfg):

@configclass
class CartpoleDepthCameraSceneCfg(CartpoleSceneCfg):

# add camera to the scene
tiled_camera: TiledCameraCfg = TiledCameraCfg(
prim_path="{ENV_REGEX_NS}/Camera",
offset=TiledCameraCfg.OffsetCfg(pos=(-7.0, 0.0, 3.0), rot=(0.9945, 0.0, 0.1045, 0.0), convention="world"),
Expand All @@ -46,17 +51,22 @@ class CartpoleDepthCameraSceneCfg(CartpoleSceneCfg):
)


##
# MDP settings
##


@configclass
class RGBObservationsCfg:
"""Observation specifications for the MDP."""

@configclass
class RGBCameraPolicyCfg(ObsGroup):
"""Observations for policy group."""
"""Observations for policy group with RGB images."""

image = ObsTerm(func=grab_images, params={"sensor_cfg": SceneEntityCfg("tiled_camera"), "data_type": "rgb"})
image = ObsTerm(func=mdp.image, params={"sensor_cfg": SceneEntityCfg("tiled_camera"), "data_type": "rgb"})

def __post_init__(self) -> None:
def __post_init__(self):
self.enable_corruption = False
self.concatenate_terms = True

Expand All @@ -65,22 +75,35 @@ def __post_init__(self) -> None:

@configclass
class DepthObservationsCfg:
"""Observation specifications for the MDP."""

@configclass
class DepthCameraPolicyCfg(RGBObservationsCfg.RGBCameraPolicyCfg):
"""Observations for policy group with depth images."""

image = ObsTerm(
func=grab_images, params={"sensor_cfg": SceneEntityCfg("tiled_camera"), "data_type": "distance_to_camera"}
func=mdp.image, params={"sensor_cfg": SceneEntityCfg("tiled_camera"), "data_type": "distance_to_camera"}
)

policy: ObsGroup = DepthCameraPolicyCfg()


##
# Environment configuration
##


@configclass
class CartpoleRGBCameraEnvCfg(CartpoleEnvCfg):
"""Configuration for the cartpole environment with RGB camera."""

scene: CartpoleSceneCfg = CartpoleRGBCameraSceneCfg(num_envs=1024, env_spacing=20)
observations = RGBObservationsCfg()
observations: RGBObservationsCfg = RGBObservationsCfg()


@configclass
class CartpoleDepthCameraEnvCfg(CartpoleEnvCfg):
"""Configuration for the cartpole environment with depth camera."""

scene: CartpoleSceneCfg = CartpoleDepthCameraSceneCfg(num_envs=1024, env_spacing=20)
observations = DepthObservationsCfg()
observations: DepthObservationsCfg = DepthObservationsCfg()
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ class CurriculumCfg:

@configclass
class CartpoleEnvCfg(ManagerBasedRLEnvCfg):
"""Configuration for the locomotion velocity-tracking environment."""
"""Configuration for the cartpole environment."""

# Scene settings
scene: CartpoleSceneCfg = CartpoleSceneCfg(num_envs=4096, env_spacing=4.0)
Expand Down
Loading
Loading