isaac-sim · Mayankm96 · Oct 4, 2024 · Oct 3, 2024 · Oct 3, 2024 · Oct 3, 2024
diff --git a/source/extensions/omni.isaac.lab/config/extension.toml b/source/extensions/omni.isaac.lab/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.24.13"
+version = "0.24.15"
 
 # Description
 title = "Isaac Lab framework for Robot Learning"

diff --git a/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst b/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst
@@ -1,24 +1,36 @@
 Changelog
 ---------
 
-0.22.15 (2024-09-20)
+0.24.16 (2024-10-03)
+~~~~~~~~~~~~~~~~~~~~
+
+Changed
+^^^^^^^
+
+* Renamed the observation function :meth:`grab_images` to :meth:`image` to follow convention of noun-based naming.
+* Renamed the function :meth:`convert_perspective_depth_to_orthogonal_depth` to a shorter name
+  :meth:`omni.isaac.lab.utils.math.orthogonalize_perspective_depth`.
+
+
+0.24.15 (2024-09-20)
 ~~~~~~~~~~~~~~~~~~~~
 
 Added
 ^^^^^
 
-* Added :meth:`grab_images` to be able to use images for an observation term in manager based environments
+* Added :meth:`grab_images` to be able to use images for an observation term in manager-based environments.
+
 
 0.24.14 (2024-09-20)
 ~~~~~~~~~~~~~~~~~~~~
 
 Added
 ^^^^^
 
-* Added :meth:`convert_perspective_depth_to_orthogonal_depth`. :meth:`unproject_depth` assumes
-  that the input depth image is orthogonal. The new :meth:`convert_perspective_depth_to_orthogonal_depth`
-  can be used to convert a perspective depth image into an orthogonal depth image, so that the point cloud
-  can be unprojected correctly with :meth:`unproject_depth`.
+* Added the method :meth:`convert_perspective_depth_to_orthogonal_depth` to convert perspective depth
+  images to orthogonal depth images. This is useful for the :meth:`~omni.isaac.lab.utils.math.unproject_depth`,
+  since it expects orthogonal depth images as inputs.
+
 
 0.24.13 (2024-09-08)
 ~~~~~~~~~~~~~~~~~~~~

diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/mdp/observations.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/mdp/observations.py
@@ -182,38 +182,52 @@ def body_incoming_wrench(env: ManagerBasedEnv, asset_cfg: SceneEntityCfg) -> tor
     return link_incoming_forces.view(env.num_envs, -1)
 
 
-def grab_images(
+def image(
     env: ManagerBasedEnv,
     sensor_cfg: SceneEntityCfg = SceneEntityCfg("tiled_camera"),
     data_type: str = "rgb",
     convert_perspective_to_orthogonal: bool = False,
     normalize: bool = True,
 ) -> torch.Tensor:
-    """Grab all of the latest images of a specific datatype produced by a specific camera.
+    """Images of a specific datatype from the camera sensor.
+
+    If the flag :attr:`normalize` is True, post-processing of the images are performed based on their
+    data-types:
+
+    - "rgb": Scales the image to (0, 1) and subtracts with the mean of the current image batch.
+    - "depth" or "distance_to_camera" or "distance_to_plane": Replaces infinity values with zero.
 
     Args:
         env: The environment the cameras are placed within.
         sensor_cfg: The desired sensor to read from. Defaults to SceneEntityCfg("tiled_camera").
         data_type: The data type to pull from the desired camera. Defaults to "rgb".
-        convert_perspective_to_orthogonal: Whether to convert perspective
-            depth images to orthogonal depth images. Defaults to False.
-        normalize: Set to True to normalize images. Defaults to True.
+        convert_perspective_to_orthogonal: Whether to orthogonalize perspective depth images.
+            This is used only when the data type is "distance_to_camera". Defaults to False.
+        normalize: Whether to normalize the images. This depends on the selected data type.
+            Defaults to True.
 
     Returns:
-        The images produced at the last timestep
+        The images produced at the last time-step
     """
+    # extract the used quantities (to enable type-hinting)
     sensor: TiledCamera | Camera | RayCasterCamera = env.scene.sensors[sensor_cfg.name]
+
+    # obtain the input image
     images = sensor.data.output[data_type]
+
+    # depth image conversion
     if (data_type == "distance_to_camera") and convert_perspective_to_orthogonal:
-        images = math_utils.convert_perspective_depth_to_orthogonal_depth(images, sensor.data.intrinsic_matrices)
+        images = math_utils.orthogonalize_perspective_depth(images, sensor.data.intrinsic_matrices)
 
+    # rgb/depth image normalization
     if normalize:
         if data_type == "rgb":
-            images = images / 255
+            images = images.float() / 255.0
             mean_tensor = torch.mean(images, dim=(1, 2), keepdim=True)
             images -= mean_tensor
         elif "distance_to" in data_type or "depth" in data_type:
             images[images == float("inf")] = 0
+
     return images.clone()
 
 

diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/utils/math.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/utils/math.py
@@ -988,12 +988,7 @@ def transform_points(
 
 @torch.jit.script
 def unproject_depth(depth: torch.Tensor, intrinsics: torch.Tensor) -> torch.Tensor:
-    r"""Unproject depth image into a pointcloud. This method assumes that depth
-    is provided orthogonally relative to the image plane, as opposed to absolutely relative to the camera's
-    principal point (perspective depth). To unproject a perspective depth image, use
-    :meth:`convert_perspective_depth_to_orthogonal_depth` to convert
-    to an orthogonal depth image prior to calling this method. Otherwise, the
-    created point cloud will be distorted, especially around the edges.
+    r"""Un-project orthogonal depth image into a pointcloud.
 
     This function converts depth images into points given the calibration matrix of the camera.
 
@@ -1003,15 +998,19 @@ def unproject_depth(depth: torch.Tensor, intrinsics: torch.Tensor) -> torch.Tens
     where :math:`p_{3D}` is the 3D point, :math:`d` is the depth value, :math:`u` and :math:`v` are
     the pixel coordinates and :math:`K` is the intrinsic matrix.
 
-    If `depth` is a batch of depth images and `intrinsics` is a single intrinsic matrix, the same
-    calibration matrix is applied to all depth images in the batch.
-
     The function assumes that the width and height are both greater than 1. This makes the function
     deal with many possible shapes of depth images and intrinsics matrices.
 
+    Additionally, the provided depth images need to be the orthogonal distance to the camera's image plane.
+    In the case of perspective depth images (i.e. the depth is computed from the camera's optical center),
+    please use the :meth:`orthogonalize_perspective_depth` to convert the depth image to
+    orthogonal depth image. Otherwise, the generated pointcloud will be distorted, especially around the edges.
+
     Args:
         depth: The depth measurement. Shape is (H, W) or or (H, W, 1) or (N, H, W) or (N, H, W, 1).
-        intrinsics: A tensor providing camera's calibration matrix. Shape is (3, 3) or (N, 3, 3).
+        intrinsics: The camera's calibration matrix. If a single matrix is provided, the same
+            calibration matrix is used across all the depth images in the batch.
+            Shape is (3, 3) or (N, 3, 3).
 
     Returns:
         The 3D coordinates of points. Shape is (P, 3) or (N, P, 3).
@@ -1020,8 +1019,10 @@ def unproject_depth(depth: torch.Tensor, intrinsics: torch.Tensor) -> torch.Tens
         ValueError: When depth is not of shape (H, W) or (H, W, 1) or (N, H, W) or (N, H, W, 1).
         ValueError: When intrinsics is not of shape (3, 3) or (N, 3, 3).
     """
+    # clone inputs to avoid in-place modifications
     depth_batch = depth.clone()
     intrinsics_batch = intrinsics.clone()
+
     # check if inputs are batched
     is_batched = depth_batch.dim() == 4 or (depth_batch.dim() == 3 and depth_batch.shape[-1] != 1)
     # make sure inputs are batched
@@ -1065,37 +1066,30 @@ def unproject_depth(depth: torch.Tensor, intrinsics: torch.Tensor) -> torch.Tens
 
 
 @torch.jit.script
-def convert_perspective_depth_to_orthogonal_depth(
-    perspective_depth: torch.Tensor, intrinsics: torch.Tensor
-) -> torch.Tensor:
-    r"""Provided depth image(s) where depth is provided as the distance to the principal
-    point of the camera (perspective depth), this function converts it so that depth
-    is provided as the distance to the camera's image plane (orthogonal depth).
-
-    This is helpful because `unproject_depth` assumes that depth is expressed in
-    the orthogonal depth format.
+def orthogonalize_perspective_depth(depth: torch.Tensor, intrinsics: torch.Tensor) -> torch.Tensor:
+    """Converts perspective depth image to orthogonal depth image.
 
-    If `perspective_depth` is a batch of depth images and `intrinsics` is a single intrinsic matrix,
-    the same calibration matrix is applied to all depth images in the batch.
+    Perspective depth images contain distances measured from the camera's optical center.
+    Meanwhile, orthogonal depth images provide the distance from the camera's image plane.
+    This method uses the camera geometry to convert perspective depth to orthogonal depth image.
 
     The function assumes that the width and height are both greater than 1.
 
     Args:
-        perspective_depth: The depth measurement obtained with the distance_to_camera replicator.
-            Shape is (H, W) or or (H, W, 1) or (N, H, W) or (N, H, W, 1).
-        intrinsics: A tensor providing camera's calibration matrix. Shape is (3, 3) or (N, 3, 3).
+        depth: The perspective depth images. Shape is (H, W) or or (H, W, 1) or (N, H, W) or (N, H, W, 1).
+        intrinsics: The camera's calibration matrix. If a single matrix is provided, the same
+            calibration matrix is used across all the depth images in the batch.
+            Shape is (3, 3) or (N, 3, 3).
 
     Returns:
-        The depth image as if obtained by the distance_to_image_plane replicator. Shape
-            matches the input shape of depth
+        The orthogonal depth images. Shape matches the input shape of depth images.
 
     Raises:
         ValueError: When depth is not of shape (H, W) or (H, W, 1) or (N, H, W) or (N, H, W, 1).
         ValueError: When intrinsics is not of shape (3, 3) or (N, 3, 3).
     """
-
     # Clone inputs to avoid in-place modifications
-    perspective_depth_batch = perspective_depth.clone()
+    perspective_depth_batch = depth.clone()
     intrinsics_batch = intrinsics.clone()
 
     # Check if inputs are batched
@@ -1123,7 +1117,7 @@ def convert_perspective_depth_to_orthogonal_depth(
 
     # Validate input shapes
     if perspective_depth_batch.dim() != 3:
-        raise ValueError(f"Expected perspective_depth to have 2, 3, or 4 dimensions; got {perspective_depth.shape}.")
+        raise ValueError(f"Expected depth images to have 2, 3, or 4 dimensions; got {depth.shape}.")
     if intrinsics_batch.dim() != 3:
         raise ValueError(f"Expected intrinsics to have shape (3, 3) or (N, 3, 3); got {intrinsics.shape}.")
 
@@ -1137,8 +1131,8 @@ def convert_perspective_depth_to_orthogonal_depth(
     cy = intrinsics_batch[:, 1, 2].view(-1, 1, 1)
 
     # Create meshgrid of pixel coordinates
-    u_grid = torch.arange(im_width, device=perspective_depth.device, dtype=perspective_depth.dtype)
-    v_grid = torch.arange(im_height, device=perspective_depth.device, dtype=perspective_depth.dtype)
+    u_grid = torch.arange(im_width, device=depth.device, dtype=depth.dtype)
+    v_grid = torch.arange(im_height, device=depth.device, dtype=depth.dtype)
     u_grid, v_grid = torch.meshgrid(u_grid, v_grid, indexing="xy")
 
     # Expand the grids for batch processing
@@ -1150,17 +1144,17 @@ def convert_perspective_depth_to_orthogonal_depth(
     y_term = ((v_grid - cy) / fy) ** 2
 
     # Calculate the orthogonal (normal) depth
-    normal_depth = perspective_depth_batch / torch.sqrt(1 + x_term + y_term)
+    orthogonal_depth = perspective_depth_batch / torch.sqrt(1 + x_term + y_term)
 
     # Restore the last dimension if it was present in the input
     if add_last_dim:
-        normal_depth = normal_depth.unsqueeze(-1)
+        orthogonal_depth = orthogonal_depth.unsqueeze(-1)
 
     # Return to original shape if input was not batched
     if not is_batched:
-        normal_depth = normal_depth.squeeze(0)
+        orthogonal_depth = orthogonal_depth.squeeze(0)
 
-    return normal_depth
+    return orthogonal_depth
 
 
 @torch.jit.script
@@ -1191,8 +1185,10 @@ def project_points(points: torch.Tensor, intrinsics: torch.Tensor) -> torch.Tens
     Returns:
         Projected 3D coordinates of points. Shape is (P, 3) or (N, P, 3).
     """
+    # clone the inputs to avoid in-place operations modifying the original data
     points_batch = points.clone()
     intrinsics_batch = intrinsics.clone()
+
     # check if inputs are batched
     is_batched = points_batch.dim() == 2
     # make sure inputs are batched
@@ -1205,12 +1201,14 @@ def project_points(points: torch.Tensor, intrinsics: torch.Tensor) -> torch.Tens
         raise ValueError(f"Expected points to have dim = 3: got shape {points.shape}.")
     if intrinsics_batch.dim() != 3:
         raise ValueError(f"Expected intrinsics to have shape (3, 3) or (N, 3, 3): got shape {intrinsics.shape}.")
+
     # project points into 2D image plane
     points_2d = torch.matmul(intrinsics_batch, points_batch.transpose(1, 2))
     points_2d = points_2d / points_2d[:, -1, :].unsqueeze(1)  # normalize by last coordinate
     points_2d = points_2d.transpose_(1, 2)  # (N, 3, P) -> (N, P, 3)
     # replace last coordinate with depth
     points_2d[:, :, -1] = points_batch[:, :, -1]
+
     # return points in same shape as input
     if not is_batched:
         points_2d = points_2d.squeeze(0)  # (1, 3, P) -> (3, P)

diff --git a/source/extensions/omni.isaac.lab/test/utils/test_math.py b/source/extensions/omni.isaac.lab/test/utils/test_math.py
@@ -384,7 +384,7 @@ def test_depth_perspective_conversion(self):
         intrinsics = torch.tensor([[500.0, 0.0, 5.0], [0.0, 500.0, 5.0], [0.0, 0.0, 1.0]])
 
         # Convert perspective depth to orthogonal depth
-        orthogonal_depth = math_utils.convert_perspective_depth_to_orthogonal_depth(perspective_depth, intrinsics)
+        orthogonal_depth = math_utils.orthogonalize_perspective_depth(perspective_depth, intrinsics)
 
         # Manually compute expected orthogonal depth based on the formula for comparison
         expected_orthogonal_depth = torch.tensor(

diff --git a/....lab_tasks/omni/isaac/lab_tasks/manager_based/classic/cartpole/cartpole_camera_env_cfg.py b/....lab_tasks/omni/isaac/lab_tasks/manager_based/classic/cartpole/cartpole_camera_env_cfg.py
@@ -4,14 +4,15 @@
 # SPDX-License-Identifier: BSD-3-Clause
 
 import omni.isaac.lab.sim as sim_utils
-from omni.isaac.lab.envs.mdp.observations import grab_images
 from omni.isaac.lab.managers import ObservationGroupCfg as ObsGroup
 from omni.isaac.lab.managers import ObservationTermCfg as ObsTerm
 from omni.isaac.lab.managers import SceneEntityCfg
 from omni.isaac.lab.sensors import TiledCameraCfg
 from omni.isaac.lab.utils import configclass
 
-from omni.isaac.lab_tasks.manager_based.classic.cartpole.cartpole_env_cfg import CartpoleEnvCfg, CartpoleSceneCfg
+import omni.isaac.lab_tasks.manager_based.classic.cartpole.mdp as mdp
+
+from .cartpole_env_cfg import CartpoleEnvCfg, CartpoleSceneCfg
 
 ##
 # Scene definition
@@ -20,6 +21,8 @@
 
 @configclass
 class CartpoleRGBCameraSceneCfg(CartpoleSceneCfg):
+
+    # add camera to the scene
     tiled_camera: TiledCameraCfg = TiledCameraCfg(
         prim_path="{ENV_REGEX_NS}/Camera",
         offset=TiledCameraCfg.OffsetCfg(pos=(-7.0, 0.0, 3.0), rot=(0.9945, 0.0, 0.1045, 0.0), convention="world"),
@@ -34,6 +37,8 @@ class CartpoleRGBCameraSceneCfg(CartpoleSceneCfg):
 
 @configclass
 class CartpoleDepthCameraSceneCfg(CartpoleSceneCfg):
+
+    # add camera to the scene
     tiled_camera: TiledCameraCfg = TiledCameraCfg(
         prim_path="{ENV_REGEX_NS}/Camera",
         offset=TiledCameraCfg.OffsetCfg(pos=(-7.0, 0.0, 3.0), rot=(0.9945, 0.0, 0.1045, 0.0), convention="world"),
@@ -46,17 +51,22 @@ class CartpoleDepthCameraSceneCfg(CartpoleSceneCfg):
     )
 
 
+##
+# MDP settings
+##
+
+
 @configclass
 class RGBObservationsCfg:
     """Observation specifications for the MDP."""
 
     @configclass
     class RGBCameraPolicyCfg(ObsGroup):
-        """Observations for policy group."""
+        """Observations for policy group with RGB images."""
 
-        image = ObsTerm(func=grab_images, params={"sensor_cfg": SceneEntityCfg("tiled_camera"), "data_type": "rgb"})
+        image = ObsTerm(func=mdp.image, params={"sensor_cfg": SceneEntityCfg("tiled_camera"), "data_type": "rgb"})
 
-        def __post_init__(self) -> None:
+        def __post_init__(self):
             self.enable_corruption = False
             self.concatenate_terms = True
 
@@ -65,22 +75,35 @@ def __post_init__(self) -> None:
 
 @configclass
 class DepthObservationsCfg:
+    """Observation specifications for the MDP."""
+
     @configclass
     class DepthCameraPolicyCfg(RGBObservationsCfg.RGBCameraPolicyCfg):
+        """Observations for policy group with depth images."""
+
         image = ObsTerm(
-            func=grab_images, params={"sensor_cfg": SceneEntityCfg("tiled_camera"), "data_type": "distance_to_camera"}
+            func=mdp.image, params={"sensor_cfg": SceneEntityCfg("tiled_camera"), "data_type": "distance_to_camera"}
         )
 
     policy: ObsGroup = DepthCameraPolicyCfg()
 
 
+##
+# Environment configuration
+##
+
+
 @configclass
 class CartpoleRGBCameraEnvCfg(CartpoleEnvCfg):
+    """Configuration for the cartpole environment with RGB camera."""
+
     scene: CartpoleSceneCfg = CartpoleRGBCameraSceneCfg(num_envs=1024, env_spacing=20)
-    observations = RGBObservationsCfg()
+    observations: RGBObservationsCfg = RGBObservationsCfg()
 
 
 @configclass
 class CartpoleDepthCameraEnvCfg(CartpoleEnvCfg):
+    """Configuration for the cartpole environment with depth camera."""
+
     scene: CartpoleSceneCfg = CartpoleDepthCameraSceneCfg(num_envs=1024, env_spacing=20)
-    observations = DepthObservationsCfg()
+    observations: DepthObservationsCfg = DepthObservationsCfg()
diff --git a/...i.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/classic/cartpole/cartpole_env_cfg.py b/...i.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/classic/cartpole/cartpole_env_cfg.py
@@ -176,7 +176,7 @@ class CurriculumCfg:
 
 @configclass
 class CartpoleEnvCfg(ManagerBasedRLEnvCfg):
-    """Configuration for the locomotion velocity-tracking environment."""
+    """Configuration for the cartpole environment."""
 
     # Scene settings
     scene: CartpoleSceneCfg = CartpoleSceneCfg(num_envs=4096, env_spacing=4.0)