Skip to content

[Question] My attempt to make a direct environment for Franka lifting with 2 robots failed; the robots shakes a lot #1935

@celestialdr4g0n

Description

@celestialdr4g0n

I make a direct environment for Franka lifting with 2 robots. Bellow is my config and some code snippets
I am using MAPPO from skrl

MAPPO parameters

seed: 42
models:
  separate: True
  policy:  # see gaussian_model parameters
    class: GaussianMixin
    clip_actions: False
    clip_log_std: True
    min_log_std: -20.0
    max_log_std: 2.0
    initial_log_std: 0.0
    network:
      - name: net
        input: STATES
        layers: [512, 512, 256, 128]
        activations: elu
    output: ACTIONS
  value:  # see deterministic_model parameters
    class: DeterministicMixin
    clip_actions: False
    network:
      - name: net
        input: STATES
        layers: [512, 512, 256, 128]
        activations: elu
    output: ONE

memory:
  class: RandomMemory
  memory_size: -1  # automatically determined (same as agent:rollouts)

agent:
  class: MAPPO
  rollouts: 24
  learning_epochs: 5
  mini_batches: 2
  discount_factor: 0.99
  lambda: 0.95
  learning_rate: 1.0e-04
  learning_rate_scheduler: KLAdaptiveLR
  learning_rate_scheduler_kwargs:
    kl_threshold: 0.008
  state_preprocessor: RunningStandardScaler
  state_preprocessor_kwargs: null
  shared_state_preprocessor: RunningStandardScaler
  shared_state_preprocessor_kwargs: null
  value_preprocessor: RunningStandardScaler
  value_preprocessor_kwargs: null
  random_timesteps: 0
  learning_starts: 0
  grad_norm_clip: 1.0
  ratio_clip: 0.2
  value_clip: 0.2
  clip_predicted_values: True
  entropy_loss_scale: 0.01
  value_loss_scale: 1.0
  kl_threshold: 0.0
  rewards_shaper_scale: 0.01
  time_limit_bootstrap: False
  # logging and checkpoint
  experiment:
    directory: "two_arms_lift_coll_mappo_cfg_2_test"
    experiment_name: "test_configuration"
    write_interval: auto
    checkpoint_interval: auto


# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
  class: SequentialTrainer
  timesteps: 36000
  # timesteps: 72000
  environment_info: log

Action pre-process and action apply

	#######################
	# Pre physics step
	#######################
	def _pre_physics_step(self, actions: dict[str, torch.Tensor]) -> None:
		self.actions = actions
		# make gripper actions binary
		robot_1_targets = self.robot_1_dof_targets + self.actions["rb_1"].clone()
		self.robot_1_dof_targets[self.robot_1_left_finger_link_idx] = torch.where(robot_1_targets[self.robot_1_left_finger_link_idx] > 0, self.open_translation, self.close_translation)
		self.robot_1_dof_targets[self.robot_1_right_finger_link_idx] = torch.where(robot_1_targets[self.robot_1_right_finger_link_idx] > 0, self.open_translation, self.close_translation)
		# make gripper actions binary
		robot_2_targets = self.robot_2_dof_targets +  self.actions["rb_2"].clone()
		self.robot_2_dof_targets[self.robot_2_left_finger_link_idx] = torch.where(robot_2_targets[self.robot_2_left_finger_link_idx] > 0, self.open_translation, self.close_translation)
		self.robot_2_dof_targets[self.robot_2_right_finger_link_idx] = torch.where(robot_2_targets[self.robot_2_right_finger_link_idx] > 0, self.open_translation, self.close_translation)
		# update the joint targets
		self.robot_1_dof_targets[:] = torch.clamp(robot_1_targets, self.rb_1_dof_low_limits, self.rb_1_dof_up_limmits)
		self.robot_2_dof_targets[:] = torch.clamp(robot_2_targets, self.rb_2_dof_low_limits, self.rb_2_dof_up_limits)

	def _apply_action(self) -> None:
		self.robot_1.set_joint_position_target(self.robot_1_dof_targets)
		self.robot_2.set_joint_position_target(self.robot_2_dof_targets

Observation and states

	def _joint_pos_rel(self, robot: Articulation, joint_ids: list[int]) -> torch.Tensor:
		"""Difference of the current joint position and the default joint"""
		return robot.data.joint_pos[:, joint_ids] - robot.data.default_joint_pos[:, joint_ids]
	

	def _joint_vel_rel(self, robot: Articulation, joint_ids: list[int]) -> torch.Tensor:
		# return asset.data.joint_vel[:, asset_cfg.joint_ids] - asset.data.default_joint_vel[:, asset_cfg.joint_ids]
		return robot.data.joint_vel[:, joint_ids] - robot.data.default_joint_vel[:, joint_ids]
	
	def _get_observations(self) -> dict[str, torch.Tensor]:
		self.robot_1_prev_actions = self.actions["rb_1"].clone()
		self.robot_2_prev_actions = self.actions["rb_2"].clone()

		observation = dict()

		self.joint_pos_rel_1 = self._joint_pos_rel(robot=self.robot_1, joint_ids=self.joint_ids)
		self.joint_pos_rel_2 = self._joint_pos_rel(robot=self.robot_2, joint_ids=self.joint_ids)
		self.joint_vel_rel_1 = self._joint_vel_rel(robot=self.robot_1, joint_ids=self.joint_ids)
		self.joint_vel_rel_2 = self._joint_vel_rel(robot=self.robot_2, joint_ids=self.joint_ids)

		self.object_A_pos_b = self._object_position_in_robot_root_frame(
			self._object_A.data.root_pos_w, self.robot_1.data.root_link_state_w)
		self.object_B_pos_b = self._object_position_in_robot_root_frame(
			self._object_B.data.root_pos_w, self.robot_2.data.root_link_state_w)

		""" For lifting environment """
		obs_robot_1 = torch.cat(
			(
				self.joint_pos_rel_1, # 9
				self.joint_vel_rel_1, # 9
				self.object_A_pos_b, # 3; 21
				self.target_1_pose_b[:, :3], # 3; 24
				self.actions["rb_1"], # 9; 33
			),

			dim=-1,
		)

		obs_robot_2 = torch.cat(
			(
				self.joint_pos_rel_2, #  for joint positions reward
				self.joint_vel_rel_2, # for joint velocities reward
				self.object_B_pos_b, # target object position
				self.target_2_pose_b[:, :3], # target object position
				self.actions["rb_2"], # actions
			),
			dim=-1,
		)

		observation["rb_1"] = obs_robot_1
		observation["rb_2"] = obs_robot_2
		
		return observation

	def _get_states(self) -> torch.Tensor:
		states = torch.cat(
			(
				# --- Robot 1 ---
				self.joint_pos_rel_1, 					# DOF positions (9)	; 
				self.joint_vel_rel_1, 					# DOF velocities (9) ; 
				self.ee_1_pos_w, 						# ee pos _w (3); 
				self.ee_1_rot_w, 						# ee rot _w (4); 
				self.robot_1_dof_targets, 				# applied_actions  (9); 34
				# --- Robot 2 ---
				self.joint_pos_rel_2, 					# DOF positions (9); 43
				self.joint_vel_rel_2, 					# DOF velocities (9); 52
				self.ee_2_pos_w, 						# ee pos _w (3) ; 55
				self.ee_2_rot_w, 						# ee rot _w (4); 59
				self.robot_2_dof_targets, 				# applied_actions  (9); 68
				# --- object 1 ---
				self._object_A_pos_b, 					# object A positions (3); 77
				self._object_A_rot_b, 					# object A rotations (4); 81
				# --- object 2 ---
				self._object_B_pos_b, 					# object B positions (3) ; 84
				self._object_B_rot_b, 					# object B rotations (4) ; 88
				# --- target 1 ---
				self.target_1_pose_b[:, :3], 			# target 1 positions (3); 91
				self.target_1_pose_b[:, 3:7],			# target 1 rotations (4); 95
				# --- target 2 ---
				self.target_2_pose_b[:, :3],            # target 2 positions (3); 98
				self.target_2_pose_b[:, 3:7],           # target 2 rotations (4); 102
			),
			dim = -1,
		)
		return states

Reward functions is the same with Manager based environment

Video

shake.shake.mp4

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions