[Question] My attempt to make a direct environment for Franka lifting with 2 robots failed; the robots shakes a lot

I make a direct environment for Franka lifting with 2 robots. Bellow is my config and some code snippets
I am using MAPPO from skrl

# MAPPO parameters
```
seed: 42
models:
  separate: True
  policy:  # see gaussian_model parameters
    class: GaussianMixin
    clip_actions: False
    clip_log_std: True
    min_log_std: -20.0
    max_log_std: 2.0
    initial_log_std: 0.0
    network:
      - name: net
        input: STATES
        layers: [512, 512, 256, 128]
        activations: elu
    output: ACTIONS
  value:  # see deterministic_model parameters
    class: DeterministicMixin
    clip_actions: False
    network:
      - name: net
        input: STATES
        layers: [512, 512, 256, 128]
        activations: elu
    output: ONE

memory:
  class: RandomMemory
  memory_size: -1  # automatically determined (same as agent:rollouts)

agent:
  class: MAPPO
  rollouts: 24
  learning_epochs: 5
  mini_batches: 2
  discount_factor: 0.99
  lambda: 0.95
  learning_rate: 1.0e-04
  learning_rate_scheduler: KLAdaptiveLR
  learning_rate_scheduler_kwargs:
    kl_threshold: 0.008
  state_preprocessor: RunningStandardScaler
  state_preprocessor_kwargs: null
  shared_state_preprocessor: RunningStandardScaler
  shared_state_preprocessor_kwargs: null
  value_preprocessor: RunningStandardScaler
  value_preprocessor_kwargs: null
  random_timesteps: 0
  learning_starts: 0
  grad_norm_clip: 1.0
  ratio_clip: 0.2
  value_clip: 0.2
  clip_predicted_values: True
  entropy_loss_scale: 0.01
  value_loss_scale: 1.0
  kl_threshold: 0.0
  rewards_shaper_scale: 0.01
  time_limit_bootstrap: False
  # logging and checkpoint
  experiment:
    directory: "two_arms_lift_coll_mappo_cfg_2_test"
    experiment_name: "test_configuration"
    write_interval: auto
    checkpoint_interval: auto


# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
  class: SequentialTrainer
  timesteps: 36000
  # timesteps: 72000
  environment_info: log
```

# Action pre-process and action apply

```
	#######################
	# Pre physics step
	#######################
	def _pre_physics_step(self, actions: dict[str, torch.Tensor]) -> None:
		self.actions = actions
		# make gripper actions binary
		robot_1_targets = self.robot_1_dof_targets + self.actions["rb_1"].clone()
		self.robot_1_dof_targets[self.robot_1_left_finger_link_idx] = torch.where(robot_1_targets[self.robot_1_left_finger_link_idx] > 0, self.open_translation, self.close_translation)
		self.robot_1_dof_targets[self.robot_1_right_finger_link_idx] = torch.where(robot_1_targets[self.robot_1_right_finger_link_idx] > 0, self.open_translation, self.close_translation)
		# make gripper actions binary
		robot_2_targets = self.robot_2_dof_targets +  self.actions["rb_2"].clone()
		self.robot_2_dof_targets[self.robot_2_left_finger_link_idx] = torch.where(robot_2_targets[self.robot_2_left_finger_link_idx] > 0, self.open_translation, self.close_translation)
		self.robot_2_dof_targets[self.robot_2_right_finger_link_idx] = torch.where(robot_2_targets[self.robot_2_right_finger_link_idx] > 0, self.open_translation, self.close_translation)
		# update the joint targets
		self.robot_1_dof_targets[:] = torch.clamp(robot_1_targets, self.rb_1_dof_low_limits, self.rb_1_dof_up_limmits)
		self.robot_2_dof_targets[:] = torch.clamp(robot_2_targets, self.rb_2_dof_low_limits, self.rb_2_dof_up_limits)

	def _apply_action(self) -> None:
		self.robot_1.set_joint_position_target(self.robot_1_dof_targets)
		self.robot_2.set_joint_position_target(self.robot_2_dof_targets
```



# Observation and states
```
	def _joint_pos_rel(self, robot: Articulation, joint_ids: list[int]) -> torch.Tensor:
		"""Difference of the current joint position and the default joint"""
		return robot.data.joint_pos[:, joint_ids] - robot.data.default_joint_pos[:, joint_ids]
	

	def _joint_vel_rel(self, robot: Articulation, joint_ids: list[int]) -> torch.Tensor:
		# return asset.data.joint_vel[:, asset_cfg.joint_ids] - asset.data.default_joint_vel[:, asset_cfg.joint_ids]
		return robot.data.joint_vel[:, joint_ids] - robot.data.default_joint_vel[:, joint_ids]
	
```

```
	def _get_observations(self) -> dict[str, torch.Tensor]:
		self.robot_1_prev_actions = self.actions["rb_1"].clone()
		self.robot_2_prev_actions = self.actions["rb_2"].clone()

		observation = dict()

		self.joint_pos_rel_1 = self._joint_pos_rel(robot=self.robot_1, joint_ids=self.joint_ids)
		self.joint_pos_rel_2 = self._joint_pos_rel(robot=self.robot_2, joint_ids=self.joint_ids)
		self.joint_vel_rel_1 = self._joint_vel_rel(robot=self.robot_1, joint_ids=self.joint_ids)
		self.joint_vel_rel_2 = self._joint_vel_rel(robot=self.robot_2, joint_ids=self.joint_ids)

		self.object_A_pos_b = self._object_position_in_robot_root_frame(
			self._object_A.data.root_pos_w, self.robot_1.data.root_link_state_w)
		self.object_B_pos_b = self._object_position_in_robot_root_frame(
			self._object_B.data.root_pos_w, self.robot_2.data.root_link_state_w)

		""" For lifting environment """
		obs_robot_1 = torch.cat(
			(
				self.joint_pos_rel_1, # 9
				self.joint_vel_rel_1, # 9
				self.object_A_pos_b, # 3; 21
				self.target_1_pose_b[:, :3], # 3; 24
				self.actions["rb_1"], # 9; 33
			),

			dim=-1,
		)

		obs_robot_2 = torch.cat(
			(
				self.joint_pos_rel_2, #  for joint positions reward
				self.joint_vel_rel_2, # for joint velocities reward
				self.object_B_pos_b, # target object position
				self.target_2_pose_b[:, :3], # target object position
				self.actions["rb_2"], # actions
			),
			dim=-1,
		)

		observation["rb_1"] = obs_robot_1
		observation["rb_2"] = obs_robot_2
		
		return observation

	def _get_states(self) -> torch.Tensor:
		states = torch.cat(
			(
				# --- Robot 1 ---
				self.joint_pos_rel_1, 					# DOF positions (9)	; 
				self.joint_vel_rel_1, 					# DOF velocities (9) ; 
				self.ee_1_pos_w, 						# ee pos _w (3); 
				self.ee_1_rot_w, 						# ee rot _w (4); 
				self.robot_1_dof_targets, 				# applied_actions  (9); 34
				# --- Robot 2 ---
				self.joint_pos_rel_2, 					# DOF positions (9); 43
				self.joint_vel_rel_2, 					# DOF velocities (9); 52
				self.ee_2_pos_w, 						# ee pos _w (3) ; 55
				self.ee_2_rot_w, 						# ee rot _w (4); 59
				self.robot_2_dof_targets, 				# applied_actions  (9); 68
				# --- object 1 ---
				self._object_A_pos_b, 					# object A positions (3); 77
				self._object_A_rot_b, 					# object A rotations (4); 81
				# --- object 2 ---
				self._object_B_pos_b, 					# object B positions (3) ; 84
				self._object_B_rot_b, 					# object B rotations (4) ; 88
				# --- target 1 ---
				self.target_1_pose_b[:, :3], 			# target 1 positions (3); 91
				self.target_1_pose_b[:, 3:7],			# target 1 rotations (4); 95
				# --- target 2 ---
				self.target_2_pose_b[:, :3],            # target 2 positions (3); 98
				self.target_2_pose_b[:, 3:7],           # target 2 rotations (4); 102
			),
			dim = -1,
		)
		return states
```

# Reward functions is the same with Manager based environment

# Video 

https://github.com/user-attachments/assets/1abab7f5-0e80-4343-8d2d-d9953507dd0e

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[Question] My attempt to make a direct environment for Franka lifting with 2 robots failed; the robots shakes a lot #1935

MAPPO parameters

Action pre-process and action apply

Observation and states

Reward functions is the same with Manager based environment

Video

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

[Question] My attempt to make a direct environment for Franka lifting with 2 robots failed; the robots shakes a lot #1935

Description

MAPPO parameters

Action pre-process and action apply

Observation and states

Reward functions is the same with Manager based environment

Video

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions