Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion config/imitation/CrawlerStatic.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@ behaviors:
gail:
gamma: 0.99
strength: 1.0
encoding_size: 128
network_settings:
normalize: true
hidden_units: 128
num_layers: 2
vis_encode_type: simple
learning_rate: 0.0003
use_actions: false
use_vail: false
Expand Down
6 changes: 5 additions & 1 deletion config/imitation/FoodCollector.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@ behaviors:
gail:
gamma: 0.99
strength: 0.1
encoding_size: 128
network_settings:
normalize: false
hidden_units: 128
num_layers: 2
vis_encode_type: simple
learning_rate: 0.0003
use_actions: false
use_vail: false
Expand Down
1 change: 0 additions & 1 deletion config/imitation/Hallway.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ behaviors:
gail:
gamma: 0.99
strength: 0.01
encoding_size: 128
learning_rate: 0.0003
use_actions: false
use_vail: false
Expand Down
6 changes: 5 additions & 1 deletion config/imitation/PushBlock.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@ behaviors:
gail:
gamma: 0.99
strength: 0.01
encoding_size: 128
network_settings:
normalize: false
hidden_units: 128
num_layers: 2
vis_encode_type: simple
learning_rate: 0.0003
use_actions: false
use_vail: false
Expand Down
4 changes: 2 additions & 2 deletions config/imitation/Pyramids.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ behaviors:
curiosity:
strength: 0.02
gamma: 0.99
encoding_size: 256
network_settings:
hidden_units: 256
gail:
strength: 0.01
gamma: 0.99
encoding_size: 128
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
behavioral_cloning:
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
Expand Down
3 changes: 2 additions & 1 deletion config/ppo/Pyramids.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ behaviors:
curiosity:
gamma: 0.99
strength: 0.02
encoding_size: 256
network_settings:
hidden_units: 256
learning_rate: 0.0003
keep_checkpoints: 5
max_steps: 10000000
Expand Down
4 changes: 2 additions & 2 deletions config/ppo/PyramidsRND.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ behaviors:
rnd:
gamma: 0.99
strength: 0.01
encoding_size: 64
network_settings:
hidden_units: 64
learning_rate: 0.0001
keep_checkpoints: 5
max_steps: 3000000
time_horizon: 128
summary_freq: 30000
framework: pytorch
threaded: true
3 changes: 2 additions & 1 deletion config/ppo/VisualPyramids.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ behaviors:
curiosity:
gamma: 0.99
strength: 0.01
encoding_size: 256
network_settings:
hidden_units: 256
learning_rate: 0.0003
keep_checkpoints: 5
max_steps: 10000000
Expand Down
1 change: 0 additions & 1 deletion config/sac/Pyramids.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ behaviors:
gail:
gamma: 0.99
strength: 0.01
encoding_size: 128
learning_rate: 0.0003
use_actions: true
use_vail: false
Expand Down
1 change: 0 additions & 1 deletion config/sac/VisualPyramids.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ behaviors:
gail:
gamma: 0.99
strength: 0.02
encoding_size: 128
learning_rate: 0.0003
use_actions: true
use_vail: false
Expand Down
3 changes: 0 additions & 3 deletions ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,6 @@ def create_reward_signals(self, reward_signal_configs):
:param reward_signal_configs: Reward signal config.
"""
for reward_signal, settings in reward_signal_configs.items():
# Get normalization from policy. Will be replaced by RewardSettings own
# NetworkSettings
settings.normalize = self.policy.normalize
# Name reward signals by string in case we have duplicates later
self.reward_signals[reward_signal.value] = create_reward_provider(
reward_signal, self.policy.behavior_spec, settings
Expand Down
17 changes: 13 additions & 4 deletions ml-agents/mlagents/trainers/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def to_settings(self) -> type:
class RewardSignalSettings:
gamma: float = 0.99
strength: float = 1.0
normalize: bool = False
network_settings: NetworkSettings = attr.ib(factory=NetworkSettings)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would make this one optional and if it is None, then use the Policy's network settings rather than our own defaults. How does that sound ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I'd prefer to use our defaults since it's possible the policy has significantly more capacity than is needed i.e. the Crawler policy of 3/512 vs what we use for the discriminator 2/128. That being said, I also realize this enables users to specify memory which we probably want to explicitly prevent in the reward providers. cc @ervteng

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not opposed to either route, they have their own pros/cons. Either way as long as it's documented it should be fine.
Is getting the Policy settings super ugly?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Im not sure how future proof it is for multi-agent scenarios. We could have different policies to select from. Additionally, we currently create reward signals in the optimizer/torch_optimizer.py and in the future i think it will be necessary to remove the policy from the optimizer (also for multiagent) in which case this would need to be addressed by either keeping the policy around/moving the creation of the reward provider. My vote is for default network settings


@staticmethod
def structure(d: Mapping, t: type) -> Any:
Expand All @@ -199,28 +199,37 @@ def structure(d: Mapping, t: type) -> Any:
enum_key = RewardSignalType(key)
t = enum_key.to_settings()
d_final[enum_key] = strict_to_cls(val, t)
if "encoding_size" in val:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Backward compatible with old configs

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you ad a comment around this code so we will remember it ?

logger.warning(
"'encoding_size' was deprecated for RewardSignals. Please use network_settings."
)
# If network settings was not specified, use the encoding size. Otherwise, use hidden_units
if "network_settings" not in val:
d_final[enum_key].network_settings.hidden_units = val[
"encoding_size"
]
return d_final


@attr.s(auto_attribs=True)
class GAILSettings(RewardSignalSettings):
encoding_size: int = 64
learning_rate: float = 3e-4
encoding_size: int = 0
use_actions: bool = False
use_vail: bool = False
demo_path: str = attr.ib(kw_only=True)


@attr.s(auto_attribs=True)
class CuriositySettings(RewardSignalSettings):
encoding_size: int = 64
learning_rate: float = 3e-4
encoding_size: int = 0


@attr.s(auto_attribs=True)
class RNDSettings(RewardSignalSettings):
encoding_size: int = 64
learning_rate: float = 1e-4
encoding_size: int = 0


# SAMPLERS #############################################################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.networks import NetworkBody
from mlagents.trainers.torch.layers import LinearEncoder, linear_layer
from mlagents.trainers.settings import NetworkSettings, EncoderType
from mlagents.trainers.trajectory import ObsUtil


Expand Down Expand Up @@ -70,21 +69,16 @@ class CuriosityNetwork(torch.nn.Module):
def __init__(self, specs: BehaviorSpec, settings: CuriositySettings) -> None:
super().__init__()
self._action_spec = specs.action_spec
state_encoder_settings = NetworkSettings(
normalize=False,
hidden_units=settings.encoding_size,
num_layers=2,
vis_encode_type=EncoderType.SIMPLE,
memory=None,
)

state_encoder_settings = settings.network_settings
self._state_encoder = NetworkBody(
specs.observation_specs, state_encoder_settings
)

self._action_flattener = ActionFlattener(self._action_spec)

self.inverse_model_action_encoding = torch.nn.Sequential(
LinearEncoder(2 * settings.encoding_size, 1, 256)
LinearEncoder(2 * state_encoder_settings.hidden_units, 1, 256)
)

if self._action_spec.continuous_size > 0:
Expand All @@ -98,9 +92,12 @@ def __init__(self, specs: BehaviorSpec, settings: CuriositySettings) -> None:

self.forward_model_next_state_prediction = torch.nn.Sequential(
LinearEncoder(
settings.encoding_size + self._action_flattener.flattened_size, 1, 256
state_encoder_settings.hidden_units
+ self._action_flattener.flattened_size,
1,
256,
),
linear_layer(256, settings.encoding_size),
linear_layer(256, state_encoder_settings.hidden_units),
)

def get_current_state(self, mini_batch: AgentBuffer) -> torch.Tensor:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from mlagents.trainers.torch.action_flattener import ActionFlattener
from mlagents.trainers.torch.networks import NetworkBody
from mlagents.trainers.torch.layers import linear_layer, Initialization
from mlagents.trainers.settings import NetworkSettings, EncoderType
from mlagents.trainers.demo_loader import demo_to_buffer
from mlagents.trainers.trajectory import ObsUtil

Expand Down Expand Up @@ -75,13 +74,7 @@ def __init__(self, specs: BehaviorSpec, settings: GAILSettings) -> None:
self._use_vail = settings.use_vail
self._settings = settings

encoder_settings = NetworkSettings(
normalize=settings.normalize,
hidden_units=settings.encoding_size,
num_layers=2,
vis_encode_type=EncoderType.SIMPLE,
memory=None,
)
encoder_settings = settings.network_settings
self._action_flattener = ActionFlattener(specs.action_spec)
unencoded_size = (
self._action_flattener.flattened_size + 1 if settings.use_actions else 0
Expand All @@ -90,14 +83,14 @@ def __init__(self, specs: BehaviorSpec, settings: GAILSettings) -> None:
specs.observation_specs, encoder_settings, unencoded_size
)

estimator_input_size = settings.encoding_size
estimator_input_size = encoder_settings.hidden_units
if settings.use_vail:
estimator_input_size = self.z_size
self._z_sigma = torch.nn.Parameter(
torch.ones((self.z_size), dtype=torch.float), requires_grad=True
)
self._z_mu_layer = linear_layer(
settings.encoding_size,
encoder_settings.hidden_units,
self.z_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.networks import NetworkBody
from mlagents.trainers.settings import NetworkSettings, EncoderType
from mlagents.trainers.trajectory import ObsUtil


Expand Down Expand Up @@ -58,13 +57,7 @@ class RNDNetwork(torch.nn.Module):

def __init__(self, specs: BehaviorSpec, settings: RNDSettings) -> None:
super().__init__()
state_encoder_settings = NetworkSettings(
normalize=True,
hidden_units=settings.encoding_size,
num_layers=3,
vis_encode_type=EncoderType.SIMPLE,
memory=None,
)
state_encoder_settings = settings.network_settings
self._encoder = NetworkBody(specs.observation_specs, state_encoder_settings)

def forward(self, mini_batch: AgentBuffer) -> torch.Tensor:
Expand Down