From 393994df99c40b0deb5ce6fca01e531e62b9ddab Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 13 Feb 2020 10:59:36 -0800 Subject: [PATCH 1/9] Moving the max step logic - Created a new Academy Event called AgentIncrementStep to be called before SetStatus - Implemented the AgentSteping logic --- com.unity.ml-agents/Runtime/Academy.cs | 5 +++++ com.unity.ml-agents/Runtime/Agent.cs | 16 ++++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/com.unity.ml-agents/Runtime/Academy.cs b/com.unity.ml-agents/Runtime/Academy.cs index 2c3e4f75fd..c35d08df2c 100644 --- a/com.unity.ml-agents/Runtime/Academy.cs +++ b/com.unity.ml-agents/Runtime/Academy.cs @@ -113,6 +113,9 @@ public bool IsCommunicatorOn // Signals to all the listeners that the academy is being destroyed internal event Action DestroyAction; + // Signals the Agent that a new step is about to start + internal event Action AgentIncrementStep; + // Signals to all the agents at each environment step along with the // Academy's maxStepReached, done and stepCount values. The agents rely // on this event to update their own values of max step reached and done @@ -415,6 +418,8 @@ public void EnvironmentStep() ForcedFullReset(); } + AgentIncrementStep?.Invoke(); + AgentSetStatus?.Invoke(m_StepCount); diff --git a/com.unity.ml-agents/Runtime/Agent.cs b/com.unity.ml-agents/Runtime/Agent.cs index 81dbb9cf9b..56096d20de 100644 --- a/com.unity.ml-agents/Runtime/Agent.cs +++ b/com.unity.ml-agents/Runtime/Agent.cs @@ -240,6 +240,7 @@ public void LazyInitialize() m_Action = new AgentAction(); sensors = new List(); + Academy.Instance.AgentIncrementStep += AgentIncrementStep; Academy.Instance.AgentSendState += SendInfo; Academy.Instance.DecideAction += DecideAction; Academy.Instance.AgentAct += AgentStep; @@ -258,6 +259,7 @@ void OnDisable() // We don't want to even try, because this will lazily create a new Academy! if (Academy.IsInitialized) { + Academy.Instance.AgentIncrementStep -=AgentIncrementStep; Academy.Instance.AgentSendState -= SendInfo; Academy.Instance.DecideAction -= DecideAction; Academy.Instance.AgentAct -= AgentStep; @@ -688,18 +690,20 @@ void SendInfo() } } - /// Used by the brain to make the agent perform a step. - void AgentStep() + void AgentIncrementStep() { if ((m_StepCount >= maxStep) && (maxStep > 0)) { NotifyAgentDone(true); _AgentReset(); } - else - { - m_StepCount += 1; - } + m_StepCount += 1; + } + + /// Used by the brain to make the agent perform a step. + void AgentStep() + { + if ((m_RequestAction) && (m_Brain != null)) { From be331710c274046c35ad9d811742a07476f90ba8 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Fri, 14 Feb 2020 11:12:42 -0800 Subject: [PATCH 2/9] second commit : Moving the step counting at the begining. I had to edit the tests but I think they are now closer to what we want --- com.unity.ml-agents/Runtime/Academy.cs | 8 ++++---- com.unity.ml-agents/Runtime/Agent.cs | 15 ++++++++------- .../Tests/Editor/MLAgentsEditModeTest.cs | 12 +++++++----- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/com.unity.ml-agents/Runtime/Academy.cs b/com.unity.ml-agents/Runtime/Academy.cs index c35d08df2c..6f3abf9ca8 100644 --- a/com.unity.ml-agents/Runtime/Academy.cs +++ b/com.unity.ml-agents/Runtime/Academy.cs @@ -418,10 +418,11 @@ public void EnvironmentStep() ForcedFullReset(); } - AgentIncrementStep?.Invoke(); - AgentSetStatus?.Invoke(m_StepCount); + m_StepCount += 1; + m_TotalStepCount += 1; + AgentIncrementStep?.Invoke(); using (TimerStack.Instance.Scoped("AgentSendState")) { @@ -438,8 +439,7 @@ public void EnvironmentStep() AgentAct?.Invoke(); } - m_StepCount += 1; - m_TotalStepCount += 1; + } /// diff --git a/com.unity.ml-agents/Runtime/Agent.cs b/com.unity.ml-agents/Runtime/Agent.cs index 56096d20de..969fcacd9f 100644 --- a/com.unity.ml-agents/Runtime/Agent.cs +++ b/com.unity.ml-agents/Runtime/Agent.cs @@ -692,19 +692,14 @@ void SendInfo() void AgentIncrementStep() { - if ((m_StepCount >= maxStep) && (maxStep > 0)) - { - NotifyAgentDone(true); - _AgentReset(); - } + m_StepCount += 1; + } /// Used by the brain to make the agent perform a step. void AgentStep() { - - if ((m_RequestAction) && (m_Brain != null)) { m_RequestAction = false; @@ -713,6 +708,12 @@ void AgentStep() AgentAction(m_Action.vectorActions); } } + + if ((m_StepCount >= maxStep) && (maxStep > 0)) + { + NotifyAgentDone(true); + _AgentReset(); + } } void DecideAction() diff --git a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs index 97b4b5c8ad..5819630937 100644 --- a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs +++ b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs @@ -489,7 +489,8 @@ public void TestCumulativeReward() var j = 0; for (var i = 0; i < 500; i++) { - if (i % 21 == 0) + Debug.Log(i); + if (i % 20 == 0) { j = 0; } @@ -524,12 +525,11 @@ public void TestMaxStepsReset() for (var i = 0; i < 15; i++) { - // We expect resets to occur when there are maxSteps actions since the last reset (and on the first step) - var expectReset = agent1.agentActionCallsSinceLastReset == maxStep || (i == 0); + Debug.Log(i); + // We expect resets to occur when there are maxSteps actions since the last reset + var expectReset = agent1.agentActionCallsSinceLastReset == maxStep; var previousNumResets = agent1.agentResetCalls; - aca.EnvironmentStep(); - if (expectReset) { Assert.AreEqual(previousNumResets + 1, agent1.agentResetCalls); @@ -538,6 +538,8 @@ public void TestMaxStepsReset() { Assert.AreEqual(previousNumResets, agent1.agentResetCalls); } + + aca.EnvironmentStep(); } } } From 0b78a16c5cbd148cd259321eaa439f1c5f032698 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Fri, 14 Feb 2020 11:14:34 -0800 Subject: [PATCH 3/9] addressing comments --- com.unity.ml-agents/Runtime/Academy.cs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/com.unity.ml-agents/Runtime/Academy.cs b/com.unity.ml-agents/Runtime/Academy.cs index 6f3abf9ca8..1309e14f31 100644 --- a/com.unity.ml-agents/Runtime/Academy.cs +++ b/com.unity.ml-agents/Runtime/Academy.cs @@ -113,7 +113,8 @@ public bool IsCommunicatorOn // Signals to all the listeners that the academy is being destroyed internal event Action DestroyAction; - // Signals the Agent that a new step is about to start + // Signals the Agent that a new step is about to start. + // This will mark the Agent as Done if it has reached its maxSteps. internal event Action AgentIncrementStep; // Signals to all the agents at each environment step along with the @@ -438,8 +439,6 @@ public void EnvironmentStep() { AgentAct?.Invoke(); } - - } /// From 66518dbb0b2a86e6e171ab9479e3c1ed76955357 Mon Sep 17 00:00:00 2001 From: Vincent-Pierre BERGES Date: Fri, 14 Feb 2020 14:05:20 -0800 Subject: [PATCH 4/9] Update com.unity.ml-agents/Runtime/Agent.cs Co-Authored-By: Chris Goy --- com.unity.ml-agents/Runtime/Agent.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/com.unity.ml-agents/Runtime/Agent.cs b/com.unity.ml-agents/Runtime/Agent.cs index 969fcacd9f..f2aa9a6216 100644 --- a/com.unity.ml-agents/Runtime/Agent.cs +++ b/com.unity.ml-agents/Runtime/Agent.cs @@ -694,7 +694,6 @@ void AgentIncrementStep() { m_StepCount += 1; - } /// Used by the brain to make the agent perform a step. From 544328092ce4844742e97fc8a53859318999f0a5 Mon Sep 17 00:00:00 2001 From: Vincent-Pierre BERGES Date: Fri, 14 Feb 2020 14:05:33 -0800 Subject: [PATCH 5/9] Update com.unity.ml-agents/Runtime/Agent.cs Co-Authored-By: Chris Goy --- com.unity.ml-agents/Runtime/Agent.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/com.unity.ml-agents/Runtime/Agent.cs b/com.unity.ml-agents/Runtime/Agent.cs index f2aa9a6216..c2f9b9b95e 100644 --- a/com.unity.ml-agents/Runtime/Agent.cs +++ b/com.unity.ml-agents/Runtime/Agent.cs @@ -692,7 +692,6 @@ void SendInfo() void AgentIncrementStep() { - m_StepCount += 1; } From b4619a04fa13f6d3c769a8d57e62c569e8927103 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Sun, 16 Feb 2020 17:13:10 -0800 Subject: [PATCH 6/9] Made the tests not be broken --- .../Tests/Editor/MLAgentsEditModeTest.cs | 46 ++++++++++--------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs index 5819630937..0d3d1c9440 100644 --- a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs +++ b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs @@ -68,6 +68,7 @@ public override void AgentAction(float[] vectorAction) public override void AgentReset() { + agentResetCalls += 1; collectObservationsCallsSinceLastReset = 0; agentActionCallsSinceLastReset = 0; @@ -486,20 +487,18 @@ public void TestCumulativeReward() agent1.LazyInitialize(); agent2.SetPolicy(new TestPolicy()); + int expectedAgent1Resets= 0; + int expectedAgent1ActionSinceReset = 0; + var j = 0; - for (var i = 0; i < 500; i++) + for (var i = 0; i < 50; i++) { - Debug.Log(i); - if (i % 20 == 0) - { - j = 0; - } - else - { - j++; + expectedAgent1ActionSinceReset += 1; + if (expectedAgent1ActionSinceReset == agent1.maxStep || i == 0){ + expectedAgent1ActionSinceReset = 0; } agent2.RequestAction(); - Assert.LessOrEqual(Mathf.Abs(j * 10.1f - agent1.GetCumulativeReward()), 0.05f); + Assert.LessOrEqual(Mathf.Abs(expectedAgent1ActionSinceReset * 10.1f - agent1.GetCumulativeReward()), 0.05f); Assert.LessOrEqual(Mathf.Abs(i * 0.1f - agent2.GetCumulativeReward()), 0.05f); agent1.AddReward(10f); @@ -523,23 +522,26 @@ public void TestMaxStepsReset() agent1.maxStep = maxStep; agent1.LazyInitialize(); + int expectedResets= 0; + int expectedAgentAction = 0; + int expectedAgentActionSinceReset = 0; + for (var i = 0; i < 15; i++) { - Debug.Log(i); - // We expect resets to occur when there are maxSteps actions since the last reset - var expectReset = agent1.agentActionCallsSinceLastReset == maxStep; - var previousNumResets = agent1.agentResetCalls; - - if (expectReset) - { - Assert.AreEqual(previousNumResets + 1, agent1.agentResetCalls); - } - else - { - Assert.AreEqual(previousNumResets, agent1.agentResetCalls); + expectedAgentAction += 1; + expectedAgentActionSinceReset += 1; + if (expectedAgentActionSinceReset == maxStep || (i == 0)){ + expectedResets +=1; } + if (expectedAgentActionSinceReset == maxStep){ + expectedAgentActionSinceReset = 0; + } aca.EnvironmentStep(); + + Assert.AreEqual(expectedAgentAction, agent1.agentActionCalls); + Assert.AreEqual(expectedResets, agent1.agentResetCalls); + Assert.AreEqual(expectedAgentActionSinceReset, agent1.agentActionCallsSinceLastReset); } } } From 5d78e2d9e8f98c23708b0dd36a2e22d532fa0748 Mon Sep 17 00:00:00 2001 From: Vincent-Pierre BERGES Date: Sun, 16 Feb 2020 18:19:53 -0800 Subject: [PATCH 7/9] Update com.unity.ml-agents/Runtime/Agent.cs Co-Authored-By: Chris Elion --- com.unity.ml-agents/Runtime/Agent.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/com.unity.ml-agents/Runtime/Agent.cs b/com.unity.ml-agents/Runtime/Agent.cs index c2f9b9b95e..c288e64efc 100644 --- a/com.unity.ml-agents/Runtime/Agent.cs +++ b/com.unity.ml-agents/Runtime/Agent.cs @@ -259,7 +259,7 @@ void OnDisable() // We don't want to even try, because this will lazily create a new Academy! if (Academy.IsInitialized) { - Academy.Instance.AgentIncrementStep -=AgentIncrementStep; + Academy.Instance.AgentIncrementStep -= AgentIncrementStep; Academy.Instance.AgentSendState -= SendInfo; Academy.Instance.DecideAction -= DecideAction; Academy.Instance.AgentAct -= AgentStep; From 3b7f1902ee90073cea036e6b491bc7f2315443cd Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Tue, 18 Feb 2020 14:55:46 -0800 Subject: [PATCH 8/9] step logic changes: unit test (#3467) --- .../Tests/Editor/MLAgentsEditModeTest.cs | 34 +++++++++++++------ 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs index 0d3d1c9440..124e18244b 100644 --- a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs +++ b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs @@ -487,10 +487,8 @@ public void TestCumulativeReward() agent1.LazyInitialize(); agent2.SetPolicy(new TestPolicy()); - int expectedAgent1Resets= 0; - int expectedAgent1ActionSinceReset = 0; + var expectedAgent1ActionSinceReset = 0; - var j = 0; for (var i = 0; i < 50; i++) { expectedAgent1ActionSinceReset += 1; @@ -518,30 +516,46 @@ public void TestMaxStepsReset() decisionRequester.DecisionPeriod = 1; decisionRequester.Awake(); - var maxStep = 6; + const int maxStep = 6; agent1.maxStep = maxStep; agent1.LazyInitialize(); - int expectedResets= 0; - int expectedAgentAction = 0; - int expectedAgentActionSinceReset = 0; + var expectedAgentStepCount = 0; + var expectedResets= 0; + var expectedAgentAction = 0; + var expectedAgentActionSinceReset = 0; + var expectedCollectObsCalls = 0; + var expectedCollectObsCallsSinceReset = 0; for (var i = 0; i < 15; i++) { + // Agent should observe and act on each Academy step expectedAgentAction += 1; expectedAgentActionSinceReset += 1; - if (expectedAgentActionSinceReset == maxStep || (i == 0)){ + expectedCollectObsCalls += 1; + expectedCollectObsCallsSinceReset += 1; + expectedAgentStepCount += 1; + + // If the next step will put the agent at maxSteps, we expect it to reset + if (agent1.GetStepCount() == maxStep - 1 || (i == 0)) + { expectedResets +=1; } - if (expectedAgentActionSinceReset == maxStep){ + if (agent1.GetStepCount() == maxStep - 1) + { expectedAgentActionSinceReset = 0; + expectedCollectObsCallsSinceReset = 0; + expectedAgentStepCount = 0; } aca.EnvironmentStep(); - Assert.AreEqual(expectedAgentAction, agent1.agentActionCalls); + Assert.AreEqual(expectedAgentStepCount, agent1.GetStepCount()); Assert.AreEqual(expectedResets, agent1.agentResetCalls); + Assert.AreEqual(expectedAgentAction, agent1.agentActionCalls); Assert.AreEqual(expectedAgentActionSinceReset, agent1.agentActionCallsSinceLastReset); + Assert.AreEqual(expectedCollectObsCalls, agent1.collectObservationsCalls); + Assert.AreEqual(expectedCollectObsCallsSinceReset, agent1.collectObservationsCallsSinceLastReset); } } } From e5c443a643718269b31db5992667de6b5bd0aebd Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Tue, 18 Feb 2020 16:28:34 -0800 Subject: [PATCH 9/9] Added a line in the changelog --- com.unity.ml-agents/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index 7093a2a4ef..d575d2b4d1 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Several classes were changed from public to internal visibility. (#3390) - Academy.RegisterSideChannel and UnregisterSideChannel methods were added. (#3391) - A tutorial on adding custom SideChannels was added (#3391) + - The stepping logic for the Agent and the Academy has been simplified (#3448) ### Bugfixes