From 393994df99c40b0deb5ce6fca01e531e62b9ddab Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 13 Feb 2020 10:59:36 -0800
Subject: [PATCH 1/9] Moving the max step logic  - Created a new Academy Event
 called AgentIncrementStep to be called before SetStatus  - Implemented the
 AgentSteping logic

---
 com.unity.ml-agents/Runtime/Academy.cs |  5 +++++
 com.unity.ml-agents/Runtime/Agent.cs   | 16 ++++++++++------
 2 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/com.unity.ml-agents/Runtime/Academy.cs b/com.unity.ml-agents/Runtime/Academy.cs
index 2c3e4f75fd..c35d08df2c 100644
--- a/com.unity.ml-agents/Runtime/Academy.cs
+++ b/com.unity.ml-agents/Runtime/Academy.cs
@@ -113,6 +113,9 @@ public bool IsCommunicatorOn
         // Signals to all the listeners that the academy is being destroyed
         internal event Action DestroyAction;
 
+        // Signals the Agent that a new step is about to start
+        internal event Action AgentIncrementStep;
+
         // Signals to all the agents at each environment step along with the
         // Academy's maxStepReached, done and stepCount values. The agents rely
         // on this event to update their own values of max step reached and done
@@ -415,6 +418,8 @@ public void EnvironmentStep()
                 ForcedFullReset();
             }
 
+            AgentIncrementStep?.Invoke();
+
             AgentSetStatus?.Invoke(m_StepCount);
 
 
diff --git a/com.unity.ml-agents/Runtime/Agent.cs b/com.unity.ml-agents/Runtime/Agent.cs
index 81dbb9cf9b..56096d20de 100644
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
@@ -240,6 +240,7 @@ public void LazyInitialize()
             m_Action = new AgentAction();
             sensors = new List<ISensor>();
 
+            Academy.Instance.AgentIncrementStep += AgentIncrementStep;
             Academy.Instance.AgentSendState += SendInfo;
             Academy.Instance.DecideAction += DecideAction;
             Academy.Instance.AgentAct += AgentStep;
@@ -258,6 +259,7 @@ void OnDisable()
             // We don't want to even try, because this will lazily create a new Academy!
             if (Academy.IsInitialized)
             {
+                Academy.Instance.AgentIncrementStep -=AgentIncrementStep;
                 Academy.Instance.AgentSendState -= SendInfo;
                 Academy.Instance.DecideAction -= DecideAction;
                 Academy.Instance.AgentAct -= AgentStep;
@@ -688,18 +690,20 @@ void SendInfo()
             }
         }
 
-        /// Used by the brain to make the agent perform a step.
-        void AgentStep()
+        void AgentIncrementStep()
         {
             if ((m_StepCount >= maxStep) && (maxStep > 0))
             {
                 NotifyAgentDone(true);
                 _AgentReset();
             }
-            else
-            {
-                m_StepCount += 1;
-            }
+            m_StepCount += 1;
+        }
+
+        /// Used by the brain to make the agent perform a step.
+        void AgentStep()
+        {
+
 
             if ((m_RequestAction) && (m_Brain != null))
             {

From be331710c274046c35ad9d811742a07476f90ba8 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Fri, 14 Feb 2020 11:12:42 -0800
Subject: [PATCH 2/9] second commit : Moving the step counting at the begining.
 I had to edit the tests but I think they are now closer to what we want

---
 com.unity.ml-agents/Runtime/Academy.cs            |  8 ++++----
 com.unity.ml-agents/Runtime/Agent.cs              | 15 ++++++++-------
 .../Tests/Editor/MLAgentsEditModeTest.cs          | 12 +++++++-----
 3 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/com.unity.ml-agents/Runtime/Academy.cs b/com.unity.ml-agents/Runtime/Academy.cs
index c35d08df2c..6f3abf9ca8 100644
--- a/com.unity.ml-agents/Runtime/Academy.cs
+++ b/com.unity.ml-agents/Runtime/Academy.cs
@@ -418,10 +418,11 @@ public void EnvironmentStep()
                 ForcedFullReset();
             }
 
-            AgentIncrementStep?.Invoke();
-
             AgentSetStatus?.Invoke(m_StepCount);
 
+            m_StepCount += 1;
+            m_TotalStepCount += 1;
+            AgentIncrementStep?.Invoke();
 
             using (TimerStack.Instance.Scoped("AgentSendState"))
             {
@@ -438,8 +439,7 @@ public void EnvironmentStep()
                 AgentAct?.Invoke();
             }
 
-            m_StepCount += 1;
-            m_TotalStepCount += 1;
+
         }
 
         /// <summary>
diff --git a/com.unity.ml-agents/Runtime/Agent.cs b/com.unity.ml-agents/Runtime/Agent.cs
index 56096d20de..969fcacd9f 100644
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
@@ -692,19 +692,14 @@ void SendInfo()
 
         void AgentIncrementStep()
         {
-            if ((m_StepCount >= maxStep) && (maxStep > 0))
-            {
-                NotifyAgentDone(true);
-                _AgentReset();
-            }
+
             m_StepCount += 1;
+
         }
 
         /// Used by the brain to make the agent perform a step.
         void AgentStep()
         {
-
-
             if ((m_RequestAction) && (m_Brain != null))
             {
                 m_RequestAction = false;
@@ -713,6 +708,12 @@ void AgentStep()
                     AgentAction(m_Action.vectorActions);
                 }
             }
+
+            if ((m_StepCount >= maxStep) && (maxStep > 0))
+            {
+                NotifyAgentDone(true);
+                _AgentReset();
+            }
         }
 
         void DecideAction()
diff --git a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
index 97b4b5c8ad..5819630937 100644
--- a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
@@ -489,7 +489,8 @@ public void TestCumulativeReward()
             var j = 0;
             for (var i = 0; i < 500; i++)
             {
-                if (i % 21 == 0)
+                Debug.Log(i);
+                if (i % 20 == 0)
                 {
                     j = 0;
                 }
@@ -524,12 +525,11 @@ public void TestMaxStepsReset()
 
             for (var i = 0; i < 15; i++)
             {
-                // We expect resets to occur when there are maxSteps actions since the last reset (and on the first step)
-                var expectReset = agent1.agentActionCallsSinceLastReset == maxStep || (i == 0);
+                Debug.Log(i);
+                // We expect resets to occur when there are maxSteps actions since the last reset 
+                var expectReset = agent1.agentActionCallsSinceLastReset == maxStep;
                 var previousNumResets = agent1.agentResetCalls;
 
-                aca.EnvironmentStep();
-
                 if (expectReset)
                 {
                     Assert.AreEqual(previousNumResets + 1, agent1.agentResetCalls);
@@ -538,6 +538,8 @@ public void TestMaxStepsReset()
                 {
                     Assert.AreEqual(previousNumResets, agent1.agentResetCalls);
                 }
+
+                aca.EnvironmentStep();
             }
         }
     }

From 0b78a16c5cbd148cd259321eaa439f1c5f032698 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Fri, 14 Feb 2020 11:14:34 -0800
Subject: [PATCH 3/9] addressing comments

---
 com.unity.ml-agents/Runtime/Academy.cs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/com.unity.ml-agents/Runtime/Academy.cs b/com.unity.ml-agents/Runtime/Academy.cs
index 6f3abf9ca8..1309e14f31 100644
--- a/com.unity.ml-agents/Runtime/Academy.cs
+++ b/com.unity.ml-agents/Runtime/Academy.cs
@@ -113,7 +113,8 @@ public bool IsCommunicatorOn
         // Signals to all the listeners that the academy is being destroyed
         internal event Action DestroyAction;
 
-        // Signals the Agent that a new step is about to start
+        // Signals the Agent that a new step is about to start. 
+        // This will mark the Agent as Done if it has reached its maxSteps.
         internal event Action AgentIncrementStep;
 
         // Signals to all the agents at each environment step along with the
@@ -438,8 +439,6 @@ public void EnvironmentStep()
             {
                 AgentAct?.Invoke();
             }
-
-
         }
 
         /// <summary>

From 66518dbb0b2a86e6e171ab9479e3c1ed76955357 Mon Sep 17 00:00:00 2001
From: Vincent-Pierre BERGES <vincentpierre@unity3d.com>
Date: Fri, 14 Feb 2020 14:05:20 -0800
Subject: [PATCH 4/9] Update com.unity.ml-agents/Runtime/Agent.cs

Co-Authored-By: Chris Goy <goyenator@gmail.com>
---
 com.unity.ml-agents/Runtime/Agent.cs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/com.unity.ml-agents/Runtime/Agent.cs b/com.unity.ml-agents/Runtime/Agent.cs
index 969fcacd9f..f2aa9a6216 100644
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
@@ -694,7 +694,6 @@ void AgentIncrementStep()
         {
 
             m_StepCount += 1;
-
         }
 
         /// Used by the brain to make the agent perform a step.

From 544328092ce4844742e97fc8a53859318999f0a5 Mon Sep 17 00:00:00 2001
From: Vincent-Pierre BERGES <vincentpierre@unity3d.com>
Date: Fri, 14 Feb 2020 14:05:33 -0800
Subject: [PATCH 5/9] Update com.unity.ml-agents/Runtime/Agent.cs

Co-Authored-By: Chris Goy <goyenator@gmail.com>
---
 com.unity.ml-agents/Runtime/Agent.cs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/com.unity.ml-agents/Runtime/Agent.cs b/com.unity.ml-agents/Runtime/Agent.cs
index f2aa9a6216..c2f9b9b95e 100644
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
@@ -692,7 +692,6 @@ void SendInfo()
 
         void AgentIncrementStep()
         {
-
             m_StepCount += 1;
         }
 

From b4619a04fa13f6d3c769a8d57e62c569e8927103 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Sun, 16 Feb 2020 17:13:10 -0800
Subject: [PATCH 6/9] Made the tests not be broken

---
 .../Tests/Editor/MLAgentsEditModeTest.cs      | 46 ++++++++++---------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
index 5819630937..0d3d1c9440 100644
--- a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
@@ -68,6 +68,7 @@ public override void AgentAction(float[] vectorAction)
 
         public override void AgentReset()
         {
+
             agentResetCalls += 1;
             collectObservationsCallsSinceLastReset = 0;
             agentActionCallsSinceLastReset = 0;
@@ -486,20 +487,18 @@ public void TestCumulativeReward()
             agent1.LazyInitialize();
             agent2.SetPolicy(new TestPolicy());
 
+            int expectedAgent1Resets= 0;
+            int expectedAgent1ActionSinceReset = 0;
+
             var j = 0;
-            for (var i = 0; i < 500; i++)
+            for (var i = 0; i < 50; i++)
             {
-                Debug.Log(i);
-                if (i % 20 == 0)
-                {
-                    j = 0;
-                }
-                else
-                {
-                    j++;
+                expectedAgent1ActionSinceReset += 1;
+                if (expectedAgent1ActionSinceReset == agent1.maxStep || i == 0){
+                    expectedAgent1ActionSinceReset = 0;
                 }
                 agent2.RequestAction();
-                Assert.LessOrEqual(Mathf.Abs(j * 10.1f - agent1.GetCumulativeReward()), 0.05f);
+                Assert.LessOrEqual(Mathf.Abs(expectedAgent1ActionSinceReset * 10.1f - agent1.GetCumulativeReward()), 0.05f);
                 Assert.LessOrEqual(Mathf.Abs(i * 0.1f - agent2.GetCumulativeReward()), 0.05f);
 
                 agent1.AddReward(10f);
@@ -523,23 +522,26 @@ public void TestMaxStepsReset()
             agent1.maxStep = maxStep;
             agent1.LazyInitialize();
 
+            int expectedResets= 0;
+            int expectedAgentAction = 0;
+            int expectedAgentActionSinceReset = 0;
+
             for (var i = 0; i < 15; i++)
             {
-                Debug.Log(i);
-                // We expect resets to occur when there are maxSteps actions since the last reset 
-                var expectReset = agent1.agentActionCallsSinceLastReset == maxStep;
-                var previousNumResets = agent1.agentResetCalls;
-
-                if (expectReset)
-                {
-                    Assert.AreEqual(previousNumResets + 1, agent1.agentResetCalls);
-                }
-                else
-                {
-                    Assert.AreEqual(previousNumResets, agent1.agentResetCalls);
+                expectedAgentAction += 1;
+                expectedAgentActionSinceReset += 1;
+                if (expectedAgentActionSinceReset == maxStep || (i == 0)){
+                    expectedResets +=1;
                 }
 
+                if (expectedAgentActionSinceReset == maxStep){
+                    expectedAgentActionSinceReset = 0;
+                }
                 aca.EnvironmentStep();
+
+                Assert.AreEqual(expectedAgentAction, agent1.agentActionCalls);
+                Assert.AreEqual(expectedResets, agent1.agentResetCalls);
+                Assert.AreEqual(expectedAgentActionSinceReset, agent1.agentActionCallsSinceLastReset);
             }
         }
     }

From 5d78e2d9e8f98c23708b0dd36a2e22d532fa0748 Mon Sep 17 00:00:00 2001
From: Vincent-Pierre BERGES <vincentpierre@unity3d.com>
Date: Sun, 16 Feb 2020 18:19:53 -0800
Subject: [PATCH 7/9] Update com.unity.ml-agents/Runtime/Agent.cs

Co-Authored-By: Chris Elion <chris.elion@unity3d.com>
---
 com.unity.ml-agents/Runtime/Agent.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/com.unity.ml-agents/Runtime/Agent.cs b/com.unity.ml-agents/Runtime/Agent.cs
index c2f9b9b95e..c288e64efc 100644
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
@@ -259,7 +259,7 @@ void OnDisable()
             // We don't want to even try, because this will lazily create a new Academy!
             if (Academy.IsInitialized)
             {
-                Academy.Instance.AgentIncrementStep -=AgentIncrementStep;
+                Academy.Instance.AgentIncrementStep -= AgentIncrementStep;
                 Academy.Instance.AgentSendState -= SendInfo;
                 Academy.Instance.DecideAction -= DecideAction;
                 Academy.Instance.AgentAct -= AgentStep;

From 3b7f1902ee90073cea036e6b491bc7f2315443cd Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Tue, 18 Feb 2020 14:55:46 -0800
Subject: [PATCH 8/9] step logic changes: unit test (#3467)

---
 .../Tests/Editor/MLAgentsEditModeTest.cs      | 34 +++++++++++++------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
index 0d3d1c9440..124e18244b 100644
--- a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
@@ -487,10 +487,8 @@ public void TestCumulativeReward()
             agent1.LazyInitialize();
             agent2.SetPolicy(new TestPolicy());
 
-            int expectedAgent1Resets= 0;
-            int expectedAgent1ActionSinceReset = 0;
+            var expectedAgent1ActionSinceReset = 0;
 
-            var j = 0;
             for (var i = 0; i < 50; i++)
             {
                 expectedAgent1ActionSinceReset += 1;
@@ -518,30 +516,46 @@ public void TestMaxStepsReset()
             decisionRequester.DecisionPeriod = 1;
             decisionRequester.Awake();
 
-            var maxStep = 6;
+            const int maxStep = 6;
             agent1.maxStep = maxStep;
             agent1.LazyInitialize();
 
-            int expectedResets= 0;
-            int expectedAgentAction = 0;
-            int expectedAgentActionSinceReset = 0;
+            var expectedAgentStepCount = 0;
+            var expectedResets= 0;
+            var expectedAgentAction = 0;
+            var expectedAgentActionSinceReset = 0;
+            var expectedCollectObsCalls = 0;
+            var expectedCollectObsCallsSinceReset = 0;
 
             for (var i = 0; i < 15; i++)
             {
+                // Agent should observe and act on each Academy step
                 expectedAgentAction += 1;
                 expectedAgentActionSinceReset += 1;
-                if (expectedAgentActionSinceReset == maxStep || (i == 0)){
+                expectedCollectObsCalls += 1;
+                expectedCollectObsCallsSinceReset += 1;
+                expectedAgentStepCount += 1;
+
+                // If the next step will put the agent at maxSteps, we expect it to reset
+                if (agent1.GetStepCount() == maxStep - 1 || (i == 0))
+                {
                     expectedResets +=1;
                 }
 
-                if (expectedAgentActionSinceReset == maxStep){
+                if (agent1.GetStepCount() == maxStep - 1)
+                {
                     expectedAgentActionSinceReset = 0;
+                    expectedCollectObsCallsSinceReset = 0;
+                    expectedAgentStepCount = 0;
                 }
                 aca.EnvironmentStep();
 
-                Assert.AreEqual(expectedAgentAction, agent1.agentActionCalls);
+                Assert.AreEqual(expectedAgentStepCount, agent1.GetStepCount());
                 Assert.AreEqual(expectedResets, agent1.agentResetCalls);
+                Assert.AreEqual(expectedAgentAction, agent1.agentActionCalls);
                 Assert.AreEqual(expectedAgentActionSinceReset, agent1.agentActionCallsSinceLastReset);
+                Assert.AreEqual(expectedCollectObsCalls, agent1.collectObservationsCalls);
+                Assert.AreEqual(expectedCollectObsCallsSinceReset, agent1.collectObservationsCallsSinceLastReset);
             }
         }
     }

From e5c443a643718269b31db5992667de6b5bd0aebd Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Tue, 18 Feb 2020 16:28:34 -0800
Subject: [PATCH 9/9] Added a line in the changelog

---
 com.unity.ml-agents/CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
index 7093a2a4ef..d575d2b4d1 100755
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
@@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
  - Several classes were changed from public to internal visibility. (#3390)
  - Academy.RegisterSideChannel and UnregisterSideChannel methods were added. (#3391)
  - A tutorial on adding custom SideChannels was added (#3391)
+ - The stepping logic for the Agent and the Academy has been simplified (#3448)
 
 ### Bugfixes