Simplifying the Agent reset logic (#3242)

vincentpierre · web-flow · commit 40d6dc964391 · 2020-01-16T15:23:40.000-08:00
* Simplifying the Agent reset logic

 - Agents will reset in ResetIfDone immediately after being marked Done
 - Agents will always request a decision right after reset
 - This change implies that additional messages might be sent to Python

* Fixing the Unit Tests

* Added a note in the Migrating.md document
diff --git a/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs b/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
@@ -436,7 +436,7 @@ public void TestAgent()
                     // Request an action without decision regularly
                     agent2.RequestAction();
                 }
-                if (agent1.IsDone() && (((acaStepsSinceReset) % agent1.agentParameters.numberOfActionsBetweenDecisions == 0)))
+                if (agent1.IsDone())
                 {
                     numberAgent1Reset += 1;
                 }
diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs b/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
@@ -193,10 +193,6 @@ public AgentInfo Info
         /// their own experience.
         int m_StepCount;
 
-        /// Flag to signify that an agent has been reset but the fact that it is
-        /// done has not been communicated (required for On Demand Decisions).
-        bool m_HasAlreadyReset;
-
         /// Unique identifier each agent receives at initialization. It is used
         /// to separate between different agents in the environment.
         int m_Id;
@@ -757,7 +753,6 @@ public virtual void AgentReset()
         /// </summary>
         void ForceReset()
         {
-            m_HasAlreadyReset = false;
             _AgentReset();
         }
 
@@ -826,26 +821,9 @@ void SetStatus(int academyStepCounter)
         /// Signals the agent that it must reset if its done flag is set to true.
         void ResetIfDone()
         {
-            // If an agent is done, then it will also
-            // request for a decision and an action
             if (IsDone())
             {
-                if (agentParameters.onDemandDecision)
-                {
-                    if (!m_HasAlreadyReset)
-                    {
-                        // If event based, the agent can reset as soon
-                        // as it is done
-                        _AgentReset();
-                        m_HasAlreadyReset = true;
-                    }
-                }
-                else if (m_RequestDecision)
-                {
-                    // If not event based, the agent must wait to request a
-                    // decision before resetting to keep multiple agents in sync.
-                    _AgentReset();
-                }
+                _AgentReset();
             }
         }
 
@@ -854,15 +832,14 @@ void ResetIfDone()
         /// </summary>
         void SendInfo()
         {
-            if (m_RequestDecision)
+            // If the Agent is done, it has just reset and thus requires a new decision
+            if (m_RequestDecision || IsDone())
             {
                 SendInfoToBrain();
                 ResetReward();
                 m_Done = false;
                 m_MaxStepReached = false;
                 m_RequestDecision = false;
-
-                m_HasAlreadyReset = false;
             }
         }
 
diff --git a/docs/Migrating.md b/docs/Migrating.md
@@ -16,6 +16,7 @@ The versions can be found in
 * Curriculum config files are now YAML formatted and all curricula for a training run are combined into a single file.
 * The `--num-runs` command-line option has been removed.
 * The "Reset on Done" setting in AgentParameters was removed; this is now effectively always true. `AgentOnDone` virtual method on the Agent has been removed.
+* Agents will always request a decision after being marked as `Done()` and will no longer wait for the next call to `RequestDecision()`.
 
 ### Steps to Migrate
 * If you have a class that inherits from Academy:

Original file line number	Diff line number	Diff line change
`@@ -436,7 +436,7 @@ public void TestAgent()`
`436`	`436`	`// Request an action without decision regularly`
`437`	`437`	`agent2.RequestAction();`
`438`	`438`	`}`
`439`		`- if (agent1.IsDone() && (((acaStepsSinceReset) % agent1.agentParameters.numberOfActionsBetweenDecisions == 0)))`
	`439`	`+ if (agent1.IsDone())`
`440`	`440`	`{`
`441`	`441`	`numberAgent1Reset += 1;`
`442`	`442`	`}`