Merge pull request #434 from muupan/use-array-instead-of-data2

muupan · web-flow · commit db8fa5c20a18 · 2019-04-04T18:30:42.000+09:00
Replace Variable.data with Variable.array (again)
diff --git a/chainerrl/agents/a2c.py b/chainerrl/agents/a2c.py
@@ -182,7 +182,7 @@ def _compute_returns(self, next_value):
     def update(self):
         with chainer.no_backprop_mode():
             _, next_value = self.model.pi_and_v(self.states[-1])
-            next_value = next_value.data[:, 0]
+            next_value = next_value.array[:, 0]
 
         self._compute_returns(next_value)
         pout, values = \
@@ -200,7 +200,7 @@ def update(self):
         advantages = self.returns[:-1] - values
         value_loss = F.mean(advantages * advantages)
         action_loss = \
-            - F.mean(advantages.data * action_log_probs)
+            - F.mean(advantages.array * action_log_probs)
 
         self.model.cleargrads()
 
@@ -216,13 +216,13 @@ def update(self):
         # Update stats
         self.average_actor_loss += (
             (1 - self.average_actor_loss_decay) *
-            (float(action_loss.data) - self.average_actor_loss))
+            (float(action_loss.array) - self.average_actor_loss))
         self.average_value += (
             (1 - self.average_value_decay) *
-            (float(value_loss.data) - self.average_value))
+            (float(value_loss.array) - self.average_value))
         self.average_entropy += (
             (1 - self.average_entropy_decay) *
-            (float(dist_entropy.data) - self.average_entropy))
+            (float(dist_entropy.array) - self.average_entropy))
 
     def batch_act_and_train(self, batch_obs):
 
@@ -231,7 +231,7 @@ def batch_act_and_train(self, batch_obs):
         if self.t == 0:
             with chainer.no_backprop_mode():
                 pout, _ = self.model.pi_and_v(statevar)
-                action = pout.sample().data
+                action = pout.sample().array
             self._flush_storage(statevar.shape, action)
 
         self.states[self.t - self.t_start] = statevar
@@ -241,11 +241,11 @@ def batch_act_and_train(self, batch_obs):
 
         with chainer.no_backprop_mode():
             pout, value = self.model.pi_and_v(statevar)
-            action = pout.sample().data
+            action = pout.sample().array
 
         self.actions[self.t - self.t_start] \
             = action.reshape([-1] + list(self.action_shape))
-        self.value_preds[self.t - self.t_start] = value.data[:, 0]
+        self.value_preds[self.t - self.t_start] = value.array[:, 0]
 
         self.t += 1
 
@@ -255,7 +255,7 @@ def batch_act(self, batch_obs):
         statevar = self.batch_states(batch_obs, self.xp, self.phi)
         with chainer.no_backprop_mode():
             pout, _ = self.model.pi_and_v(statevar)
-            action = pout.sample().data
+            action = pout.sample().array
         return chainer.cuda.to_cpu(action)
 
     def batch_observe_and_train(self, batch_obs, batch_reward, batch_done,
@@ -290,9 +290,9 @@ def act(self, obs):
             statevar = self.batch_states([obs], self.xp, self.phi)
             pout, _ = self.model.pi_and_v(statevar)
             if self.act_deterministically:
-                return chainer.cuda.to_cpu(pout.most_probable.data)[0]
+                return chainer.cuda.to_cpu(pout.most_probable.array)[0]
             else:
-                return chainer.cuda.to_cpu(pout.sample().data)[0]
+                return chainer.cuda.to_cpu(pout.sample().array)[0]
 
     def stop_episode_and_train(self, state, reward, done=False):
         raise RuntimeError('A2C does not support non-batch training')
diff --git a/chainerrl/agents/dqn.py b/chainerrl/agents/dqn.py
@@ -429,8 +429,8 @@ def batch_act_and_train(self, batch_obs):
         with chainer.using_config('train', False), chainer.no_backprop_mode():
             batch_xs = self.batch_states(batch_obs, self.xp, self.phi)
             batch_av = self.model(batch_xs)
-            batch_maxq = batch_av.max.data
-            batch_argmax = cuda.to_cpu(batch_av.greedy_actions.data)
+            batch_maxq = batch_av.max.array
+            batch_argmax = cuda.to_cpu(batch_av.greedy_actions.array)
         batch_action = [
             self.explorer.select_action(
                 self.t, lambda: batch_argmax[i],
@@ -450,7 +450,7 @@ def batch_act(self, batch_obs):
         with chainer.using_config('train', False), chainer.no_backprop_mode():
             batch_xs = self.batch_states(batch_obs, self.xp, self.phi)
             batch_av = self.model(batch_xs)
-            batch_argmax = cuda.to_cpu(batch_av.greedy_actions.data)
+            batch_argmax = cuda.to_cpu(batch_av.greedy_actions.array)
             return batch_argmax
 
     def batch_observe_and_train(self, batch_obs, batch_reward,
diff --git a/chainerrl/agents/iqn.py b/chainerrl/agents/iqn.py
@@ -340,8 +340,8 @@ def act(self, obs):
 
     def batch_act_and_train(self, batch_obs):
         batch_av = self._compute_action_value(batch_obs)
-        batch_maxq = batch_av.max.data
-        batch_argmax = cuda.to_cpu(batch_av.greedy_actions.data)
+        batch_maxq = batch_av.max.array
+        batch_argmax = cuda.to_cpu(batch_av.greedy_actions.array)
         batch_action = [
             self.explorer.select_action(
                 self.t, lambda: batch_argmax[i],
@@ -359,5 +359,5 @@ def batch_act_and_train(self, batch_obs):
 
     def batch_act(self, batch_obs):
         batch_av = self._compute_action_value(batch_obs)
-        batch_argmax = cuda.to_cpu(batch_av.greedy_actions.data)
+        batch_argmax = cuda.to_cpu(batch_av.greedy_actions.array)
         return batch_argmax
diff --git a/chainerrl/agents/ppo.py b/chainerrl/agents/ppo.py
@@ -177,9 +177,9 @@ def _make_dataset(self):
             next_states = self.obs_normalizer(next_states, update=False)
         with chainer.using_config('train', False), chainer.no_backprop_mode():
             _, vs_pred = self.model(states)
-            vs_pred = chainer.cuda.to_cpu(vs_pred.data.ravel())
+            vs_pred = chainer.cuda.to_cpu(vs_pred.array.ravel())
             _, next_vs_pred = self.model(next_states)
-            next_vs_pred = chainer.cuda.to_cpu(next_vs_pred.data.ravel())
+            next_vs_pred = chainer.cuda.to_cpu(next_vs_pred.array.ravel())
         for transition, v_pred, next_v_pred in zip(dataset,
                                                    vs_pred,
                                                    next_vs_pred):
@@ -328,9 +328,9 @@ def act_and_train(self, obs, reward):
         # action_distrib will be recomputed when computing gradients
         with chainer.using_config('train', False), chainer.no_backprop_mode():
             action_distrib, value = self.model(b_state)
-            action = chainer.cuda.to_cpu(action_distrib.sample().data)[0]
-            self.entropy_record.append(float(action_distrib.entropy.data))
-            self.value_record.append(float(value.data))
+            action = chainer.cuda.to_cpu(action_distrib.sample().array)[0]
+            self.entropy_record.append(float(action_distrib.entropy.array))
+            self.value_record.append(float(value.array))
 
         self.last_state = obs
         self.last_action = action
@@ -346,7 +346,7 @@ def act(self, obs):
 
         with chainer.using_config('train', False), chainer.no_backprop_mode():
             action_distrib, _ = self.model(b_state)
-            action = chainer.cuda.to_cpu(action_distrib.sample().data)[0]
+            action = chainer.cuda.to_cpu(action_distrib.sample().array)[0]
 
         return action
 
@@ -381,7 +381,7 @@ def batch_act(self, batch_obs):
 
         with chainer.using_config('train', False), chainer.no_backprop_mode():
             action_distrib, _ = self.model(b_state)
-            action = chainer.cuda.to_cpu(action_distrib.sample().data)
+            action = chainer.cuda.to_cpu(action_distrib.sample().array)
 
         return action
 
@@ -402,10 +402,10 @@ def batch_act_and_train(self, batch_obs):
         # action_distrib will be recomputed when computing gradients
         with chainer.using_config('train', False), chainer.no_backprop_mode():
             action_distrib, batch_value = self.model(b_state)
-            batch_action = chainer.cuda.to_cpu(action_distrib.sample().data)
+            batch_action = chainer.cuda.to_cpu(action_distrib.sample().array)
             self.entropy_record.extend(
-                chainer.cuda.to_cpu(action_distrib.entropy.data))
-            self.value_record.extend(chainer.cuda.to_cpu((batch_value.data)))
+                chainer.cuda.to_cpu(action_distrib.entropy.array))
+            self.value_record.extend(chainer.cuda.to_cpu((batch_value.array)))
 
         self.batch_last_state = list(batch_obs)
         self.batch_last_action = list(batch_action)