Skip to content

Commit db8fa5c

Browse files
authored
Merge pull request #434 from muupan/use-array-instead-of-data2
Replace Variable.data with Variable.array (again)
2 parents e69969e + a41b405 commit db8fa5c

File tree

4 files changed

+27
-27
lines changed

4 files changed

+27
-27
lines changed

chainerrl/agents/a2c.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ def _compute_returns(self, next_value):
182182
def update(self):
183183
with chainer.no_backprop_mode():
184184
_, next_value = self.model.pi_and_v(self.states[-1])
185-
next_value = next_value.data[:, 0]
185+
next_value = next_value.array[:, 0]
186186

187187
self._compute_returns(next_value)
188188
pout, values = \
@@ -200,7 +200,7 @@ def update(self):
200200
advantages = self.returns[:-1] - values
201201
value_loss = F.mean(advantages * advantages)
202202
action_loss = \
203-
- F.mean(advantages.data * action_log_probs)
203+
- F.mean(advantages.array * action_log_probs)
204204

205205
self.model.cleargrads()
206206

@@ -216,13 +216,13 @@ def update(self):
216216
# Update stats
217217
self.average_actor_loss += (
218218
(1 - self.average_actor_loss_decay) *
219-
(float(action_loss.data) - self.average_actor_loss))
219+
(float(action_loss.array) - self.average_actor_loss))
220220
self.average_value += (
221221
(1 - self.average_value_decay) *
222-
(float(value_loss.data) - self.average_value))
222+
(float(value_loss.array) - self.average_value))
223223
self.average_entropy += (
224224
(1 - self.average_entropy_decay) *
225-
(float(dist_entropy.data) - self.average_entropy))
225+
(float(dist_entropy.array) - self.average_entropy))
226226

227227
def batch_act_and_train(self, batch_obs):
228228

@@ -231,7 +231,7 @@ def batch_act_and_train(self, batch_obs):
231231
if self.t == 0:
232232
with chainer.no_backprop_mode():
233233
pout, _ = self.model.pi_and_v(statevar)
234-
action = pout.sample().data
234+
action = pout.sample().array
235235
self._flush_storage(statevar.shape, action)
236236

237237
self.states[self.t - self.t_start] = statevar
@@ -241,11 +241,11 @@ def batch_act_and_train(self, batch_obs):
241241

242242
with chainer.no_backprop_mode():
243243
pout, value = self.model.pi_and_v(statevar)
244-
action = pout.sample().data
244+
action = pout.sample().array
245245

246246
self.actions[self.t - self.t_start] \
247247
= action.reshape([-1] + list(self.action_shape))
248-
self.value_preds[self.t - self.t_start] = value.data[:, 0]
248+
self.value_preds[self.t - self.t_start] = value.array[:, 0]
249249

250250
self.t += 1
251251

@@ -255,7 +255,7 @@ def batch_act(self, batch_obs):
255255
statevar = self.batch_states(batch_obs, self.xp, self.phi)
256256
with chainer.no_backprop_mode():
257257
pout, _ = self.model.pi_and_v(statevar)
258-
action = pout.sample().data
258+
action = pout.sample().array
259259
return chainer.cuda.to_cpu(action)
260260

261261
def batch_observe_and_train(self, batch_obs, batch_reward, batch_done,
@@ -290,9 +290,9 @@ def act(self, obs):
290290
statevar = self.batch_states([obs], self.xp, self.phi)
291291
pout, _ = self.model.pi_and_v(statevar)
292292
if self.act_deterministically:
293-
return chainer.cuda.to_cpu(pout.most_probable.data)[0]
293+
return chainer.cuda.to_cpu(pout.most_probable.array)[0]
294294
else:
295-
return chainer.cuda.to_cpu(pout.sample().data)[0]
295+
return chainer.cuda.to_cpu(pout.sample().array)[0]
296296

297297
def stop_episode_and_train(self, state, reward, done=False):
298298
raise RuntimeError('A2C does not support non-batch training')

chainerrl/agents/dqn.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -429,8 +429,8 @@ def batch_act_and_train(self, batch_obs):
429429
with chainer.using_config('train', False), chainer.no_backprop_mode():
430430
batch_xs = self.batch_states(batch_obs, self.xp, self.phi)
431431
batch_av = self.model(batch_xs)
432-
batch_maxq = batch_av.max.data
433-
batch_argmax = cuda.to_cpu(batch_av.greedy_actions.data)
432+
batch_maxq = batch_av.max.array
433+
batch_argmax = cuda.to_cpu(batch_av.greedy_actions.array)
434434
batch_action = [
435435
self.explorer.select_action(
436436
self.t, lambda: batch_argmax[i],
@@ -450,7 +450,7 @@ def batch_act(self, batch_obs):
450450
with chainer.using_config('train', False), chainer.no_backprop_mode():
451451
batch_xs = self.batch_states(batch_obs, self.xp, self.phi)
452452
batch_av = self.model(batch_xs)
453-
batch_argmax = cuda.to_cpu(batch_av.greedy_actions.data)
453+
batch_argmax = cuda.to_cpu(batch_av.greedy_actions.array)
454454
return batch_argmax
455455

456456
def batch_observe_and_train(self, batch_obs, batch_reward,

chainerrl/agents/iqn.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -340,8 +340,8 @@ def act(self, obs):
340340

341341
def batch_act_and_train(self, batch_obs):
342342
batch_av = self._compute_action_value(batch_obs)
343-
batch_maxq = batch_av.max.data
344-
batch_argmax = cuda.to_cpu(batch_av.greedy_actions.data)
343+
batch_maxq = batch_av.max.array
344+
batch_argmax = cuda.to_cpu(batch_av.greedy_actions.array)
345345
batch_action = [
346346
self.explorer.select_action(
347347
self.t, lambda: batch_argmax[i],
@@ -359,5 +359,5 @@ def batch_act_and_train(self, batch_obs):
359359

360360
def batch_act(self, batch_obs):
361361
batch_av = self._compute_action_value(batch_obs)
362-
batch_argmax = cuda.to_cpu(batch_av.greedy_actions.data)
362+
batch_argmax = cuda.to_cpu(batch_av.greedy_actions.array)
363363
return batch_argmax

chainerrl/agents/ppo.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -177,9 +177,9 @@ def _make_dataset(self):
177177
next_states = self.obs_normalizer(next_states, update=False)
178178
with chainer.using_config('train', False), chainer.no_backprop_mode():
179179
_, vs_pred = self.model(states)
180-
vs_pred = chainer.cuda.to_cpu(vs_pred.data.ravel())
180+
vs_pred = chainer.cuda.to_cpu(vs_pred.array.ravel())
181181
_, next_vs_pred = self.model(next_states)
182-
next_vs_pred = chainer.cuda.to_cpu(next_vs_pred.data.ravel())
182+
next_vs_pred = chainer.cuda.to_cpu(next_vs_pred.array.ravel())
183183
for transition, v_pred, next_v_pred in zip(dataset,
184184
vs_pred,
185185
next_vs_pred):
@@ -328,9 +328,9 @@ def act_and_train(self, obs, reward):
328328
# action_distrib will be recomputed when computing gradients
329329
with chainer.using_config('train', False), chainer.no_backprop_mode():
330330
action_distrib, value = self.model(b_state)
331-
action = chainer.cuda.to_cpu(action_distrib.sample().data)[0]
332-
self.entropy_record.append(float(action_distrib.entropy.data))
333-
self.value_record.append(float(value.data))
331+
action = chainer.cuda.to_cpu(action_distrib.sample().array)[0]
332+
self.entropy_record.append(float(action_distrib.entropy.array))
333+
self.value_record.append(float(value.array))
334334

335335
self.last_state = obs
336336
self.last_action = action
@@ -346,7 +346,7 @@ def act(self, obs):
346346

347347
with chainer.using_config('train', False), chainer.no_backprop_mode():
348348
action_distrib, _ = self.model(b_state)
349-
action = chainer.cuda.to_cpu(action_distrib.sample().data)[0]
349+
action = chainer.cuda.to_cpu(action_distrib.sample().array)[0]
350350

351351
return action
352352

@@ -381,7 +381,7 @@ def batch_act(self, batch_obs):
381381

382382
with chainer.using_config('train', False), chainer.no_backprop_mode():
383383
action_distrib, _ = self.model(b_state)
384-
action = chainer.cuda.to_cpu(action_distrib.sample().data)
384+
action = chainer.cuda.to_cpu(action_distrib.sample().array)
385385

386386
return action
387387

@@ -402,10 +402,10 @@ def batch_act_and_train(self, batch_obs):
402402
# action_distrib will be recomputed when computing gradients
403403
with chainer.using_config('train', False), chainer.no_backprop_mode():
404404
action_distrib, batch_value = self.model(b_state)
405-
batch_action = chainer.cuda.to_cpu(action_distrib.sample().data)
405+
batch_action = chainer.cuda.to_cpu(action_distrib.sample().array)
406406
self.entropy_record.extend(
407-
chainer.cuda.to_cpu(action_distrib.entropy.data))
408-
self.value_record.extend(chainer.cuda.to_cpu((batch_value.data)))
407+
chainer.cuda.to_cpu(action_distrib.entropy.array))
408+
self.value_record.extend(chainer.cuda.to_cpu((batch_value.array)))
409409

410410
self.batch_last_state = list(batch_obs)
411411
self.batch_last_action = list(batch_action)

0 commit comments

Comments
 (0)