Skip to content

Replace Variable.data with Variable.array (again) #434

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions chainerrl/agents/a2c.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def _compute_returns(self, next_value):
def update(self):
with chainer.no_backprop_mode():
_, next_value = self.model.pi_and_v(self.states[-1])
next_value = next_value.data[:, 0]
next_value = next_value.array[:, 0]

self._compute_returns(next_value)
pout, values = \
Expand All @@ -200,7 +200,7 @@ def update(self):
advantages = self.returns[:-1] - values
value_loss = F.mean(advantages * advantages)
action_loss = \
- F.mean(advantages.data * action_log_probs)
- F.mean(advantages.array * action_log_probs)

self.model.cleargrads()

Expand All @@ -216,13 +216,13 @@ def update(self):
# Update stats
self.average_actor_loss += (
(1 - self.average_actor_loss_decay) *
(float(action_loss.data) - self.average_actor_loss))
(float(action_loss.array) - self.average_actor_loss))
self.average_value += (
(1 - self.average_value_decay) *
(float(value_loss.data) - self.average_value))
(float(value_loss.array) - self.average_value))
self.average_entropy += (
(1 - self.average_entropy_decay) *
(float(dist_entropy.data) - self.average_entropy))
(float(dist_entropy.array) - self.average_entropy))

def batch_act_and_train(self, batch_obs):

Expand All @@ -231,7 +231,7 @@ def batch_act_and_train(self, batch_obs):
if self.t == 0:
with chainer.no_backprop_mode():
pout, _ = self.model.pi_and_v(statevar)
action = pout.sample().data
action = pout.sample().array
self._flush_storage(statevar.shape, action)

self.states[self.t - self.t_start] = statevar
Expand All @@ -241,11 +241,11 @@ def batch_act_and_train(self, batch_obs):

with chainer.no_backprop_mode():
pout, value = self.model.pi_and_v(statevar)
action = pout.sample().data
action = pout.sample().array

self.actions[self.t - self.t_start] \
= action.reshape([-1] + list(self.action_shape))
self.value_preds[self.t - self.t_start] = value.data[:, 0]
self.value_preds[self.t - self.t_start] = value.array[:, 0]

self.t += 1

Expand All @@ -255,7 +255,7 @@ def batch_act(self, batch_obs):
statevar = self.batch_states(batch_obs, self.xp, self.phi)
with chainer.no_backprop_mode():
pout, _ = self.model.pi_and_v(statevar)
action = pout.sample().data
action = pout.sample().array
return chainer.cuda.to_cpu(action)

def batch_observe_and_train(self, batch_obs, batch_reward, batch_done,
Expand Down Expand Up @@ -290,9 +290,9 @@ def act(self, obs):
statevar = self.batch_states([obs], self.xp, self.phi)
pout, _ = self.model.pi_and_v(statevar)
if self.act_deterministically:
return chainer.cuda.to_cpu(pout.most_probable.data)[0]
return chainer.cuda.to_cpu(pout.most_probable.array)[0]
else:
return chainer.cuda.to_cpu(pout.sample().data)[0]
return chainer.cuda.to_cpu(pout.sample().array)[0]

def stop_episode_and_train(self, state, reward, done=False):
raise RuntimeError('A2C does not support non-batch training')
Expand Down
6 changes: 3 additions & 3 deletions chainerrl/agents/dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,8 +429,8 @@ def batch_act_and_train(self, batch_obs):
with chainer.using_config('train', False), chainer.no_backprop_mode():
batch_xs = self.batch_states(batch_obs, self.xp, self.phi)
batch_av = self.model(batch_xs)
batch_maxq = batch_av.max.data
batch_argmax = cuda.to_cpu(batch_av.greedy_actions.data)
batch_maxq = batch_av.max.array
batch_argmax = cuda.to_cpu(batch_av.greedy_actions.array)
batch_action = [
self.explorer.select_action(
self.t, lambda: batch_argmax[i],
Expand All @@ -450,7 +450,7 @@ def batch_act(self, batch_obs):
with chainer.using_config('train', False), chainer.no_backprop_mode():
batch_xs = self.batch_states(batch_obs, self.xp, self.phi)
batch_av = self.model(batch_xs)
batch_argmax = cuda.to_cpu(batch_av.greedy_actions.data)
batch_argmax = cuda.to_cpu(batch_av.greedy_actions.array)
return batch_argmax

def batch_observe_and_train(self, batch_obs, batch_reward,
Expand Down
6 changes: 3 additions & 3 deletions chainerrl/agents/iqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,8 +340,8 @@ def act(self, obs):

def batch_act_and_train(self, batch_obs):
batch_av = self._compute_action_value(batch_obs)
batch_maxq = batch_av.max.data
batch_argmax = cuda.to_cpu(batch_av.greedy_actions.data)
batch_maxq = batch_av.max.array
batch_argmax = cuda.to_cpu(batch_av.greedy_actions.array)
batch_action = [
self.explorer.select_action(
self.t, lambda: batch_argmax[i],
Expand All @@ -359,5 +359,5 @@ def batch_act_and_train(self, batch_obs):

def batch_act(self, batch_obs):
batch_av = self._compute_action_value(batch_obs)
batch_argmax = cuda.to_cpu(batch_av.greedy_actions.data)
batch_argmax = cuda.to_cpu(batch_av.greedy_actions.array)
return batch_argmax
20 changes: 10 additions & 10 deletions chainerrl/agents/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,9 @@ def _make_dataset(self):
next_states = self.obs_normalizer(next_states, update=False)
with chainer.using_config('train', False), chainer.no_backprop_mode():
_, vs_pred = self.model(states)
vs_pred = chainer.cuda.to_cpu(vs_pred.data.ravel())
vs_pred = chainer.cuda.to_cpu(vs_pred.array.ravel())
_, next_vs_pred = self.model(next_states)
next_vs_pred = chainer.cuda.to_cpu(next_vs_pred.data.ravel())
next_vs_pred = chainer.cuda.to_cpu(next_vs_pred.array.ravel())
for transition, v_pred, next_v_pred in zip(dataset,
vs_pred,
next_vs_pred):
Expand Down Expand Up @@ -328,9 +328,9 @@ def act_and_train(self, obs, reward):
# action_distrib will be recomputed when computing gradients
with chainer.using_config('train', False), chainer.no_backprop_mode():
action_distrib, value = self.model(b_state)
action = chainer.cuda.to_cpu(action_distrib.sample().data)[0]
self.entropy_record.append(float(action_distrib.entropy.data))
self.value_record.append(float(value.data))
action = chainer.cuda.to_cpu(action_distrib.sample().array)[0]
self.entropy_record.append(float(action_distrib.entropy.array))
self.value_record.append(float(value.array))

self.last_state = obs
self.last_action = action
Expand All @@ -346,7 +346,7 @@ def act(self, obs):

with chainer.using_config('train', False), chainer.no_backprop_mode():
action_distrib, _ = self.model(b_state)
action = chainer.cuda.to_cpu(action_distrib.sample().data)[0]
action = chainer.cuda.to_cpu(action_distrib.sample().array)[0]

return action

Expand Down Expand Up @@ -381,7 +381,7 @@ def batch_act(self, batch_obs):

with chainer.using_config('train', False), chainer.no_backprop_mode():
action_distrib, _ = self.model(b_state)
action = chainer.cuda.to_cpu(action_distrib.sample().data)
action = chainer.cuda.to_cpu(action_distrib.sample().array)

return action

Expand All @@ -402,10 +402,10 @@ def batch_act_and_train(self, batch_obs):
# action_distrib will be recomputed when computing gradients
with chainer.using_config('train', False), chainer.no_backprop_mode():
action_distrib, batch_value = self.model(b_state)
batch_action = chainer.cuda.to_cpu(action_distrib.sample().data)
batch_action = chainer.cuda.to_cpu(action_distrib.sample().array)
self.entropy_record.extend(
chainer.cuda.to_cpu(action_distrib.entropy.data))
self.value_record.extend(chainer.cuda.to_cpu((batch_value.data)))
chainer.cuda.to_cpu(action_distrib.entropy.array))
self.value_record.extend(chainer.cuda.to_cpu((batch_value.array)))

self.batch_last_state = list(batch_obs)
self.batch_last_action = list(batch_action)
Expand Down