Module pearl.user_envs.wrappers.dynamic_action_env
Expand source code
# pyre-ignore-all-errors
import torch
try:
import gymnasium as gym
except ModuleNotFoundError:
print("gymnasium module not found.")
from pearl.utils.instantiations.spaces.discrete_action import DiscreteActionSpace
class DynamicActionSpaceWrapper(gym.Wrapper):
def __init__(self, env, reduce_action_space_cadence=4):
super(DynamicActionSpaceWrapper, self).__init__(env)
self.number_of_steps = 0
self.reduce_action_space_cadence = reduce_action_space_cadence
def reset(self, **kwargs):
observation, info = self.env.reset(**kwargs)
info["available_action_space"] = DiscreteActionSpace(
[torch.tensor([i]) for i in range(self.env.action_space.n)]
)
self.number_of_steps = 0
return observation, info
def step(self, action):
if (
self.number_of_steps % self.reduce_action_space_cadence == 0
and self.number_of_steps != 0
):
assert action != self.env.action_space.n - 1
observation, reward, done, truncated, info = self.env.step(action)
self.number_of_steps += 1
shrink = (
1 if self.number_of_steps % self.reduce_action_space_cadence == 0 else 0
)
info["available_action_space"] = DiscreteActionSpace(
[torch.tensor([i]) for i in range(self.env.action_space.n - shrink)]
)
return observation, reward, done, truncated, info
Classes
class DynamicActionSpaceWrapper (env, reduce_action_space_cadence=4)
-
Wraps a :class:
gymnasium.Env
to allow a modular transformation of the :meth:step
and :meth:reset
methods.This class is the base class of all wrappers to change the behavior of the underlying environment. Wrappers that inherit from this class can modify the :attr:
action_space
, :attr:observation_space
, :attr:reward_range
and :attr:metadata
attributes, without changing the underlying environment's attributes. Moreover, the behavior of the :meth:step
and :meth:reset
methods can be changed by these wrappers.Some attributes (:attr:
spec
, :attr:render_mode
, :attr:np_random
) will point back to the wrapper's environment (i.e. to the corresponding attributes of :attr:env
).Note
If you inherit from :class:
Wrapper
, don't forget to callsuper().__init__(env)
Wraps an environment to allow a modular transformation of the :meth:
step
and :meth:reset
methods.Args
env
- The environment to wrap
Expand source code
class DynamicActionSpaceWrapper(gym.Wrapper): def __init__(self, env, reduce_action_space_cadence=4): super(DynamicActionSpaceWrapper, self).__init__(env) self.number_of_steps = 0 self.reduce_action_space_cadence = reduce_action_space_cadence def reset(self, **kwargs): observation, info = self.env.reset(**kwargs) info["available_action_space"] = DiscreteActionSpace( [torch.tensor([i]) for i in range(self.env.action_space.n)] ) self.number_of_steps = 0 return observation, info def step(self, action): if ( self.number_of_steps % self.reduce_action_space_cadence == 0 and self.number_of_steps != 0 ): assert action != self.env.action_space.n - 1 observation, reward, done, truncated, info = self.env.step(action) self.number_of_steps += 1 shrink = ( 1 if self.number_of_steps % self.reduce_action_space_cadence == 0 else 0 ) info["available_action_space"] = DiscreteActionSpace( [torch.tensor([i]) for i in range(self.env.action_space.n - shrink)] ) return observation, reward, done, truncated, info
Ancestors
- gymnasium.core.Wrapper
- gymnasium.core.Env
- typing.Generic
Methods
def reset(self, **kwargs)
-
Uses the :meth:
reset
of the :attr:env
that can be overwritten to change the returned data.Expand source code
def reset(self, **kwargs): observation, info = self.env.reset(**kwargs) info["available_action_space"] = DiscreteActionSpace( [torch.tensor([i]) for i in range(self.env.action_space.n)] ) self.number_of_steps = 0 return observation, info
def step(self, action)
-
Uses the :meth:
step
of the :attr:env
that can be overwritten to change the returned data.Expand source code
def step(self, action): if ( self.number_of_steps % self.reduce_action_space_cadence == 0 and self.number_of_steps != 0 ): assert action != self.env.action_space.n - 1 observation, reward, done, truncated, info = self.env.step(action) self.number_of_steps += 1 shrink = ( 1 if self.number_of_steps % self.reduce_action_space_cadence == 0 else 0 ) info["available_action_space"] = DiscreteActionSpace( [torch.tensor([i]) for i in range(self.env.action_space.n - shrink)] ) return observation, reward, done, truncated, info