Module pearl.user_envs.wrappers.dynamic_action_env
Expand source code
# pyre-ignore-all-errors
import torch
try:
import gymnasium as gym
except ModuleNotFoundError:
print("gymnasium module not found.")
from pearl.utils.instantiations.spaces.discrete_action import DiscreteActionSpace
class DynamicActionSpaceWrapper(gym.Wrapper):
def __init__(self, env, reduce_action_space_cadence=4):
super(DynamicActionSpaceWrapper, self).__init__(env)
self.number_of_steps = 0
self.reduce_action_space_cadence = reduce_action_space_cadence
def reset(self, **kwargs):
observation, info = self.env.reset(**kwargs)
info["available_action_space"] = DiscreteActionSpace(
[torch.tensor([i]) for i in range(self.env.action_space.n)]
)
self.number_of_steps = 0
return observation, info
def step(self, action):
if (
self.number_of_steps % self.reduce_action_space_cadence == 0
and self.number_of_steps != 0
):
assert action != self.env.action_space.n - 1
observation, reward, done, truncated, info = self.env.step(action)
self.number_of_steps += 1
shrink = (
1 if self.number_of_steps % self.reduce_action_space_cadence == 0 else 0
)
info["available_action_space"] = DiscreteActionSpace(
[torch.tensor([i]) for i in range(self.env.action_space.n - shrink)]
)
return observation, reward, done, truncated, info
Classes
class DynamicActionSpaceWrapper (env, reduce_action_space_cadence=4)-
Wraps a :class:
gymnasium.Envto allow a modular transformation of the :meth:stepand :meth:resetmethods.This class is the base class of all wrappers to change the behavior of the underlying environment. Wrappers that inherit from this class can modify the :attr:
action_space, :attr:observation_space, :attr:reward_rangeand :attr:metadataattributes, without changing the underlying environment's attributes. Moreover, the behavior of the :meth:stepand :meth:resetmethods can be changed by these wrappers.Some attributes (:attr:
spec, :attr:render_mode, :attr:np_random) will point back to the wrapper's environment (i.e. to the corresponding attributes of :attr:env).Note
If you inherit from :class:
Wrapper, don't forget to callsuper().__init__(env)Wraps an environment to allow a modular transformation of the :meth:
stepand :meth:resetmethods.Args
env- The environment to wrap
Expand source code
class DynamicActionSpaceWrapper(gym.Wrapper): def __init__(self, env, reduce_action_space_cadence=4): super(DynamicActionSpaceWrapper, self).__init__(env) self.number_of_steps = 0 self.reduce_action_space_cadence = reduce_action_space_cadence def reset(self, **kwargs): observation, info = self.env.reset(**kwargs) info["available_action_space"] = DiscreteActionSpace( [torch.tensor([i]) for i in range(self.env.action_space.n)] ) self.number_of_steps = 0 return observation, info def step(self, action): if ( self.number_of_steps % self.reduce_action_space_cadence == 0 and self.number_of_steps != 0 ): assert action != self.env.action_space.n - 1 observation, reward, done, truncated, info = self.env.step(action) self.number_of_steps += 1 shrink = ( 1 if self.number_of_steps % self.reduce_action_space_cadence == 0 else 0 ) info["available_action_space"] = DiscreteActionSpace( [torch.tensor([i]) for i in range(self.env.action_space.n - shrink)] ) return observation, reward, done, truncated, infoAncestors
- gymnasium.core.Wrapper
- gymnasium.core.Env
- typing.Generic
Methods
def reset(self, **kwargs)-
Uses the :meth:
resetof the :attr:envthat can be overwritten to change the returned data.Expand source code
def reset(self, **kwargs): observation, info = self.env.reset(**kwargs) info["available_action_space"] = DiscreteActionSpace( [torch.tensor([i]) for i in range(self.env.action_space.n)] ) self.number_of_steps = 0 return observation, info def step(self, action)-
Uses the :meth:
stepof the :attr:envthat can be overwritten to change the returned data.Expand source code
def step(self, action): if ( self.number_of_steps % self.reduce_action_space_cadence == 0 and self.number_of_steps != 0 ): assert action != self.env.action_space.n - 1 observation, reward, done, truncated, info = self.env.step(action) self.number_of_steps += 1 shrink = ( 1 if self.number_of_steps % self.reduce_action_space_cadence == 0 else 0 ) info["available_action_space"] = DiscreteActionSpace( [torch.tensor([i]) for i in range(self.env.action_space.n - shrink)] ) return observation, reward, done, truncated, info