Module pearl.utils.instantiations.environments.contextual_bandit_environment
Expand source code
from abc import ABC, abstractmethod
from pearl.api.action import Action
from pearl.api.action_result import ActionResult
from pearl.api.action_space import ActionSpace
from pearl.api.environment import Environment
from pearl.api.reward import Reward
class ContextualBanditEnvironment(Environment, ABC):
"""
A specialization of Environment for contextual bandits.
In a contextual bandit environment, an episode always has a single step
and the only important information the environment needs to produce
as a result of an action is its reward.
This class provides an implementation of step that takes that into account,
returning an ActionResult with 'terminated' equal to 'True' and with next observation
equal to None (since it is irrelevant).
It defers to a new method `get_reward` (to be provided by implementations)
to determine the ActionResult reward.
"""
@property
@abstractmethod
def action_space(self) -> ActionSpace:
pass
@abstractmethod
def get_reward(self, action: Action) -> Reward:
pass
def step(self, action: Action) -> ActionResult:
# Since all episodes have a single step,
# the resulting observation after an action does not matter,
# so we set it to None.
reward = self.get_reward(action)
return ActionResult(
observation=None,
reward=reward,
terminated=True,
truncated=False,
)
def render(self) -> None:
pass
def close(self) -> None:
pass
Classes
class ContextualBanditEnvironment
-
A specialization of Environment for contextual bandits. In a contextual bandit environment, an episode always has a single step and the only important information the environment needs to produce as a result of an action is its reward.
This class provides an implementation of step that takes that into account, returning an ActionResult with 'terminated' equal to 'True' and with next observation equal to None (since it is irrelevant). It defers to a new method
get_reward
(to be provided by implementations) to determine the ActionResult reward.Expand source code
class ContextualBanditEnvironment(Environment, ABC): """ A specialization of Environment for contextual bandits. In a contextual bandit environment, an episode always has a single step and the only important information the environment needs to produce as a result of an action is its reward. This class provides an implementation of step that takes that into account, returning an ActionResult with 'terminated' equal to 'True' and with next observation equal to None (since it is irrelevant). It defers to a new method `get_reward` (to be provided by implementations) to determine the ActionResult reward. """ @property @abstractmethod def action_space(self) -> ActionSpace: pass @abstractmethod def get_reward(self, action: Action) -> Reward: pass def step(self, action: Action) -> ActionResult: # Since all episodes have a single step, # the resulting observation after an action does not matter, # so we set it to None. reward = self.get_reward(action) return ActionResult( observation=None, reward=reward, terminated=True, truncated=False, ) def render(self) -> None: pass def close(self) -> None: pass
Ancestors
- Environment
- abc.ABC
Subclasses
Methods
def get_reward(self, action: torch.Tensor) ‑> object
-
Expand source code
@abstractmethod def get_reward(self, action: Action) -> Reward: pass
Inherited members