Module pearl.policy_learners.exploration_modules.common.propensity_exploration
Expand source code
from typing import Optional
import torch
from pearl.api.action import Action
from pearl.api.action_space import ActionSpace
from pearl.api.state import SubjectiveState
from pearl.policy_learners.exploration_modules.exploration_module import (
ExplorationModule,
)
from pearl.utils.instantiations.spaces.discrete_action import DiscreteActionSpace
class PropensityExploration(ExplorationModule):
"""
Propensity exploration module.
"""
def __init__(self) -> None:
super(PropensityExploration, self).__init__()
# TODO: We should make discrete action space itself iterable
def act(
self,
subjective_state: SubjectiveState,
action_space: ActionSpace,
values: Optional[torch.Tensor] = None,
exploit_action: Optional[Action] = None,
action_availability_mask: Optional[torch.Tensor] = None,
representation: Optional[torch.nn.Module] = None,
) -> Action:
return torch.distributions.Categorical(values).sample()
Classes
class PropensityExploration
-
Propensity exploration module.
Expand source code
class PropensityExploration(ExplorationModule): """ Propensity exploration module. """ def __init__(self) -> None: super(PropensityExploration, self).__init__() # TODO: We should make discrete action space itself iterable def act( self, subjective_state: SubjectiveState, action_space: ActionSpace, values: Optional[torch.Tensor] = None, exploit_action: Optional[Action] = None, action_availability_mask: Optional[torch.Tensor] = None, representation: Optional[torch.nn.Module] = None, ) -> Action: return torch.distributions.Categorical(values).sample()
Ancestors
- ExplorationModule
- abc.ABC
Methods
def act(self, subjective_state: torch.Tensor, action_space: ActionSpace, values: Optional[torch.Tensor] = None, exploit_action: Optional[torch.Tensor] = None, action_availability_mask: Optional[torch.Tensor] = None, representation: Optional[torch.nn.modules.module.Module] = None) ‑> torch.Tensor
-
Expand source code
def act( self, subjective_state: SubjectiveState, action_space: ActionSpace, values: Optional[torch.Tensor] = None, exploit_action: Optional[Action] = None, action_availability_mask: Optional[torch.Tensor] = None, representation: Optional[torch.nn.Module] = None, ) -> Action: return torch.distributions.Categorical(values).sample()
Inherited members