Module pearl.policy_learners.exploration_modules.common.propensity_exploration

Expand source code
from typing import Optional

import torch

from pearl.api.action import Action
from pearl.api.action_space import ActionSpace
from pearl.api.state import SubjectiveState
from pearl.policy_learners.exploration_modules.exploration_module import (
    ExplorationModule,
)
from pearl.utils.instantiations.spaces.discrete_action import DiscreteActionSpace


class PropensityExploration(ExplorationModule):
    """
    Propensity exploration module.
    """

    def __init__(self) -> None:
        super(PropensityExploration, self).__init__()

    # TODO: We should make discrete action space itself iterable
    def act(
        self,
        subjective_state: SubjectiveState,
        action_space: ActionSpace,
        values: Optional[torch.Tensor] = None,
        exploit_action: Optional[Action] = None,
        action_availability_mask: Optional[torch.Tensor] = None,
        representation: Optional[torch.nn.Module] = None,
    ) -> Action:
        return torch.distributions.Categorical(values).sample()

Classes

class PropensityExploration

Propensity exploration module.

Expand source code
class PropensityExploration(ExplorationModule):
    """
    Propensity exploration module.
    """

    def __init__(self) -> None:
        super(PropensityExploration, self).__init__()

    # TODO: We should make discrete action space itself iterable
    def act(
        self,
        subjective_state: SubjectiveState,
        action_space: ActionSpace,
        values: Optional[torch.Tensor] = None,
        exploit_action: Optional[Action] = None,
        action_availability_mask: Optional[torch.Tensor] = None,
        representation: Optional[torch.nn.Module] = None,
    ) -> Action:
        return torch.distributions.Categorical(values).sample()

Ancestors

Methods

def act(self, subjective_state: torch.Tensor, action_space: ActionSpace, values: Optional[torch.Tensor] = None, exploit_action: Optional[torch.Tensor] = None, action_availability_mask: Optional[torch.Tensor] = None, representation: Optional[torch.nn.modules.module.Module] = None) ‑> torch.Tensor
Expand source code
def act(
    self,
    subjective_state: SubjectiveState,
    action_space: ActionSpace,
    values: Optional[torch.Tensor] = None,
    exploit_action: Optional[Action] = None,
    action_availability_mask: Optional[torch.Tensor] = None,
    representation: Optional[torch.nn.Module] = None,
) -> Action:
    return torch.distributions.Categorical(values).sample()

Inherited members