Module pearl.policy_learners.exploration_modules.exploration_module
Expand source code
from abc import ABC, abstractmethod
from enum import Enum
from typing import Optional
import torch
from pearl.api.action import Action
from pearl.api.action_space import ActionSpace
from pearl.history_summarization_modules.history_summarization_module import (
SubjectiveState,
)
from pearl.replay_buffers.replay_buffer import ReplayBuffer
class ExplorationType(Enum):
UNIFORM = 0
BOLTZMANN = 1
REPRESENTATION = 2
EPISTEMICNN = 3
VALUE = 4
class ExplorationModule(ABC):
"""
An abstract interface for exploration module.
"""
def reset(self) -> None: # noqa: B027
"""
Resets the internal state of the exploration module. Default implementation does nothing.
"""
pass
@abstractmethod
def act(
self,
subjective_state: SubjectiveState,
action_space: ActionSpace,
values: Optional[torch.Tensor] = None,
exploit_action: Optional[Action] = None,
action_availability_mask: Optional[torch.Tensor] = None,
representation: Optional[torch.nn.Module] = None,
) -> Action:
pass
def learn(self, replay_buffer: ReplayBuffer) -> None: # noqa: B027
"""Learns from the replay buffer. Default implementation does nothing."""
pass
Classes
class ExplorationModule
-
An abstract interface for exploration module.
Expand source code
class ExplorationModule(ABC): """ An abstract interface for exploration module. """ def reset(self) -> None: # noqa: B027 """ Resets the internal state of the exploration module. Default implementation does nothing. """ pass @abstractmethod def act( self, subjective_state: SubjectiveState, action_space: ActionSpace, values: Optional[torch.Tensor] = None, exploit_action: Optional[Action] = None, action_availability_mask: Optional[torch.Tensor] = None, representation: Optional[torch.nn.Module] = None, ) -> Action: pass def learn(self, replay_buffer: ReplayBuffer) -> None: # noqa: B027 """Learns from the replay buffer. Default implementation does nothing.""" pass
Ancestors
- abc.ABC
Subclasses
- NormalDistributionExploration
- PropensityExploration
- ScoreExplorationBase
- UniformExplorationBase
- DeepExploration
Methods
def act(self, subjective_state: torch.Tensor, action_space: ActionSpace, values: Optional[torch.Tensor] = None, exploit_action: Optional[torch.Tensor] = None, action_availability_mask: Optional[torch.Tensor] = None, representation: Optional[torch.nn.modules.module.Module] = None) ‑> torch.Tensor
-
Expand source code
@abstractmethod def act( self, subjective_state: SubjectiveState, action_space: ActionSpace, values: Optional[torch.Tensor] = None, exploit_action: Optional[Action] = None, action_availability_mask: Optional[torch.Tensor] = None, representation: Optional[torch.nn.Module] = None, ) -> Action: pass
def learn(self, replay_buffer: ReplayBuffer) ‑> None
-
Learns from the replay buffer. Default implementation does nothing.
Expand source code
def learn(self, replay_buffer: ReplayBuffer) -> None: # noqa: B027 """Learns from the replay buffer. Default implementation does nothing.""" pass
def reset(self) ‑> None
-
Resets the internal state of the exploration module. Default implementation does nothing.
Expand source code
def reset(self) -> None: # noqa: B027 """ Resets the internal state of the exploration module. Default implementation does nothing. """ pass
class ExplorationType (value, names=None, *, module=None, qualname=None, type=None, start=1)
-
An enumeration.
Expand source code
class ExplorationType(Enum): UNIFORM = 0 BOLTZMANN = 1 REPRESENTATION = 2 EPISTEMICNN = 3 VALUE = 4
Ancestors
- enum.Enum
Class variables
var BOLTZMANN
var EPISTEMICNN
var REPRESENTATION
var UNIFORM
var VALUE