Module pearl.policy_learners.exploration_modules.common.epsilon_greedy_exploration
Expand source code
import random
from typing import Optional
import torch
from pearl.api.action import Action
from pearl.api.action_space import ActionSpace
from pearl.api.state import SubjectiveState
from pearl.policy_learners.exploration_modules.common.uniform_exploration_base import (
UniformExplorationBase,
)
from pearl.utils.instantiations.spaces.discrete_action import DiscreteActionSpace
class EGreedyExploration(UniformExplorationBase):
"""
epsilon Greedy exploration module.
"""
def __init__(self, epsilon: float) -> None:
super(EGreedyExploration, self).__init__()
self.epsilon = epsilon
def act(
self,
subjective_state: SubjectiveState,
action_space: ActionSpace,
exploit_action: Optional[Action],
values: Optional[torch.Tensor] = None,
action_availability_mask: Optional[torch.Tensor] = None,
representation: Optional[torch.nn.Module] = None,
) -> Action:
if exploit_action is None:
raise ValueError(
"exploit_action cannot be None for epsilon-greedy exploration"
)
if not isinstance(action_space, DiscreteActionSpace):
raise TypeError("action space must be discrete")
if random.random() < self.epsilon:
return torch.randint(action_space.n, (1,))
return exploit_action
Classes
class EGreedyExploration (epsilon: float)
-
epsilon Greedy exploration module.
Expand source code
class EGreedyExploration(UniformExplorationBase): """ epsilon Greedy exploration module. """ def __init__(self, epsilon: float) -> None: super(EGreedyExploration, self).__init__() self.epsilon = epsilon def act( self, subjective_state: SubjectiveState, action_space: ActionSpace, exploit_action: Optional[Action], values: Optional[torch.Tensor] = None, action_availability_mask: Optional[torch.Tensor] = None, representation: Optional[torch.nn.Module] = None, ) -> Action: if exploit_action is None: raise ValueError( "exploit_action cannot be None for epsilon-greedy exploration" ) if not isinstance(action_space, DiscreteActionSpace): raise TypeError("action space must be discrete") if random.random() < self.epsilon: return torch.randint(action_space.n, (1,)) return exploit_action
Ancestors
Methods
def act(self, subjective_state: torch.Tensor, action_space: ActionSpace, exploit_action: Optional[torch.Tensor], values: Optional[torch.Tensor] = None, action_availability_mask: Optional[torch.Tensor] = None, representation: Optional[torch.nn.modules.module.Module] = None) ‑> torch.Tensor
-
Expand source code
def act( self, subjective_state: SubjectiveState, action_space: ActionSpace, exploit_action: Optional[Action], values: Optional[torch.Tensor] = None, action_availability_mask: Optional[torch.Tensor] = None, representation: Optional[torch.nn.Module] = None, ) -> Action: if exploit_action is None: raise ValueError( "exploit_action cannot be None for epsilon-greedy exploration" ) if not isinstance(action_space, DiscreteActionSpace): raise TypeError("action space must be discrete") if random.random() < self.epsilon: return torch.randint(action_space.n, (1,)) return exploit_action
Inherited members