Module pearl.policy_learners.exploration_modules.contextual_bandits.linucb_exploration
Expand source code
from typing import List, Optional
import torch
from pearl.api.state import SubjectiveState
from pearl.policy_learners.exploration_modules.contextual_bandits.ucb_exploration import (
UCBExploration,
)
from pearl.utils.tensor_like import assert_is_tensor_like
class LinUCBExploration(UCBExploration):
"""
Exploration module for linear UCB with joint linear models
paper: https://arxiv.org/pdf/1003.0146.pdf
"""
def sigma(
self,
subjective_state: SubjectiveState,
representation: Optional[torch.nn.Module] = None,
) -> torch.Tensor:
"""
Args:
subjective_state: feature vector (either state,
or state and action features after concatenation)
Shape should be either (batch_size, action_count, feature_dim) or
(batch_size, feature_dim).
Returns:
sigma with shape (batch_size, action_count) or (batch_size, 1)
"""
assert representation is not None
sigma = representation.calculate_sigma(subjective_state)
nan_check = torch.isnan(sigma)
sigma = torch.where(nan_check, torch.zeros_like(sigma), sigma)
return sigma
class DisjointLinUCBExploration(LinUCBExploration):
"""
Same as LinUCBExploration, just that now different action has different linear regression
"""
# pyre-fixme[14]: `sigma` overrides method defined in `LinUCBExploration`
# inconsistently.
def sigma(
self,
subjective_state: SubjectiveState,
representation: Optional[List[torch.nn.Module]] = None,
) -> torch.Tensor:
"""
Args:
subjective_state: this is feature vector in shape, batch_size, action_count, feature
representation: unlike LinUCBExploration, here it is a list for different actions
"""
assert representation is not None
subjective_state = assert_is_tensor_like(subjective_state)
sigma = []
for i, linear_regression in enumerate(representation):
sigma.append(
super(DisjointLinUCBExploration, self).sigma(
subjective_state=subjective_state[
:, i, :
], # different action has different feature
representation=linear_regression,
)
)
sigma = torch.stack(sigma)
# change from shape(action_count, batch_size) to shape(batch_size, action_count)
sigma = sigma.permute(1, 0)
return sigma
Classes
class DisjointLinUCBExploration (alpha: float)
-
Same as LinUCBExploration, just that now different action has different linear regression
Expand source code
class DisjointLinUCBExploration(LinUCBExploration): """ Same as LinUCBExploration, just that now different action has different linear regression """ # pyre-fixme[14]: `sigma` overrides method defined in `LinUCBExploration` # inconsistently. def sigma( self, subjective_state: SubjectiveState, representation: Optional[List[torch.nn.Module]] = None, ) -> torch.Tensor: """ Args: subjective_state: this is feature vector in shape, batch_size, action_count, feature representation: unlike LinUCBExploration, here it is a list for different actions """ assert representation is not None subjective_state = assert_is_tensor_like(subjective_state) sigma = [] for i, linear_regression in enumerate(representation): sigma.append( super(DisjointLinUCBExploration, self).sigma( subjective_state=subjective_state[ :, i, : ], # different action has different feature representation=linear_regression, ) ) sigma = torch.stack(sigma) # change from shape(action_count, batch_size) to shape(batch_size, action_count) sigma = sigma.permute(1, 0) return sigma
Ancestors
Methods
def sigma(self, subjective_state: torch.Tensor, representation: Optional[List[torch.nn.modules.module.Module]] = None) ‑> torch.Tensor
-
Args
subjective_state
- this is feature vector in shape, batch_size, action_count, feature
representation
- unlike LinUCBExploration, here it is a list for different actions
Expand source code
def sigma( self, subjective_state: SubjectiveState, representation: Optional[List[torch.nn.Module]] = None, ) -> torch.Tensor: """ Args: subjective_state: this is feature vector in shape, batch_size, action_count, feature representation: unlike LinUCBExploration, here it is a list for different actions """ assert representation is not None subjective_state = assert_is_tensor_like(subjective_state) sigma = [] for i, linear_regression in enumerate(representation): sigma.append( super(DisjointLinUCBExploration, self).sigma( subjective_state=subjective_state[ :, i, : ], # different action has different feature representation=linear_regression, ) ) sigma = torch.stack(sigma) # change from shape(action_count, batch_size) to shape(batch_size, action_count) sigma = sigma.permute(1, 0) return sigma
Inherited members
class LinUCBExploration (alpha: float)
-
Exploration module for linear UCB with joint linear models paper: https://arxiv.org/pdf/1003.0146.pdf
Expand source code
class LinUCBExploration(UCBExploration): """ Exploration module for linear UCB with joint linear models paper: https://arxiv.org/pdf/1003.0146.pdf """ def sigma( self, subjective_state: SubjectiveState, representation: Optional[torch.nn.Module] = None, ) -> torch.Tensor: """ Args: subjective_state: feature vector (either state, or state and action features after concatenation) Shape should be either (batch_size, action_count, feature_dim) or (batch_size, feature_dim). Returns: sigma with shape (batch_size, action_count) or (batch_size, 1) """ assert representation is not None sigma = representation.calculate_sigma(subjective_state) nan_check = torch.isnan(sigma) sigma = torch.where(nan_check, torch.zeros_like(sigma), sigma) return sigma
Ancestors
Subclasses
Inherited members