Module pearl.neural_networks.sequential_decision_making.q_value_network
This file defines PEARL neural network interafaces User is free to define their own Q(s, a), but would need to inherit from this interface
Expand source code
#!/usr/bin/env python3
# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
"""
This file defines PEARL neural network interafaces
User is free to define their own Q(s, a), but would need to inherit from this interface
"""
from __future__ import annotations
import abc
from typing import Optional
import torch
from torch import nn
class QValueNetwork(abc.ABC, nn.Module):
"""
An interface for state-action value (Q-value) estimators (typically, neural networks).
These are value neural networks with a special method
for computing the Q-value for a state-action pair.
"""
@property
@abc.abstractmethod
def state_dim(self) -> int:
"""Returns state dimention"""
...
@property
@abc.abstractmethod
def action_dim(self) -> int:
"""Returns action dimention"""
...
@abc.abstractmethod
def get_q_values(
self,
state_batch: torch.Tensor,
action_batch: torch.Tensor,
curr_available_actions_batch: Optional[torch.Tensor] = None,
) -> torch.Tensor:
"""Returns Q(s, a), given s and a
Args:
state_batch (torch.Tensor): a batch of state tensors (batch_size, state_dim)
action_batch (torch.Tensor): a batch of action tensors (batch_size, action_dim)
curr_available_actions_batch (torch.Tensor, optional): a batch of currently available
actions (batch_size, available_action_space_size, action_dim)
Returns:
Q-values of (state, action) pairs: (batch_size)
"""
...
class DistributionalQValueNetwork(abc.ABC, nn.Module):
"""
An interface for estimators of state-action value distribution (Q-value distribution).
These are value neural networks with special method for computing the Q-value distribution
and the expected Q-values for a state-action pair.
Examples include Categorical DQN, Quantile DQN, IQN etc.
"""
@property
@abc.abstractmethod
def state_dim(self) -> int:
"""Returns state dimention"""
...
@property
@abc.abstractmethod
def action_dim(self) -> int:
"""Returns action dimention"""
...
@property
@abc.abstractmethod
def num_quantiles(self) -> int:
"""Returns number of particles for approximating the quantile distribution"""
@property
@abc.abstractmethod
def quantiles(self) -> torch.Tensor:
"""Returns quantiles of the approximate value distribution"""
@property
@abc.abstractmethod
def quantile_midpoints(self) -> torch.Tensor:
"""Returns midpoints of the quantiles"""
@abc.abstractmethod
def get_q_value_distribution(
self,
state_batch: torch.Tensor,
action_batch: torch.Tensor,
) -> torch.Tensor:
"""Returns Z(s, a), a probability distribution over q values, given s and a.
Note that under a risk neutral measure, Q(s,a) = E[Z(s, a)].
Args:
state_batch (torch.Tensor): a batch of state tensors (batch_size, state_dim)
action_batch (torch.Tensor): a batch of action tensors (batch_size, action_dim)
Returns:
approximation of distribution of Q-values of (state, action) pairs
"""
...
Classes
class DistributionalQValueNetwork (*args, **kwargs)
-
An interface for estimators of state-action value distribution (Q-value distribution). These are value neural networks with special method for computing the Q-value distribution and the expected Q-values for a state-action pair. Examples include Categorical DQN, Quantile DQN, IQN etc.
Initializes internal Module state, shared by both nn.Module and ScriptModule.
Expand source code
class DistributionalQValueNetwork(abc.ABC, nn.Module): """ An interface for estimators of state-action value distribution (Q-value distribution). These are value neural networks with special method for computing the Q-value distribution and the expected Q-values for a state-action pair. Examples include Categorical DQN, Quantile DQN, IQN etc. """ @property @abc.abstractmethod def state_dim(self) -> int: """Returns state dimention""" ... @property @abc.abstractmethod def action_dim(self) -> int: """Returns action dimention""" ... @property @abc.abstractmethod def num_quantiles(self) -> int: """Returns number of particles for approximating the quantile distribution""" @property @abc.abstractmethod def quantiles(self) -> torch.Tensor: """Returns quantiles of the approximate value distribution""" @property @abc.abstractmethod def quantile_midpoints(self) -> torch.Tensor: """Returns midpoints of the quantiles""" @abc.abstractmethod def get_q_value_distribution( self, state_batch: torch.Tensor, action_batch: torch.Tensor, ) -> torch.Tensor: """Returns Z(s, a), a probability distribution over q values, given s and a. Note that under a risk neutral measure, Q(s,a) = E[Z(s, a)]. Args: state_batch (torch.Tensor): a batch of state tensors (batch_size, state_dim) action_batch (torch.Tensor): a batch of action tensors (batch_size, action_dim) Returns: approximation of distribution of Q-values of (state, action) pairs """ ...
Ancestors
- abc.ABC
- torch.nn.modules.module.Module
Subclasses
Instance variables
var action_dim : int
-
Returns action dimention
Expand source code
@property @abc.abstractmethod def action_dim(self) -> int: """Returns action dimention""" ...
var num_quantiles : int
-
Returns number of particles for approximating the quantile distribution
Expand source code
@property @abc.abstractmethod def num_quantiles(self) -> int: """Returns number of particles for approximating the quantile distribution"""
var quantile_midpoints : torch.Tensor
-
Returns midpoints of the quantiles
Expand source code
@property @abc.abstractmethod def quantile_midpoints(self) -> torch.Tensor: """Returns midpoints of the quantiles"""
var quantiles : torch.Tensor
-
Returns quantiles of the approximate value distribution
Expand source code
@property @abc.abstractmethod def quantiles(self) -> torch.Tensor: """Returns quantiles of the approximate value distribution"""
var state_dim : int
-
Returns state dimention
Expand source code
@property @abc.abstractmethod def state_dim(self) -> int: """Returns state dimention""" ...
Methods
def get_q_value_distribution(self, state_batch: torch.Tensor, action_batch: torch.Tensor) ‑> torch.Tensor
-
Returns Z(s, a), a probability distribution over q values, given s and a. Note that under a risk neutral measure, Q(s,a) = E[Z(s, a)].
Args
state_batch
:torch.Tensor
- a batch of state tensors (batch_size, state_dim)
action_batch
:torch.Tensor
- a batch of action tensors (batch_size, action_dim)
Returns
approximation of distribution of Q-values of (state, action) pairs
Expand source code
@abc.abstractmethod def get_q_value_distribution( self, state_batch: torch.Tensor, action_batch: torch.Tensor, ) -> torch.Tensor: """Returns Z(s, a), a probability distribution over q values, given s and a. Note that under a risk neutral measure, Q(s,a) = E[Z(s, a)]. Args: state_batch (torch.Tensor): a batch of state tensors (batch_size, state_dim) action_batch (torch.Tensor): a batch of action tensors (batch_size, action_dim) Returns: approximation of distribution of Q-values of (state, action) pairs """ ...
class QValueNetwork (*args, **kwargs)
-
An interface for state-action value (Q-value) estimators (typically, neural networks). These are value neural networks with a special method for computing the Q-value for a state-action pair.
Initializes internal Module state, shared by both nn.Module and ScriptModule.
Expand source code
class QValueNetwork(abc.ABC, nn.Module): """ An interface for state-action value (Q-value) estimators (typically, neural networks). These are value neural networks with a special method for computing the Q-value for a state-action pair. """ @property @abc.abstractmethod def state_dim(self) -> int: """Returns state dimention""" ... @property @abc.abstractmethod def action_dim(self) -> int: """Returns action dimention""" ... @abc.abstractmethod def get_q_values( self, state_batch: torch.Tensor, action_batch: torch.Tensor, curr_available_actions_batch: Optional[torch.Tensor] = None, ) -> torch.Tensor: """Returns Q(s, a), given s and a Args: state_batch (torch.Tensor): a batch of state tensors (batch_size, state_dim) action_batch (torch.Tensor): a batch of action tensors (batch_size, action_dim) curr_available_actions_batch (torch.Tensor, optional): a batch of currently available actions (batch_size, available_action_space_size, action_dim) Returns: Q-values of (state, action) pairs: (batch_size) """ ...
Ancestors
- abc.ABC
- torch.nn.modules.module.Module
Subclasses
Instance variables
var action_dim : int
-
Returns action dimention
Expand source code
@property @abc.abstractmethod def action_dim(self) -> int: """Returns action dimention""" ...
var state_dim : int
-
Returns state dimention
Expand source code
@property @abc.abstractmethod def state_dim(self) -> int: """Returns state dimention""" ...
Methods
def get_q_values(self, state_batch: torch.Tensor, action_batch: torch.Tensor, curr_available_actions_batch: Optional[torch.Tensor] = None) ‑> torch.Tensor
-
Returns Q(s, a), given s and a
Args
state_batch
:torch.Tensor
- a batch of state tensors (batch_size, state_dim)
action_batch
:torch.Tensor
- a batch of action tensors (batch_size, action_dim)
curr_available_actions_batch
:torch.Tensor
, optional- a batch of currently available actions (batch_size, available_action_space_size, action_dim)
Returns
Q-values of (state, action) pairs: (batch_size)
Expand source code
@abc.abstractmethod def get_q_values( self, state_batch: torch.Tensor, action_batch: torch.Tensor, curr_available_actions_batch: Optional[torch.Tensor] = None, ) -> torch.Tensor: """Returns Q(s, a), given s and a Args: state_batch (torch.Tensor): a batch of state tensors (batch_size, state_dim) action_batch (torch.Tensor): a batch of action tensors (batch_size, action_dim) curr_available_actions_batch (torch.Tensor, optional): a batch of currently available actions (batch_size, available_action_space_size, action_dim) Returns: Q-values of (state, action) pairs: (batch_size) """ ...