Module `pearl.user_envs.wrappers.sparse_reward`

Expand source code

# pyre-ignore-all-errors
import numpy as np

try:
    import gymnasium as gym
except ModuleNotFoundError:
    print("gymnasium module not found.")
import math


class PendulumSparseRewardWrapper(gym.Wrapper):
    r"""Sparse Reward wrapper for the Pendulum environment."""

    def __init__(self, env):
        super(PendulumSparseRewardWrapper, self).__init__(env)

    def step(self, action):
        obs, reward, done, truncated, info = self.env.step(action)
        sparse_reward = self.sparse_reward()
        return obs, sparse_reward, done, truncated, info

    def sparse_reward(self):
        th, thdot = self.env.state
        threshold = 15 / 180 * np.pi
        if (
            self.angle_normalize(th) < threshold
            and self.angle_normalize(th) > -threshold
        ):
            sparse_reward = 1
        else:
            sparse_reward = 0
        return sparse_reward

    def angle_normalize(self, x):
        return ((x + np.pi) % (2 * np.pi)) - np.pi


class AcrobotSparseRewardWrapper(gym.Wrapper):
    r"""Sparse Reward wrapper for the Acrobot environment."""

    def __init__(self, env):
        super(AcrobotSparseRewardWrapper, self).__init__(env)

    def step(self, action):
        obs, reward, done, truncated, info = self.env.step(action)
        sparse_reward = self.sparse_reward()
        return obs, sparse_reward, done, truncated, info

    def sparse_reward(self):
        s = self.env.state
        return bool(-np.cos(s[0]) - np.cos(s[1] + s[0]) > 1.0)


class MountainCarSparseRewardWrapper(gym.Wrapper):
    r"""Sparse Reward wrapper for the Mountain Car environment."""

    def __init__(self, env):
        super(MountainCarSparseRewardWrapper, self).__init__(env)

    def step(self, action):
        obs, reward, done, truncated, info = self.env.step(action)
        sparse_reward = self.sparse_reward()
        return obs, sparse_reward, done, truncated, info

    def sparse_reward(self):
        position, velocity = self.env.state
        return bool(
            position >= self.env.goal_position and velocity >= self.env.goal_velocity
        )


class PuckWorldSparseRewardWrapper(gym.Wrapper):
    r"""Sparse Reward wrapper for the Mountain Car environment."""

    def __init__(self, env):
        super(PuckWorldSparseRewardWrapper, self).__init__(env)

    def step(self, action):
        obs, reward, done, truncated, info = self.env.step(action)
        sparse_reward = self.sparse_reward()
        return obs, sparse_reward, done, truncated, info

    def sparse_reward(self):
        x = self.env.get_ob(self.env.game.getGameState())
        px = x[0]
        py = x[1]
        gx = x[4]
        gy = x[5]
        bx = x[6]
        by = x[7]
        dx = px - gx
        dy = py - gy
        dist_to_good = math.sqrt(dx * dx + dy * dy)

        dx = px - bx
        dy = py - by
        dist_to_bad = math.sqrt(dx * dx + dy * dy)
        reward = 0.0
        gr = self.env.game.CREEP_GOOD["radius"]
        br = self.env.game.CREEP_BAD["radius_outer"]
        if dist_to_good < gr:
            reward += 1.0 * (gr - dist_to_good) / float(gr)

        if dist_to_bad < br:
            reward += 2.0 * (dist_to_bad - br) / float(br)
        return reward / 1000

Classes

class AcrobotSparseRewardWrapper (env)

Sparse Reward wrapper for the Acrobot environment.

Wraps an environment to allow a modular transformation of the :meth:step and :meth:reset methods.

Args

env: The environment to wrap

Expand source code

class AcrobotSparseRewardWrapper(gym.Wrapper):
    r"""Sparse Reward wrapper for the Acrobot environment."""

    def __init__(self, env):
        super(AcrobotSparseRewardWrapper, self).__init__(env)

    def step(self, action):
        obs, reward, done, truncated, info = self.env.step(action)
        sparse_reward = self.sparse_reward()
        return obs, sparse_reward, done, truncated, info

    def sparse_reward(self):
        s = self.env.state
        return bool(-np.cos(s[0]) - np.cos(s[1] + s[0]) > 1.0)

Ancestors

gymnasium.core.Wrapper
gymnasium.core.Env
typing.Generic

Methods

def sparse_reward(self)

Expand source code

def sparse_reward(self):
    s = self.env.state
    return bool(-np.cos(s[0]) - np.cos(s[1] + s[0]) > 1.0)

def step(self, action)

Uses the :meth:step of the :attr:env that can be overwritten to change the returned data.

Expand source code

def step(self, action):
    obs, reward, done, truncated, info = self.env.step(action)
    sparse_reward = self.sparse_reward()
    return obs, sparse_reward, done, truncated, info

class MountainCarSparseRewardWrapper (env)

Sparse Reward wrapper for the Mountain Car environment.

Wraps an environment to allow a modular transformation of the :meth:step and :meth:reset methods.

Args

env: The environment to wrap

Expand source code

class MountainCarSparseRewardWrapper(gym.Wrapper):
    r"""Sparse Reward wrapper for the Mountain Car environment."""

    def __init__(self, env):
        super(MountainCarSparseRewardWrapper, self).__init__(env)

    def step(self, action):
        obs, reward, done, truncated, info = self.env.step(action)
        sparse_reward = self.sparse_reward()
        return obs, sparse_reward, done, truncated, info

    def sparse_reward(self):
        position, velocity = self.env.state
        return bool(
            position >= self.env.goal_position and velocity >= self.env.goal_velocity
        )

Ancestors

gymnasium.core.Wrapper
gymnasium.core.Env
typing.Generic

Methods

def sparse_reward(self)

Expand source code

def sparse_reward(self):
    position, velocity = self.env.state
    return bool(
        position >= self.env.goal_position and velocity >= self.env.goal_velocity
    )

def step(self, action)

Uses the :meth:step of the :attr:env that can be overwritten to change the returned data.

Expand source code

def step(self, action):
    obs, reward, done, truncated, info = self.env.step(action)
    sparse_reward = self.sparse_reward()
    return obs, sparse_reward, done, truncated, info

class PendulumSparseRewardWrapper (env)

Sparse Reward wrapper for the Pendulum environment.

Wraps an environment to allow a modular transformation of the :meth:step and :meth:reset methods.

Args

env: The environment to wrap

Expand source code

class PendulumSparseRewardWrapper(gym.Wrapper):
    r"""Sparse Reward wrapper for the Pendulum environment."""

    def __init__(self, env):
        super(PendulumSparseRewardWrapper, self).__init__(env)

    def step(self, action):
        obs, reward, done, truncated, info = self.env.step(action)
        sparse_reward = self.sparse_reward()
        return obs, sparse_reward, done, truncated, info

    def sparse_reward(self):
        th, thdot = self.env.state
        threshold = 15 / 180 * np.pi
        if (
            self.angle_normalize(th) < threshold
            and self.angle_normalize(th) > -threshold
        ):
            sparse_reward = 1
        else:
            sparse_reward = 0
        return sparse_reward

    def angle_normalize(self, x):
        return ((x + np.pi) % (2 * np.pi)) - np.pi

Ancestors

gymnasium.core.Wrapper
gymnasium.core.Env
typing.Generic

Methods

def angle_normalize(self, x)

Expand source code

def angle_normalize(self, x):
    return ((x + np.pi) % (2 * np.pi)) - np.pi

def sparse_reward(self)

Expand source code

def sparse_reward(self):
    th, thdot = self.env.state
    threshold = 15 / 180 * np.pi
    if (
        self.angle_normalize(th) < threshold
        and self.angle_normalize(th) > -threshold
    ):
        sparse_reward = 1
    else:
        sparse_reward = 0
    return sparse_reward

def step(self, action)

Uses the :meth:step of the :attr:env that can be overwritten to change the returned data.

Expand source code

def step(self, action):
    obs, reward, done, truncated, info = self.env.step(action)
    sparse_reward = self.sparse_reward()
    return obs, sparse_reward, done, truncated, info

class PuckWorldSparseRewardWrapper (env)

Sparse Reward wrapper for the Mountain Car environment.

Wraps an environment to allow a modular transformation of the :meth:step and :meth:reset methods.

Args

env: The environment to wrap

Expand source code

class PuckWorldSparseRewardWrapper(gym.Wrapper):
    r"""Sparse Reward wrapper for the Mountain Car environment."""

    def __init__(self, env):
        super(PuckWorldSparseRewardWrapper, self).__init__(env)

    def step(self, action):
        obs, reward, done, truncated, info = self.env.step(action)
        sparse_reward = self.sparse_reward()
        return obs, sparse_reward, done, truncated, info

    def sparse_reward(self):
        x = self.env.get_ob(self.env.game.getGameState())
        px = x[0]
        py = x[1]
        gx = x[4]
        gy = x[5]
        bx = x[6]
        by = x[7]
        dx = px - gx
        dy = py - gy
        dist_to_good = math.sqrt(dx * dx + dy * dy)

        dx = px - bx
        dy = py - by
        dist_to_bad = math.sqrt(dx * dx + dy * dy)
        reward = 0.0
        gr = self.env.game.CREEP_GOOD["radius"]
        br = self.env.game.CREEP_BAD["radius_outer"]
        if dist_to_good < gr:
            reward += 1.0 * (gr - dist_to_good) / float(gr)

        if dist_to_bad < br:
            reward += 2.0 * (dist_to_bad - br) / float(br)
        return reward / 1000

Ancestors

gymnasium.core.Wrapper
gymnasium.core.Env
typing.Generic

Methods

def sparse_reward(self)

Expand source code

def sparse_reward(self):
    x = self.env.get_ob(self.env.game.getGameState())
    px = x[0]
    py = x[1]
    gx = x[4]
    gy = x[5]
    bx = x[6]
    by = x[7]
    dx = px - gx
    dy = py - gy
    dist_to_good = math.sqrt(dx * dx + dy * dy)

    dx = px - bx
    dy = py - by
    dist_to_bad = math.sqrt(dx * dx + dy * dy)
    reward = 0.0
    gr = self.env.game.CREEP_GOOD["radius"]
    br = self.env.game.CREEP_BAD["radius_outer"]
    if dist_to_good < gr:
        reward += 1.0 * (gr - dist_to_good) / float(gr)

    if dist_to_bad < br:
        reward += 2.0 * (dist_to_bad - br) / float(br)
    return reward / 1000

def step(self, action)

Uses the :meth:step of the :attr:env that can be overwritten to change the returned data.

Expand source code

def step(self, action):
    obs, reward, done, truncated, info = self.env.step(action)
    sparse_reward = self.sparse_reward()
    return obs, sparse_reward, done, truncated, info