Module pearl.user_envs.wrappers.sparse_reward
Expand source code
# pyre-ignore-all-errors
import numpy as np
try:
import gymnasium as gym
except ModuleNotFoundError:
print("gymnasium module not found.")
import math
class PendulumSparseRewardWrapper(gym.Wrapper):
r"""Sparse Reward wrapper for the Pendulum environment."""
def __init__(self, env):
super(PendulumSparseRewardWrapper, self).__init__(env)
def step(self, action):
obs, reward, done, truncated, info = self.env.step(action)
sparse_reward = self.sparse_reward()
return obs, sparse_reward, done, truncated, info
def sparse_reward(self):
th, thdot = self.env.state
threshold = 15 / 180 * np.pi
if (
self.angle_normalize(th) < threshold
and self.angle_normalize(th) > -threshold
):
sparse_reward = 1
else:
sparse_reward = 0
return sparse_reward
def angle_normalize(self, x):
return ((x + np.pi) % (2 * np.pi)) - np.pi
class AcrobotSparseRewardWrapper(gym.Wrapper):
r"""Sparse Reward wrapper for the Acrobot environment."""
def __init__(self, env):
super(AcrobotSparseRewardWrapper, self).__init__(env)
def step(self, action):
obs, reward, done, truncated, info = self.env.step(action)
sparse_reward = self.sparse_reward()
return obs, sparse_reward, done, truncated, info
def sparse_reward(self):
s = self.env.state
return bool(-np.cos(s[0]) - np.cos(s[1] + s[0]) > 1.0)
class MountainCarSparseRewardWrapper(gym.Wrapper):
r"""Sparse Reward wrapper for the Mountain Car environment."""
def __init__(self, env):
super(MountainCarSparseRewardWrapper, self).__init__(env)
def step(self, action):
obs, reward, done, truncated, info = self.env.step(action)
sparse_reward = self.sparse_reward()
return obs, sparse_reward, done, truncated, info
def sparse_reward(self):
position, velocity = self.env.state
return bool(
position >= self.env.goal_position and velocity >= self.env.goal_velocity
)
class PuckWorldSparseRewardWrapper(gym.Wrapper):
r"""Sparse Reward wrapper for the Mountain Car environment."""
def __init__(self, env):
super(PuckWorldSparseRewardWrapper, self).__init__(env)
def step(self, action):
obs, reward, done, truncated, info = self.env.step(action)
sparse_reward = self.sparse_reward()
return obs, sparse_reward, done, truncated, info
def sparse_reward(self):
x = self.env.get_ob(self.env.game.getGameState())
px = x[0]
py = x[1]
gx = x[4]
gy = x[5]
bx = x[6]
by = x[7]
dx = px - gx
dy = py - gy
dist_to_good = math.sqrt(dx * dx + dy * dy)
dx = px - bx
dy = py - by
dist_to_bad = math.sqrt(dx * dx + dy * dy)
reward = 0.0
gr = self.env.game.CREEP_GOOD["radius"]
br = self.env.game.CREEP_BAD["radius_outer"]
if dist_to_good < gr:
reward += 1.0 * (gr - dist_to_good) / float(gr)
if dist_to_bad < br:
reward += 2.0 * (dist_to_bad - br) / float(br)
return reward / 1000
Classes
class AcrobotSparseRewardWrapper (env)
-
Sparse Reward wrapper for the Acrobot environment.
Wraps an environment to allow a modular transformation of the :meth:
step
and :meth:reset
methods.Args
env
- The environment to wrap
Expand source code
class AcrobotSparseRewardWrapper(gym.Wrapper): r"""Sparse Reward wrapper for the Acrobot environment.""" def __init__(self, env): super(AcrobotSparseRewardWrapper, self).__init__(env) def step(self, action): obs, reward, done, truncated, info = self.env.step(action) sparse_reward = self.sparse_reward() return obs, sparse_reward, done, truncated, info def sparse_reward(self): s = self.env.state return bool(-np.cos(s[0]) - np.cos(s[1] + s[0]) > 1.0)
Ancestors
- gymnasium.core.Wrapper
- gymnasium.core.Env
- typing.Generic
Methods
def sparse_reward(self)
-
Expand source code
def sparse_reward(self): s = self.env.state return bool(-np.cos(s[0]) - np.cos(s[1] + s[0]) > 1.0)
def step(self, action)
-
Uses the :meth:
step
of the :attr:env
that can be overwritten to change the returned data.Expand source code
def step(self, action): obs, reward, done, truncated, info = self.env.step(action) sparse_reward = self.sparse_reward() return obs, sparse_reward, done, truncated, info
class MountainCarSparseRewardWrapper (env)
-
Sparse Reward wrapper for the Mountain Car environment.
Wraps an environment to allow a modular transformation of the :meth:
step
and :meth:reset
methods.Args
env
- The environment to wrap
Expand source code
class MountainCarSparseRewardWrapper(gym.Wrapper): r"""Sparse Reward wrapper for the Mountain Car environment.""" def __init__(self, env): super(MountainCarSparseRewardWrapper, self).__init__(env) def step(self, action): obs, reward, done, truncated, info = self.env.step(action) sparse_reward = self.sparse_reward() return obs, sparse_reward, done, truncated, info def sparse_reward(self): position, velocity = self.env.state return bool( position >= self.env.goal_position and velocity >= self.env.goal_velocity )
Ancestors
- gymnasium.core.Wrapper
- gymnasium.core.Env
- typing.Generic
Methods
def sparse_reward(self)
-
Expand source code
def sparse_reward(self): position, velocity = self.env.state return bool( position >= self.env.goal_position and velocity >= self.env.goal_velocity )
def step(self, action)
-
Uses the :meth:
step
of the :attr:env
that can be overwritten to change the returned data.Expand source code
def step(self, action): obs, reward, done, truncated, info = self.env.step(action) sparse_reward = self.sparse_reward() return obs, sparse_reward, done, truncated, info
class PendulumSparseRewardWrapper (env)
-
Sparse Reward wrapper for the Pendulum environment.
Wraps an environment to allow a modular transformation of the :meth:
step
and :meth:reset
methods.Args
env
- The environment to wrap
Expand source code
class PendulumSparseRewardWrapper(gym.Wrapper): r"""Sparse Reward wrapper for the Pendulum environment.""" def __init__(self, env): super(PendulumSparseRewardWrapper, self).__init__(env) def step(self, action): obs, reward, done, truncated, info = self.env.step(action) sparse_reward = self.sparse_reward() return obs, sparse_reward, done, truncated, info def sparse_reward(self): th, thdot = self.env.state threshold = 15 / 180 * np.pi if ( self.angle_normalize(th) < threshold and self.angle_normalize(th) > -threshold ): sparse_reward = 1 else: sparse_reward = 0 return sparse_reward def angle_normalize(self, x): return ((x + np.pi) % (2 * np.pi)) - np.pi
Ancestors
- gymnasium.core.Wrapper
- gymnasium.core.Env
- typing.Generic
Methods
def angle_normalize(self, x)
-
Expand source code
def angle_normalize(self, x): return ((x + np.pi) % (2 * np.pi)) - np.pi
def sparse_reward(self)
-
Expand source code
def sparse_reward(self): th, thdot = self.env.state threshold = 15 / 180 * np.pi if ( self.angle_normalize(th) < threshold and self.angle_normalize(th) > -threshold ): sparse_reward = 1 else: sparse_reward = 0 return sparse_reward
def step(self, action)
-
Uses the :meth:
step
of the :attr:env
that can be overwritten to change the returned data.Expand source code
def step(self, action): obs, reward, done, truncated, info = self.env.step(action) sparse_reward = self.sparse_reward() return obs, sparse_reward, done, truncated, info
class PuckWorldSparseRewardWrapper (env)
-
Sparse Reward wrapper for the Mountain Car environment.
Wraps an environment to allow a modular transformation of the :meth:
step
and :meth:reset
methods.Args
env
- The environment to wrap
Expand source code
class PuckWorldSparseRewardWrapper(gym.Wrapper): r"""Sparse Reward wrapper for the Mountain Car environment.""" def __init__(self, env): super(PuckWorldSparseRewardWrapper, self).__init__(env) def step(self, action): obs, reward, done, truncated, info = self.env.step(action) sparse_reward = self.sparse_reward() return obs, sparse_reward, done, truncated, info def sparse_reward(self): x = self.env.get_ob(self.env.game.getGameState()) px = x[0] py = x[1] gx = x[4] gy = x[5] bx = x[6] by = x[7] dx = px - gx dy = py - gy dist_to_good = math.sqrt(dx * dx + dy * dy) dx = px - bx dy = py - by dist_to_bad = math.sqrt(dx * dx + dy * dy) reward = 0.0 gr = self.env.game.CREEP_GOOD["radius"] br = self.env.game.CREEP_BAD["radius_outer"] if dist_to_good < gr: reward += 1.0 * (gr - dist_to_good) / float(gr) if dist_to_bad < br: reward += 2.0 * (dist_to_bad - br) / float(br) return reward / 1000
Ancestors
- gymnasium.core.Wrapper
- gymnasium.core.Env
- typing.Generic
Methods
def sparse_reward(self)
-
Expand source code
def sparse_reward(self): x = self.env.get_ob(self.env.game.getGameState()) px = x[0] py = x[1] gx = x[4] gy = x[5] bx = x[6] by = x[7] dx = px - gx dy = py - gy dist_to_good = math.sqrt(dx * dx + dy * dy) dx = px - bx dy = py - by dist_to_bad = math.sqrt(dx * dx + dy * dy) reward = 0.0 gr = self.env.game.CREEP_GOOD["radius"] br = self.env.game.CREEP_BAD["radius_outer"] if dist_to_good < gr: reward += 1.0 * (gr - dist_to_good) / float(gr) if dist_to_bad < br: reward += 2.0 * (dist_to_bad - br) / float(br) return reward / 1000
def step(self, action)
-
Uses the :meth:
step
of the :attr:env
that can be overwritten to change the returned data.Expand source code
def step(self, action): obs, reward, done, truncated, info = self.env.step(action) sparse_reward = self.sparse_reward() return obs, sparse_reward, done, truncated, info