import numpy as np
from loco_mujoco.utils.math import mat2angle_xy
[docs]
class RewardInterface:
"""
Interface to specify a reward function.
"""
[docs]
def __call__(self, state, action, next_state, absorbing):
"""
Compute the reward.
Args:
state (np.ndarray): last state;
action (np.ndarray): applied action;
next_state (np.ndarray): current state.
Returns:
The reward for the current transition.
"""
raise NotImplementedError
[docs]
def reset_state(self):
"""
Reset the state of the object.
"""
pass
[docs]
class NoReward(RewardInterface):
"""
A reward function that returns always 0.
"""
[docs]
def __call__(self, state, action, next_state, absorbing):
return 0
[docs]
class PosReward(RewardInterface):
def __init__(self, pos_idx):
self._pos_idx = pos_idx
[docs]
def __call__(self, state, action, next_state, absorbing):
pos = state[self._pos_idx]
return pos
[docs]
class CustomReward(RewardInterface):
def __init__(self, reward_callback=None):
self._reward_callback = reward_callback
[docs]
def __call__(self, state, action, next_state, absorbing):
if self._reward_callback is not None:
return self._reward_callback(state, action, next_state)
else:
return 0
[docs]
class TargetVelocityReward(RewardInterface):
def __init__(self, target_velocity, x_vel_idx):
self._target_vel = target_velocity
self._x_vel_idx = x_vel_idx
[docs]
def __call__(self, state, action, next_state, absorbing):
x_vel = state[self._x_vel_idx]
return np.exp(- np.square(x_vel - self._target_vel))
[docs]
class MultiTargetVelocityReward(RewardInterface):
def __init__(self, target_velocity, x_vel_idx, env_id_len, scalings):
self._target_vel = target_velocity
self._env_id_len = env_id_len
self._scalings = scalings
self._x_vel_idx = x_vel_idx
[docs]
def __call__(self, state, action, next_state, absorbing):
x_vel = state[self._x_vel_idx]
env_id = state[-self._env_id_len:]
# convert binary array to index
ind = np.packbits(env_id.astype(int), bitorder='big') >> (8 - env_id.shape[0])
ind = ind[0]
scaling = self._scalings[ind]
# calculate target vel
target_vel = self._target_vel * scaling
return np.exp(- np.square(x_vel - target_vel))
[docs]
class VelocityVectorReward(RewardInterface):
def __init__(self, x_vel_idx, y_vel_idx, angle_idx, goal_vel_idx):
self._x_vel_idx = x_vel_idx
self._y_vel_idx = y_vel_idx
self._angle_idx = angle_idx
self._goal_vel_idx = goal_vel_idx
[docs]
def __call__(self, state, action, next_state, absorbing):
# get current velocity vector in x-y-plane
curr_velocity_xy = np.array([state[self._x_vel_idx], state[self._y_vel_idx]])
# get desired velocity vector in x-y-plane
cos_sine = state[self._angle_idx]
des_vel = state[self._goal_vel_idx] * cos_sine
return np.exp(-5.0*np.linalg.norm(curr_velocity_xy - des_vel))