Source code for loco_mujoco.utils.reward

import numpy as np
from loco_mujoco.utils.math import mat2angle_xy



[docs]
class RewardInterface:
    """
    Interface to specify a reward function.

    """


[docs]
    def __call__(self, state, action, next_state, absorbing):
        """
        Compute the reward.

        Args:
            state (np.ndarray): last state;
            action (np.ndarray): applied action;
            next_state (np.ndarray): current state.

        Returns:
            The reward for the current transition.

        """
        raise NotImplementedError



[docs]
    def reset_state(self):
        """
        Reset the state of the object.

        """
        pass





[docs]
class NoReward(RewardInterface):
    """
    A reward function that returns always 0.

    """


[docs]
    def __call__(self, state, action, next_state, absorbing):
        return 0





[docs]
class PosReward(RewardInterface):

    def __init__(self, pos_idx):
        self._pos_idx = pos_idx


[docs]
    def __call__(self, state, action, next_state, absorbing):
        pos = state[self._pos_idx]
        return pos





[docs]
class CustomReward(RewardInterface):

    def __init__(self, reward_callback=None):
        self._reward_callback = reward_callback


[docs]
    def __call__(self, state, action, next_state, absorbing):
        if self._reward_callback is not None:
            return self._reward_callback(state, action, next_state)
        else:
            return 0





[docs]
class TargetVelocityReward(RewardInterface):

    def __init__(self, target_velocity, x_vel_idx):
        self._target_vel = target_velocity
        self._x_vel_idx = x_vel_idx


[docs]
    def __call__(self, state, action, next_state, absorbing):
        x_vel = state[self._x_vel_idx]
        return np.exp(- np.square(x_vel - self._target_vel))





[docs]
class MultiTargetVelocityReward(RewardInterface):

    def __init__(self, target_velocity, x_vel_idx, env_id_len, scalings):
        self._target_vel = target_velocity
        self._env_id_len = env_id_len
        self._scalings = scalings
        self._x_vel_idx = x_vel_idx


[docs]
    def __call__(self, state, action, next_state, absorbing):
        x_vel = state[self._x_vel_idx]
        env_id = state[-self._env_id_len:]

        # convert binary array to index
        ind = np.packbits(env_id.astype(int), bitorder='big') >> (8 - env_id.shape[0])
        ind = ind[0]
        scaling = self._scalings[ind]

        # calculate target vel
        target_vel = self._target_vel * scaling

        return np.exp(- np.square(x_vel - target_vel))





[docs]
class VelocityVectorReward(RewardInterface):

    def __init__(self, x_vel_idx, y_vel_idx, angle_idx, goal_vel_idx):
        self._x_vel_idx = x_vel_idx
        self._y_vel_idx = y_vel_idx
        self._angle_idx = angle_idx
        self._goal_vel_idx = goal_vel_idx


[docs]
    def __call__(self, state, action, next_state, absorbing):

        # get current velocity vector in x-y-plane
        curr_velocity_xy = np.array([state[self._x_vel_idx], state[self._y_vel_idx]])

        # get desired velocity vector in x-y-plane
        cos_sine = state[self._angle_idx]
        des_vel = state[self._goal_vel_idx] * cos_sine

        return np.exp(-5.0*np.linalg.norm(curr_velocity_xy - des_vel))