Source code for loco_mujoco.environments.quadrupeds.unitreeA1

import os
import warnings
from pathlib import Path
from copy import deepcopy

from dm_control import mjcf
from mushroom_rl.utils.running_stats import *
from mushroom_rl.utils.mujoco import *
from mushroom_rl.utils.angles import mat_to_euler, euler_to_mat

import loco_mujoco
from loco_mujoco.environments import ValidTaskConf
from loco_mujoco.environments import LocoEnv
from loco_mujoco.utils.reward import VelocityVectorReward
from loco_mujoco.utils.math import rotate_obs
from loco_mujoco.utils.goals import GoalDirectionVelocity
from loco_mujoco.utils.math import mat2angle_xy, angle2mat_xy, transform_angle_2pi
from loco_mujoco.utils.checks import check_validity_task_mode_dataset



[docs]
class UnitreeA1(LocoEnv):

    """
    Description
    ------------

    Mujoco simulation of Unitree A1 model.

    Tasks
    -----------------
    * **Simple**: The robot has to walk forward with a fixed speed of 0.5 m/s.
    * **Hard**: The robot has to walk in 8 different directions with a fixed speed of 0.5 m/s.


    Dataset Types
    -----------------
    The available dataset types for this environment can be found at: :ref:`env-label`.


    Observation Space
    -----------------

    The observation space has the following properties *by default* (i.e., only obs with Disabled == False):

    | For simple task: :code:`(min=-inf, max=inf, dim=37, dtype=float32)`
    | For hard task: :code:`(min=-inf, max=inf, dim=37, dtype=float32)`

    Some observations are **disabled by default**, but can be turned on. The detailed observation space is:

    ===== ========================================================= ========= ========= ======== === ========================
    0     Position of Joint trunk_tz                                -inf      inf       False    1   Angle [rad]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    1     Position of Joint trunk_list                              -inf      inf       False    1   Angle [rad]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    2     Position of Joint trunk_tilt                              -inf      inf       False    1   Angle [rad]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    3     Position of Joint trunk_rotation                          -inf      inf       False    1   Angle [rad]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    4     Position of Joint FR_hip_joint                            -0.802851 0.802851  False    1   Angle [rad]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    5     Position of Joint FR_thigh_joint                          -1.0472   4.18879   False    1   Angle [rad]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    6     Position of Joint FR_calf_joint                           -2.69653  -0.916298 False    1   Angle [rad]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    7     Position of Joint FL_hip_joint                            -0.802851 0.802851  False    1   Angle [rad]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    8     Position of Joint FL_thigh_joint                          -1.0472   4.18879   False    1   Angle [rad]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    9     Position of Joint FL_calf_joint                           -2.69653  -0.916298 False    1   Angle [rad]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    10    Position of Joint RR_hip_joint                            -0.802851 0.802851  False    1   Angle [rad]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    11    Position of Joint RR_thigh_joint                          -1.0472   4.18879   False    1   Angle [rad]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    12    Position of Joint RR_calf_joint                           -2.69653  -0.916298 False    1   Angle [rad]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    13    Position of Joint RL_hip_joint                            -0.802851 0.802851  False    1   Angle [rad]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    14    Position of Joint RL_thigh_joint                          -1.0472   4.18879   False    1   Angle [rad]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    15    Position of Joint RL_calf_joint                           -2.69653  -0.916298 False    1   Angle [rad]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    16    Velocity of Joint trunk_tx                                -inf      inf       False    1   Angular Velocity [rad/s]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    17    Velocity of Joint trunk_ty                                -inf      inf       False    1   Angular Velocity [rad/s]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    18    Velocity of Joint trunk_tz                                -inf      inf       False    1   Angular Velocity [rad/s]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    19    Velocity of Joint trunk_list                              -inf      inf       False    1   Angular Velocity [rad/s]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    20    Velocity of Joint trunk_tilt                              -inf      inf       False    1   Angular Velocity [rad/s]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    21    Velocity of Joint trunk_rotation                          -inf      inf       False    1   Angular Velocity [rad/s]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    22    Velocity of Joint FR_hip_joint                            -inf      inf       False    1   Angular Velocity [rad/s]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    23    Velocity of Joint FR_thigh_joint                          -inf      inf       False    1   Angular Velocity [rad/s]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    24    Velocity of Joint FR_calf_joint                           -inf      inf       False    1   Angular Velocity [rad/s]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    25    Velocity of Joint FL_hip_joint                            -inf      inf       False    1   Angular Velocity [rad/s]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    26    Velocity of Joint FL_thigh_joint                          -inf      inf       False    1   Angular Velocity [rad/s]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    27    Velocity of Joint FL_calf_joint                           -inf      inf       False    1   Angular Velocity [rad/s]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    28    Velocity of Joint RR_hip_joint                            -inf      inf       False    1   Angular Velocity [rad/s]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    29    Velocity of Joint RR_thigh_joint                          -inf      inf       False    1   Angular Velocity [rad/s]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    30    Velocity of Joint RR_calf_joint                           -inf      inf       False    1   Angular Velocity [rad/s]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    31    Velocity of Joint RL_hip_joint                            -inf      inf       False    1   Angular Velocity [rad/s]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    32    Velocity of Joint RL_thigh_joint                          -inf      inf       False    1   Angular Velocity [rad/s]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    33    Velocity of Joint RL_calf_joint                           -inf      inf       False    1   Angular Velocity [rad/s]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    34    Desired Velocity Angle represented as Sine-Cosine Feature 0.0       1         False    2   None
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    36    Desired Velocity                                          0.0       inf       False    1   Velocity [m/s]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    37    3D linear Forces between Front Left Foot and Floor        0.0       inf       True     3   Force [N]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    40    3D linear Forces between Front Right Foot and Floor       0.0       inf       True     3   Force [N]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    43    3D linear Forces between Back Left Foot and Floor         0.0       inf       True     3   Force [N]
    ----- --------------------------------------------------------- --------- --------- -------- --- ------------------------
    46    3D linear Forces between Back Right Foot and Floor        0.0       inf       True     3   Force [N]
    ===== ========================================================= ========= ========= ======== === ========================

    Action Space
    ------------

    | The action space has the following properties *by default* (i.e., only actions with Disabled == False):
    | :code:`(min=-1, max=1, dim=12, dtype=float32)`

    ===== =========== =========== =========== ========
    Index Name in XML Control Min Control Max Disabled
    ===== =========== =========== =========== ========
    0     FR_hip      -1.0        1.0         False
    ----- ----------- ----------- ----------- --------
    1     FR_thigh    -1.0        1.0         False
    ----- ----------- ----------- ----------- --------
    2     FR_calf     -1.0        1.0         False
    ----- ----------- ----------- ----------- --------
    3     FL_hip      -1.0        1.0         False
    ----- ----------- ----------- ----------- --------
    4     FL_thigh    -1.0        1.0         False
    ----- ----------- ----------- ----------- --------
    5     FL_calf     -1.0        1.0         False
    ----- ----------- ----------- ----------- --------
    6     RR_hip      -1.0        1.0         False
    ----- ----------- ----------- ----------- --------
    7     RR_thigh    -1.0        1.0         False
    ----- ----------- ----------- ----------- --------
    8     RR_calf     -1.0        1.0         False
    ----- ----------- ----------- ----------- --------
    9     RL_hip      -1.0        1.0         False
    ----- ----------- ----------- ----------- --------
    10    RL_thigh    -1.0        1.0         False
    ----- ----------- ----------- ----------- --------
    11    RL_calf     -1.0        1.0         False
    ===== =========== =========== =========== ========


    Rewards
    --------

    Reward function based on the difference between the desired velocity vector and the actual center of mass velocity
    vector in horizontal plane. The desired velocity vector is given by the dataset to imitate.

    **Class**: :class:`loco_mujoco.utils.reward.VelocityVectorReward`


    Initial States
    ---------------

    The initial state is sampled by default from the dataset to imitate.

    Terminal States
    ----------------

    The terminal state is reached when the robot falls, or rather starts falling. The condition to check if the robot
    is falling is based on the orientation of the robot and the height of the center of mass. More details can be found
    in the  :code:`_has_fallen` method of the environment.

    Methods
    ------------

    """

    valid_task_confs = ValidTaskConf(tasks=["simple", "hard"],
                                     data_types=["real", "perfect"])

    def __init__(self, action_mode="torque", setup_random_rot=False,
                 default_target_velocity=0.5, camera_params=None, **kwargs):
        """
        Constructor.

        Args:
            action_mode (str): Either "torque", "position", or "position_difference". Defines the action controller.
            setup_random_rot (bool): If True, the robot is initialized with a random rotation.
            default_target_velocity (float): Default target velocity set in the goal, when no trajectory
                data is provided.
            camera_params (dict): Dictionary defining some of the camera parameters for visualization.

        """

        # Choose xml file (either for torque or position control)
        if action_mode == "torque":
            xml_path = (Path(__file__).resolve().parent.parent / "data" / "quadrupeds" /
                        "unitree_a1_torque.xml").as_posix()
        else:
            xml_path = (Path(__file__).resolve().parent.parent / "data" / "quadrupeds" /
                        "unitree_a1_position.xml").as_posix()

        self._action_mode = action_mode
        action_spec = self._get_action_specification()

        observation_spec = self._get_observation_specification()

        collision_groups = [("floor", ["floor"]),
                            ("foot_FR", ["FR_foot"]),
                            ("foot_FL", ["FL_foot"]),
                            ("foot_RR", ["RR_foot"]),
                            ("foot_RL", ["RL_foot"])]

        # append observation_spec with the direction arrow and add it to the xml file
        observation_spec.append(("dir_arrow", "dir_arrow", ObservationType.SITE_ROT))
        xml_handle = self._add_dir_vector_to_xml_handle(mjcf.from_path(xml_path))

        self.setup_random_rot = setup_random_rot

        # setup goal including the desired direction and velocity
        self._goal = GoalDirectionVelocity()
        self._goal.set_goal(0.0, default_target_velocity)

        if camera_params is None:
            # make the camera by default a bit higher
            camera_params = dict(follow=dict(distance=3.5, elevation=-20.0, azimuth=90.0))
        super().__init__(xml_handle, action_spec, observation_spec,  collision_groups,
                         camera_params=camera_params, **kwargs)


[docs]
    def setup(self, obs):
        """
        Function to setup the initial state of the simulation. Initialization can be done either
        randomly, from a certain initial, or from the default initial state of the model.

        Args:
            obs (np.array): Observation to initialize the environment from;

        """

        self._reward_function.reset_state()

        if obs is not None:
            self._init_sim_from_obs(obs)
        else:
            if not self.trajectories and self._random_start:
                raise ValueError("Random start not possible without trajectory data.")
            elif not self.trajectories and self._init_step_no is not None:
                raise ValueError("Setting an initial step is not possible without trajectory data.")
            elif self._init_step_no is not None and self._random_start:
                raise ValueError("Either use a random start or set an initial step, not both.")

            if self.trajectories is not None:
                if self._random_start:
                    sample = self.trajectories.reset_trajectory()
                    if self.setup_random_rot:
                        angle = np.random.uniform(0, 2 * np.pi)
                        sample = rotate_obs(sample, angle,  *self._get_relevant_idx_rotation())
                elif self._init_step_no:
                    traj_len = self.trajectories.trajectory_length
                    n_traj = self.trajectories.nnumber_of_trajectories
                    assert self._init_step_no <= traj_len * n_traj
                    substep_no = int(self._init_step_no % traj_len)
                    traj_no = int(self._init_step_no / traj_len)
                    sample = self.trajectories.reset_trajectory(substep_no, traj_no)
                else:
                    # sample random trajectory and use the first sample
                    sample = self.trajectories.reset_trajectory(substep_no=0)
                    if self.setup_random_rot:
                        angle = np.random.uniform(0, 2 * np.pi)
                        sample = rotate_obs(sample, angle,  *self._get_relevant_idx_rotation())

                # set the goal
                rot_mat = self.trajectories.get_from_sample(sample, "dir_arrow")
                angle = mat2angle_xy(rot_mat)
                desired_vel = self.trajectories.get_from_sample(sample, "goal_speed")
                self._goal.set_goal(angle, desired_vel)

                # set the state of the simulation
                self.set_sim_state(sample)



[docs]
    def set_sim_state(self, sample):
        """
        Sets the state of the simulation according to an observation.

        Args:
            sample (list or np.array): Sample used to set the state of the simulation.

        """

        # set simulation state
        sample = sample[:-1]    # remove goal velocity
        super(UnitreeA1, self).set_sim_state(sample)



[docs]
    def create_dataset(self, ignore_keys=None):
        """
        Creates a dataset from the specified trajectories.

        Args:
            ignore_keys (list): List of keys to ignore in the dataset.

        Returns:
            Dictionary containing states, next_states and absorbing flags. For the states the shape is
            (N_traj x N_samples_per_traj, dim_state), while the absorbing flag has the shape is
            (N_traj x N_samples_per_traj).

        """

        if self._dataset is None:

            if ignore_keys is None:
                ignore_keys = ["q_trunk_tx", "q_trunk_ty"]

            if self.trajectories is not None:
                rot_mat_idx_arrow = self._get_idx("dir_arrow")
                state_callback_params = dict(rot_mat_idx_arrow=rot_mat_idx_arrow,
                                             goal_velocity_idx=self._goal_velocity_idx)
                dataset = self.trajectories.create_dataset(ignore_keys=ignore_keys,
                                                           state_callback=self._modify_observation_callback,
                                                           state_callback_params=state_callback_params)
            else:
                raise ValueError("No trajectory was passed to the environment. "
                                 "To create a dataset pass a trajectory first.")

            self._dataset = deepcopy(dataset)

            return dataset

        else:
            return deepcopy(self._dataset)



[docs]
    def get_kinematic_obs_mask(self):
        """
        Returns a mask (np.array) for the observation specified in observation_spec (or part of it).

        """

        return np.arange(len(self.obs_helper.observation_spec))



[docs]
    def load_dataset_and_get_traj_files(self, dataset_path, freq=None):
        """
        Calculates a dictionary containing the kinematics given a dataset. If freq is provided,
        the x and z positions are calculated based on the velocity.

        Args:
            dataset_path (str): Path to the dataset.
            freq (float): Frequency of the data in obs.

        Returns:
            Dictionary containing the keys specified in observation_spec with the corresponding
            values from the dataset.

        """

        dataset = np.load(str(Path(loco_mujoco.__file__).resolve().parent / dataset_path))
        self._dataset = deepcopy({k: d for k, d in dataset.items()})

        states = dataset["states"]
        last = dataset["last"]

        states = np.atleast_2d(states)
        rel_keys = [obs_spec[0] for obs_spec in self.obs_helper.observation_spec]
        num_data = len(states)
        trajectories = dict()
        for i, key in enumerate(rel_keys):
            if i < 2:
                if freq is None:
                    # fill with zeros for x and y position
                    data = np.zeros(num_data)
                else:
                    # compute positions from velocities
                    dt = 1 / float(freq)
                    assert len(states) > 2
                    vel_idx = rel_keys.index("d" + key) - 2
                    data = [0.0]
                    for j, o in enumerate(states[:-1, vel_idx], 1):
                        if last is not None and last[j - 1] == 1:
                            data.append(0.0)
                        else:
                            data.append(data[-1] + dt * o)
                    data = np.array(data)
            elif key == "dir_arrow":
                sin_cos = states[:, i-2:i]
                angle = np.arctan2(sin_cos[:, 1], sin_cos[:, 0]) #+ np.pi/2
                if num_data > 1:
                    data = [angle2mat_xy(a).reshape((9,)) for a in angle]
                else:
                    data = angle2mat_xy(angle).reshape((9,))
                # calculate goal_speed
                dq_trunk_tx = states[:, rel_keys.index("dq_trunk_tx")-2]
                dq_trunk_ty = states[:, rel_keys.index("dq_trunk_ty")-2]
                vels = np.stack([dq_trunk_tx, dq_trunk_ty], axis=1)
                goal_speed = np.linalg.norm(vels, axis=1)
                goal_speed = np.mean(goal_speed) * np.ones_like(goal_speed)
                trajectories["goal_speed"] = goal_speed
            else:
                data = states[:, i - 2]
            trajectories[key] = data

        # add split points
        if len(states) > 2:
            trajectories["split_points"] = np.concatenate([[0], np.squeeze(np.argwhere(last == 1) + 1)])

        return trajectories


    def _get_observation_space(self):
        """
        Returns a tuple of the lows and highs (np.array) of the observation space.

        """
        dir_arrow_idx = self._get_idx("dir_arrow")
        sim_low, sim_high = (self.info.observation_space.low[2:],
                             self.info.observation_space.high[2:])
        sin_cos_angle_low = [-1, -1]
        sin_cos_angle_high = [1, 1]
        goal_vel_low = -np.inf
        goal_vel_high = np.inf
        sim_low = np.concatenate([sim_low[:dir_arrow_idx[0]], sin_cos_angle_low, [goal_vel_low]])
        sim_high = np.concatenate([sim_high[:dir_arrow_idx[0]], sin_cos_angle_high, [goal_vel_high]])

        if self._use_foot_forces:
            grf_low, grf_high = (-np.ones((12,)) * np.inf,
                                 np.ones((12,)) * np.inf)
            return (np.concatenate([sim_low, grf_low]),
                    np.concatenate([sim_high, grf_high]))
        else:
            return sim_low, sim_high

    def _simulation_post_step(self):
        """
        Sets the correct rotation of the goal arrow and the calculates the
        statistics of the ground forces if required. This function is
        called after each step.

        """

        self._set_goal_arrow()
        super()._simulation_post_step()

    def _create_observation(self, obs):
        """
        Creates a full vector of observations.

        Args:
            obs (np.array): Observation vector to be modified or extended;

        Returns:
            New observation vector (np.array);

        """
        rot_mat_idx_arrow = self._get_idx("dir_arrow")

        obs = np.concatenate([obs[2:], [self._goal.get_velocity()]]).flatten()

        obs = self._modify_observation_callback(obs, rot_mat_idx_arrow, self._goal_velocity_idx)

        if self._use_foot_forces:
            obs = np.concatenate([obs,
                                  self.mean_grf.mean / 1000.,
                                  ]).flatten()

        return obs

    def _get_reward_function(self, reward_type, reward_params):
        """
        Constructs a reward function.

        Args:
            reward_type (string): Name of the reward.
            reward_params (dict): Parameters of the reward function.

        Returns:
            Reward function.

        """

        if reward_type == "velocity_vector":
            x_vel_idx = self.get_obs_idx("dq_trunk_tx")[0]
            y_vel_idx = self.get_obs_idx("dq_trunk_ty")[0]
            angle_idx = [-3, -2]
            goal_vel_idx = [-1]
            goal_reward_func = VelocityVectorReward(x_vel_idx=x_vel_idx, y_vel_idx=y_vel_idx,
                                                    angle_idx=angle_idx, goal_vel_idx=goal_vel_idx)
        else:
            goal_reward_func = super()._get_reward_function(reward_type, reward_params)

        return goal_reward_func

    def _has_fallen(self, obs, return_err_msg=False):
        """
        Checks if a model has fallen.

        Args:
            obs (np.array): Current observation.
            return_err_msg (bool): If True, an error message with violations is returned.

        Returns:
            True, if the model has fallen for the current observation, False otherwise.
            Optionally an error message is returned.

        """

        trunk_euler = self._get_from_obs(obs, ["q_trunk_list", "q_trunk_tilt"])
        trunk_height = self._get_from_obs(obs, ["q_trunk_tz"])

        trunk_list_condition = (trunk_euler[0] < -0.2793) or (trunk_euler[0] > 0.2793)
        trunk_tilt_condition = (trunk_euler[1] < -0.192) or (trunk_euler[1] > 0.192)
        trunk_height_condition = trunk_height[0] < -.24
        trunk_condition = (trunk_list_condition or trunk_tilt_condition or trunk_height_condition)

        if return_err_msg:
            error_msg = ""
            if trunk_list_condition:
                error_msg += "trunk_list_condition violated.\n"
            elif trunk_tilt_condition:
                error_msg += "trunk_tilt_condition violated.\n"
            elif trunk_height_condition:
                error_msg += "trunk_height_condition violated. %f \n" % trunk_height

            return trunk_condition, error_msg
        else:
            return trunk_condition

    def _get_relevant_idx_rotation(self):
        """
        Returns the indices relevant for rotating the observation space
        around the vertical axis.

        """

        keys = self.obs_helper.get_all_observation_keys()
        idx_rot = keys.index("q_trunk_rotation")
        idx_xvel = keys.index("dq_trunk_tx")
        idx_yvel = keys.index("dq_trunk_ty")
        return idx_rot, idx_xvel, idx_yvel

    def _get_ground_forces(self):
        """
        Returns the ground forces (np.array).

        """

        grf = np.concatenate([self._get_collision_force("floor", "foot_FL")[:3],
                              self._get_collision_force("floor", "foot_FR")[:3],
                              self._get_collision_force("floor", "foot_RL")[:3],
                              self._get_collision_force("floor", "foot_RR")[:3]])

        return grf

    def _set_goal_arrow(self):
        """
        Sets the rotation of the goal arrow based on the current trunk
        rotation angle and the current goal direction.

        """

        desired_angle = self._goal.get_direction()

        rot_mat = euler_to_mat(np.array([np.pi/2, 0, desired_angle]))

        # and set the rotation of the cylinder
        self._data.site("dir_arrow").xmat = (rot_mat).reshape((9,))

        # calc position of the ball corresponding to the arrow
        self._data.site("dir_arrow_ball").xpos = self._data.body("dir_arrow").xpos + \
                                                 [-0.1 * np.sin(desired_angle), 0.1 * np.cos(desired_angle), 0]

    def _init_sim_from_obs(self, obs):
        """
        Initializes the simulation from an observation.

        Args:
            obs (np.array): The observation to set the simulation state to.

        """
        raise TypeError("Initializing from observation is currently not supported in this environment. ")

    def _get_interpolate_map_params(self):
        """
        Returns all parameters needed to do the interpolation mapping for the respective environment.

        """
        keys = self.get_all_observation_keys()
        rot_mat_idx = keys.index("dir_arrow")
        trunk_list_idx = keys.index("q_trunk_list")
        trunk_tilt_idx = keys.index("q_trunk_tilt")
        trunk_rot_idx = keys.index("q_trunk_rotation")

        return dict(rot_mat_idx=rot_mat_idx, trunk_orientation_idx=[trunk_list_idx, trunk_tilt_idx, trunk_rot_idx])

    def _get_interpolate_remap_params(self):
        """
        Returns all parameters needed to do the interpolation remapping for the respective environment.

        """
        keys = self.get_all_observation_keys()
        angle_idx = keys.index("dir_arrow")
        trunk_list_idx = keys.index("q_trunk_list")
        trunk_tilt_idx = keys.index("q_trunk_tilt")
        trunk_rot_idx = keys.index("q_trunk_rotation")
        position_indices = [keys.index(key) for key in keys if key.startswith("q_")]
        velocity_indices = [keys.index(key) for key in keys if key.startswith("dq_")]
        ctrl_dt = self.dt

        return dict(angle_idx=angle_idx, trunk_orientation_idx=[trunk_list_idx, trunk_tilt_idx, trunk_rot_idx],
                    position_indices=position_indices, velocity_indices=velocity_indices, ctrl_dt=ctrl_dt)


[docs]
    @staticmethod
    def generate(task="simple", dataset_type="real", debug=False, **kwargs):
        """
        Returns a Unitree environment corresponding to the specified task.

        Args:
            task (str): Main task to solve. Either "simple" or "hard". "simple" is a straight walking
                task. "hard" is a walking task in 8 direction. This makes this environment goal conditioned.
            dataset_type (str): "real" or "perfect". "real" uses real motion capture data as the
                reference trajectory. This data does not perfectly match the kinematics
                and dynamics of this environment, hence it is more challenging. "perfect" uses
                a perfect dataset.
            debug (bool): If True, the smaller test datasets are used for debugging purposes.

        Returns:
            An MDP of the Unitree A1 Robot.

        """
        check_validity_task_mode_dataset(UnitreeA1.__name__, task, None, dataset_type,
                                         *UnitreeA1.valid_task_confs.get_all())

        if "reward_type" in kwargs.keys():
            reward_type = kwargs["reward_type"]
            reward_params = kwargs["reward_params"]
            del kwargs["reward_type"]
            del kwargs["reward_params"]
        else:
            reward_type = "velocity_vector"
            reward_params = dict()

        # Generate the MDP
        # todo: once the trajectory is learned without random init rotation, activate the latter.
        if task == "simple":
            if dataset_type == "real":
                path = "datasets/quadrupeds/real/walk_straight.npz"
            elif dataset_type == "perfect":
                path = "datasets/quadrupeds/perfect/unitreea1_simple/perfect_expert_dataset_det.npz"
            use_mini_dataset = not os.path.exists(Path(loco_mujoco.__file__).resolve().parent / path)
            if debug or use_mini_dataset:
                if use_mini_dataset:
                    warnings.warn("Datasets not found, falling back to test datasets. Please download and install "
                                  "the datasets to use this environment for imitation learning!")
                path = path.split("/")
                path.insert(3, "mini_datasets")
                path = "/".join(path)
            mdp = UnitreeA1(reward_type=reward_type, reward_params=reward_params, **kwargs)
            traj_path = Path(loco_mujoco.__file__).resolve().parent / path
        elif task == "hard":
            if dataset_type == "real":
                path = "datasets/quadrupeds/real/walk_8_dir.npz"
            elif dataset_type == "perfect":
                path = "datasets/quadrupeds/perfect/unitreea1_hard/perfect_expert_dataset_det.npz"
            use_mini_dataset = not os.path.exists(Path(loco_mujoco.__file__).resolve().parent / path)
            if debug or use_mini_dataset:
                if use_mini_dataset:
                    warnings.warn("Datasets not found, falling back to test datasets. Please download and install "
                                  "the datasets to use this environment for imitation learning!")
                path = path.split("/")
                path.insert(3, "mini_datasets")
                path = "/".join(path)
            mdp = UnitreeA1(reward_type=reward_type, reward_params=reward_params, **kwargs)
            traj_path = Path(loco_mujoco.__file__).resolve().parent / path

        # Load the trajectory
        env_freq = 1 / mdp._timestep  # hz
        desired_contr_freq = 1 / mdp.dt  # hz
        n_substeps = env_freq // desired_contr_freq

        if dataset_type == "real":
            traj_data_freq = 500  # hz
            traj_params = dict(traj_path=traj_path,
                               traj_dt=(1 / traj_data_freq),
                               control_dt=(1 / desired_contr_freq))
        elif dataset_type == "perfect":
            if "use_foot_forces" in kwargs.keys():
                assert kwargs["use_foot_forces"] is False
            if "action_mode" in kwargs.keys():
                assert kwargs["action_mode"] == "torque"
            if "default_target_velocity" in kwargs.keys():
                assert kwargs["default_target_velocity"] == 0.5
            traj_data_freq = 100  # hz
            traj_files = mdp.load_dataset_and_get_traj_files(path, traj_data_freq)
            traj_params = dict(traj_files=traj_files,
                               traj_dt=(1 / traj_data_freq),
                               control_dt=(1 / desired_contr_freq))

        mdp.load_trajectory(traj_params)

        return mdp


    @property
    def _goal_velocity_idx(self):
        """
        The goal speed is not in the observation helper, but only in the trajectory. This is a workaround
        to access it anyways.
        Note: This is the goal velocity in index *before* the modify_observation_callback is applied!

        """
        return 43

    @staticmethod
    def _modify_observation_callback(obs, rot_mat_idx_arrow, goal_velocity_idx):
        """
        Transforms the rotation matrix from obs to a sin-cos feature.

        Args:
            obs (np.array): Generated observation.
            rot_mat_idx_arrow (int): Index of the beginning rotation matrix in the observation.
            goal_velocity_idx (int): Index of the goal speed in the observation.

        Returns:
            The final environment observation for the agent.

        """

        rot_mat_arrow = obs[rot_mat_idx_arrow].reshape((3, 3))
        # convert mat to angle
        angle = mat2angle_xy(rot_mat_arrow)
        # transform the angle to be in [-pi, pi]
        angle = transform_angle_2pi(angle)
        # rotate by 90 degrees
        angle -= np.pi / 2
        # make sin-cos transformation
        angle = np.array([np.cos(angle), np.sin(angle)])

        # get goal velocity
        goal_velocity = obs[goal_velocity_idx]

        # concatenate everything to new obs
        new_obs = np.concatenate([obs[:rot_mat_idx_arrow[0]], angle, [goal_velocity]])

        return new_obs

    @staticmethod
    def _add_dir_vector_to_xml_handle(xml_handle):
        """
        Adds a direction vector to the Mujoco XML visualizing the goal direction.

        Args:
            xml_handle: Handle to Mujoco XML.

        Returns:
            Modified Mujoco XML handle.

        """

        # find trunk and attach direction arrow
        trunk = xml_handle.find("body", "trunk")
        trunk.add("body", name="dir_arrow", pos="0 0 0.15")
        dir_vec = xml_handle.find("body", "dir_arrow")
        # todo: once Mujoco support cones, make an actual arrow (its a requested feature).
        dir_vec.add("site", name="dir_arrow_ball", type="sphere", size=".03", pos="-.1 0 0")
        dir_vec.add("site", name="dir_arrow", type="cylinder", size=".01", fromto="0 0 -.1 0 0 .1")

        return xml_handle

    @staticmethod
    def _get_observation_specification():
        """
        Getter for the observation space specification.

        Returns:
            A list of tuples containing the specification of each observation
            space entry.

        """

        observation_spec = [
            # ------------------- JOINT POS -------------------
            # --- Trunk ---
            ("q_trunk_tx", "trunk_tx", ObservationType.JOINT_POS),
            ("q_trunk_ty", "trunk_ty", ObservationType.JOINT_POS),
            ("q_trunk_tz", "trunk_tz", ObservationType.JOINT_POS),
            ("q_trunk_list", "trunk_list", ObservationType.JOINT_POS),
            ("q_trunk_tilt", "trunk_tilt", ObservationType.JOINT_POS),
            ("q_trunk_rotation", "trunk_rotation", ObservationType.JOINT_POS),
            # --- Front ---
            ("q_FR_hip_joint", "FR_hip_joint", ObservationType.JOINT_POS),
            ("q_FR_thigh_joint", "FR_thigh_joint", ObservationType.JOINT_POS),
            ("q_FR_calf_joint", "FR_calf_joint", ObservationType.JOINT_POS),
            ("q_FL_hip_joint", "FL_hip_joint", ObservationType.JOINT_POS),
            ("q_FL_thigh_joint", "FL_thigh_joint", ObservationType.JOINT_POS),
            ("q_FL_calf_joint", "FL_calf_joint", ObservationType.JOINT_POS),
            # --- Rear ---
            ("q_RR_hip_joint", "RR_hip_joint", ObservationType.JOINT_POS),
            ("q_RR_thigh_joint", "RR_thigh_joint", ObservationType.JOINT_POS),
            ("q_RR_calf_joint", "RR_calf_joint", ObservationType.JOINT_POS),
            ("q_RL_hip_joint", "RL_hip_joint", ObservationType.JOINT_POS),
            ("q_RL_thigh_joint", "RL_thigh_joint", ObservationType.JOINT_POS),
            ("q_RL_calf_joint", "RL_calf_joint", ObservationType.JOINT_POS),
            # ------------------- JOINT VEL -------------------
            # --- Trunk ---
            ("dq_trunk_tx", "trunk_tx", ObservationType.JOINT_VEL),
            ("dq_trunk_ty", "trunk_ty", ObservationType.JOINT_VEL),
            ("dq_trunk_tz", "trunk_tz", ObservationType.JOINT_VEL),
            ("dq_trunk_list", "trunk_list", ObservationType.JOINT_VEL),
            ("dq_trunk_tilt", "trunk_tilt", ObservationType.JOINT_VEL),
            ("dq_trunk_rotation", "trunk_rotation", ObservationType.JOINT_VEL),
            # --- Front ---
            ("dq_FR_hip_joint", "FR_hip_joint", ObservationType.JOINT_VEL),
            ("dq_FR_thigh_joint", "FR_thigh_joint", ObservationType.JOINT_VEL),
            ("dq_FR_calf_joint", "FR_calf_joint", ObservationType.JOINT_VEL),
            ("dq_FL_hip_joint", "FL_hip_joint", ObservationType.JOINT_VEL),
            ("dq_FL_thigh_joint", "FL_thigh_joint", ObservationType.JOINT_VEL),
            ("dq_FL_calf_joint", "FL_calf_joint", ObservationType.JOINT_VEL),
            # --- Rear ---
            ("dq_RR_hip_joint", "RR_hip_joint", ObservationType.JOINT_VEL),
            ("dq_RR_thigh_joint", "RR_thigh_joint", ObservationType.JOINT_VEL),
            ("dq_RR_calf_joint", "RR_calf_joint", ObservationType.JOINT_VEL),
            ("dq_RL_hip_joint", "RL_hip_joint", ObservationType.JOINT_VEL),
            ("dq_RL_thigh_joint", "RL_thigh_joint", ObservationType.JOINT_VEL),
            ("dq_RL_calf_joint", "RL_calf_joint", ObservationType.JOINT_VEL)]

        return observation_spec

    @staticmethod
    def _get_action_specification():
        """
        Getter for the action space specification.

        Returns:
            A list of tuples containing the specification of each action
            space entry.

        """

        action_spec = [
            "FR_hip", "FR_thigh", "FR_calf",
            "FL_hip", "FL_thigh", "FL_calf",
            "RR_hip", "RR_thigh", "RR_calf",
            "RL_hip", "RL_thigh", "RL_calf"]

        return action_spec

    @staticmethod
    def _interpolate_map(traj, **interpolate_map_params):
        """
        A mapping that is supposed to transform a trajectory into a space where interpolation is
        allowed. E.g., maps a rotation matrix to a set of angles.

        Args:
            traj (list): List of np.arrays containing each observations. Each np.array
                has the shape (n_trajectories, n_samples, (dim_observation)). If dim_observation
                is one the shape of the array is just (n_trajectories, n_samples).
            interpolate_map_params: Set of parameters needed to do the interpolation by the Unitree environment.

        Returns:
            A np.array with shape (n_observations, n_trajectories, n_samples). dim_observation
            has to be one.

        """

        rot_mat_idx = interpolate_map_params["rot_mat_idx"]
        trunk_orientation_idx = interpolate_map_params["trunk_orientation_idx"]
        traj_list = [list() for j in range(len(traj))]
        for i in range(len(traj_list)):
            # if the state is a rotation
            if i in trunk_orientation_idx:  # todo: not sure if this is actually needed.
                # change it to the nearest rotation presentation to the previous state
                # -> no huge jumps between -pi and pi for example
                traj_list[i] = list(np.unwrap(traj[i]))
            else:
                traj_list[i] = list(traj[i])
        # turn matrices into angles todo: this is slow, implement vectorized implementation
        traj_list[rot_mat_idx] = np.array([mat2angle_xy(mat) for mat in traj[rot_mat_idx]])
        return np.array(traj_list)

    @staticmethod
    def _interpolate_remap(traj, **interpolate_remap_params):
        """
        The corresponding backwards transformation to _interpolation_map.

        Args:
            traj (np.array): Trajectory as np.array with shape (n_observations, n_trajectories, n_samples).
            dim_observation is one.
            interpolate_remap_params: Set of parameters needed to do the interpolation by the Unitree environment.

        Returns:
            List of np.arrays containing each observations. Each np.array has the shape
            (n_trajectories, n_samples, (dim_observation)). If dim_observation
            is one the shape of the array is just (n_trajectories, n_samples).

        """

        angle_idx = interpolate_remap_params["angle_idx"]
        trunk_orientation_idx = interpolate_remap_params["trunk_orientation_idx"]
        position_indices = interpolate_remap_params["position_indices"]
        velocity_indices = interpolate_remap_params["velocity_indices"]
        ctrl_dt = interpolate_remap_params["ctrl_dt"]
        traj_list = [list() for j in range(len(traj))]
        for i in range(len(traj_list)):
            # if the state is a rotation
            if i in trunk_orientation_idx:
                # make sure it is in range -pi,pi
                traj_list[i] = [transform_angle_2pi(angle) for angle in traj[i]]
            elif i in velocity_indices:
                # the interpolation is problematic in the joint velocities for the Unitree. Recalculate them here based
                # on the positions
                joint_position_idx = position_indices[velocity_indices.index(i)]
                joint_position = traj[joint_position_idx]
                traj_list[i] = [0.0] + list((joint_position[1:] - joint_position[:-1]) / ctrl_dt)
            else:
                traj_list[i] = list(traj[i])

        # transforms angles into rotation matrices todo: this is slow, implement vectorized implementation
        traj_list[angle_idx] = np.array([angle2mat_xy(angle).reshape(9,) for angle in traj[angle_idx]])
        return traj_list