class CustomReward(reward_callback=None)[source]

Bases: RewardInterface

__call__(state, action, next_state, absorbing)[source]

Compute the reward.

Args:

state (np.ndarray): last state; action (np.ndarray): applied action; next_state (np.ndarray): current state.

Returns:

The reward for the current transition.

class MultiTargetVelocityReward(target_velocity, x_vel_idx, env_id_len, scalings)[source]

Bases: RewardInterface

__call__(state, action, next_state, absorbing)[source]

Compute the reward.

Args:

state (np.ndarray): last state; action (np.ndarray): applied action; next_state (np.ndarray): current state.

Returns:

The reward for the current transition.

class NoReward[source]

Bases: RewardInterface

A reward function that returns always 0.

__call__(state, action, next_state, absorbing)[source]

Compute the reward.

Args:

state (np.ndarray): last state; action (np.ndarray): applied action; next_state (np.ndarray): current state.

Returns:

The reward for the current transition.

class PosReward(pos_idx)[source]

Bases: RewardInterface

__call__(state, action, next_state, absorbing)[source]

Compute the reward.

Args:

state (np.ndarray): last state; action (np.ndarray): applied action; next_state (np.ndarray): current state.

Returns:

The reward for the current transition.

class RewardInterface[source]

Bases: object

Interface to specify a reward function.

__call__(state, action, next_state, absorbing)[source]

Compute the reward.

Args:

state (np.ndarray): last state; action (np.ndarray): applied action; next_state (np.ndarray): current state.

Returns:

The reward for the current transition.

reset_state()[source]

Reset the state of the object.

class TargetVelocityReward(target_velocity, x_vel_idx)[source]

Bases: RewardInterface

__call__(state, action, next_state, absorbing)[source]

Compute the reward.

Args:

state (np.ndarray): last state; action (np.ndarray): applied action; next_state (np.ndarray): current state.

Returns:

The reward for the current transition.

class VelocityVectorReward(x_vel_idx, y_vel_idx, angle_idx, goal_vel_idx)[source]

Bases: RewardInterface

__call__(state, action, next_state, absorbing)[source]

Compute the reward.

Args:

state (np.ndarray): last state; action (np.ndarray): applied action; next_state (np.ndarray): current state.

Returns:

The reward for the current transition.