Source code for rl_coach.filters.action.box_masking

#
# Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from typing import Union

import numpy as np

from rl_coach.core_types import ActionType
from rl_coach.filters.action.action_filter import ActionFilter
from rl_coach.spaces import BoxActionSpace


[docs]class BoxMasking(ActionFilter):
    """
    Masks part of the action space to enforce the agent to work in a defined space. For example,
    if the original action space is between -1 and 1, then this filter can be used in order to constrain the agent actions
    to the range 0 and 1 instead. This essentially masks the range -1 and 0 from the agent.
    The resulting action space will be shifted and will always start from 0 and have the size of the unmasked area.
    """
    def __init__(self,
                 masked_target_space_low: Union[None, int, float, np.ndarray],
                 masked_target_space_high: Union[None, int, float, np.ndarray]):
        """
        :param masked_target_space_low: the lowest values that can be chosen in the target action space
        :param masked_target_space_high: the highest values that can be chosen in the target action space
        """
        self.masked_target_space_low = masked_target_space_low
        self.masked_target_space_high = masked_target_space_high
        self.offset = masked_target_space_low
        super().__init__()

    def set_masking(self, masked_target_space_low: Union[None, int, float, np.ndarray],
                    masked_target_space_high: Union[None, int, float, np.ndarray]):
        self.masked_target_space_low = masked_target_space_low
        self.masked_target_space_high = masked_target_space_high
        self.offset = masked_target_space_low
        if self.output_action_space:
            self.validate_output_action_space(self.output_action_space)
            self.input_action_space = BoxActionSpace(self.output_action_space.shape,
                                                     low=0,
                                                     high=self.masked_target_space_high - self.masked_target_space_low)

    def validate_output_action_space(self, output_action_space: BoxActionSpace):
        if not isinstance(output_action_space, BoxActionSpace):
            raise ValueError("BoxActionSpace discretization only works with an output space of type BoxActionSpace. "
                             "The given output space is {}".format(output_action_space))
        if self.masked_target_space_low is None or self.masked_target_space_high is None:
            raise ValueError("The masking target space size was not set. Please call set_masking.")
        if not (np.all(output_action_space.low <= self.masked_target_space_low)
                and np.all(self.masked_target_space_low <= output_action_space.high)):
            raise ValueError("The low values for masking the action space ({}) are not within the range of the "
                             "target space (low = {}, high = {})"
                             .format(self.masked_target_space_low, output_action_space.low, output_action_space.high))
        if not (np.all(output_action_space.low <= self.masked_target_space_high)
                and np.all(self.masked_target_space_high <= output_action_space.high)):
            raise ValueError("The high values for masking the action space ({}) are not within the range of the "
                             "target space (low = {}, high = {})"
                             .format(self.masked_target_space_high, output_action_space.low, output_action_space.high))

    def get_unfiltered_action_space(self, output_action_space: BoxActionSpace) -> BoxActionSpace:
        self.output_action_space = output_action_space
        self.input_action_space = BoxActionSpace(output_action_space.shape,
                                                 low=0,
                                                 high=self.masked_target_space_high - self.masked_target_space_low)
        return self.input_action_space

    def filter(self, action: ActionType) -> ActionType:
        return action + self.offset