Source code for gym_gridverse.envs.inner_env

import abc
from typing import Optional, Tuple

from gym_gridverse.action import Action
from gym_gridverse.observation import Observation
from gym_gridverse.spaces import ActionSpace, ObservationSpace, StateSpace
from gym_gridverse.state import State

__all__ = ['InnerEnv']


[docs]class InnerEnv(metaclass=abc.ABCMeta):
    """Inner environment

    Inner environments provide an interface primarily based on python objects,
    with states represented by :py:class:`~gym_gridverse.state.State`,
    observations by :py:class:`~gym_gridverse.observation.Observation`, and
    actions by :py:class:`~gym_gridverse.action.Action`.

    """

    def __init__(
        self,
        state_space: StateSpace,
        action_space: ActionSpace,
        observation_space: ObservationSpace,
    ):
        self.state_space = state_space
        self.action_space = action_space
        self.observation_space = observation_space

        self._state: Optional[State] = None
        self._observation: Optional[Observation] = None

[docs]    @abc.abstractmethod
    def set_seed(self, seed: Optional[int] = None):
        assert False, "Must be implemented by derived class"

[docs]    @abc.abstractmethod
    def functional_reset(self) -> State:
        """Returns a new state"""
        assert False, "Must be implemented by derived class"

[docs]    @abc.abstractmethod
    def functional_step(
        self, state: State, action: Action
    ) -> Tuple[State, float, bool]:
        """Returns next state, reward, and done flag"""
        assert False, "Must be implemented by derived class"

[docs]    @abc.abstractmethod
    def functional_observation(self, state: State) -> Observation:
        """Returns observation"""
        assert False, "Must be implemented by derived class"

[docs]    def reset(self):
        """Resets the state

        Internally calls :py:meth:`functional_reset` to reset the state;  also
        resets the observation, so that an updated observation will be
        generated upon request.
        """
        self._state = self.functional_reset()
        self._observation = None

[docs]    def step(self, action: Action) -> Tuple[float, bool]:
        """Runs the dynamics for one timestep, and returns reward and done flag

        Internally calls :py:meth:`functional_step` to update the state;  also
        resets the observation, so that an updated observation will be
        generated upon request.

        Args:
            action (Action): the chosen action to apply

        Returns:
            Tuple[float, bool]: reward and terminal
        """

        self._state, reward, done = self.functional_step(self.state, action)
        self._observation = None
        return reward, done

    @property
    def state(self) -> State:
        """Return the current state

        Returns:
            State:
        """
        if self._state is None:
            raise RuntimeError(
                'The state was not set properly;  was the environment reset?'
            )

        return self._state

    @property
    def observation(self) -> Observation:
        """Returns the current observation

        Internally calls :py:meth:`functional_observation` to generate the
        current observation based on the current state.  The observation is
        generated lazily, such that at most one observation is generated for
        each state.  As a consequence, this will return the same observation
        until the state is reset/updated, even if the observation function is
        stochastic.

        Returns:
            Observation:
        """
        # memoizing observation because observation function can be stochastic
        if self._observation is None:
            self._observation = self.functional_observation(self.state)

        return self._observation