Source code for gym_gridverse.envs.inner_env

import abc
from typing import Optional, Tuple

from gym_gridverse.action import Action
from gym_gridverse.observation import Observation
from gym_gridverse.spaces import ActionSpace, ObservationSpace, StateSpace
from gym_gridverse.state import State

__all__ = ['InnerEnv']


[docs]class InnerEnv(metaclass=abc.ABCMeta): """Inner environment Inner environments provide an interface primarily based on python objects, with states represented by :py:class:`~gym_gridverse.state.State`, observations by :py:class:`~gym_gridverse.observation.Observation`, and actions by :py:class:`~gym_gridverse.action.Action`. """ def __init__( self, state_space: StateSpace, action_space: ActionSpace, observation_space: ObservationSpace, ): self.state_space = state_space self.action_space = action_space self.observation_space = observation_space self._state: Optional[State] = None self._observation: Optional[Observation] = None
[docs] @abc.abstractmethod def set_seed(self, seed: Optional[int] = None): assert False, "Must be implemented by derived class"
[docs] @abc.abstractmethod def functional_reset(self) -> State: """Returns a new state""" assert False, "Must be implemented by derived class"
[docs] @abc.abstractmethod def functional_step( self, state: State, action: Action ) -> Tuple[State, float, bool]: """Returns next state, reward, and done flag""" assert False, "Must be implemented by derived class"
[docs] @abc.abstractmethod def functional_observation(self, state: State) -> Observation: """Returns observation""" assert False, "Must be implemented by derived class"
[docs] def reset(self): """Resets the state Internally calls :py:meth:`functional_reset` to reset the state; also resets the observation, so that an updated observation will be generated upon request. """ self._state = self.functional_reset() self._observation = None
[docs] def step(self, action: Action) -> Tuple[float, bool]: """Runs the dynamics for one timestep, and returns reward and done flag Internally calls :py:meth:`functional_step` to update the state; also resets the observation, so that an updated observation will be generated upon request. Args: action (Action): the chosen action to apply Returns: Tuple[float, bool]: reward and terminal """ self._state, reward, done = self.functional_step(self.state, action) self._observation = None return reward, done
@property def state(self) -> State: """Return the current state Returns: State: """ if self._state is None: raise RuntimeError( 'The state was not set properly; was the environment reset?' ) return self._state @property def observation(self) -> Observation: """Returns the current observation Internally calls :py:meth:`functional_observation` to generate the current observation based on the current state. The observation is generated lazily, such that at most one observation is generated for each state. As a consequence, this will return the same observation until the state is reset/updated, even if the observation function is stochastic. Returns: Observation: """ # memoizing observation because observation function can be stochastic if self._observation is None: self._observation = self.functional_observation(self.state) return self._observation