gym_gridverse.envs package#

Subpackages#

gym_gridverse.envs.yaml package

Submodules#

gym_gridverse.envs.gridworld module#

class GridWorld(state_space, action_space, observation_space, reset_function, transition_function, observation_function, reward_function, termination_function)[source]#

Bases: InnerEnv

Implementation of the InnerEnv interface.

Initializes a GridWorld from the given components.

Parameters

state_space (StateSpace) –
action_space (ActionSpace) –
observation_space (ObservationSpace) –
reset_function (ResetFunction) – (ResetFunction):
transition_function (TransitionFunction) – (TransitionFunction),:
observation_function (ObservationFunction) –
reward_function (RewardFunction) –
termination_function (TerminatingFunction) –

__init__(state_space, action_space, observation_space, reset_function, transition_function, observation_function, reward_function, termination_function)[source]#

Initializes a GridWorld from the given components.

Parameters

state_space (StateSpace) –
action_space (ActionSpace) –
observation_space (ObservationSpace) –
reset_function (ResetFunction) – (ResetFunction):
transition_function (TransitionFunction) – (TransitionFunction),:
observation_function (ObservationFunction) –
reward_function (RewardFunction) –
termination_function (TerminatingFunction) –

set_seed(seed=None)[source]#

functional_reset()[source]#

Returns a new state

Return type: State

functional_step(state, action)[source]#

Returns next state, reward, and done flag

Return type: Tuple[State, float, bool]

functional_observation(state)[source]#

Returns observation

Return type: Observation

gym_gridverse.envs.inner_env module#

class InnerEnv(state_space, action_space, observation_space)[source]#

Bases: object

Inner environment

Inner environments provide an interface primarily based on python objects, with states represented by State, observations by Observation, and actions by Action.

abstract set_seed(seed=None)[source]#

abstract functional_reset()[source]#

Returns a new state

Return type: State

abstract functional_step(state, action)[source]#

Returns next state, reward, and done flag

Return type: Tuple[State, float, bool]

abstract functional_observation(state)[source]#

Returns observation

Return type: Observation

reset()[source]#

Resets the state

Internally calls functional_reset() to reset the state; also resets the observation, so that an updated observation will be generated upon request.

step(action)[source]#

Runs the dynamics for one timestep, and returns reward and done flag

Internally calls functional_step() to update the state; also resets the observation, so that an updated observation will be generated upon request.

Parameters: action (Action) – the chosen action to apply
Returns: reward and terminal
Return type: Tuple[float, bool]

property state: gym_gridverse.state.State#

Return the current state

Return type: State

property observation: gym_gridverse.observation.Observation#

Returns the current observation

Internally calls functional_observation() to generate the current observation based on the current state. The observation is generated lazily, such that at most one observation is generated for each state. As a consequence, this will return the same observation until the state is reset/updated, even if the observation function is stochastic.

Return type: Observation

gym_gridverse.envs.observation_functions module#

class ObservationFunction(*args, **kwargs)[source]#: Bases: Protocol

class ObservationFunctionRegistry(dict=None, /, **kwargs)[source]#

Bases: FunctionRegistry

get_protocol_parameters(signature)[source]#

Return type: List[Parameter]

check_signature(function)[source]#

observation_function_registry = {'from_visibility': <function from_visibility>, 'fully_transparent': <function fully_transparent>, 'partially_occluded': <function partially_occluded>, 'raytracing': <function raytracing>, 'stochastic_raytracing': <function stochastic_raytracing>}#: Observation function registry

from_visibility(state, *, area, visibility_function, rng=None)[source]#

Return type: Observation

fully_transparent(state, *, area, rng=None)[source]#

Return type: Observation

partially_occluded(state, *, area, rng=None)[source]#

Return type: Observation

raytracing(state, *, area, rng=None)[source]#

Return type: Observation

stochastic_raytracing(state, *, area, rng=None)[source]#

Return type: Observation

factory(name, **kwargs)[source]#

Return type: ObservationFunction

gym_gridverse.envs.reset_functions module#

class ResetFunction(*args, **kwargs)[source]#

Bases: Protocol

Signature that all reset functions must follow

class ResetFunctionRegistry(dict=None, /, **kwargs)[source]#

Bases: FunctionRegistry

get_protocol_parameters(signature)[source]#

Return type: List[Parameter]

check_signature(function)[source]#

reset_function_registry = {'empty': <function empty>, 'rooms': <function rooms>, 'dynamic_obstacles': <function dynamic_obstacles>, 'keydoor': <function keydoor>, 'crossing': <function crossing>, 'teleport': <function teleport>, 'memory': <function memory>, 'memory_rooms': <function memory_rooms>}#: Reset function registry

empty(shape, random_agent=False, random_exit=False, *, rng=None)[source]#

An empty environment

Return type: State

rooms(shape, layout, *, rng=None)[source]#

Return type: State

dynamic_obstacles(shape, num_obstacles, random_agent=False, *, rng=None)[source]#

An environment with dynamically moving obstacles

Parameters

shape (Shape) – shape of grid
num_obstacles (int) – number of dynamic obstacles
random_agent (bool, optional) – position of agent, in corner if False
rng (Optional[Generator]) – (Generator, optional)

Return type

State

keydoor(shape, *, rng=None)[source]#

An environment with a key and a door

Creates a height x width (including outer walls) grid with a random column of walls. The agent and a yellow key are randomly dropped left of the column, while the exit is placed in the bottom right. For example:

#########
# @#    #
#  D    #
#K #   G#
#########

Parameters

shape (Shape) –
rng (Optional[Generator]) – (Generator, optional)

Return type

State

crossing(shape, num_rivers, object_type, *, rng=None)[source]#

An environment with “rivers” to be crosses

Creates a height x width (including wall) grid with random rows/columns of objects called “rivers”. The agent needs to navigate river openings to reach the exit. For example:

#########
#@    # #
#### ####
#     # #
## ######
#       #
#     # #
#     #E#
#########

Parameters

shape (Shape) – shape (odd height and width) of grid
num_rivers (int) – number of rivers
object_type (Type[GridObject]) – river’s object type
rng (Optional[Generator]) – (Generator, optional)

Return type

State

teleport(shape, *, rng=None)[source]#

Return type: State

memory(shape, colors, *, rng=None)[source]#

Return type: State

memory_rooms(shape, layout, colors, num_beacons, num_exits, *, rng=None)[source]#

Return type: State

factory(name, **kwargs)[source]#

Return type: ResetFunction

gym_gridverse.envs.reward_functions module#

class RewardFunction(*args, **kwargs)[source]#

Bases: Protocol

Signature that all reward functions must follow

RewardReductionFunction#

Signature for a float reduction function

alias of Callable[[Iterator[float]], float]

class RewardFunctionRegistry(dict=None, /, **kwargs)[source]#

Bases: FunctionRegistry

get_protocol_parameters(signature)[source]#

Return type: List[Parameter]

check_signature(function)[source]#

reward_function_registry = {'reduce': <function reduce>, 'reduce_sum': <function reduce_sum>, 'overlap': <function overlap>, 'living_reward': <function living_reward>, 'reach_exit': <function reach_exit>, 'bump_moving_obstacle': <function bump_moving_obstacle>, 'proportional_to_distance': <function proportional_to_distance>, 'getting_closer': <function getting_closer>, 'getting_closer_shortest_path': <function getting_closer_shortest_path>, 'bump_into_wall': <function bump_into_wall>, 'actuate_door': <function actuate_door>, 'pickndrop': <function pickndrop>, 'reach_exit_memory': <function reach_exit_memory>}#: Reward function registry

reduce(state, action, next_state, *, reward_functions, reduction, rng=None)[source]#

reduction of multiple reward functions into a single boolean value

Parameters

state (State) –
action (Action) –
next_state (State) –
reward_functions (Sequence[RewardFunction]) –
reduction (RewardReductionFunction) –
rng (Generator, optional) –

Returns

reduction operator over the input reward functions

Return type

bool

reduce_sum(state, action, next_state, *, reward_functions, rng=None)[source]#

utility reward function which sums other reward functions

Parameters

state (State) –
action (Action) –
next_state (State) –
reward_functions (Sequence[RewardFunction]) –
rng (Generator, optional) –

Returns

sum of the evaluated input reward functions

Return type

float

overlap(state, action, next_state, *, object_type, reward_on=1.0, reward_off=0.0, rng=None)[source]#

reward for the agent occupying the same position as another object

Parameters

state (State) –
action (Action) –
next_state (State) –
object_type (Type[GridObject]) –
reward_on (float) – reward for when agent is on the object
reward_off (float) – reward for when agent is not on the object
rng (Generator, optional) –

Returns

one of the two input rewards

Return type

float

living_reward(state, action, next_state, *, reward=- 1.0, rng=None)[source]#

a living reward which does not depend on states or actions

Parameters

state (State) –
action (Action) –
next_state (State) –
reward (float) – reward for when agent is on exit
rng (Generator, optional) –

Returns

the input reward

Return type

float

reach_exit(state, action, next_state, *, reward_on=1.0, reward_off=0.0, rng=None)[source]#

reward for the Agent being on a Exit

Parameters

state (State) –
action (Action) –
next_state (State) –
reward_on (float) – reward for when agent is on exit
reward_off (float) – reward for when agent is not on exit
rng (Generator, optional) –

Returns

one of the two input rewards

Return type

float

bump_moving_obstacle(state, action, next_state, *, reward=- 1.0, rng=None)[source]#

reward for the Agent bumping into on a MovingObstacle

Parameters

state (State) –
action (Action) –
next_state (State) –
reward (float) – reward for when Agent bumps a MovingObstacle
rng (Generator, optional) –

Returns

the input reward or 0.0

Return type

float

proportional_to_distance(state, action, next_state, *, distance_function=<function Position.manhattan_distance>, object_type, reward_per_unit_distance=-1.0, rng=None)[source]#

reward proportional to distance to object

Parameters

state (State) –
action (Action) –
next_state (State) –
distance_function (DistanceFunction) –
object_type (Type[GridObject]) – (Type[GridObject]): type of unique object in grid
reward (float) – reward per unit distance
rng (Generator, optional) –

Returns

input reward times distance to object

Return type

float

getting_closer(state, action, next_state, *, distance_function=<function Position.manhattan_distance>, object_type, reward_closer=1.0, reward_further=-1.0, rng=None)[source]#

reward for getting closer or further to object

Parameters

state (State) –
action (Action) –
next_state (State) –
distance_function (DistanceFunction) –
object_type (Type[GridObject]) – (Type[GridObject]): type of unique object in grid
reward_closer (float) – reward for when agent gets closer to object
reward_further (float) – reward for when agent gets further to object
rng (Generator, optional) –

Returns

one of the input rewards, or 0.0 if distance has not changed

Return type

float

dijkstra(layout, source_position)[source]#

Return type: ndarray

getting_closer_shortest_path(state, action, next_state, *, object_type, reward_closer=1.0, reward_further=- 1.0, rng=None)[source]#

reward for getting closer or further to object, assuming normal navigation dynamics

Parameters

state (State) –
action (Action) –
next_state (State) –
object_type (Type[GridObject]) – (Type[GridObject]): type of unique object in grid
reward_closer (float) – reward for when agent gets closer to object
reward_further (float) – reward for when agent gets further to object
rng (Generator, optional) –

Returns

one of the input rewards, or 0.0 if distance has not changed

Return type

float

bump_into_wall(state, action, next_state, *, reward=- 1.0, rng=None)[source]#

Returns reward when bumping into wall, otherwise 0

Bumping is tested by seeing whether the intended move would end up with the agent on a wall.

Parameters

state (State) –
action (Action) –
next_state (State) –
reward (float) – (optional) The reward to provide if bumping into wall
rng (Generator, optional) –

actuate_door(state, action, next_state, *, reward_open=1.0, reward_close=- 1.0, rng=None)[source]#

Returns reward_open when opening and reward_close when closing door.

Opening/closing is checked by making sure the actuate action is performed, and checking the status of the door in front of the agent.

Parameters

state (State) –
action (Action) –
next_state (State) –
reward_open (float) – (optional) The reward to provide if opening a door
reward_close (float) – (optional) The reward to provide if closing a door
rng (Generator, optional) –

pickndrop(state, action, next_state, *, object_type, reward_pick=1.0, reward_drop=- 1.0, rng=None)[source]#

Returns reward_pick / reward_drop when an object is picked / dropped.

Picking/dropping is checked by the agent’s object, and not the action.

Parameters

state (State) –
action (Action) –
next_state (State) –
reward_pick (float) – (optional) The reward to provide if picking a key
reward_drop (float) – (optional) The reward to provide if dropping a key
rng (Generator, optional) –

reach_exit_memory(state, action, next_state, *, reward_good=1.0, reward_bad=- 1.0, rng=None)[source]#

reward for the Agent being on a Exit

Parameters

state (State) –
action (Action) –
next_state (State) –
reward_good (float) – reward for when agent is on the good exit
reward_bad (float) – reward for when agent is on the bad exit
rng (Generator, optional) –

Returns

one of the two input rewards

Return type

float

factory(name, **kwargs)[source]#

Return type: RewardFunction

gym_gridverse.envs.terminating_functions module#

class TerminatingFunction(*args, **kwargs)[source]#

Bases: Protocol

Signature for functions to determine whether a transition is terminal

TerminatingReductionFunction#

Signature for a boolean reduction function

alias of Callable[[Iterator[bool]], bool]

class TerminatingFunctionRegistry(dict=None, /, **kwargs)[source]#

Bases: FunctionRegistry

get_protocol_parameters(signature)[source]#

Return type: List[Parameter]

check_signature(function)[source]#

terminating_function_registry = {'reduce': <function reduce>, 'reduce_any': <function reduce_any>, 'reduce_all': <function reduce_all>, 'overlap': <function overlap>, 'reach_exit': <function reach_exit>, 'bump_moving_obstacle': <function bump_moving_obstacle>, 'bump_into_wall': <function bump_into_wall>}#: Terminating function registry

reduce(state, action, next_state, *, terminating_functions, reduction, rng=None)[source]#

reduction of multiple terminating functions into a single boolean value

Parameters

state (State) –
action (Action) –
next_state (State) –
terminating_functions (Sequence[TerminatingFunction]) –
reduction (TerminatingReductionFunction) –

Returns

reduction operator over the input terminating functions

Return type

bool

reduce_any(state, action, next_state, *, terminating_functions, rng=None)[source]#

utility function terminates when any of the input functions terminates

Parameters

state (State) –
action (Action) –
next_state (State) –
terminating_functions (Sequence[TerminatingFunction]) –

Returns

OR operator over the input terminating functions

Return type

bool

reduce_all(state, action, next_state, *, terminating_functions, rng=None)[source]#

utility function terminates when all of the input functions terminates

Parameters

state (State) –
action (Action) –
next_state (State) –
terminating_functions (Sequence[TerminatingFunction]) –

Returns

AND operator over the input terminating functions

Return type

bool

overlap(state, action, next_state, *, object_type, rng=None)[source]#

terminating condition for agent occupying same position as an object

Parameters

state (State) –
action (Action) –
next_state (State) –
object_type (Type[GridObject]) –

Returns

True if next_state agent is on object of type object_type

Return type

bool

reach_exit(state, action, next_state, *, rng=None)[source]#

terminating condition for Agent reaching the Exit

Parameters

state (State) –
action (Action) –
next_state (State) –

Returns

True if next_state agent is on exit

Return type

bool

bump_moving_obstacle(state, action, next_state, *, rng=None)[source]#

terminating condition for Agent bumping a moving obstacle

Parameters

state (State) –
action (Action) –
next_state (State) –

Returns

True if next_state agent is on a MovingObstacle

Return type

bool

bump_into_wall(state, action, next_state, *, rng=None)[source]#

Terminating condition for Agent bumping into a wall

Tests whether the intended next agent position from state contains a Wall

Parameters

state (State) –
action (Action) –
next_state (State) –

Returns

True if next_state agent attempted to move onto a wall cell

Return type

bool

factory(name, **kwargs)[source]#

Return type: TerminatingFunction

gym_gridverse.envs.transition_functions module#

Functions to model dynamics

class TransitionFunction(*args, **kwargs)[source]#

Bases: Protocol

Signature that all reset functions must follow

class TransitionFunctionRegistry(dict=None, /, **kwargs)[source]#

Bases: FunctionRegistry

get_protocol_parameters(signature)[source]#

Return type: List[Parameter]

check_signature(function)[source]#

transition_function_registry = {'chain': <function chain>, 'move_agent': <function move_agent>, 'turn_agent': <function turn_agent>, 'pickndrop': <function pickndrop>, 'move_obstacles': <function move_obstacles>, 'actuate_door': <function actuate_door>, 'actuate_box': <function actuate_box>, 'teleport': <function teleport>}#: Transition function registry

chain(state, action, *, transition_functions, rng=None)[source]#

Run multiple transition functions in a row

Parameters

state (State) –
action (Action) –
transition_functions (Sequence[TransitionFunction]) – transition functions
rng (Generator, optional) –

Returns

None – None

move_agent(state, action, *, rng=None)[source]#

Applies translation to agent (e.g. up/down/left/right)

Leaves the state unaffected if any other action was taken instead

Parameters

state (State) –
action (Action) –

Return type

None

turn_agent(state, action, *, rng=None)[source]#

Turns agent according to action (e.g. turn left/right)

Leaves the state unaffected if any other action was taken instead

Parameters

state (State) –
action (Action) –

Return type

None

pickndrop(state, action, *, rng=None)[source]#

Implements the effect of the pickup and drop action

Pickup applies to the item in front of the agent There are multiple scenarios

There is no (pick-up-able) item to pickup under the agent:
- The agent is not holding any object -> No effect
- The agent is holding an object:
  
  Position in front of agent is floor -> drop current object
  
  Position in front is not a floor -> No effect
There is a (pick-up-able) item to pickup under the agent:
- The agent is not holding any object -> Pick up, put floor in stead
- The agent is holding an object -> Swap items

Parameters

state (State) –
action (Action) –
rng (Generator, optional) –

Return type

None

move_obstacles(state, action, *, rng=None)[source]#

Moves moving obstacles randomly

Randomly moves each MovingObstacle to a neighbouring Floor cell, if possible.

Parameters

state (State) – current state
action (Action) – action taken by agent (ignored)

Return type

None

actuate_door(state, action, *, rng=None)[source]#

Attempts to open door

When not holding correct key with correct color:: open or closed -> open locked -> locked
When holding correct key:: any state -> open

Return type: None

actuate_box(state, action, *, rng=None)[source]#

Attempts to open door

When not holding correct key with correct color:: open or closed -> open locked -> locked
When holding correct key:: any state -> open

Return type: None

teleport(state, action, *, rng=None)[source]#

Teleports the agent if positioned on the telepod

Return type: None

factory(name, **kwargs)[source]#

Return type: TransitionFunction

transition_with_copy(transition_function, state, action, *, rng=None)[source]#

Utility to perform a non-in-place version of a transition function.

NOTE: This is not a transition function (transition functions are in-place by definition).

Parameters

transition_function (TransitionFunction) –
state (State) –
action (action) –
rng (Generator, optional) –

Return type

State

gym_gridverse.envs.utils module#

get_next_position(position, orientation, action)[source]#

Returns the tentative next position according to action

NOTE: Assumes successful action and free unobstructed movement.

Parameters

position (Position) – current agent position
orientation (Orientation) – current agent orientation
action (Action) – action taken by agent

Returns

tentative next position

Return type

Position

gym_gridverse.envs.visibility_functions module#

class VisibilityFunction(*args, **kwargs)[source]#: Bases: Protocol

class VisibilityFunctionRegistry(dict=None, /, **kwargs)[source]#

Bases: FunctionRegistry

get_protocol_parameters(signature)[source]#

Return type: List[Parameter]

check_signature(function)[source]#

visibility_function_registry = {'fully_transparent': <function fully_transparent>, 'partially_occluded': <function partially_occluded>, 'raytracing': <function raytracing>, 'stochastic_raytracing': <function stochastic_raytracing>}#: Visibility function registry

fully_transparent(grid, position, *, rng=None)[source]#

Return type: ndarray

partially_occluded(grid, position, *, rng=None)[source]#

Return type: ndarray

raytracing(grid, position, *, absolute_counts=True, threshold=1, rng=None)[source]#

Return type: ndarray

stochastic_raytracing(grid, position, *, rng=None)[source]#

Return type: ndarray

factory(name, **kwargs)[source]#

Return type: VisibilityFunction

gym_gridverse.envs package#

Subpackages#

Submodules#

gym_gridverse.envs.gridworld module#

gym_gridverse.envs.inner_env module#

gym_gridverse.envs.observation_functions module#

gym_gridverse.envs.reset_functions module#

gym_gridverse.envs.reward_functions module#

gym_gridverse.envs.terminating_functions module#

gym_gridverse.envs.transition_functions module#

gym_gridverse.envs.utils module#

gym_gridverse.envs.visibility_functions module#

Module contents#