Using Environments#

In the following sections, we show simple random-agent control loops which respectively use GV’s “outer” interface, and the OpenAI gym interface.

Using the GV “Outer” interface#

The “outer” interface will be explained in detail in the Design section of this tutorial. For now, it will suffice to know that it provides an alternative interface to interact with an environment.

This script is also available as scripts/gv_control_loop_outer.py.

#!/usr/bin/env python
import argparse
import itertools as itt
import random
import time
from typing import Dict

import numpy as np

from gym_gridverse.envs.yaml.factory import factory_env_from_yaml
from gym_gridverse.outer_env import OuterEnv
from gym_gridverse.representations.observation_representations import (
    make_observation_representation,
)
from gym_gridverse.representations.state_representations import (
    make_state_representation,
)


def make_env(path: str) -> OuterEnv:
    """Makes a GV "outer" environment."""
    inner_env = factory_env_from_yaml(path)
    state_representation = make_state_representation(
        'default',
        inner_env.state_space,
    )
    observation_representation = make_observation_representation(
        'default',
        inner_env.observation_space,
    )
    return OuterEnv(
        inner_env,
        state_representation=state_representation,
        observation_representation=observation_representation,
    )


def print_compact(data: Dict[str, np.ndarray]):
    """Converts numpy arrays into lists before printing, for more compact output."""
    compact_data = {k: v.tolist() for k, v in data.items()}
    print(compact_data)


def main(args):
    env = make_env(args.path)
    env.reset()

    spf = 1 / args.fps

    for ei in itt.count():
        print(f'# Episode {ei}')
        print()

        env.reset()
        print('state:')
        print_compact(env.state)
        print('observation:')
        print_compact(env.observation)
        time.sleep(spf)

        for ti in itt.count():
            print(f'episode: {ei}')
            print(f'time: {ti}')

            action = random.choice(env.action_space.actions)
            reward, done = env.step(action)

            print(f'action: {action}')
            print(f'reward: {reward}')
            print('state:')
            print_compact(env.state)
            print('observation:')
            print_compact(env.observation)
            print(f'done: {done}')
            print()

            time.sleep(spf)

            if done:
                break


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('path', help='env YAML file')
    parser.add_argument(
        '--fps', type=float, default=1.0, help='frames per second'
    )
    main(parser.parse_args())

Using the OpenAI Gym interface#

The OpenAI Gym interface is implemented by GymEnvironment. In addition to the fields defined by the gym interface itself, this class provides access to state_space and state attributes.

This script is also available as scripts/gv_control_loop_gym.py.

#!/usr/bin/env python
import argparse
import itertools as itt
import time
from typing import Dict

import gym
import numpy as np

from gym_gridverse.envs.yaml.factory import factory_env_from_yaml
from gym_gridverse.gym import GymEnvironment
from gym_gridverse.outer_env import OuterEnv
from gym_gridverse.representations.observation_representations import (
    make_observation_representation,
)
from gym_gridverse.representations.state_representations import (
    make_state_representation,
)


def make_env(id_or_path: str) -> GymEnvironment:
    """Makes a GV gym environment."""
    try:
        print('Loading using gym.make')
        env = gym.make(id_or_path)

    except gym.error.Error:
        print(f'Environment with id {id_or_path} not found.')
        print('Loading using YAML')
        inner_env = factory_env_from_yaml(id_or_path)
        state_representation = make_state_representation(
            'default',
            inner_env.state_space,
        )
        observation_representation = make_observation_representation(
            'default',
            inner_env.observation_space,
        )
        outer_env = OuterEnv(
            inner_env,
            state_representation=state_representation,
            observation_representation=observation_representation,
        )
        env = GymEnvironment(outer_env)

    else:
        if not isinstance(env, GymEnvironment):
            raise ValueError(
                f'gym id {id_or_path} is not associated with a GridVerse environment'
            )

    return env


def print_compact(data: Dict[str, np.ndarray]):
    """Converts numpy arrays into lists before printing, for more compact output."""
    compact_data = {k: v.tolist() for k, v in data.items()}
    print(compact_data)


def main(args):
    env = make_env(args.id_or_path)
    env.reset()

    spf = 1 / args.fps

    for ei in itt.count():
        print(f'# Episode {ei}')
        print()

        observation = env.reset()
        env.render()

        print('observation:')
        print_compact(observation)
        print()

        time.sleep(spf)

        for ti in itt.count():
            print(f'episode: {ei}')
            print(f'time: {ti}')

            action = env.action_space.sample()
            observation, reward, done, _ = env.step(action)
            env.render()

            print(f'action: {action}')
            print(f'reward: {reward}')
            print('observation:')
            print_compact(observation)
            print(f'done: {done}')
            print()

            time.sleep(spf)

            if done:
                break


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('id_or_path', help='Gym id or GV YAML file')
    parser.add_argument(
        '--fps', type=float, default=1.0, help='frames per second'
    )
    main(parser.parse_args())