Using Environments#
In the following sections, we show simple random-agent control loops which respectively use GV’s “outer” interface, and the OpenAI gym interface.
Using the GV “Outer” interface#
The “outer” interface will be explained in detail in the Design section of this tutorial. For now, it will suffice to know that it provides an alternative interface to interact with an environment.
This script is also available as scripts/gv_control_loop_outer.py
.
#!/usr/bin/env python
import argparse
import itertools as itt
import random
import time
from typing import Dict
import numpy as np
from gym_gridverse.envs.yaml.factory import factory_env_from_yaml
from gym_gridverse.outer_env import OuterEnv
from gym_gridverse.representations.observation_representations import (
make_observation_representation,
)
from gym_gridverse.representations.state_representations import (
make_state_representation,
)
def make_env(path: str) -> OuterEnv:
"""Makes a GV "outer" environment."""
inner_env = factory_env_from_yaml(path)
state_representation = make_state_representation(
'default',
inner_env.state_space,
)
observation_representation = make_observation_representation(
'default',
inner_env.observation_space,
)
return OuterEnv(
inner_env,
state_representation=state_representation,
observation_representation=observation_representation,
)
def print_compact(data: Dict[str, np.ndarray]):
"""Converts numpy arrays into lists before printing, for more compact output."""
compact_data = {k: v.tolist() for k, v in data.items()}
print(compact_data)
def main(args):
env = make_env(args.path)
env.reset()
spf = 1 / args.fps
for ei in itt.count():
print(f'# Episode {ei}')
print()
env.reset()
print('state:')
print_compact(env.state)
print('observation:')
print_compact(env.observation)
time.sleep(spf)
for ti in itt.count():
print(f'episode: {ei}')
print(f'time: {ti}')
action = random.choice(env.action_space.actions)
reward, done = env.step(action)
print(f'action: {action}')
print(f'reward: {reward}')
print('state:')
print_compact(env.state)
print('observation:')
print_compact(env.observation)
print(f'done: {done}')
print()
time.sleep(spf)
if done:
break
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('path', help='env YAML file')
parser.add_argument(
'--fps', type=float, default=1.0, help='frames per second'
)
main(parser.parse_args())
Using the OpenAI Gym interface#
The OpenAI Gym interface is implemented by
GymEnvironment
. In addition to the fields
defined by the gym interface itself, this class provides access to
state_space
and
state
attributes.
This script is also available as scripts/gv_control_loop_gym.py
.
#!/usr/bin/env python
import argparse
import itertools as itt
import time
from typing import Dict
import gym
import numpy as np
from gym_gridverse.envs.yaml.factory import factory_env_from_yaml
from gym_gridverse.gym import GymEnvironment
from gym_gridverse.outer_env import OuterEnv
from gym_gridverse.representations.observation_representations import (
make_observation_representation,
)
from gym_gridverse.representations.state_representations import (
make_state_representation,
)
def make_env(id_or_path: str) -> GymEnvironment:
"""Makes a GV gym environment."""
try:
print('Loading using gym.make')
env = gym.make(id_or_path)
except gym.error.Error:
print(f'Environment with id {id_or_path} not found.')
print('Loading using YAML')
inner_env = factory_env_from_yaml(id_or_path)
state_representation = make_state_representation(
'default',
inner_env.state_space,
)
observation_representation = make_observation_representation(
'default',
inner_env.observation_space,
)
outer_env = OuterEnv(
inner_env,
state_representation=state_representation,
observation_representation=observation_representation,
)
env = GymEnvironment(outer_env)
else:
if not isinstance(env, GymEnvironment):
raise ValueError(
f'gym id {id_or_path} is not associated with a GridVerse environment'
)
return env
def print_compact(data: Dict[str, np.ndarray]):
"""Converts numpy arrays into lists before printing, for more compact output."""
compact_data = {k: v.tolist() for k, v in data.items()}
print(compact_data)
def main(args):
env = make_env(args.id_or_path)
env.reset()
spf = 1 / args.fps
for ei in itt.count():
print(f'# Episode {ei}')
print()
observation = env.reset()
env.render()
print('observation:')
print_compact(observation)
print()
time.sleep(spf)
for ti in itt.count():
print(f'episode: {ei}')
print(f'time: {ti}')
action = env.action_space.sample()
observation, reward, done, _ = env.step(action)
env.render()
print(f'action: {action}')
print(f'reward: {reward}')
print('observation:')
print_compact(observation)
print(f'done: {done}')
print()
time.sleep(spf)
if done:
break
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('id_or_path', help='Gym id or GV YAML file')
parser.add_argument(
'--fps', type=float, default=1.0, help='frames per second'
)
main(parser.parse_args())