Environments#
Pre-built Gymnasium environments for robot learning.
Overview#
GenesisLab provides pre-built environments that follow the Gymnasium API:
Standard
reset()andstep()interfaceVectorized environments for parallel training
Compatible with popular RL libraries
Easy to customize through configuration
Available Environments#
Quadruped Locomotion#
Flat Terrain:
GenesisLab-Go2-Flat-v0: Unitree Go2 on flat groundGenesisLab-A1-Flat-v0: Unitree A1 on flat groundGenesisLab-B2-Flat-v0: Unitree B2 on flat groundGenesisLab-ANYmal-Flat-v0: ANYmal-D on flat ground
Rough Terrain:
GenesisLab-Go2-Rough-v0: Unitree Go2 with terrain curriculumGenesisLab-A1-Rough-v0: Unitree A1 with terrain curriculumGenesisLab-ANYmal-Rough-v0: ANYmal-D with terrain curriculum
Humanoid Locomotion#
GenesisLab-H1-Flat-v0: Unitree H1 humanoidGenesisLab-G1-Flat-v0: Unitree G1 humanoidGenesisLab-H1-Rough-v0: H1 with terrain curriculum
Basic Usage#
Creating an Environment#
import gymnasium as gym
import genesislab.envs # Register environments
# Create environment
env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=4096)
# Get info
print(f"Observation space: {env.observation_space}")
print(f"Action space: {env.action_space}")
# Reset
obs, info = env.reset()
# Step
action = env.action_space.sample()
obs, reward, terminated, truncated, info = env.step(action)
# Cleanup
env.close()
With Custom Configuration#
from genesislab.tasks.go2_flat import Go2FlatEnvCfg
# Modify configuration
cfg = Go2FlatEnvCfg()
cfg.scene.num_envs = 8192
cfg.scene.env_spacing = 5.0
cfg.rewards.forward_vel.weight = 2.0
# Create environment with custom config
env = gym.make("GenesisLab-Go2-Flat-v0", cfg=cfg)
With Visualization#
# Create environment with viewer
env = gym.make(
"GenesisLab-Go2-Flat-v0",
num_envs=1, # Single env for easier viewing
headless=False # Enable viewer
)
obs, _ = env.reset()
for _ in range(1000):
action = policy(obs)
obs, reward, terminated, truncated, info = env.step(action)
Environment Interface#
Gymnasium API#
All GenesisLab environments implement the standard Gymnasium interface:
class GenesisLabEnv(gym.Env):
"""
GenesisLab Gymnasium environment.
Attributes:
observation_space: Observation space definition
action_space: Action space definition
num_envs: Number of parallel environments
scene: Underlying LabScene instance
"""
def reset(
self,
seed: int | None = None,
options: dict | None = None
) -> tuple[np.ndarray, dict]:
"""
Reset environment.
Args:
seed: Random seed for reproducibility
options: Additional reset options
Returns:
Tuple of:
- observations: Numpy array [num_envs, obs_dim]
- info: Dictionary with reset information
"""
pass
def step(
self,
action: np.ndarray
) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, dict]:
"""
Step environment.
Args:
action: Action array [num_envs, action_dim]
Returns:
Tuple of:
- observations: [num_envs, obs_dim]
- rewards: [num_envs]
- terminated: [num_envs] boolean
- truncated: [num_envs] boolean
- info: Dictionary with step information
"""
pass
Vectorized Interface#
GenesisLab environments are natively vectorized:
# Create with many parallel environments
env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=4096)
# All operations are batched
obs, _ = env.reset() # [4096, 48]
actions = policy(obs) # [4096, 12]
obs, rew, term, trunc, info = env.step(actions)
No need for gym.vector.VectorEnv wrappers!
Integration with RL Libraries#
Stable Baselines3#
from stable_baselines3 import PPO
import genesislab.envs
# Create environment
env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=4096)
# Train
model = PPO(
"MlpPolicy",
env,
verbose=1,
n_steps=24, # Must divide episode length
batch_size=4096,
learning_rate=3e-4
)
model.learn(total_timesteps=10_000_000)
model.save("go2_policy")
RSL RL#
from rsl_rl.runners import OnPolicyRunner
from genesislab.tasks.go2_flat import Go2FlatEnvCfg, Go2FlatTrainCfg
# Create environment
env_cfg = Go2FlatEnvCfg()
train_cfg = Go2FlatTrainCfg()
# Create environment
env = gym.make("GenesisLab-Go2-Flat-v0", cfg=env_cfg)
# Create runner
runner = OnPolicyRunner(env, train_cfg, log_dir="logs/go2")
# Train
runner.learn(num_learning_iterations=1000)
CleanRL#
import genesislab.envs
# GenesisLab environments work with CleanRL scripts
# Just pass the environment name
env_name = "GenesisLab-Go2-Flat-v0"
Custom Environment Registration#
Registering a New Environment#
from gymnasium.envs.registration import register
from genesislab.envs import GenesisLabEnv
# Define task configuration
@configclass
class MyTaskCfg:
# ... configuration
pass
# Register environment
register(
id="GenesisLab-MyTask-v0",
entry_point="genesislab.envs:GenesisLabEnv",
kwargs={
"cfg": MyTaskCfg(),
}
)
# Use registered environment
env = gym.make("GenesisLab-MyTask-v0")
Environment Factory#
from genesislab.envs import make_env
# Create environment using factory
env = make_env(
task="go2_flat",
num_envs=4096,
headless=True,
cfg_overrides={
"scene.env_spacing": 5.0,
"rewards.forward_vel.weight": 2.0
}
)
Environment Wrappers#
Recording Wrapper#
from genesislab.envs.wrappers import RecordVideoWrapper
env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=1, headless=False)
env = RecordVideoWrapper(env, video_folder="videos/", episode_trigger=lambda x: x % 10 == 0)
# Videos are automatically saved every 10 episodes
Observation Normalization#
from genesislab.envs.wrappers import NormalizeObservationWrapper
env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=4096)
env = NormalizeObservationWrapper(env, clip_obs=10.0)
Reward Scaling#
from genesislab.envs.wrappers import RewardScalingWrapper
env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=4096)
env = RewardScalingWrapper(env, scale=0.1)
Environment Information#
Accessing Scene and Managers#
env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=4096)
# Access underlying scene
scene = env.unwrapped.scene
# Access managers
obs_manager = scene.observation_manager
reward_manager = scene.reward_manager
action_manager = scene.action_manager
# Get term information
print("Observation terms:", list(obs_manager.terms.keys()))
print("Reward terms:", list(reward_manager.terms.keys()))
Episode Statistics#
env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=4096)
obs, _ = env.reset()
episode_rewards = []
episode_lengths = []
for _ in range(10000):
action = policy(obs)
obs, reward, terminated, truncated, info = env.step(action)
# Collect episode statistics
if "episode" in info:
episode_rewards.append(info["episode"]["r"])
episode_lengths.append(info["episode"]["l"])
print(f"Mean reward: {np.mean(episode_rewards):.2f}")
print(f"Mean length: {np.mean(episode_lengths):.0f}")
Performance Tips#
Optimal Number of Environments#
# Too few: Underutilizes GPU
env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=256) # ❌
# Good: Balances speed and memory
env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=4096) # ✅
# Very large: May run out of memory
env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=16384) # ⚠️
Headless Mode#
# Always use headless=True for training
env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=4096, headless=True) # ✅
# Only visualize for debugging with few envs
env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=1, headless=False) # For debugging
Next Steps#
See creating tasks tutorial
Check training tutorial
Review task configurations