Environments#

Pre-built Gymnasium environments for robot learning.

Overview#

GenesisLab provides pre-built environments that follow the Gymnasium API:

  • Standard reset() and step() interface

  • Vectorized environments for parallel training

  • Compatible with popular RL libraries

  • Easy to customize through configuration

Available Environments#

Quadruped Locomotion#

Flat Terrain:

  • GenesisLab-Go2-Flat-v0: Unitree Go2 on flat ground

  • GenesisLab-A1-Flat-v0: Unitree A1 on flat ground

  • GenesisLab-B2-Flat-v0: Unitree B2 on flat ground

  • GenesisLab-ANYmal-Flat-v0: ANYmal-D on flat ground

Rough Terrain:

  • GenesisLab-Go2-Rough-v0: Unitree Go2 with terrain curriculum

  • GenesisLab-A1-Rough-v0: Unitree A1 with terrain curriculum

  • GenesisLab-ANYmal-Rough-v0: ANYmal-D with terrain curriculum

Humanoid Locomotion#

  • GenesisLab-H1-Flat-v0: Unitree H1 humanoid

  • GenesisLab-G1-Flat-v0: Unitree G1 humanoid

  • GenesisLab-H1-Rough-v0: H1 with terrain curriculum

Basic Usage#

Creating an Environment#

import gymnasium as gym
import genesislab.envs  # Register environments

# Create environment
env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=4096)

# Get info
print(f"Observation space: {env.observation_space}")
print(f"Action space: {env.action_space}")

# Reset
obs, info = env.reset()

# Step
action = env.action_space.sample()
obs, reward, terminated, truncated, info = env.step(action)

# Cleanup
env.close()

With Custom Configuration#

from genesislab.tasks.go2_flat import Go2FlatEnvCfg

# Modify configuration
cfg = Go2FlatEnvCfg()
cfg.scene.num_envs = 8192
cfg.scene.env_spacing = 5.0
cfg.rewards.forward_vel.weight = 2.0

# Create environment with custom config
env = gym.make("GenesisLab-Go2-Flat-v0", cfg=cfg)

With Visualization#

# Create environment with viewer
env = gym.make(
    "GenesisLab-Go2-Flat-v0",
    num_envs=1,  # Single env for easier viewing
    headless=False  # Enable viewer
)

obs, _ = env.reset()

for _ in range(1000):
    action = policy(obs)
    obs, reward, terminated, truncated, info = env.step(action)

Environment Interface#

Gymnasium API#

All GenesisLab environments implement the standard Gymnasium interface:

class GenesisLabEnv(gym.Env):
    """
    GenesisLab Gymnasium environment.
    
    Attributes:
        observation_space: Observation space definition
        action_space: Action space definition
        num_envs: Number of parallel environments
        scene: Underlying LabScene instance
    """
    
    def reset(
        self,
        seed: int | None = None,
        options: dict | None = None
    ) -> tuple[np.ndarray, dict]:
        """
        Reset environment.
        
        Args:
            seed: Random seed for reproducibility
            options: Additional reset options
            
        Returns:
            Tuple of:
            - observations: Numpy array [num_envs, obs_dim]
            - info: Dictionary with reset information
        """
        pass
    
    def step(
        self,
        action: np.ndarray
    ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, dict]:
        """
        Step environment.
        
        Args:
            action: Action array [num_envs, action_dim]
            
        Returns:
            Tuple of:
            - observations: [num_envs, obs_dim]
            - rewards: [num_envs]
            - terminated: [num_envs] boolean
            - truncated: [num_envs] boolean
            - info: Dictionary with step information
        """
        pass

Vectorized Interface#

GenesisLab environments are natively vectorized:

# Create with many parallel environments
env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=4096)

# All operations are batched
obs, _ = env.reset()  # [4096, 48]
actions = policy(obs)  # [4096, 12]
obs, rew, term, trunc, info = env.step(actions)

No need for gym.vector.VectorEnv wrappers!

Integration with RL Libraries#

Stable Baselines3#

from stable_baselines3 import PPO
import genesislab.envs

# Create environment
env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=4096)

# Train
model = PPO(
    "MlpPolicy",
    env,
    verbose=1,
    n_steps=24,  # Must divide episode length
    batch_size=4096,
    learning_rate=3e-4
)

model.learn(total_timesteps=10_000_000)
model.save("go2_policy")

RSL RL#

from rsl_rl.runners import OnPolicyRunner
from genesislab.tasks.go2_flat import Go2FlatEnvCfg, Go2FlatTrainCfg

# Create environment
env_cfg = Go2FlatEnvCfg()
train_cfg = Go2FlatTrainCfg()

# Create environment
env = gym.make("GenesisLab-Go2-Flat-v0", cfg=env_cfg)

# Create runner
runner = OnPolicyRunner(env, train_cfg, log_dir="logs/go2")

# Train
runner.learn(num_learning_iterations=1000)

CleanRL#

import genesislab.envs

# GenesisLab environments work with CleanRL scripts
# Just pass the environment name
env_name = "GenesisLab-Go2-Flat-v0"

Custom Environment Registration#

Registering a New Environment#

from gymnasium.envs.registration import register
from genesislab.envs import GenesisLabEnv

# Define task configuration
@configclass
class MyTaskCfg:
    # ... configuration
    pass

# Register environment
register(
    id="GenesisLab-MyTask-v0",
    entry_point="genesislab.envs:GenesisLabEnv",
    kwargs={
        "cfg": MyTaskCfg(),
    }
)

# Use registered environment
env = gym.make("GenesisLab-MyTask-v0")

Environment Factory#

from genesislab.envs import make_env

# Create environment using factory
env = make_env(
    task="go2_flat",
    num_envs=4096,
    headless=True,
    cfg_overrides={
        "scene.env_spacing": 5.0,
        "rewards.forward_vel.weight": 2.0
    }
)

Environment Wrappers#

Recording Wrapper#

from genesislab.envs.wrappers import RecordVideoWrapper

env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=1, headless=False)
env = RecordVideoWrapper(env, video_folder="videos/", episode_trigger=lambda x: x % 10 == 0)

# Videos are automatically saved every 10 episodes

Observation Normalization#

from genesislab.envs.wrappers import NormalizeObservationWrapper

env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=4096)
env = NormalizeObservationWrapper(env, clip_obs=10.0)

Reward Scaling#

from genesislab.envs.wrappers import RewardScalingWrapper

env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=4096)
env = RewardScalingWrapper(env, scale=0.1)

Environment Information#

Accessing Scene and Managers#

env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=4096)

# Access underlying scene
scene = env.unwrapped.scene

# Access managers
obs_manager = scene.observation_manager
reward_manager = scene.reward_manager
action_manager = scene.action_manager

# Get term information
print("Observation terms:", list(obs_manager.terms.keys()))
print("Reward terms:", list(reward_manager.terms.keys()))

Episode Statistics#

env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=4096)
obs, _ = env.reset()

episode_rewards = []
episode_lengths = []

for _ in range(10000):
    action = policy(obs)
    obs, reward, terminated, truncated, info = env.step(action)
    
    # Collect episode statistics
    if "episode" in info:
        episode_rewards.append(info["episode"]["r"])
        episode_lengths.append(info["episode"]["l"])

print(f"Mean reward: {np.mean(episode_rewards):.2f}")
print(f"Mean length: {np.mean(episode_lengths):.0f}")

Performance Tips#

Optimal Number of Environments#

# Too few: Underutilizes GPU
env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=256)  # ❌

# Good: Balances speed and memory
env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=4096)  # ✅

# Very large: May run out of memory
env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=16384)  # ⚠️

Headless Mode#

# Always use headless=True for training
env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=4096, headless=True)  # ✅

# Only visualize for debugging with few envs
env = gym.make("GenesisLab-Go2-Flat-v0", num_envs=1, headless=False)  # For debugging

Next Steps#