Improve ingredient documentation.

HumanCompatibleAI · Jul 7, 2023 · f47d6e7 · f47d6e7
1 parent 80a4ca5
commit f47d6e7
Show file tree

Hide file tree

Showing 11 changed files with 44 additions and 14 deletions.
diff --git a/src/imitation/scripts/ingredients/__init__.py b/src/imitation/scripts/ingredients/__init__.py
@@ -1 +1 @@
-"""Ingredients for scripts."""
+"""Ingredients for Sacred experiments."""
diff --git a/src/imitation/scripts/ingredients/bc.py b/src/imitation/scripts/ingredients/bc.py
@@ -1,4 +1,4 @@
-"""Ingredients for training a BC policy."""
+"""This ingredient provides BC algorithm instance by either loading it from disk or constructing it from scratch."""
 import warnings
 from typing import Optional, Sequence
 

diff --git a/src/imitation/scripts/ingredients/demonstrations.py b/src/imitation/scripts/ingredients/demonstrations.py
@@ -1,4 +1,8 @@
-"""Ingredient for scripts learning from demonstrations."""
+"""This ingredient provides (expert) demonstrations to learn from.
+
+The demonstrations are either loaded from disk, from the HuggingFace Dataset Hub, or
+sampled from the expert policy provided by the expert ingredient.
+"""
 
 import logging
 import pathlib
@@ -25,11 +29,15 @@
 
 @demonstrations_ingredient.config
 def config():
+ # Either "local" or "{algo}-huggingface" to load them from the HuggingFace Dataset Hub.
  rollout_type = "local"
- # path to file containing rollouts. If rollout_path is None
- # and rollout_type is local, they are sampled from the expert.
+
+ # If none, they are sampled from the expert policy.
  rollout_path = None
- n_expert_demos = None # Num demos used or sampled. None loads every demo possible.
+
+ # Num demos used or sampled. None loads every demo possible.
+ n_expert_demos = None
+
  locals() # quieten flake8
 
 

diff --git a/src/imitation/scripts/ingredients/environment.py b/src/imitation/scripts/ingredients/environment.py
@@ -1,4 +1,4 @@
-"""Environment Ingredient for sacred experiments."""
+"""This ingredient provides a vectorized gym environment."""
 import contextlib
 from typing import Any, Generator, Mapping
 

diff --git a/src/imitation/scripts/ingredients/expert.py b/src/imitation/scripts/ingredients/expert.py
@@ -1,4 +1,16 @@
-"""Common configuration elements for loading of expert policies."""
+"""This ingredient provides an expert policy.
+
+The expert policy is either loaded from disk or from the HuggingFace Model Hub or is
+a test policy (e.g., random or zero).
+The supported policy types are:
+
+- `ppo` and `sac`: A policy trained with SB3. Needs a `path` in the `loader_kwargs`.
+- `<algo>-huggingface` (algo can be `ppo` or `sac`): A policy trained with SB3 and uploaded to the HuggingFace Model
+ Hub. Will load the model from the repo `<organization>/<algo>-<env_name>`.
+ You can set the organization with the `organization` key in `loader_kwargs`. The default is `HumanCompatibleAI`.
+- `random`: A policy that takes random actions.
+- `zero`: A policy that takes zero actions.
+"""
 import sacred
 
 from imitation.policies import serialize

diff --git a/src/imitation/scripts/ingredients/logging.py b/src/imitation/scripts/ingredients/logging.py
@@ -1,4 +1,8 @@
-"""Logging ingredient for scripts."""
+"""This ingredient provides a number of logging utilities.
+
+It is responsible for logging to WandB, TensorBoard, and stdout.
+It will also create a symlink to the sacred logging directory in the log directory.
+"""
 
 import logging
 import pathlib

diff --git a/src/imitation/scripts/ingredients/policy.py b/src/imitation/scripts/ingredients/policy.py
@@ -1,4 +1,4 @@
-"""Ingredient implementation for a SB3 policy."""
+"""This ingredient provides a newly constructed stable-baselines3 policy."""
 
 import logging
 from typing import Any, Mapping, Type

diff --git a/src/imitation/scripts/ingredients/policy_evaluation.py b/src/imitation/scripts/ingredients/policy_evaluation.py
@@ -1,4 +1,7 @@
-"""Sacred ingredient for evaluating a policy on a VecEnv."""
+"""This ingredient performs evaluation of learned policy.
+
+It takes care of the right wrappers, does some rollouts and computes statistics of the rollouts.
+"""
 
 from typing import Mapping, Union
 

diff --git a/src/imitation/scripts/ingredients/reward.py b/src/imitation/scripts/ingredients/reward.py
@@ -1,4 +1,4 @@
-"""Common configuration elements for reward network training."""
+"""This ingredient provides a reward network."""
 
 import logging
 import typing

diff --git a/src/imitation/scripts/ingredients/rl.py b/src/imitation/scripts/ingredients/rl.py
@@ -1,4 +1,7 @@
-"""Common configuration elements for reinforcement learning."""
+"""This ingredient provides a reinforcement learning algorithm from stable-baselines3.
+
+The algorithm instance is either freshly constructed or loaded from a file.
+"""
 
 import logging
 import warnings

diff --git a/src/imitation/scripts/ingredients/wb.py b/src/imitation/scripts/ingredients/wb.py
@@ -1,4 +1,4 @@
-"""Weights & Biases configuration elements for scripts."""
+"""This ingredient provides Weights & Biases logging."""
 
 import logging
 from typing import Any, Mapping, Optional