HumanCompatibleAI · ernestum · Sep 26, 2022 · Sep 14, 2022 · Sep 14, 2022 · Sep 15, 2022
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -235,14 +235,18 @@ jobs:
  name: codespell
  command: codespell -I .codespell.skip --skip='*.pyc,tests/testdata/*,*.ipynb,*.csv' ${SRC_FILES}
 
- docs:
+ doctest:
  executor: static-analysis-medium
 
  steps:
  - dependencies-linux
  - run:
  name: sphinx
- command: pushd docs/ && make clean && make doctest && make html && popd
+ environment:
+ # Note: we don't want to execute the example notebooks in this step since
+ # this happens in a separate readthedocs job anyway.
+ NB_EXECUTION_MODE: "off"
+ command: pushd docs/ && make clean && make doctest && popd
 
  type:
  executor: static-analysis-medium
@@ -351,7 +355,7 @@ workflows:
  - lint:
  context:
  - docker-hub-creds
- - docs:
+ - doctest:
  context:
  - docker-hub-creds
  - type:

diff --git a/README.md b/README.md
@@ -90,7 +90,7 @@ See [examples/quickstart.py](examples/quickstart.py) for an example script that
 
 ### Density reward baseline
 
-We also implement a density-based reward baseline. You can find an [example notebook here](examples/7_train_density.ipynb).
+We also implement a density-based reward baseline. You can find an [example notebook here](docs/tutorials/7_train_density.ipynb).
 
 # Citations (BibTeX)
 

diff --git a/docs/algorithms/airl.rst b/docs/algorithms/airl.rst
@@ -17,7 +17,7 @@ Notes
 Example
 =======
 
-Detailed example notebook: `4_train_airl.ipynb <https:/HumanCompatibleAI/imitation/blob/master/examples/4_train_airl.ipynb>`_
+Detailed example notebook: :doc:`../tutorials/4_train_airl`
 
 .. testcode::
 

diff --git a/docs/algorithms/bc.rst b/docs/algorithms/bc.rst
@@ -15,7 +15,7 @@ approaches to learning from demonstrations).
 Example
 =======
 
-Detailed example notebook: `1_train_bc.ipynb <https:/HumanCompatibleAI/imitation/blob/master/examples/1_train_bc.ipynb>`_
+Detailed example notebook: :doc:`../tutorials/1_train_bc`
 
 .. testcode::
 

diff --git a/docs/algorithms/dagger.rst b/docs/algorithms/dagger.rst
@@ -19,7 +19,7 @@ Notes
 Example
 =======
 
-Detailed example notebook: `2_train_dagger.ipynb <https:/HumanCompatibleAI/imitation/blob/master/examples/2_train_dagger.ipynb>`_
+Detailed example notebook: :doc:`../tutorials/2_train_dagger`
 
 .. testcode::
 

diff --git a/docs/algorithms/density.rst b/docs/algorithms/density.rst
@@ -5,7 +5,7 @@ Density-based reward modeling
 Example
 =======
 
-Detailed example notebook: `7_train_density.ipynb <https:/HumanCompatibleAI/imitation/blob/master/examples/7_train_density.ipynb>`_
+Detailed example notebook: :doc:`../tutorials/7_train_density`
 
 .. testcode::
 

diff --git a/docs/algorithms/gail.rst b/docs/algorithms/gail.rst
@@ -15,7 +15,7 @@ Notes
 Example
 =======
 
-Detailed example notebook: `3_train_gail.ipynb <https:/HumanCompatibleAI/imitation/blob/master/examples/3_train_gail.ipynb>`_
+Detailed example notebook: :doc:`../tutorials/3_train_gail`
 
 .. testcode::
 

diff --git a/docs/algorithms/mce_irl.rst b/docs/algorithms/mce_irl.rst
@@ -7,7 +7,7 @@ Implements `Modeling Interaction via the Principle of Maximum Causal Entropy <ht
 Example
 =======
 
-Detailed example notebook: `6_train_mce.ipynb <https:/HumanCompatibleAI/imitation/blob/master/examples/6_train_mce.ipynb>`_
+Detailed example notebook: :doc:`../tutorials/6_train_mce`
 
 .. testcode::
 

diff --git a/docs/algorithms/preference_comparisons.rst b/docs/algorithms/preference_comparisons.rst
@@ -18,7 +18,7 @@ Notes
 Example
 =======
 
-Detailed example notebook: `5_train_preference_comparisons.ipynb <https:/HumanCompatibleAI/imitation/blob/master/examples/5_train_preference_comparisons.ipynb>`_
+Detailed example notebook: :doc:`../tutorials/5_train_preference_comparisons`
 
 .. testcode::
 

diff --git a/docs/conf.py b/docs/conf.py
@@ -18,7 +18,7 @@
 
 
 # -- Project information -----------------------------------------------------
-
+import os
 from importlib import metadata
 
 project = "imitation"
@@ -44,12 +44,18 @@
  "sphinx_copybutton",
  "sphinx_github_changelog",
  "sphinx.ext.doctest",
+ "myst_nb",
 ]
 
 napoleon_google_docstring = True
 napoleon_numpy_docstring = False
 autosummary_generate = True
 
+nb_execution_mode = os.getenv("NB_EXECUTION_MODE", "cache")
+nb_execution_timeout = 120
+nb_merge_streams = True
+nb_output_stderr = "remove"
+
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ["_templates"]
 

diff --git a/docs/index.rst b/docs/index.rst
@@ -51,6 +51,19 @@ Main Features
  algorithms/mce_irl
  algorithms/preference_comparisons
 
+.. toctree::
+ :maxdepth: 2
+ :caption: Tutorials
+ :hidden:
+
+ tutorials/1_train_bc
+ tutorials/2_train_dagger
+ tutorials/3_train_gail
+ tutorials/4_train_airl
+ tutorials/5_train_preference_comparisons
+ tutorials/5a_train_preference_comparisons_with_cnn
+ tutorials/6_train_mce
+ tutorials/7_train_density
 
 API Reference
 ~~~~~~~~~~~~~

diff --git a/examples/1_train_bc.ipynb → docs/tutorials/1_train_bc.ipynb b/examples/1_train_bc.ipynb → docs/tutorials/1_train_bc.ipynb
@@ -4,6 +4,7 @@
  "cell_type": "markdown",
  "metadata": {},
  "source": [
+ "[download this notebook here](https:/HumanCompatibleAI/imitation/blob/master/docs/tutorials/1_train_bc.ipynb)\n",
  "# Train an Agent using Behavior Cloning\n",
  "\n",
  "Behavior cloning is the most naive approach to imitation learning. \n",

diff --git a/examples/2_train_dagger.ipynb → docs/tutorials/2_train_dagger.ipynb b/examples/2_train_dagger.ipynb → docs/tutorials/2_train_dagger.ipynb
@@ -4,6 +4,7 @@
  "cell_type": "markdown",
  "metadata": {},
  "source": [
+ "[download this notebook here](https:/HumanCompatibleAI/imitation/blob/master/docs/tutorials/2_train_dagger.ipynb)\n",
  "# Train an Agent using the DAgger Algorithm\n",
  "\n",
  "The DAgger algorithm is an extension of behavior cloning. \n",

diff --git a/examples/3_train_gail.ipynb → docs/tutorials/3_train_gail.ipynb b/examples/3_train_gail.ipynb → docs/tutorials/3_train_gail.ipynb
@@ -4,6 +4,7 @@
  "cell_type": "markdown",
  "metadata": {},
  "source": [
+ "[download this notebook here](https:/HumanCompatibleAI/imitation/blob/master/docs/tutorials/3_train_gail.ipynb)\n",
  "# Train an Agent using Generative Adversarial Imitation Learning\n",
  "\n",
  "The idea of generative adversarial imitation learning is to train a discriminator network to distinguish between expert trajectories and learner trajectories.\n",

diff --git a/examples/4_train_airl.ipynb → docs/tutorials/4_train_airl.ipynb b/examples/4_train_airl.ipynb → docs/tutorials/4_train_airl.ipynb
@@ -4,7 +4,8 @@
  "cell_type": "markdown",
  "metadata": {},
  "source": [
- "# Train an Agent using Adversarial Inverse Reinforcement Learning\n"
+ "[download this notebook here](https:/HumanCompatibleAI/imitation/blob/master/docs/tutorials/4_train_airl.ipynb)\n",
+ "# Train an Agent using Adversarial Inverse Reinforcement Learning"
  ]
  },
  {

diff --git a/...ples/5_train_preference_comparisons.ipynb → ...ials/5_train_preference_comparisons.ipynb b/...ples/5_train_preference_comparisons.ipynb → ...ials/5_train_preference_comparisons.ipynb
@@ -4,6 +4,7 @@
  "cell_type": "markdown",
  "metadata": {},
  "source": [
+ "[download this notebook here](https:/HumanCompatibleAI/imitation/blob/master/docs/tutorials/5_train_preference_comparisons.ipynb)\n",
  "# Learning a Reward Function using Preference Comparisons\n",
  "\n",
  "The preference comparisons algorithm learns a reward function by comparing trajectory segments to each other."

diff --git a/...ain_preference_comparisons_with_cnn.ipynb → ...ain_preference_comparisons_with_cnn.ipynb b/...ain_preference_comparisons_with_cnn.ipynb → ...ain_preference_comparisons_with_cnn.ipynb
@@ -5,6 +5,8 @@
  "id": "1635a6fd",
  "metadata": {},
  "source": [
+ "[download this notebook here](https:/HumanCompatibleAI/imitation/blob/master/docs/tutorials/5a_train_preference_comparisons_with_cnn.ipynb)\n",
+ "\n",
  "# Learning a Reward Function using Preference Comparisons on Atari\n",
  "\n",
  "In this case, we will use a convolutional neural network for our policy and reward model. We will also shape the learned reward model with the policy's learned value function, since these shaped rewards will be more informative for training - incentivizing agents to move to high-value states. In the interests of execution time, we will only do a little bit of training - much less than in the previous preference comparison notebook. To run this notebook, be sure to install the `atari` extras, for example by running `pip install imitation[atari]`."

diff --git a/examples/6_train_mce.ipynb → docs/tutorials/6_train_mce.ipynb b/examples/6_train_mce.ipynb → docs/tutorials/6_train_mce.ipynb
@@ -4,6 +4,7 @@
  "cell_type": "markdown",
  "metadata": {},
  "source": [
+ "[download this notebook here](https:/HumanCompatibleAI/imitation/blob/master/docs/tutorials/6_train_mce.ipynb)\n",
  "# Learn a Reward Function using Maximum Conditional Entropy Inverse Reinforcement Learning\n",
  "\n",
  "Here, we're going to take a tabular environment with a pre-defined reward function, Cliffworld, and solve for the optimal policy. We then generate demonstrations from this policy, and use them to learn an approximation to the true reward function with MCE IRL. Finally, we directly compare the learned reward to the ground-truth reward (which we have access to in this example)."
@@ -81,7 +82,7 @@
  "cell_type": "markdown",
  "metadata": {},
  "source": [
- "### Training the reward function\n",
+ "## Training the reward function\n",
  "\n",
  "The true reward here is not linear in the reduced feature space (i.e $(x,y)$ coordinates). Finding an appropriate linear reward is impossible, but an MLP should Just Work™."
  ]

diff --git a/examples/7_train_density.ipynb → docs/tutorials/7_train_density.ipynb b/examples/7_train_density.ipynb → docs/tutorials/7_train_density.ipynb
@@ -4,6 +4,7 @@
  "cell_type": "markdown",
  "metadata": {},
  "source": [
+ "[download this notebook here](https:/HumanCompatibleAI/imitation/blob/master/docs/tutorials/7_train_density.ipynb)\n",
  "# Learning a Reward Function using Kernel Density\n",
  "\n",
  "This demo shows how to train a `Pendulum` agent (exciting!) with our simple density-based imitation learning baselines. `DensityTrainer` has a few interesting parameters, but the key ones are:\n",

diff --git a/setup.py b/setup.py
@@ -79,7 +79,10 @@
  "furo==2022.6.21",
  "sphinx-copybutton==0.5.0",
  "sphinx-github-changelog~=1.2.0",
-]
+ "myst-nb==0.16.0",
+ "ipykernel~=6.15.2",
+ "seals==0.1.2",
+] + ATARI_REQUIRE
 
 
 def get_readme() -> str:

diff --git a/tests/test_examples.py b/tests/test_examples.py
@@ -7,7 +7,7 @@
 from typing import Iterable, Sequence
 
 import pytest
-from pytest_notebook import execution, notebook
+import pytest_notebook as ptnb
 
 
 def _paths_to_strs(x: Iterable[pathlib.Path]) -> Sequence[str]:
@@ -28,24 +28,28 @@ def _paths_to_strs(x: Iterable[pathlib.Path]) -> Sequence[str]:
 
 THIS_DIR = pathlib.Path(__file__).absolute().parent
 EXAMPLES_DIR = THIS_DIR / ".." / "examples"
+TUTORIALS_DIR = THIS_DIR / ".." / "docs" / "tutorials"
 
 SH_PATHS = _paths_to_strs(EXAMPLES_DIR.glob("*.sh"))
-NB_PATHS = _paths_to_strs(EXAMPLES_DIR.glob("*.ipynb"))
+TUTORIAL_PATHS = _paths_to_strs(TUTORIALS_DIR.glob("*.ipynb"))
 PY_PATHS = _paths_to_strs(EXAMPLES_DIR.glob("*.py"))
 
 
-@pytest.mark.parametrize("nb_path", NB_PATHS)
-def test_run_example_notebooks(nb_path) -> None:
- """Smoke test ensuring that example notebooks run without error.
+# Note: This is excluded from coverage since is computed on linux. However, it is
+# covered by mac and windows runners.
+@pytest.mark.skipif(sys.platform == "linux", reason="Linux is covered by readthedocs.")
+@pytest.mark.parametrize("nb_path", TUTORIAL_PATHS)
+def test_run_tutorial_notebooks(nb_path) -> None: # pragma: no cover
+ """Smoke test ensuring that tutorial notebooks run without error.
 
  The `pytest_notebook` package also includes regression test functionality against
  saved notebook outputs, if we want to check that later.
 
  Args:
  nb_path: Path to the notebook to test.
  """
- nb = notebook.load_notebook(nb_path)
- result = execution.execute_notebook(nb, cwd=EXAMPLES_DIR, timeout=120)
+ nb = ptnb.notebook.load_notebook(nb_path)
+ result = ptnb.execution.execute_notebook(nb, cwd=TUTORIALS_DIR, timeout=120)
  assert result.exec_error is None