From d9acb71ce528dfe0fc9b76a4693adc8b2e5aaa4b Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Wed, 14 Sep 2022 14:07:27 +0200 Subject: [PATCH 01/16] Integrate the tutorial notebook using nbsphinx in the documentation. --- Dockerfile | 5 +++-- README.md | 2 +- docs/algorithms/airl.rst | 2 +- docs/algorithms/bc.rst | 2 +- docs/algorithms/dagger.rst | 2 +- docs/algorithms/density.rst | 2 +- docs/algorithms/gail.rst | 2 +- docs/algorithms/mce_irl.rst | 2 +- docs/algorithms/preference_comparisons.rst | 2 +- docs/conf.py | 1 + docs/index.rst | 13 +++++++++++++ {examples => docs/tutorials}/1_train_bc.ipynb | 0 {examples => docs/tutorials}/2_train_dagger.ipynb | 0 {examples => docs/tutorials}/3_train_gail.ipynb | 0 {examples => docs/tutorials}/4_train_airl.ipynb | 0 .../tutorials}/5_train_preference_comparisons.ipynb | 0 .../5a_train_preference_comparisons_with_cnn.ipynb | 0 {examples => docs/tutorials}/6_train_mce.ipynb | 0 {examples => docs/tutorials}/7_train_density.ipynb | 0 setup.py | 1 + tests/test_examples.py | 7 ++++--- 21 files changed, 30 insertions(+), 13 deletions(-) rename {examples => docs/tutorials}/1_train_bc.ipynb (100%) rename {examples => docs/tutorials}/2_train_dagger.ipynb (100%) rename {examples => docs/tutorials}/3_train_gail.ipynb (100%) rename {examples => docs/tutorials}/4_train_airl.ipynb (100%) rename {examples => docs/tutorials}/5_train_preference_comparisons.ipynb (100%) rename {examples => docs/tutorials}/5a_train_preference_comparisons_with_cnn.ipynb (100%) rename {examples => docs/tutorials}/6_train_mce.ipynb (100%) rename {examples => docs/tutorials}/7_train_density.ipynb (100%) diff --git a/Dockerfile b/Dockerfile index c5efd64aa..6c4166d5d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ # base stage contains just binary dependencies. # This is used in the CI build. -FROM nvidia/cuda:11.6.2-cudnn8-runtime-ubuntu20.04 AS base +FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu20.04 AS base ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update -q \ @@ -27,7 +27,8 @@ RUN apt-get update -q \ virtualenv \ xpra \ xserver-xorg-dev \ - patchelf \ + patchelf \ + pandoc \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* diff --git a/README.md b/README.md index 465d27512..5d298efe7 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,7 @@ See [examples/quickstart.py](examples/quickstart.py) for an example script that ### Density reward baseline -We also implement a density-based reward baseline. You can find an [example notebook here](examples/7_train_density.ipynb). +We also implement a density-based reward baseline. You can find an [example notebook here](docs/tutorials/7_train_density.ipynb). # Citations (BibTeX) diff --git a/docs/algorithms/airl.rst b/docs/algorithms/airl.rst index 48c80d436..13fbec8b2 100644 --- a/docs/algorithms/airl.rst +++ b/docs/algorithms/airl.rst @@ -17,7 +17,7 @@ Notes Example ======= -Detailed example notebook: `4_train_airl.ipynb `_ +Detailed example notebook: :doc:`../tutorials/4_train_airl` .. testcode:: diff --git a/docs/algorithms/bc.rst b/docs/algorithms/bc.rst index a60abc064..4e254be0c 100644 --- a/docs/algorithms/bc.rst +++ b/docs/algorithms/bc.rst @@ -15,7 +15,7 @@ approaches to learning from demonstrations). Example ======= -Detailed example notebook: `1_train_bc.ipynb `_ +Detailed example notebook: :doc:`../tutorials/1_train_bc` .. testcode:: diff --git a/docs/algorithms/dagger.rst b/docs/algorithms/dagger.rst index cf71d4b1d..7b0521784 100644 --- a/docs/algorithms/dagger.rst +++ b/docs/algorithms/dagger.rst @@ -19,7 +19,7 @@ Notes Example ======= -Detailed example notebook: `2_train_dagger.ipynb `_ +Detailed example notebook: :doc:`../tutorials/2_train_dagger` .. testcode:: diff --git a/docs/algorithms/density.rst b/docs/algorithms/density.rst index 42734cdbe..ed822bbbc 100644 --- a/docs/algorithms/density.rst +++ b/docs/algorithms/density.rst @@ -5,7 +5,7 @@ Density-based reward modeling Example ======= -Detailed example notebook: `7_train_density.ipynb `_ +Detailed example notebook: :doc:`../tutorials/7_train_density` .. testcode:: diff --git a/docs/algorithms/gail.rst b/docs/algorithms/gail.rst index 34a278e65..584cb36f9 100644 --- a/docs/algorithms/gail.rst +++ b/docs/algorithms/gail.rst @@ -15,7 +15,7 @@ Notes Example ======= -Detailed example notebook: `3_train_gail.ipynb `_ +Detailed example notebook: :doc:`../tutorials/3_train_gail` .. testcode:: diff --git a/docs/algorithms/mce_irl.rst b/docs/algorithms/mce_irl.rst index 42005cea2..0b2e8ce21 100644 --- a/docs/algorithms/mce_irl.rst +++ b/docs/algorithms/mce_irl.rst @@ -7,7 +7,7 @@ Implements `Modeling Interaction via the Principle of Maximum Causal Entropy `_ +Detailed example notebook: :doc:`../tutorials/6_train_mce` .. testcode:: diff --git a/docs/algorithms/preference_comparisons.rst b/docs/algorithms/preference_comparisons.rst index fc1667c24..34aaa40c5 100644 --- a/docs/algorithms/preference_comparisons.rst +++ b/docs/algorithms/preference_comparisons.rst @@ -18,7 +18,7 @@ Notes Example ======= -Detailed example notebook: `5_train_preference_comparisons.ipynb `_ +Detailed example notebook: :doc:`../tutorials/5_train_preference_comparisons` .. testcode:: diff --git a/docs/conf.py b/docs/conf.py index 2815a603c..d43c52270 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -44,6 +44,7 @@ "sphinx_copybutton", "sphinx_github_changelog", "sphinx.ext.doctest", + "nbsphinx", ] napoleon_google_docstring = True diff --git a/docs/index.rst b/docs/index.rst index 86914c16a..d5947de4e 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -51,6 +51,19 @@ Main Features algorithms/mce_irl algorithms/preference_comparisons +.. toctree:: + :maxdepth: 2 + :caption: Tutorials + :hidden: + + tutorials/1_train_bc + tutorials/2_train_dagger + tutorials/3_train_gail + tutorials/4_train_airl + tutorials/5_train_preference_comparisons + tutorials/5a_train_preference_comparisons_with_cnn + tutorials/6_train_mce + tutorials/7_train_density API Reference ~~~~~~~~~~~~~ diff --git a/examples/1_train_bc.ipynb b/docs/tutorials/1_train_bc.ipynb similarity index 100% rename from examples/1_train_bc.ipynb rename to docs/tutorials/1_train_bc.ipynb diff --git a/examples/2_train_dagger.ipynb b/docs/tutorials/2_train_dagger.ipynb similarity index 100% rename from examples/2_train_dagger.ipynb rename to docs/tutorials/2_train_dagger.ipynb diff --git a/examples/3_train_gail.ipynb b/docs/tutorials/3_train_gail.ipynb similarity index 100% rename from examples/3_train_gail.ipynb rename to docs/tutorials/3_train_gail.ipynb diff --git a/examples/4_train_airl.ipynb b/docs/tutorials/4_train_airl.ipynb similarity index 100% rename from examples/4_train_airl.ipynb rename to docs/tutorials/4_train_airl.ipynb diff --git a/examples/5_train_preference_comparisons.ipynb b/docs/tutorials/5_train_preference_comparisons.ipynb similarity index 100% rename from examples/5_train_preference_comparisons.ipynb rename to docs/tutorials/5_train_preference_comparisons.ipynb diff --git a/examples/5a_train_preference_comparisons_with_cnn.ipynb b/docs/tutorials/5a_train_preference_comparisons_with_cnn.ipynb similarity index 100% rename from examples/5a_train_preference_comparisons_with_cnn.ipynb rename to docs/tutorials/5a_train_preference_comparisons_with_cnn.ipynb diff --git a/examples/6_train_mce.ipynb b/docs/tutorials/6_train_mce.ipynb similarity index 100% rename from examples/6_train_mce.ipynb rename to docs/tutorials/6_train_mce.ipynb diff --git a/examples/7_train_density.ipynb b/docs/tutorials/7_train_density.ipynb similarity index 100% rename from examples/7_train_density.ipynb rename to docs/tutorials/7_train_density.ipynb diff --git a/setup.py b/setup.py index f28dd5f7d..b95aa0e8a 100644 --- a/setup.py +++ b/setup.py @@ -79,6 +79,7 @@ "furo==2022.6.21", "sphinx-copybutton==0.5.0", "sphinx-github-changelog~=1.2.0", + "nbsphinx~=0.8.9", ] diff --git a/tests/test_examples.py b/tests/test_examples.py index 83e963f72..3c900d3eb 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -28,14 +28,15 @@ def _paths_to_strs(x: Iterable[pathlib.Path]) -> Sequence[str]: THIS_DIR = pathlib.Path(__file__).absolute().parent EXAMPLES_DIR = THIS_DIR / ".." / "examples" +TUTORIALS_DIR = THIS_DIR / ".." / "docs" / "tutorials" SH_PATHS = _paths_to_strs(EXAMPLES_DIR.glob("*.sh")) -NB_PATHS = _paths_to_strs(EXAMPLES_DIR.glob("*.ipynb")) +NB_PATHS = _paths_to_strs(TUTORIALS_DIR.glob("*.ipynb")) PY_PATHS = _paths_to_strs(EXAMPLES_DIR.glob("*.py")) @pytest.mark.parametrize("nb_path", NB_PATHS) -def test_run_example_notebooks(nb_path) -> None: +def test_run_tutorial_notebooks_notebooks(nb_path) -> None: """Smoke test ensuring that example notebooks run without error. The `pytest_notebook` package also includes regression test functionality against @@ -45,7 +46,7 @@ def test_run_example_notebooks(nb_path) -> None: nb_path: Path to the notebook to test. """ nb = notebook.load_notebook(nb_path) - result = execution.execute_notebook(nb, cwd=EXAMPLES_DIR, timeout=120) + result = execution.execute_notebook(nb, cwd=TUTORIALS_DIR, timeout=120) assert result.exec_error is None From 775cb3f997bc470ed52042913b50a193dc280591 Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Wed, 14 Sep 2022 14:35:59 +0200 Subject: [PATCH 02/16] Add missing dependencies for building docs --- setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b95aa0e8a..f17319adf 100644 --- a/setup.py +++ b/setup.py @@ -80,7 +80,9 @@ "sphinx-copybutton==0.5.0", "sphinx-github-changelog~=1.2.0", "nbsphinx~=0.8.9", -] + "ipykernel~=6.15.2", + "seals==0.1.2", +] + ATARI_REQUIRE def get_readme() -> str: From 516506af76a26e8c04b673ef57f5d698b2605993 Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Thu, 15 Sep 2022 15:17:13 +0200 Subject: [PATCH 03/16] Fix caption hierarchy in 6_train_mce.ipynb --- docs/tutorials/6_train_mce.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorials/6_train_mce.ipynb b/docs/tutorials/6_train_mce.ipynb index e3aeb87bc..f845c0583 100644 --- a/docs/tutorials/6_train_mce.ipynb +++ b/docs/tutorials/6_train_mce.ipynb @@ -81,7 +81,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Training the reward function\n", + "## Training the reward function\n", "\n", "The true reward here is not linear in the reduced feature space (i.e $(x,y)$ coordinates). Finding an appropriate linear reward is impossible, but an MLP should Just Work™." ] From 714b6b8c9077ef085fe6446e715aad010d2ec208 Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Thu, 15 Sep 2022 15:20:00 +0200 Subject: [PATCH 04/16] Switch from nbsphinx to myst_nb to make use of notebook caching. --- docs/conf.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index d43c52270..a51f24c08 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -44,13 +44,16 @@ "sphinx_copybutton", "sphinx_github_changelog", "sphinx.ext.doctest", - "nbsphinx", + "myst_nb", ] napoleon_google_docstring = True napoleon_numpy_docstring = False autosummary_generate = True +nb_execution_mode = "cache" +nb_execution_timeout = 120 + # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] From dc6aee0e0d980cd7e4358a9bd8acf8a0c3ae874b Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Thu, 15 Sep 2022 15:21:00 +0200 Subject: [PATCH 05/16] Revert changes to docker file and switch back to original image. --- Dockerfile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 6c4166d5d..c5efd64aa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ # base stage contains just binary dependencies. # This is used in the CI build. -FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu20.04 AS base +FROM nvidia/cuda:11.6.2-cudnn8-runtime-ubuntu20.04 AS base ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update -q \ @@ -27,8 +27,7 @@ RUN apt-get update -q \ virtualenv \ xpra \ xserver-xorg-dev \ - patchelf \ - pandoc \ + patchelf \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* From 387a217bec3f91eeeed14e28f3557083fc9578db Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Thu, 15 Sep 2022 15:28:11 +0200 Subject: [PATCH 06/16] Change doc dependencies from nbsphinx to myst-nb. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f17319adf..cb2637722 100644 --- a/setup.py +++ b/setup.py @@ -79,7 +79,7 @@ "furo==2022.6.21", "sphinx-copybutton==0.5.0", "sphinx-github-changelog~=1.2.0", - "nbsphinx~=0.8.9", + "myst-nb==0.16.0", "ipykernel~=6.15.2", "seals==0.1.2", ] + ATARI_REQUIRE From eeeac22a08adbb08953efbba54be33d8b803b1f9 Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Thu, 15 Sep 2022 15:40:23 +0200 Subject: [PATCH 07/16] Make the output of notebooks in the documentation look prettier. --- docs/conf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/conf.py b/docs/conf.py index a51f24c08..875fb8db9 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -53,6 +53,8 @@ nb_execution_mode = "cache" nb_execution_timeout = 120 +nb_merge_streams = True +nb_output_stderr = "remove" # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] From 9552c52e4086d3769641d619a3de0fea78796ddd Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Thu, 15 Sep 2022 15:48:00 +0200 Subject: [PATCH 08/16] Remove redundant html building in sphinx doctest and rename the job to doctest to make the intent clear. --- .circleci/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index aa653bff3..2283f7215 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -235,14 +235,14 @@ jobs: name: codespell command: codespell -I .codespell.skip --skip='*.pyc,tests/testdata/*,*.ipynb,*.csv' ${SRC_FILES} - docs: + doctest: executor: static-analysis-medium steps: - dependencies-linux - run: name: sphinx - command: pushd docs/ && make clean && make doctest && make html && popd + command: pushd docs/ && make clean && make doctest && popd type: executor: static-analysis-medium From 879ec4b1aa62a40a1a84a5e7954d6cc3b66d095a Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Thu, 15 Sep 2022 15:49:04 +0200 Subject: [PATCH 09/16] Remove redundant test_run_tutorial_notebooks test (they are executed by sphinx now). --- tests/test_examples.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/tests/test_examples.py b/tests/test_examples.py index 3c900d3eb..dc403134a 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -31,25 +31,9 @@ def _paths_to_strs(x: Iterable[pathlib.Path]) -> Sequence[str]: TUTORIALS_DIR = THIS_DIR / ".." / "docs" / "tutorials" SH_PATHS = _paths_to_strs(EXAMPLES_DIR.glob("*.sh")) -NB_PATHS = _paths_to_strs(TUTORIALS_DIR.glob("*.ipynb")) PY_PATHS = _paths_to_strs(EXAMPLES_DIR.glob("*.py")) -@pytest.mark.parametrize("nb_path", NB_PATHS) -def test_run_tutorial_notebooks_notebooks(nb_path) -> None: - """Smoke test ensuring that example notebooks run without error. - - The `pytest_notebook` package also includes regression test functionality against - saved notebook outputs, if we want to check that later. - - Args: - nb_path: Path to the notebook to test. - """ - nb = notebook.load_notebook(nb_path) - result = execution.execute_notebook(nb, cwd=TUTORIALS_DIR, timeout=120) - assert result.exec_error is None - - @pytest.mark.parametrize("py_path", PY_PATHS) def test_run_example_py_scripts(py_path): """Smoke test ensuring that python example scripts run without error.""" From bb99e86820689feb9e3571919423d402e906e9e4 Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Thu, 15 Sep 2022 15:52:29 +0200 Subject: [PATCH 10/16] Fix circleci config. --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 2283f7215..42a150d51 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -351,7 +351,7 @@ workflows: - lint: context: - docker-hub-creds - - docs: + - doctest: context: - docker-hub-creds - type: From 6b2335324065c1f3e59c7820f80a5e406d71e312 Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Thu, 15 Sep 2022 16:09:59 +0200 Subject: [PATCH 11/16] Remove unused imports from test_examples.py --- tests/test_examples.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_examples.py b/tests/test_examples.py index dc403134a..26b9ca9b5 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -7,7 +7,6 @@ from typing import Iterable, Sequence import pytest -from pytest_notebook import execution, notebook def _paths_to_strs(x: Iterable[pathlib.Path]) -> Sequence[str]: From 5e397cad097a24d00853a9542d0fcc887456229c Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Thu, 15 Sep 2022 18:06:21 +0200 Subject: [PATCH 12/16] Prevent notebooks to be evaluated during doctest. --- .circleci/config.yml | 4 ++++ docs/conf.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 42a150d51..8af7eb5cc 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -242,6 +242,10 @@ jobs: - dependencies-linux - run: name: sphinx + environment: + # Note: we don't want to execute the example notebooks in this step since + # this happens in a separate readthedocs job anyway. + NB_EXECUTION_MODE: "off" command: pushd docs/ && make clean && make doctest && popd type: diff --git a/docs/conf.py b/docs/conf.py index 875fb8db9..84ddfca03 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -18,7 +18,7 @@ # -- Project information ----------------------------------------------------- - +import os from importlib import metadata project = "imitation" @@ -51,7 +51,7 @@ napoleon_numpy_docstring = False autosummary_generate = True -nb_execution_mode = "cache" +nb_execution_mode = os.getenv("NB_EXECUTION_MODE", "cache") nb_execution_timeout = 120 nb_merge_streams = True nb_output_stderr = "remove" From ee887231c64353e17cf5db393da9b667107dbe19 Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Mon, 19 Sep 2022 12:36:49 +0200 Subject: [PATCH 13/16] Add download links to tutorial notebooks. --- docs/tutorials/1_train_bc.ipynb | 1 + docs/tutorials/2_train_dagger.ipynb | 1 + docs/tutorials/3_train_gail.ipynb | 1 + docs/tutorials/4_train_airl.ipynb | 3 ++- docs/tutorials/5_train_preference_comparisons.ipynb | 1 + docs/tutorials/5a_train_preference_comparisons_with_cnn.ipynb | 2 ++ docs/tutorials/6_train_mce.ipynb | 1 + docs/tutorials/7_train_density.ipynb | 1 + 8 files changed, 10 insertions(+), 1 deletion(-) diff --git a/docs/tutorials/1_train_bc.ipynb b/docs/tutorials/1_train_bc.ipynb index 75a4659ee..30e9126c1 100644 --- a/docs/tutorials/1_train_bc.ipynb +++ b/docs/tutorials/1_train_bc.ipynb @@ -4,6 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "[download this notebook here](https://github.com/HumanCompatibleAI/imitation/blob/master/docs/tutorials/1_train_bc.ipynb)\n", "# Train an Agent using Behavior Cloning\n", "\n", "Behavior cloning is the most naive approach to imitation learning. \n", diff --git a/docs/tutorials/2_train_dagger.ipynb b/docs/tutorials/2_train_dagger.ipynb index 0cea53f77..c1fee45c8 100644 --- a/docs/tutorials/2_train_dagger.ipynb +++ b/docs/tutorials/2_train_dagger.ipynb @@ -4,6 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "[download this notebook here](https://github.com/HumanCompatibleAI/imitation/blob/master/docs/tutorials/2_train_dagger.ipynb)\n", "# Train an Agent using the DAgger Algorithm\n", "\n", "The DAgger algorithm is an extension of behavior cloning. \n", diff --git a/docs/tutorials/3_train_gail.ipynb b/docs/tutorials/3_train_gail.ipynb index 7ef9ee55b..33125614b 100644 --- a/docs/tutorials/3_train_gail.ipynb +++ b/docs/tutorials/3_train_gail.ipynb @@ -4,6 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "[download this notebook here](https://github.com/HumanCompatibleAI/imitation/blob/master/docs/tutorials/3_train_gail.ipynb)\n", "# Train an Agent using Generative Adversarial Imitation Learning\n", "\n", "The idea of generative adversarial imitation learning is to train a discriminator network to distinguish between expert trajectories and learner trajectories.\n", diff --git a/docs/tutorials/4_train_airl.ipynb b/docs/tutorials/4_train_airl.ipynb index 4a67824af..a9f7fbb4c 100644 --- a/docs/tutorials/4_train_airl.ipynb +++ b/docs/tutorials/4_train_airl.ipynb @@ -4,7 +4,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Train an Agent using Adversarial Inverse Reinforcement Learning\n" + "[download this notebook here](https://github.com/HumanCompatibleAI/imitation/blob/master/docs/tutorials/4_train_airl.ipynb)\n", + "# Train an Agent using Adversarial Inverse Reinforcement Learning" ] }, { diff --git a/docs/tutorials/5_train_preference_comparisons.ipynb b/docs/tutorials/5_train_preference_comparisons.ipynb index b3c382f2b..2d27bc4b3 100644 --- a/docs/tutorials/5_train_preference_comparisons.ipynb +++ b/docs/tutorials/5_train_preference_comparisons.ipynb @@ -4,6 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "[download this notebook here](https://github.com/HumanCompatibleAI/imitation/blob/master/docs/tutorials/5_train_preference_comparisons.ipynb)\n", "# Learning a Reward Function using Preference Comparisons\n", "\n", "The preference comparisons algorithm learns a reward function by comparing trajectory segments to each other." diff --git a/docs/tutorials/5a_train_preference_comparisons_with_cnn.ipynb b/docs/tutorials/5a_train_preference_comparisons_with_cnn.ipynb index 594de3745..0ee7427a0 100644 --- a/docs/tutorials/5a_train_preference_comparisons_with_cnn.ipynb +++ b/docs/tutorials/5a_train_preference_comparisons_with_cnn.ipynb @@ -5,6 +5,8 @@ "id": "1635a6fd", "metadata": {}, "source": [ + "[download this notebook here](https://github.com/HumanCompatibleAI/imitation/blob/master/docs/tutorials/5a_train_preference_comparisons_with_cnn.ipynb)\n", + "\n", "# Learning a Reward Function using Preference Comparisons on Atari\n", "\n", "In this case, we will use a convolutional neural network for our policy and reward model. We will also shape the learned reward model with the policy's learned value function, since these shaped rewards will be more informative for training - incentivizing agents to move to high-value states. In the interests of execution time, we will only do a little bit of training - much less than in the previous preference comparison notebook. To run this notebook, be sure to install the `atari` extras, for example by running `pip install imitation[atari]`." diff --git a/docs/tutorials/6_train_mce.ipynb b/docs/tutorials/6_train_mce.ipynb index f845c0583..cb215747e 100644 --- a/docs/tutorials/6_train_mce.ipynb +++ b/docs/tutorials/6_train_mce.ipynb @@ -4,6 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "[download this notebook here](https://github.com/HumanCompatibleAI/imitation/blob/master/docs/tutorials/6_train_mce.ipynb)\n", "# Learn a Reward Function using Maximum Conditional Entropy Inverse Reinforcement Learning\n", "\n", "Here, we're going to take a tabular environment with a pre-defined reward function, Cliffworld, and solve for the optimal policy. We then generate demonstrations from this policy, and use them to learn an approximation to the true reward function with MCE IRL. Finally, we directly compare the learned reward to the ground-truth reward (which we have access to in this example)." diff --git a/docs/tutorials/7_train_density.ipynb b/docs/tutorials/7_train_density.ipynb index ecbd78b7f..df75e7057 100644 --- a/docs/tutorials/7_train_density.ipynb +++ b/docs/tutorials/7_train_density.ipynb @@ -4,6 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "[download this notebook here](https://github.com/HumanCompatibleAI/imitation/blob/master/docs/tutorials/7_train_density.ipynb)\n", "# Learning a Reward Function using Kernel Density\n", "\n", "This demo shows how to train a `Pendulum` agent (exciting!) with our simple density-based imitation learning baselines. `DensityTrainer` has a few interesting parameters, but the key ones are:\n", From ece9544963300dcd1ae7845a802d4e652c7def5c Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Mon, 19 Sep 2022 12:46:11 +0200 Subject: [PATCH 14/16] Re-enable notebook tests only on mac and windows. --- tests/test_examples.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/test_examples.py b/tests/test_examples.py index 26b9ca9b5..e523e40f9 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -7,6 +7,7 @@ from typing import Iterable, Sequence import pytest +from pytest_notebook import execution, notebook def _paths_to_strs(x: Iterable[pathlib.Path]) -> Sequence[str]: @@ -30,9 +31,26 @@ def _paths_to_strs(x: Iterable[pathlib.Path]) -> Sequence[str]: TUTORIALS_DIR = THIS_DIR / ".." / "docs" / "tutorials" SH_PATHS = _paths_to_strs(EXAMPLES_DIR.glob("*.sh")) +TUTORIAL_PATHS = _paths_to_strs(TUTORIALS_DIR.glob("*.ipynb")) PY_PATHS = _paths_to_strs(EXAMPLES_DIR.glob("*.py")) +@pytest.mark.skipif(sys.platform == "linux", reason="Linux is covered by readthedocs.") +@pytest.mark.parametrize("nb_path", TUTORIAL_PATHS) +def test_run_tutorial_notebooks(nb_path) -> None: + """Smoke test ensuring that tutorial notebooks run without error. + + The `pytest_notebook` package also includes regression test functionality against + saved notebook outputs, if we want to check that later. + + Args: + nb_path: Path to the notebook to test. + """ + nb = notebook.load_notebook(nb_path) + result = execution.execute_notebook(nb, cwd=TUTORIALS_DIR, timeout=120) + assert result.exec_error is None + + @pytest.mark.parametrize("py_path", PY_PATHS) def test_run_example_py_scripts(py_path): """Smoke test ensuring that python example scripts run without error.""" From 6e3047799defa17f8c398be36fcb26429b96cab8 Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Tue, 20 Sep 2022 13:46:53 +0200 Subject: [PATCH 15/16] Fix import style in test_examples.py --- tests/test_examples.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_examples.py b/tests/test_examples.py index e523e40f9..43b2ff5ae 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -7,7 +7,7 @@ from typing import Iterable, Sequence import pytest -from pytest_notebook import execution, notebook +import pytest_notebook as ptnb def _paths_to_strs(x: Iterable[pathlib.Path]) -> Sequence[str]: @@ -46,8 +46,8 @@ def test_run_tutorial_notebooks(nb_path) -> None: Args: nb_path: Path to the notebook to test. """ - nb = notebook.load_notebook(nb_path) - result = execution.execute_notebook(nb, cwd=TUTORIALS_DIR, timeout=120) + nb = ptnb.notebook.load_notebook(nb_path) + result = ptnb.execution.execute_notebook(nb, cwd=TUTORIALS_DIR, timeout=120) assert result.exec_error is None From a364a0e7d5b12d120d3ccae408f7026afa8c349b Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Tue, 20 Sep 2022 13:55:08 +0200 Subject: [PATCH 16/16] Ad no cover pragma to example notebooks test. --- tests/test_examples.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_examples.py b/tests/test_examples.py index 43b2ff5ae..214fa0415 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -35,9 +35,11 @@ def _paths_to_strs(x: Iterable[pathlib.Path]) -> Sequence[str]: PY_PATHS = _paths_to_strs(EXAMPLES_DIR.glob("*.py")) +# Note: This is excluded from coverage since is computed on linux. However, it is +# covered by mac and windows runners. @pytest.mark.skipif(sys.platform == "linux", reason="Linux is covered by readthedocs.") @pytest.mark.parametrize("nb_path", TUTORIAL_PATHS) -def test_run_tutorial_notebooks(nb_path) -> None: +def test_run_tutorial_notebooks(nb_path) -> None: # pragma: no cover """Smoke test ensuring that tutorial notebooks run without error. The `pytest_notebook` package also includes regression test functionality against