From b4b8c1ad207c339c29167e88ee0f13e4d9001de8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Sun, 24 Aug 2025 14:36:00 +0200 Subject: [PATCH 1/3] start work on pre-commit hooks --- .pre-commit-config.yaml | 13 ++++++++-- pre_commit_hooks/check_notebooks.py | 38 +++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 pre_commit_hooks/check_notebooks.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8155bbd..488ac4a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -files: '.py' +files: '.py|.ipynb' exclude: '.git' default_stages: [pre-commit] @@ -23,4 +23,13 @@ repos: - repo: https://github.com/christopher-hacker/enforce-notebook-run-order rev: 2.0.1 hooks: - - id: enforce-notebook-run-order \ No newline at end of file + - id: enforce-notebook-run-order + + - repo: local + hooks: + - id: check_notebooks + name: check notebooks + entry: python ./pre_commit_hooks/check_notebooks.py + language: system + files: '.ipynb' + diff --git a/pre_commit_hooks/check_notebooks.py b/pre_commit_hooks/check_notebooks.py new file mode 100644 index 0000000..7aa14bf --- /dev/null +++ b/pre_commit_hooks/check_notebooks.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +import argparse +from collections.abc import Sequence + +import nbformat + + +def test_no_errors_or_warnings_in_output(notebook): + """checks if all example Jupyter notebooks have clear std-err output + (i.e., no errors or warnings) visible; except acceptable + diagnostics from the joblib package""" + for cell in notebook.cells: + if cell.cell_type == "code": + for output in cell.outputs: + if "name" in output and output["name"] == "stderr": + if not output["text"].startswith("[Parallel(n_jobs="): + raise AssertionError(output["text"]) + + +def main(argv: Sequence[str] | None = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument("filenames", nargs="*", help="Filenames to check.") + args = parser.parse_args(argv) + + retval = 0 + for filename in args.filenames: + with open(filename, encoding="utf8") as notebook_file: + notebook = nbformat.read(notebook_file, nbformat.NO_CONVERT) + try: + test_no_errors_or_warnings_in_output(notebook) + except ValueError as exc: + retval = 1 + return retval + + +if __name__ == "__main__": + raise SystemExit(main()) From 2de8ac2e5af514861c161e1f5a026f2b9289a434 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Sun, 24 Aug 2025 17:52:48 +0200 Subject: [PATCH 2/3] divide ito check_badges and check_notebooks; add utils --- .pre-commit-config.yaml | 6 +- pre_commit_hooks/check_badges.py | 96 ++++++++++++++++++ pre_commit_hooks/check_notebooks.py | 49 ++++++++++ pre_commit_hooks/utils.py | 45 +++++++++ test_notebooks.py | 147 ---------------------------- 5 files changed, 195 insertions(+), 148 deletions(-) create mode 100644 pre_commit_hooks/check_badges.py create mode 100644 pre_commit_hooks/utils.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 488ac4a..a756a2f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -32,4 +32,8 @@ repos: entry: python ./pre_commit_hooks/check_notebooks.py language: system files: '.ipynb' - + - id: check_badges + name: check badges + entry: python ./pre_commit_hooks/check_badges.py + language: system + files: '.ipynb' \ No newline at end of file diff --git a/pre_commit_hooks/check_badges.py b/pre_commit_hooks/check_badges.py new file mode 100644 index 0000000..4ff6c76 --- /dev/null +++ b/pre_commit_hooks/check_badges.py @@ -0,0 +1,96 @@ +from __future__ import annotations + +import argparse +from collections.abc import Sequence + +import nbformat + +from utils import find_files, relative_path, repo_path + +COLAB_HEADER = f"""import sys +if 'google.colab' in sys.modules: + !pip --quiet install open-atmos-jupyter-utils + from open_atmos_jupyter_utils import pip_install_on_colab + pip_install_on_colab('{repo_path().name}-examples')""" + + +def _preview_badge_markdown(absolute_path): + svg_badge_url = ( + "https://img.shields.io/static/v1?" + + "label=render%20on&logo=github&color=87ce3e&message=GitHub" + ) + link = ( + f"https://github.com/open-atmos/{repo_path().name}/blob/main/" + + f"{relative_path(absolute_path)}" + ) + return f"[![preview notebook]({svg_badge_url})]({link})" + + +def _mybinder_badge_markdown(absolute_path): + svg_badge_url = "https://mybinder.org/badge_logo.svg" + link = ( + f"https://mybinder.org/v2/gh/open-atmos/{repo_path().name}.git/main?urlpath=lab/tree/" + + f"{relative_path(absolute_path)}" + ) + return f"[![launch on mybinder.org]({svg_badge_url})]({link})" + + +def _colab_badge_markdown(absolute_path): + svg_badge_url = "https://colab.research.google.com/assets/colab-badge.svg" + link = ( + f"https://colab.research.google.com/github/open-atmos/{repo_path().name}/blob/main/" + + f"{relative_path(absolute_path)}" + ) + return f"[![launch on Colab]({svg_badge_url})]({link})" + + +def test_first_cell_contains_three_badges(notebook_filename): + """checks if all notebooks feature Github preview, mybinder and Colab badges + (in the first cell)""" + with open(notebook_filename, encoding="utf8") as fp: + nb = nbformat.read(fp, nbformat.NO_CONVERT) + assert len(nb.cells) > 0 + assert nb.cells[0].cell_type == "markdown" + lines = nb.cells[0].source.split("\n") + assert len(lines) == 3 + assert lines[0] == _preview_badge_markdown(notebook_filename) + assert lines[1] == _mybinder_badge_markdown(notebook_filename) + assert lines[2] == _colab_badge_markdown(notebook_filename) + + +def test_second_cell_is_a_markdown_cell(notebook_filename): + """checks if all notebooks have their second cell with some markdown + (hopefully clarifying what the example is about)""" + with open(notebook_filename, encoding="utf8") as fp: + nb = nbformat.read(fp, nbformat.NO_CONVERT) + assert len(nb.cells) > 1 + assert nb.cells[1].cell_type == "markdown" + + +def test_third_cell_contains_colab_header(notebook_filename): + """checks if all notebooks feature a Colab-magic cell""" + with open(notebook_filename, encoding="utf8") as fp: + nb = nbformat.read(fp, nbformat.NO_CONVERT) + assert len(nb.cells) > 2 + assert nb.cells[2].cell_type == "code" + assert nb.cells[2].source == COLAB_HEADER + + +def main(argv: Sequence[str] | None = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument("filenames", nargs="*", help="Filenames to check.") + args = parser.parse_args(argv) + + retval = 0 + for filename in args.filenames: + try: + test_first_cell_contains_three_badges(filename) + test_second_cell_is_a_markdown_cell(filename) + test_third_cell_contains_colab_header(filename) + except ValueError as exc: + retval = 1 + return retval + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/pre_commit_hooks/check_notebooks.py b/pre_commit_hooks/check_notebooks.py index 7aa14bf..5afa44f 100644 --- a/pre_commit_hooks/check_notebooks.py +++ b/pre_commit_hooks/check_notebooks.py @@ -6,6 +6,13 @@ import nbformat +def test_cell_contains_output(notebook): + """checks if all notebook cells have an output present""" + for cell in notebook.cells: + if cell.cell_type == "code" and cell.source != "": + assert cell.execution_count is not None + + def test_no_errors_or_warnings_in_output(notebook): """checks if all example Jupyter notebooks have clear std-err output (i.e., no errors or warnings) visible; except acceptable @@ -18,6 +25,47 @@ def test_no_errors_or_warnings_in_output(notebook): raise AssertionError(output["text"]) +def test_show_plot_used_instead_of_matplotlib(notebook): + """checks if plotting is done with open_atmos_jupyter_utils show_plot()""" + matplot_used = False + show_plot_used = False + for cell in notebook.cells: + if cell.cell_type == "code": + if ( + "pyplot.show()" in cell.source + or "plt.show()" in cell.source + or "from matplotlib import pyplot" in cell.source + ): + matplot_used = True + if "show_plot()" in cell.source: + show_plot_used = True + if matplot_used and not show_plot_used: + raise AssertionError( + "if using matplotlib, please use open_atmos_jupyter_utils.show_plot()" + ) + + +def test_show_anim_used_instead_of_matplotlib(notebook): + """checks if animation generation is done with open_atmos_jupyter_utils show_anim()""" + matplot_used = False + show_anim_used = False + for cell in notebook.cells: + if cell.cell_type == "code": + if ( + "funcAnimation" in cell.source + or "matplotlib.animation" in cell.source + or "from matplotlib import animation" in cell.source + ): + matplot_used = True + if "show_anim()" in cell.source: + show_anim_used = True + if matplot_used and not show_anim_used: + raise AssertionError( + """if using matplotlib for animations, + please use open_atmos_jupyter_utils.show_anim()""" + ) + + def main(argv: Sequence[str] | None = None) -> int: parser = argparse.ArgumentParser() parser.add_argument("filenames", nargs="*", help="Filenames to check.") @@ -28,6 +76,7 @@ def main(argv: Sequence[str] | None = None) -> int: with open(filename, encoding="utf8") as notebook_file: notebook = nbformat.read(notebook_file, nbformat.NO_CONVERT) try: + test_cell_contains_output(notebook) test_no_errors_or_warnings_in_output(notebook) except ValueError as exc: retval = 1 diff --git a/pre_commit_hooks/utils.py b/pre_commit_hooks/utils.py new file mode 100644 index 0000000..948be01 --- /dev/null +++ b/pre_commit_hooks/utils.py @@ -0,0 +1,45 @@ +""" +Utils functions to reuse in different parts of the codebase +""" + +import os +import pathlib + +from git.cmd import Git + + +def find_files(path_to_folder_from_project_root=".", file_extension=None): + """ + Returns all files in a current git repo. + The list of returned files may be filtered with `file_extension` param. + """ + all_files = [ + path + for path in Git( + Git(path_to_folder_from_project_root).rev_parse("--show-toplevel") + ) + .ls_files() + .split("\n") + if os.path.isfile(path) + ] + if file_extension is not None: + return list(filter(lambda path: path.endswith(file_extension), all_files)) + + return all_files + + +def relative_path(absolute_path): + """returns a path relative to the repo base (converting backslashes to slashes on Windows)""" + relpath = os.path.relpath(absolute_path, repo_path().absolute()) + posixpath_to_make_it_usable_in_urls_even_on_windows = pathlib.Path( + relpath + ).as_posix() + return posixpath_to_make_it_usable_in_urls_even_on_windows + + +def repo_path(): + """returns absolute path to the repo base (ignoring .git location if in a submodule)""" + path = pathlib.Path(__file__) + while not (path.is_dir() and Git(path).rev_parse("--git-dir") == ".git"): + path = path.parent + return path diff --git a/test_notebooks.py b/test_notebooks.py index ba28eb4..1b3eadb 100644 --- a/test_notebooks.py +++ b/test_notebooks.py @@ -28,23 +28,6 @@ SI = pint.UnitRegistry() -def _relative_path(absolute_path): - """returns a path relative to the repo base (converting backslashes to slashes on Windows)""" - relpath = os.path.relpath(absolute_path, _repo_path().absolute()) - posixpath_to_make_it_usable_in_urls_even_on_windows = pathlib.Path( - relpath - ).as_posix() - return posixpath_to_make_it_usable_in_urls_even_on_windows - - -def _repo_path(): - """returns absolute path to the repo base (ignoring .git location if in a submodule)""" - path = pathlib.Path(__file__) - while not (path.is_dir() and Git(path).rev_parse("--git-dir") == ".git"): - path = path.parent - return path - - COLAB_HEADER = f"""import sys if 'google.colab' in sys.modules: !pip --quiet install open-atmos-jupyter-utils @@ -84,20 +67,6 @@ def test_file_size(notebook_filename): assert os.stat(notebook_filename).st_size * SI.byte < 2 * SI.megabyte -def test_no_errors_or_warnings_in_output(notebook_filename): - """checks if all example Jupyter notebooks have clear std-err output - (i.e., no errors or warnings) visible; with exception of acceptable - diagnostics from the joblib package""" - with open(notebook_filename, encoding="utf8") as notebook_file: - notebook = nbformat.read(notebook_file, nbformat.NO_CONVERT) - for cell in notebook.cells: - if cell.cell_type == "code": - for output in cell.outputs: - if "name" in output and output["name"] == "stderr": - if not output["text"].startswith("[Parallel(n_jobs="): - raise AssertionError(output["text"]) - - def test_jetbrains_bug_py_66491(notebook_filename): """checks if all notebooks have the execution_count key for each cell in JSON what is required by GitHub renderer and what happens not to be the case if generating the notebook @@ -111,119 +80,3 @@ def test_jetbrains_bug_py_66491(notebook_filename): + " (could be due to a bug in PyCharm," + " see https://youtrack.jetbrains.com/issue/PY-66491 )" ) - - -def _preview_badge_markdown(absolute_path): - svg_badge_url = ( - "https://img.shields.io/static/v1?" - + "label=render%20on&logo=github&color=87ce3e&message=GitHub" - ) - link = ( - f"https://github.com/open-atmos/{_repo_path().name}/blob/main/" - + f"{_relative_path(absolute_path)}" - ) - return f"[![preview notebook]({svg_badge_url})]({link})" - - -def _mybinder_badge_markdown(abslute_path): - svg_badge_url = "https://mybinder.org/badge_logo.svg" - link = ( - f"https://mybinder.org/v2/gh/open-atmos/{_repo_path().name}.git/main?urlpath=lab/tree/" - + f"{_relative_path(abslute_path)}" - ) - return f"[![launch on mybinder.org]({svg_badge_url})]({link})" - - -def _colab_badge_markdown(absolute_path): - svg_badge_url = "https://colab.research.google.com/assets/colab-badge.svg" - link = ( - f"https://colab.research.google.com/github/open-atmos/{_repo_path().name}/blob/main/" - + f"{_relative_path(absolute_path)}" - ) - return f"[![launch on Colab]({svg_badge_url})]({link})" - - -def test_first_cell_contains_three_badges(notebook_filename): - """checks if all notebooks feature Github preview, mybinder and Colab badges - (in the first cell)""" - with open(notebook_filename, encoding="utf8") as fp: - nb = nbformat.read(fp, nbformat.NO_CONVERT) - assert len(nb.cells) > 0 - assert nb.cells[0].cell_type == "markdown" - lines = nb.cells[0].source.split("\n") - assert len(lines) == 3 - assert lines[0] == _preview_badge_markdown(notebook_filename) - assert lines[1] == _mybinder_badge_markdown(notebook_filename) - assert lines[2] == _colab_badge_markdown(notebook_filename) - - -def test_second_cell_is_a_markdown_cell(notebook_filename): - """checks if all notebooks have their second cell with some markdown - (hopefully clarifying what the example is about)""" - with open(notebook_filename, encoding="utf8") as fp: - nb = nbformat.read(fp, nbformat.NO_CONVERT) - assert len(nb.cells) > 1 - assert nb.cells[1].cell_type == "markdown" - - -def test_third_cell_contains_colab_header(notebook_filename): - """checks if all notebooks feature a Colab-magic cell""" - with open(notebook_filename, encoding="utf8") as fp: - nb = nbformat.read(fp, nbformat.NO_CONVERT) - assert len(nb.cells) > 2 - assert nb.cells[2].cell_type == "code" - assert nb.cells[2].source == COLAB_HEADER - - -def test_cell_contains_output(notebook_filename): - """checks if all notebook cells have an output present""" - with open(notebook_filename, encoding="utf8") as fp: - nb = nbformat.read(fp, nbformat.NO_CONVERT) - for cell in nb.cells: - if cell.cell_type == "code" and cell.source != "": - assert cell.execution_count is not None - - -def test_show_plot_used_instead_of_matplotlib(notebook_filename): - """checks if plotting is done with open_atmos_jupyter_utils show_plot()""" - with open(notebook_filename, encoding="utf8") as fp: - nb = nbformat.read(fp, nbformat.NO_CONVERT) - matplot_used = False - show_plot_used = False - for cell in nb.cells: - if cell.cell_type == "code": - if ( - "pyplot.show()" in cell.source - or "plt.show()" in cell.source - or "from matplotlib import pyplot" in cell.source - ): - matplot_used = True - if "show_plot()" in cell.source: - show_plot_used = True - if matplot_used and not show_plot_used: - raise AssertionError( - "if using matplotlib, please use open_atmos_jupyter_utils.show_plot()" - ) - - -def test_show_anim_used_instead_of_matplotlib(notebook_filename): - """checks if animation generation is done with open_atmos_jupyter_utils show_anim()""" - with open(notebook_filename, encoding="utf8") as fp: - nb = nbformat.read(fp, nbformat.NO_CONVERT) - matplot_used = False - show_anim_used = False - for cell in nb.cells: - if cell.cell_type == "code": - if ( - "funcAnimation" in cell.source - or "matplotlib.animation" in cell.source - or "from matplotlib import animation" in cell.source - ): - matplot_used = True - if "show_anim()" in cell.source: - show_anim_used = True - if matplot_used and not show_anim_used: - raise AssertionError( - """if using matplotlib for animations, - please use open_atmos_jupyter_utils.show_anim()""" - ) From 9902a892325b5a64a82f1621972715829c8917f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Sun, 24 Aug 2025 17:55:18 +0200 Subject: [PATCH 3/3] cleanup --- pre_commit_hooks/check_badges.py | 2 +- test_notebooks.py | 9 --------- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/pre_commit_hooks/check_badges.py b/pre_commit_hooks/check_badges.py index 4ff6c76..021deb4 100644 --- a/pre_commit_hooks/check_badges.py +++ b/pre_commit_hooks/check_badges.py @@ -5,7 +5,7 @@ import nbformat -from utils import find_files, relative_path, repo_path +from utils import relative_path, repo_path COLAB_HEADER = f"""import sys if 'google.colab' in sys.modules: diff --git a/test_notebooks.py b/test_notebooks.py index 1b3eadb..1f02ff9 100644 --- a/test_notebooks.py +++ b/test_notebooks.py @@ -11,13 +11,11 @@ import gc import os -import pathlib import warnings import nbformat import pint import pytest -from git.cmd import Git from .utils import find_files @@ -28,13 +26,6 @@ SI = pint.UnitRegistry() -COLAB_HEADER = f"""import sys -if 'google.colab' in sys.modules: - !pip --quiet install open-atmos-jupyter-utils - from open_atmos_jupyter_utils import pip_install_on_colab - pip_install_on_colab('{_repo_path().name}-examples')""" - - @pytest.fixture( params=find_files(file_extension=".ipynb"), name="notebook_filename",