From 5d073669defa9ea91797d178ed26edfa180778ae Mon Sep 17 00:00:00 2001 From: Verdi March Date: Sat, 22 Jan 2022 16:54:51 +0800 Subject: [PATCH 1/7] Added my_nb_path.py and my_nb_color.py my_nb_path.py: to reinstante the ability for notebooks to import custom modules within the git repository -- the ability which was disabled by PR #5. my_nb_color.py: colorize notebook outputs on a best-effort basis. Depends on rich and loguru. --- .../notebooks/my_nb_path.py | 71 +++++++++++++++++++ {{cookiecutter.repo_name}}/src/my_nb_color.py | 69 ++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 {{cookiecutter.repo_name}}/notebooks/my_nb_path.py create mode 100644 {{cookiecutter.repo_name}}/src/my_nb_color.py diff --git a/{{cookiecutter.repo_name}}/notebooks/my_nb_path.py b/{{cookiecutter.repo_name}}/notebooks/my_nb_path.py new file mode 100644 index 0000000..873d157 --- /dev/null +++ b/{{cookiecutter.repo_name}}/notebooks/my_nb_path.py @@ -0,0 +1,71 @@ +"""Allow notebooks to import custom modules at a few pre-defined places within this project's +git repository. + +When imported, adds ``GITROOT``, ``GITROOT/src/``, and ``GITROOT/notebooks`` to `sys.path`. + +Place this file in the same directory as your ``.ipynb`` files. If ``.ipynb`` files are organized +into subfolders, please ensure this file is presented in each subfolder. Example: + +.. code-block:: bash + + GITROOT + |-- .git # GITROOT/ must be a git repository + |-- notebooks # Jupyter notebooks + | |-- folder-a + | | |-- my_nb_path.py # Importable by *.ipynb in this subfolder + | | |-- nb-abc.ipynb + | | `-- nb-xyz.ipynb + | |-- my_nb_path.py # Importable by *.ipynb in this subfolder + | |-- nb-01.ipynb + | `-- nb-02.ipynb + `-- src + `-- my_custom_module + |-- __init__.py + `-- ... + +Usage by ``.ipynb``: + +.. code-block:: python + + # Allow this notebook to import from ``GITROOT/``, ``GITROOT/src/``, and + # ``GITROOT/notebooks``. + import my_nb_path + + # Test-drive importing a custom module under ``GITROOT/src/``. + import my_custom_module + +Background: we used to rely on ``ipython_config.py`` in the current working directory files. +However, IPython 8.0.1, 7.31.1 and 5.11 onwards disable this approach to prevent potential +Execution with Unnecessary Privileges, as described +[here](https://ipython.readthedocs.io/en/stable/whatsnew/version8.html#ipython-8-0-1-cve-2022-21699). + +So now, each ``.ipynb`` must explicitly modify its own `sys.path`, and this module is provided for +convenience of writing such a logic. +""" +import os +import subprocess +import sys +from pathlib import Path +from typing import Union + +def sys_path_append(o: Union[str, os.PathLike]) -> None: + posix_path: str = o.as_posix() if isinstance(o, Path) else Path(o).as_posix() + if posix_path not in sys.path: + sys.path.insert(0, posix_path) + +# Add GIT_ROOT/ and a few other subdirs +_p = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT +) + +if _p.returncode == 0: + _git_root: str = _p.stdout[:-1].decode("utf-8") # Remove trailing '\n' + _git_root_p = Path(_git_root) + + my_sys_paths = [ + _git_root_p, + _git_root_p / "src", + _git_root_p / "notebooks", + ] + for sp in my_sys_paths: + sys_path_append(sp) diff --git a/{{cookiecutter.repo_name}}/src/my_nb_color.py b/{{cookiecutter.repo_name}}/src/my_nb_color.py new file mode 100644 index 0000000..87efe53 --- /dev/null +++ b/{{cookiecutter.repo_name}}/src/my_nb_color.py @@ -0,0 +1,69 @@ +"""Convenience module to setup color prints and logs in a Jupyter notebook. + +Dependencies: `loguru`, `rich`. + +Basic usage by an ``.ipynb``: + + >>> # Colorize notebook outputs + >>> from my_nb_color import print, rprint, oprint + >>> + >>> # Test-drive different behavior of print functionalities + >>> d = {"A" * 200, "B" * 200} + >>> print("Colored:", d) + >>> rprint("Colored and wrapped:", d) + >>> oprint("Plain (i.e., Python's original):", d) + >>> d # Or: display(d) + >>> + >>> # Test-drive loguru + >>> from loguru import logger + >>> for f in (logger.debug, logger.info, logger.success, logger.error): + >>> f("Hello World!") + + +Robust usage by an ``.ipynb``, typically when the notebook file needs to run in an environment where +colors are not desired or dependencies are not installed: + + >>> # Best-effort to colorize notebook outputs + >>> try: + >>> from my_nb_color import print, rprint, oprint + >>> except ModuleNotFoundError: + >>> pass + >>> + >>> # Test-drive different behavior of print functionalities + >>> d = {"A" * 200, "B" * 200} + >>> print("Colored:", d) + >>> rprint("Colored and wrapped:", d) + >>> oprint("Plain (i.e., Python's original):", d) + >>> d # Or: display(d) +""" +# Try to setup rich. +try: + import rich +except ModuleNotFoundError: + print = rprint = oprint = print +else: + oprint = print # In-case needed + + rich.reconfigure(force_terminal=True, force_jupyter=False) + rich.pretty.install() + print = rich.get_console().out + rprint = rich.get_console().print + + +# Try to setup loguru. +try: + from loguru import logger +except ModuleNotFoundError: + pass +else: + try: + get_ipython() + except NameError: + colorize = None + else: + colorize = True + + import sys + + logger.configure(handlers=[dict(sink=sys.stderr, colorize=colorize)]) + del colorize From af04fe8f7429ec7bcc2f899391e44a20c0f5dddf Mon Sep 17 00:00:00 2001 From: Verdi March Date: Sat, 22 Jan 2022 17:26:15 +0800 Subject: [PATCH 2/7] Updated the example notebook with custom sys paths and color outputs --- .../notebooks/skeleton.ipynb | 78 +++++++++++++++++-- 1 file changed, 72 insertions(+), 6 deletions(-) diff --git a/{{cookiecutter.repo_name}}/notebooks/skeleton.ipynb b/{{cookiecutter.repo_name}}/notebooks/skeleton.ipynb index 11d7c3c..3b736d9 100644 --- a/{{cookiecutter.repo_name}}/notebooks/skeleton.ipynb +++ b/{{cookiecutter.repo_name}}/notebooks/skeleton.ipynb @@ -13,8 +13,9 @@ "- Best viewed using Jupyter Lab.\n", "- The title is a styled sentence rather than `h1`, to prevent it being showed and numbered in TOC.\n", "\n", - "
NOTE: this skeleton notebook is primarily for reading. To run it\n", - "completely, you need to install additional dependencies imported in the cell below.

" + "**NOTE:** this skeleton notebook is meant for reading. To run it,\n", + "please install additional dependencies imported in the second next cell which starts with line\n", + "`# Dependencies required`." ] }, { @@ -28,12 +29,23 @@ "%load_ext autoreload\n", "%autoreload 2\n", "\n", - "# Follow isort>=5 style: 'import ...' statements before 'from ... import ...'.\n", + "import my_nb_path\n", + "from my_nb_color import print, rprint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Dependencies required\n", "import ndpretty\n", "import numpy as np\n", "import pandas as pd\n", "import sagemaker as sm\n", "from IPython.display import Markdown\n", + "from loguru import logger\n", "from smallmatter.ds import mask_df # See: https://github.com/aws-samples/smallmatter-package/\n", "\n", "# A few standard SageMaker's stanzas. Use type annotation to be verbose.\n", @@ -359,6 +371,60 @@ "# NOTE: the rendered output won't persist." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Colored outputs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Colored: \u001b[1m{\u001b[0m\u001b[32m'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'\u001b[0m, \u001b[32m'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB'\u001b[0m\u001b[1m}\u001b[0m\n", + "Colored and wrapped:\n", + "\u001b[1m{\u001b[0m\n", + " \u001b[32m'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\u001b[0m\n", + "\u001b[32mAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\u001b[0m\n", + "\u001b[32mAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'\u001b[0m,\n", + " \u001b[32m'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\u001b[0m\n", + "\u001b[32mBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\u001b[0m\n", + "\u001b[32mBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB'\u001b[0m\n", + "\u001b[1m}\u001b[0m\n", + "\n", + "\u001b[1m{\u001b[0m\n", + " \u001b[32m'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'\u001b[0m,\n", + " \u001b[32m'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB'\u001b[0m\n", + "\u001b[1m}\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2022-01-22 17:23:03.529\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[34m\u001b[1mHello World!\u001b[0m\n", + "\u001b[32m2022-01-22 17:23:03.530\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[1mHello World!\u001b[0m\n", + "\u001b[32m2022-01-22 17:23:03.531\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[32m\u001b[1mHello World!\u001b[0m\n", + "\u001b[32m2022-01-22 17:23:03.532\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[31m\u001b[1mHello World!\u001b[0m\n" + ] + } + ], + "source": [ + "d = {\"A\" * 200, \"B\" * 200}\n", + "print(\"Colored:\", d)\n", + "rprint(\"Colored and wrapped:\", d)\n", + "display(d)\n", + "\n", + "for f in (logger.debug, logger.info, logger.success, logger.error):\n", + " f(\"Hello World!\")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -400,9 +466,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Environment (virtualenv_p39x)", + "display_name": "Environment (virtualenv_ds-p310)", "language": "python", - "name": "virtualenv_p39x" + "name": "virtualenv_ds-p310" }, "language_info": { "codemirror_mode": { @@ -414,7 +480,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.2" + "version": "3.10.2" }, "toc-autonumbering": true, "toc-showcode": false, From 485355a47fbb41d153a364cbed7efa6f8e79121b Mon Sep 17 00:00:00 2001 From: Verdi March Date: Sun, 23 Jan 2022 00:14:04 +0800 Subject: [PATCH 3/7] Reorganized sample notebook --- .../notebooks/skeleton.ipynb | 132 ++++++++++-------- 1 file changed, 71 insertions(+), 61 deletions(-) diff --git a/{{cookiecutter.repo_name}}/notebooks/skeleton.ipynb b/{{cookiecutter.repo_name}}/notebooks/skeleton.ipynb index 3b736d9..4ec4031 100644 --- a/{{cookiecutter.repo_name}}/notebooks/skeleton.ipynb +++ b/{{cookiecutter.repo_name}}/notebooks/skeleton.ipynb @@ -29,7 +29,8 @@ "%load_ext autoreload\n", "%autoreload 2\n", "\n", - "import my_nb_path\n", + "# Make sure my_nb_path is imported first (and when isort is used, it needs to be told).\n", + "import my_nb_path # isort: skip\n", "from my_nb_color import print, rprint" ] }, @@ -157,6 +158,67 @@ "# Improved output" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Colored outputs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Colored: \u001b[1m{\u001b[0m\u001b[32m'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'\u001b[0m, \u001b[32m'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB'\u001b[0m\u001b[1m}\u001b[0m\n", + "Colored and wrapped:\n", + "\u001b[1m{\u001b[0m\n", + " \u001b[32m'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\u001b[0m\n", + "\u001b[32mAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\u001b[0m\n", + "\u001b[32mAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'\u001b[0m,\n", + " \u001b[32m'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\u001b[0m\n", + "\u001b[32mBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\u001b[0m\n", + "\u001b[32mBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB'\u001b[0m\n", + "\u001b[1m}\u001b[0m\n", + "\n", + "\u001b[1m{\u001b[0m\n", + " \u001b[32m'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'\u001b[0m,\n", + " \u001b[32m'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB'\u001b[0m\n", + "\u001b[1m}\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2022-01-22 17:23:03.529\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[34m\u001b[1mHello World!\u001b[0m\n", + "\u001b[32m2022-01-22 17:23:03.530\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[1mHello World!\u001b[0m\n", + "\u001b[32m2022-01-22 17:23:03.531\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[32m\u001b[1mHello World!\u001b[0m\n", + "\u001b[32m2022-01-22 17:23:03.532\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[31m\u001b[1mHello World!\u001b[0m\n" + ] + } + ], + "source": [ + "d = {\"A\" * 200, \"B\" * 200}\n", + "print(\"Colored:\", d)\n", + "rprint(\"Colored and wrapped:\", d)\n", + "display(d)\n", + "\n", + "for f in (logger.debug, logger.info, logger.success, logger.error):\n", + " f(\"Hello World!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dataframes" + ] + }, { "cell_type": "code", "execution_count": null, @@ -165,8 +227,8 @@ { "data": { "text/markdown": [ - "## Plain dataframe\n", - "**NOTE:** this also appears in TOC as \"*2.1. Plain dataframe*\"" + "### Plain dataframe\n", + "**NOTE:** this also appears in TOC as \"*2.2.1. Plain dataframe*\"" ], "text/plain": [ "" @@ -233,7 +295,7 @@ { "data": { "text/markdown": [ - "## Masked dataframe\n", + "### Masked dataframe\n", "Sometime, we would like to version the output of this cell into the git repo, to help readers to\n", "quickly see the shape of a dataframe.\n", "\n", @@ -322,17 +384,19 @@ ")\n", "df_b = pd.DataFrame(\n", " {\n", - " \"userid\": [1000, 2000, 3000],\n", + " \"userid\": [1000, 2000, 3000], # Illustration only. Usually read from somewhere.\n", " \"pca_a\": [0.1, 0.2, 0.3],\n", " \"pca_b\": [-0.3, 0.01, 0.7],\n", " }\n", ")\n", "\n", "display(\n", - " Markdown('## Plain dataframe\\n**NOTE:** this also appears in TOC as \"*2.1. Plain dataframe*\"'),\n", + " Markdown(\n", + " '### Plain dataframe\\n**NOTE:** this also appears in TOC as \"*2.2.1. Plain dataframe*\"'\n", + " ),\n", " df_a,\n", " Markdown(\n", - " \"\"\"## Masked dataframe\n", + " \"\"\"### Masked dataframe\n", "Sometime, we would like to version the output of this cell into the git repo, to help readers to\n", "quickly see the shape of a dataframe.\n", "\n", @@ -371,60 +435,6 @@ "# NOTE: the rendered output won't persist." ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Colored outputs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Colored: \u001b[1m{\u001b[0m\u001b[32m'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'\u001b[0m, \u001b[32m'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB'\u001b[0m\u001b[1m}\u001b[0m\n", - "Colored and wrapped:\n", - "\u001b[1m{\u001b[0m\n", - " \u001b[32m'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\u001b[0m\n", - "\u001b[32mAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\u001b[0m\n", - "\u001b[32mAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'\u001b[0m,\n", - " \u001b[32m'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\u001b[0m\n", - "\u001b[32mBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\u001b[0m\n", - "\u001b[32mBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB'\u001b[0m\n", - "\u001b[1m}\u001b[0m\n", - "\n", - "\u001b[1m{\u001b[0m\n", - " \u001b[32m'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'\u001b[0m,\n", - " \u001b[32m'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB'\u001b[0m\n", - "\u001b[1m}\u001b[0m\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2022-01-22 17:23:03.529\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[34m\u001b[1mHello World!\u001b[0m\n", - "\u001b[32m2022-01-22 17:23:03.530\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[1mHello World!\u001b[0m\n", - "\u001b[32m2022-01-22 17:23:03.531\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[32m\u001b[1mHello World!\u001b[0m\n", - "\u001b[32m2022-01-22 17:23:03.532\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[31m\u001b[1mHello World!\u001b[0m\n" - ] - } - ], - "source": [ - "d = {\"A\" * 200, \"B\" * 200}\n", - "print(\"Colored:\", d)\n", - "rprint(\"Colored and wrapped:\", d)\n", - "display(d)\n", - "\n", - "for f in (logger.debug, logger.info, logger.success, logger.error):\n", - " f(\"Hello World!\")" - ] - }, { "cell_type": "markdown", "metadata": {}, From 8042c21fc98e1c6f417c10e93238697c7cc4e803 Mon Sep 17 00:00:00 2001 From: Verdi March Date: Sun, 23 Jan 2022 00:36:33 +0800 Subject: [PATCH 4/7] Updated my_nb_path.py and my_nb_color.py --- .../notebooks/my_nb_path.py | 32 ++++++++--------- {{cookiecutter.repo_name}}/src/my_nb_color.py | 35 +++---------------- 2 files changed, 20 insertions(+), 47 deletions(-) diff --git a/{{cookiecutter.repo_name}}/notebooks/my_nb_path.py b/{{cookiecutter.repo_name}}/notebooks/my_nb_path.py index 873d157..2fce3b8 100644 --- a/{{cookiecutter.repo_name}}/notebooks/my_nb_path.py +++ b/{{cookiecutter.repo_name}}/notebooks/my_nb_path.py @@ -1,7 +1,7 @@ """Allow notebooks to import custom modules at a few pre-defined places within this project's git repository. -When imported, adds ``GITROOT``, ``GITROOT/src/``, and ``GITROOT/notebooks`` to `sys.path`. +When imported, adds ``GITROOT``, ``GITROOT/src``, and ``GITROOT/notebooks`` to `sys.path`. Place this file in the same directory as your ``.ipynb`` files. If ``.ipynb`` files are organized into subfolders, please ensure this file is presented in each subfolder. Example: @@ -9,13 +9,13 @@ .. code-block:: bash GITROOT - |-- .git # GITROOT/ must be a git repository - |-- notebooks # Jupyter notebooks + |-- .git # Signify this is a git repository + |-- notebooks # Parent folder of Jupyter notebooks | |-- folder-a - | | |-- my_nb_path.py # Importable by *.ipynb in this subfolder + | | |-- my_nb_path.py # Importable by nb-abc.ipynb and nb-xyz.ipynb | | |-- nb-abc.ipynb | | `-- nb-xyz.ipynb - | |-- my_nb_path.py # Importable by *.ipynb in this subfolder + | |-- my_nb_path.py # Importable by nb-01.ipynb and nb-02.ipynb | |-- nb-01.ipynb | `-- nb-02.ipynb `-- src @@ -25,22 +25,20 @@ Usage by ``.ipynb``: -.. code-block:: python - - # Allow this notebook to import from ``GITROOT/``, ``GITROOT/src/``, and - # ``GITROOT/notebooks``. - import my_nb_path - - # Test-drive importing a custom module under ``GITROOT/src/``. - import my_custom_module + >>> # Allow this notebook to import from GITROOT, GITROOT/src, and GITROOT/notebooks. + >>> # This module must be imported before importing any other custom modules under GITROOT. + >>> # The isort directive prevents the statement to be moved around when isort is used. + >>> import my_nb_path # isort: skip + >>> + >>> # Test-drive importing a custom module under GITROOT/src. + >>> import my_custom_module Background: we used to rely on ``ipython_config.py`` in the current working directory files. -However, IPython 8.0.1, 7.31.1 and 5.11 onwards disable this approach to prevent potential -Execution with Unnecessary Privileges, as described +However, IPython 8.0.1+, 7.31.1+ and 5.11+ disable this behavior for security reason as described [here](https://ipython.readthedocs.io/en/stable/whatsnew/version8.html#ipython-8-0-1-cve-2022-21699). -So now, each ``.ipynb`` must explicitly modify its own `sys.path`, and this module is provided for -convenience of writing such a logic. +So now, each ``.ipynb`` must explicitly modify its own `sys.path` which is what this module offers +as convenience. """ import os import subprocess diff --git a/{{cookiecutter.repo_name}}/src/my_nb_color.py b/{{cookiecutter.repo_name}}/src/my_nb_color.py index 87efe53..f13a467 100644 --- a/{{cookiecutter.repo_name}}/src/my_nb_color.py +++ b/{{cookiecutter.repo_name}}/src/my_nb_color.py @@ -12,38 +12,23 @@ >>> print("Colored:", d) >>> rprint("Colored and wrapped:", d) >>> oprint("Plain (i.e., Python's original):", d) - >>> d # Or: display(d) + >>> display(d) >>> >>> # Test-drive loguru >>> from loguru import logger >>> for f in (logger.debug, logger.info, logger.success, logger.error): >>> f("Hello World!") +""" +import sys -Robust usage by an ``.ipynb``, typically when the notebook file needs to run in an environment where -colors are not desired or dependencies are not installed: - - >>> # Best-effort to colorize notebook outputs - >>> try: - >>> from my_nb_color import print, rprint, oprint - >>> except ModuleNotFoundError: - >>> pass - >>> - >>> # Test-drive different behavior of print functionalities - >>> d = {"A" * 200, "B" * 200} - >>> print("Colored:", d) - >>> rprint("Colored and wrapped:", d) - >>> oprint("Plain (i.e., Python's original):", d) - >>> d # Or: display(d) -""" # Try to setup rich. try: import rich except ModuleNotFoundError: print = rprint = oprint = print else: - oprint = print # In-case needed - + oprint = print # In-case plain old behavior is needed rich.reconfigure(force_terminal=True, force_jupyter=False) rich.pretty.install() print = rich.get_console().out @@ -56,14 +41,4 @@ except ModuleNotFoundError: pass else: - try: - get_ipython() - except NameError: - colorize = None - else: - colorize = True - - import sys - - logger.configure(handlers=[dict(sink=sys.stderr, colorize=colorize)]) - del colorize + logger.configure(handlers=[dict(sink=sys.stderr, colorize=True)]) From ecdb6d88c8adf1d91be551dca5e7046b5f9dc936 Mon Sep 17 00:00:00 2001 From: Verdi March Date: Sun, 23 Jan 2022 00:46:13 +0800 Subject: [PATCH 5/7] Updated README.md with my_nb_*.py --- README.md | 4 +++- {{cookiecutter.repo_name}}/README.md | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6244340..789c0b4 100644 --- a/README.md +++ b/README.md @@ -36,10 +36,12 @@ By using this template, your data science project is auto-generated as follows: ``` . |-- notebooks # A directory to place all notebooks files. -| `-- *.ipynb +| |-- *.ipynb +| `-- my_nb_path.py # Imported by *.ipynb to treat src/ as PYTHONPATH |-- setup.py # To pip install your Python module (if module name specified to cookiecutter) |-- src | |-- my_custom_module # Your custom module +| |-- my_nb_color.py # Imported by *.ipynb to colorize their outputs | `-- source_dir # You can further create this subdir for SageMaker entrypoint scripts |-- tests # Unit tests diff --git a/{{cookiecutter.repo_name}}/README.md b/{{cookiecutter.repo_name}}/README.md index 92708f6..209d8f9 100644 --- a/{{cookiecutter.repo_name}}/README.md +++ b/{{cookiecutter.repo_name}}/README.md @@ -8,14 +8,16 @@ {{cookiecutter.repo_name}} |-- bin # CLI scripts |-- notebooks -| `-- *.ipynb # Jupyter notebooks +| |-- *.ipynb # Jupyter notebooks +| `-- my_nb_path.py # Imported by *.ipynb to treat src/ as PYTHONPATH {% if cookiecutter.package_name != "" -%} |-- setup.py # To install {{cookiecutter.repo_name}} as a Python module {% endif -%} |-- src # Python modules developed in this project {% if cookiecutter.package_name != "" -%} -| `-- {{cookiecutter.repo_name}} +| |-- {{cookiecutter.repo_name}} {% endif -%} +| `-- my_nb_color.py # Imported by *.ipynb to colorize their outputs `-- tests # Unit tests # Miscellaneous files From 6265692ecf7db3be7b5a85726eb5fa5f2383c595 Mon Sep 17 00:00:00 2001 From: Verdi March Date: Sun, 23 Jan 2022 00:53:01 +0800 Subject: [PATCH 6/7] Multiple updates - removed python_interpreter question - removed left-over ipython_config.py - unreviewed setup.py immediately raises exception --- cookiecutter.json | 4 --- hooks/post_gen_project.py | 2 +- {{cookiecutter.repo_name}}/ipython_config.py | 38 -------------------- {{cookiecutter.repo_name}}/setup.py | 10 +++--- 4 files changed, 6 insertions(+), 48 deletions(-) delete mode 100644 {{cookiecutter.repo_name}}/ipython_config.py diff --git a/cookiecutter.json b/cookiecutter.json index 2014bb2..893409e 100644 --- a/cookiecutter.json +++ b/cookiecutter.json @@ -8,9 +8,5 @@ "MIT License", "Apache-2.0 License" ], - "python_interpreter": [ - "python3", - "python" - ], "package_name": "" } diff --git a/hooks/post_gen_project.py b/hooks/post_gen_project.py index 4916e98..38c047e 100644 --- a/hooks/post_gen_project.py +++ b/hooks/post_gen_project.py @@ -50,7 +50,7 @@ def rm(s: Path) -> None: message.append("# - review LICENSE") if package_name != "": message += [ - "# - review and update setup.py, then remove the exception at the end.", + "# - review and update setup.py, then remove the exception at the start.", "# - consider to adopt versioneer to version your package.", ] message += [ diff --git a/{{cookiecutter.repo_name}}/ipython_config.py b/{{cookiecutter.repo_name}}/ipython_config.py deleted file mode 100644 index d97c322..0000000 --- a/{{cookiecutter.repo_name}}/ipython_config.py +++ /dev/null @@ -1,38 +0,0 @@ -import os -import subprocess -from pathlib import Path -from typing import Union - -#################################################################################################### -# Additional PYTHONPATH to allow notebooks to import custom modules at a few pre-defined places. - - -def sys_path_append(o: Union[str, os.PathLike]) -> str: - posix_path: str = o.as_posix() if isinstance(o, Path) else Path(o).as_posix() - return 'sys.path.insert(0, "{}")'.format(posix_path) - - -_pythonpath = [ - "import sys, os", - sys_path_append(os.getcwd()), -] - -# Add GIT_ROOT/ and a few other subdirs -try: - _p = subprocess.run( - ["git", "rev-parse", "--show-toplevel"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT - ) - - if _p.returncode == 0: - _git_root: str = _p.stdout[:-1].decode("utf-8") # Remove trailing '\n' - _git_root_p: Path = Path(_git_root) - _pythonpath += [ - sys_path_append(_git_root_p), # GIT_ROOT - sys_path_append(_git_root_p / "src"), # GIT_ROOT/src - sys_path_append(_git_root_p / "notebooks"), # GIT_ROOT/notebooks - ] -except: # noqa: E722 - pass - -c.InteractiveShellApp.exec_lines = _pythonpath # type: ignore # noqa: F821 -#################################################################################################### diff --git a/{{cookiecutter.repo_name}}/setup.py b/{{cookiecutter.repo_name}}/setup.py index 85a7ad6..46f5f88 100644 --- a/{{cookiecutter.repo_name}}/setup.py +++ b/{{cookiecutter.repo_name}}/setup.py @@ -1,3 +1,8 @@ +raise ValueError( + "Baseline setup.py from cookiecutter aws-samples/python-data-science-template. " + "Please review and modify accordingly, then remove this exception" +) + import os from typing import List @@ -55,8 +60,3 @@ def read(fname) -> str: python_requires=">=3.6.0", install_requires=required_packages, ) - -raise ValueError( - "Baseline setup.py from cookiecutter verdimrc/py-ds-template. " - "Please review and modify accordingly, then remove this exception" -) From e8191adf3d5bf4e8066660e101dcaf5b4562b570 Mon Sep 17 00:00:00 2001 From: Verdi March Date: Sun, 23 Jan 2022 01:00:35 +0800 Subject: [PATCH 7/7] Update my_nb_path.py --- {{cookiecutter.repo_name}}/notebooks/my_nb_path.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/{{cookiecutter.repo_name}}/notebooks/my_nb_path.py b/{{cookiecutter.repo_name}}/notebooks/my_nb_path.py index 2fce3b8..0571245 100644 --- a/{{cookiecutter.repo_name}}/notebooks/my_nb_path.py +++ b/{{cookiecutter.repo_name}}/notebooks/my_nb_path.py @@ -33,8 +33,8 @@ >>> # Test-drive importing a custom module under GITROOT/src. >>> import my_custom_module -Background: we used to rely on ``ipython_config.py`` in the current working directory files. -However, IPython 8.0.1+, 7.31.1+ and 5.11+ disable this behavior for security reason as described +Background: we used to rely on ``ipython_config.py`` in the current working directory. However, +IPython 8.0.1+, 7.31.1+ and 5.11+ disable this behavior for security reason as described [here](https://ipython.readthedocs.io/en/stable/whatsnew/version8.html#ipython-8-0-1-cve-2022-21699). So now, each ``.ipynb`` must explicitly modify its own `sys.path` which is what this module offers