diff --git a/README.md b/README.md
index 6244340..789c0b4 100644
--- a/README.md
+++ b/README.md
@@ -36,10 +36,12 @@ By using this template, your data science project is auto-generated as follows:
```
.
|-- notebooks # A directory to place all notebooks files.
-| `-- *.ipynb
+| |-- *.ipynb
+| `-- my_nb_path.py # Imported by *.ipynb to treat src/ as PYTHONPATH
|-- setup.py # To pip install your Python module (if module name specified to cookiecutter)
|-- src
| |-- my_custom_module # Your custom module
+| |-- my_nb_color.py # Imported by *.ipynb to colorize their outputs
| `-- source_dir # You can further create this subdir for SageMaker entrypoint scripts
|-- tests # Unit tests
diff --git a/cookiecutter.json b/cookiecutter.json
index 2014bb2..893409e 100644
--- a/cookiecutter.json
+++ b/cookiecutter.json
@@ -8,9 +8,5 @@
"MIT License",
"Apache-2.0 License"
],
- "python_interpreter": [
- "python3",
- "python"
- ],
"package_name": ""
}
diff --git a/hooks/post_gen_project.py b/hooks/post_gen_project.py
index 4916e98..38c047e 100644
--- a/hooks/post_gen_project.py
+++ b/hooks/post_gen_project.py
@@ -50,7 +50,7 @@ def rm(s: Path) -> None:
message.append("# - review LICENSE")
if package_name != "":
message += [
- "# - review and update setup.py, then remove the exception at the end.",
+ "# - review and update setup.py, then remove the exception at the start.",
"# - consider to adopt versioneer to version your package.",
]
message += [
diff --git a/{{cookiecutter.repo_name}}/README.md b/{{cookiecutter.repo_name}}/README.md
index 92708f6..209d8f9 100644
--- a/{{cookiecutter.repo_name}}/README.md
+++ b/{{cookiecutter.repo_name}}/README.md
@@ -8,14 +8,16 @@
{{cookiecutter.repo_name}}
|-- bin # CLI scripts
|-- notebooks
-| `-- *.ipynb # Jupyter notebooks
+| |-- *.ipynb # Jupyter notebooks
+| `-- my_nb_path.py # Imported by *.ipynb to treat src/ as PYTHONPATH
{% if cookiecutter.package_name != "" -%}
|-- setup.py # To install {{cookiecutter.repo_name}} as a Python module
{% endif -%}
|-- src # Python modules developed in this project
{% if cookiecutter.package_name != "" -%}
-| `-- {{cookiecutter.repo_name}}
+| |-- {{cookiecutter.repo_name}}
{% endif -%}
+| `-- my_nb_color.py # Imported by *.ipynb to colorize their outputs
`-- tests # Unit tests
# Miscellaneous files
diff --git a/{{cookiecutter.repo_name}}/ipython_config.py b/{{cookiecutter.repo_name}}/ipython_config.py
deleted file mode 100644
index d97c322..0000000
--- a/{{cookiecutter.repo_name}}/ipython_config.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import os
-import subprocess
-from pathlib import Path
-from typing import Union
-
-####################################################################################################
-# Additional PYTHONPATH to allow notebooks to import custom modules at a few pre-defined places.
-
-
-def sys_path_append(o: Union[str, os.PathLike]) -> str:
- posix_path: str = o.as_posix() if isinstance(o, Path) else Path(o).as_posix()
- return 'sys.path.insert(0, "{}")'.format(posix_path)
-
-
-_pythonpath = [
- "import sys, os",
- sys_path_append(os.getcwd()),
-]
-
-# Add GIT_ROOT/ and a few other subdirs
-try:
- _p = subprocess.run(
- ["git", "rev-parse", "--show-toplevel"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT
- )
-
- if _p.returncode == 0:
- _git_root: str = _p.stdout[:-1].decode("utf-8") # Remove trailing '\n'
- _git_root_p: Path = Path(_git_root)
- _pythonpath += [
- sys_path_append(_git_root_p), # GIT_ROOT
- sys_path_append(_git_root_p / "src"), # GIT_ROOT/src
- sys_path_append(_git_root_p / "notebooks"), # GIT_ROOT/notebooks
- ]
-except: # noqa: E722
- pass
-
-c.InteractiveShellApp.exec_lines = _pythonpath # type: ignore # noqa: F821
-####################################################################################################
diff --git a/{{cookiecutter.repo_name}}/notebooks/my_nb_path.py b/{{cookiecutter.repo_name}}/notebooks/my_nb_path.py
new file mode 100644
index 0000000..0571245
--- /dev/null
+++ b/{{cookiecutter.repo_name}}/notebooks/my_nb_path.py
@@ -0,0 +1,69 @@
+"""Allow notebooks to import custom modules at a few pre-defined places within this project's
+git repository.
+
+When imported, adds ``GITROOT``, ``GITROOT/src``, and ``GITROOT/notebooks`` to `sys.path`.
+
+Place this file in the same directory as your ``.ipynb`` files. If ``.ipynb`` files are organized
+into subfolders, please ensure this file is presented in each subfolder. Example:
+
+.. code-block:: bash
+
+ GITROOT
+ |-- .git # Signify this is a git repository
+ |-- notebooks # Parent folder of Jupyter notebooks
+ | |-- folder-a
+ | | |-- my_nb_path.py # Importable by nb-abc.ipynb and nb-xyz.ipynb
+ | | |-- nb-abc.ipynb
+ | | `-- nb-xyz.ipynb
+ | |-- my_nb_path.py # Importable by nb-01.ipynb and nb-02.ipynb
+ | |-- nb-01.ipynb
+ | `-- nb-02.ipynb
+ `-- src
+ `-- my_custom_module
+ |-- __init__.py
+ `-- ...
+
+Usage by ``.ipynb``:
+
+ >>> # Allow this notebook to import from GITROOT, GITROOT/src, and GITROOT/notebooks.
+ >>> # This module must be imported before importing any other custom modules under GITROOT.
+ >>> # The isort directive prevents the statement to be moved around when isort is used.
+ >>> import my_nb_path # isort: skip
+ >>>
+ >>> # Test-drive importing a custom module under GITROOT/src.
+ >>> import my_custom_module
+
+Background: we used to rely on ``ipython_config.py`` in the current working directory. However,
+IPython 8.0.1+, 7.31.1+ and 5.11+ disable this behavior for security reason as described
+[here](https://ipython.readthedocs.io/en/stable/whatsnew/version8.html#ipython-8-0-1-cve-2022-21699).
+
+So now, each ``.ipynb`` must explicitly modify its own `sys.path` which is what this module offers
+as convenience.
+"""
+import os
+import subprocess
+import sys
+from pathlib import Path
+from typing import Union
+
+def sys_path_append(o: Union[str, os.PathLike]) -> None:
+ posix_path: str = o.as_posix() if isinstance(o, Path) else Path(o).as_posix()
+ if posix_path not in sys.path:
+ sys.path.insert(0, posix_path)
+
+# Add GIT_ROOT/ and a few other subdirs
+_p = subprocess.run(
+ ["git", "rev-parse", "--show-toplevel"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT
+)
+
+if _p.returncode == 0:
+ _git_root: str = _p.stdout[:-1].decode("utf-8") # Remove trailing '\n'
+ _git_root_p = Path(_git_root)
+
+ my_sys_paths = [
+ _git_root_p,
+ _git_root_p / "src",
+ _git_root_p / "notebooks",
+ ]
+ for sp in my_sys_paths:
+ sys_path_append(sp)
diff --git a/{{cookiecutter.repo_name}}/notebooks/skeleton.ipynb b/{{cookiecutter.repo_name}}/notebooks/skeleton.ipynb
index 11d7c3c..4ec4031 100644
--- a/{{cookiecutter.repo_name}}/notebooks/skeleton.ipynb
+++ b/{{cookiecutter.repo_name}}/notebooks/skeleton.ipynb
@@ -13,8 +13,9 @@
"- Best viewed using Jupyter Lab.\n",
"- The title is a styled sentence rather than `h1`, to prevent it being showed and numbered in TOC.\n",
"\n",
- "
NOTE: this skeleton notebook is primarily for reading. To run it\n",
- "completely, you need to install additional dependencies imported in the cell below.
"
+ "**NOTE:** this skeleton notebook is meant for reading. To run it,\n",
+ "please install additional dependencies imported in the second next cell which starts with line\n",
+ "`# Dependencies required`."
]
},
{
@@ -28,12 +29,24 @@
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
- "# Follow isort>=5 style: 'import ...' statements before 'from ... import ...'.\n",
+ "# Make sure my_nb_path is imported first (and when isort is used, it needs to be told).\n",
+ "import my_nb_path # isort: skip\n",
+ "from my_nb_color import print, rprint"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Dependencies required\n",
"import ndpretty\n",
"import numpy as np\n",
"import pandas as pd\n",
"import sagemaker as sm\n",
"from IPython.display import Markdown\n",
+ "from loguru import logger\n",
"from smallmatter.ds import mask_df # See: https://github.com/aws-samples/smallmatter-package/\n",
"\n",
"# A few standard SageMaker's stanzas. Use type annotation to be verbose.\n",
@@ -145,6 +158,67 @@
"# Improved output"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Colored outputs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Colored: \u001b[1m{\u001b[0m\u001b[32m'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'\u001b[0m, \u001b[32m'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB'\u001b[0m\u001b[1m}\u001b[0m\n",
+ "Colored and wrapped:\n",
+ "\u001b[1m{\u001b[0m\n",
+ " \u001b[32m'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\u001b[0m\n",
+ "\u001b[32mAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\u001b[0m\n",
+ "\u001b[32mAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'\u001b[0m,\n",
+ " \u001b[32m'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\u001b[0m\n",
+ "\u001b[32mBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\u001b[0m\n",
+ "\u001b[32mBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB'\u001b[0m\n",
+ "\u001b[1m}\u001b[0m\n",
+ "\n",
+ "\u001b[1m{\u001b[0m\n",
+ " \u001b[32m'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'\u001b[0m,\n",
+ " \u001b[32m'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB'\u001b[0m\n",
+ "\u001b[1m}\u001b[0m\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32m2022-01-22 17:23:03.529\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[34m\u001b[1mHello World!\u001b[0m\n",
+ "\u001b[32m2022-01-22 17:23:03.530\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[1mHello World!\u001b[0m\n",
+ "\u001b[32m2022-01-22 17:23:03.531\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[32m\u001b[1mHello World!\u001b[0m\n",
+ "\u001b[32m2022-01-22 17:23:03.532\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[31m\u001b[1mHello World!\u001b[0m\n"
+ ]
+ }
+ ],
+ "source": [
+ "d = {\"A\" * 200, \"B\" * 200}\n",
+ "print(\"Colored:\", d)\n",
+ "rprint(\"Colored and wrapped:\", d)\n",
+ "display(d)\n",
+ "\n",
+ "for f in (logger.debug, logger.info, logger.success, logger.error):\n",
+ " f(\"Hello World!\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Dataframes"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -153,8 +227,8 @@
{
"data": {
"text/markdown": [
- "## Plain dataframe\n",
- "**NOTE:** this also appears in TOC as \"*2.1. Plain dataframe*\""
+ "### Plain dataframe\n",
+ "**NOTE:** this also appears in TOC as \"*2.2.1. Plain dataframe*\""
],
"text/plain": [
""
@@ -221,7 +295,7 @@
{
"data": {
"text/markdown": [
- "## Masked dataframe\n",
+ "### Masked dataframe\n",
"Sometime, we would like to version the output of this cell into the git repo, to help readers to\n",
"quickly see the shape of a dataframe.\n",
"\n",
@@ -310,17 +384,19 @@
")\n",
"df_b = pd.DataFrame(\n",
" {\n",
- " \"userid\": [1000, 2000, 3000],\n",
+ " \"userid\": [1000, 2000, 3000], # Illustration only. Usually read from somewhere.\n",
" \"pca_a\": [0.1, 0.2, 0.3],\n",
" \"pca_b\": [-0.3, 0.01, 0.7],\n",
" }\n",
")\n",
"\n",
"display(\n",
- " Markdown('## Plain dataframe\\n**NOTE:** this also appears in TOC as \"*2.1. Plain dataframe*\"'),\n",
+ " Markdown(\n",
+ " '### Plain dataframe\\n**NOTE:** this also appears in TOC as \"*2.2.1. Plain dataframe*\"'\n",
+ " ),\n",
" df_a,\n",
" Markdown(\n",
- " \"\"\"## Masked dataframe\n",
+ " \"\"\"### Masked dataframe\n",
"Sometime, we would like to version the output of this cell into the git repo, to help readers to\n",
"quickly see the shape of a dataframe.\n",
"\n",
@@ -400,9 +476,9 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Environment (virtualenv_p39x)",
+ "display_name": "Environment (virtualenv_ds-p310)",
"language": "python",
- "name": "virtualenv_p39x"
+ "name": "virtualenv_ds-p310"
},
"language_info": {
"codemirror_mode": {
@@ -414,7 +490,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.2"
+ "version": "3.10.2"
},
"toc-autonumbering": true,
"toc-showcode": false,
diff --git a/{{cookiecutter.repo_name}}/setup.py b/{{cookiecutter.repo_name}}/setup.py
index 85a7ad6..46f5f88 100644
--- a/{{cookiecutter.repo_name}}/setup.py
+++ b/{{cookiecutter.repo_name}}/setup.py
@@ -1,3 +1,8 @@
+raise ValueError(
+ "Baseline setup.py from cookiecutter aws-samples/python-data-science-template. "
+ "Please review and modify accordingly, then remove this exception"
+)
+
import os
from typing import List
@@ -55,8 +60,3 @@ def read(fname) -> str:
python_requires=">=3.6.0",
install_requires=required_packages,
)
-
-raise ValueError(
- "Baseline setup.py from cookiecutter verdimrc/py-ds-template. "
- "Please review and modify accordingly, then remove this exception"
-)
diff --git a/{{cookiecutter.repo_name}}/src/my_nb_color.py b/{{cookiecutter.repo_name}}/src/my_nb_color.py
new file mode 100644
index 0000000..f13a467
--- /dev/null
+++ b/{{cookiecutter.repo_name}}/src/my_nb_color.py
@@ -0,0 +1,44 @@
+"""Convenience module to setup color prints and logs in a Jupyter notebook.
+
+Dependencies: `loguru`, `rich`.
+
+Basic usage by an ``.ipynb``:
+
+ >>> # Colorize notebook outputs
+ >>> from my_nb_color import print, rprint, oprint
+ >>>
+ >>> # Test-drive different behavior of print functionalities
+ >>> d = {"A" * 200, "B" * 200}
+ >>> print("Colored:", d)
+ >>> rprint("Colored and wrapped:", d)
+ >>> oprint("Plain (i.e., Python's original):", d)
+ >>> display(d)
+ >>>
+ >>> # Test-drive loguru
+ >>> from loguru import logger
+ >>> for f in (logger.debug, logger.info, logger.success, logger.error):
+ >>> f("Hello World!")
+"""
+import sys
+
+
+# Try to setup rich.
+try:
+ import rich
+except ModuleNotFoundError:
+ print = rprint = oprint = print
+else:
+ oprint = print # In-case plain old behavior is needed
+ rich.reconfigure(force_terminal=True, force_jupyter=False)
+ rich.pretty.install()
+ print = rich.get_console().out
+ rprint = rich.get_console().print
+
+
+# Try to setup loguru.
+try:
+ from loguru import logger
+except ModuleNotFoundError:
+ pass
+else:
+ logger.configure(handlers=[dict(sink=sys.stderr, colorize=True)])