Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,12 @@ By using this template, your data science project is auto-generated as follows:
```
.
|-- notebooks # A directory to place all notebooks files.
| `-- *.ipynb
| |-- *.ipynb
| `-- my_nb_path.py # Imported by *.ipynb to treat src/ as PYTHONPATH
|-- setup.py # To pip install your Python module (if module name specified to cookiecutter)
|-- src
| |-- my_custom_module # Your custom module
| |-- my_nb_color.py # Imported by *.ipynb to colorize their outputs
| `-- source_dir # You can further create this subdir for SageMaker entrypoint scripts
|-- tests # Unit tests

Expand Down
4 changes: 0 additions & 4 deletions cookiecutter.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,5 @@
"MIT License",
"Apache-2.0 License"
],
"python_interpreter": [
"python3",
"python"
],
"package_name": ""
}
2 changes: 1 addition & 1 deletion hooks/post_gen_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def rm(s: Path) -> None:
message.append("# - review LICENSE")
if package_name != "":
message += [
"# - review and update setup.py, then remove the exception at the end.",
"# - review and update setup.py, then remove the exception at the start.",
"# - consider to adopt versioneer to version your package.",
]
message += [
Expand Down
6 changes: 4 additions & 2 deletions {{cookiecutter.repo_name}}/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,16 @@
{{cookiecutter.repo_name}}
|-- bin # CLI scripts
|-- notebooks
| `-- *.ipynb # Jupyter notebooks
| |-- *.ipynb # Jupyter notebooks
| `-- my_nb_path.py # Imported by *.ipynb to treat src/ as PYTHONPATH
{% if cookiecutter.package_name != "" -%}
|-- setup.py # To install {{cookiecutter.repo_name}} as a Python module
{% endif -%}
|-- src # Python modules developed in this project
{% if cookiecutter.package_name != "" -%}
| `-- {{cookiecutter.repo_name}}
| |-- {{cookiecutter.repo_name}}
{% endif -%}
| `-- my_nb_color.py # Imported by *.ipynb to colorize their outputs
`-- tests # Unit tests

# Miscellaneous files
Expand Down
38 changes: 0 additions & 38 deletions {{cookiecutter.repo_name}}/ipython_config.py

This file was deleted.

69 changes: 69 additions & 0 deletions {{cookiecutter.repo_name}}/notebooks/my_nb_path.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""Allow notebooks to import custom modules at a few pre-defined places within this project's
git repository.

When imported, adds ``GITROOT``, ``GITROOT/src``, and ``GITROOT/notebooks`` to `sys.path`.

Place this file in the same directory as your ``.ipynb`` files. If ``.ipynb`` files are organized
into subfolders, please ensure this file is presented in each subfolder. Example:

.. code-block:: bash

GITROOT
|-- .git # Signify this is a git repository
|-- notebooks # Parent folder of Jupyter notebooks
| |-- folder-a
| | |-- my_nb_path.py # Importable by nb-abc.ipynb and nb-xyz.ipynb
| | |-- nb-abc.ipynb
| | `-- nb-xyz.ipynb
| |-- my_nb_path.py # Importable by nb-01.ipynb and nb-02.ipynb
| |-- nb-01.ipynb
| `-- nb-02.ipynb
`-- src
`-- my_custom_module
|-- __init__.py
`-- ...

Usage by ``.ipynb``:

>>> # Allow this notebook to import from GITROOT, GITROOT/src, and GITROOT/notebooks.
>>> # This module must be imported before importing any other custom modules under GITROOT.
>>> # The isort directive prevents the statement to be moved around when isort is used.
>>> import my_nb_path # isort: skip
>>>
>>> # Test-drive importing a custom module under GITROOT/src.
>>> import my_custom_module

Background: we used to rely on ``ipython_config.py`` in the current working directory. However,
IPython 8.0.1+, 7.31.1+ and 5.11+ disable this behavior for security reason as described
[here](https://ipython.readthedocs.io/en/stable/whatsnew/version8.html#ipython-8-0-1-cve-2022-21699).

So now, each ``.ipynb`` must explicitly modify its own `sys.path` which is what this module offers
as convenience.
"""
import os
import subprocess
import sys
from pathlib import Path
from typing import Union

def sys_path_append(o: Union[str, os.PathLike]) -> None:
posix_path: str = o.as_posix() if isinstance(o, Path) else Path(o).as_posix()
if posix_path not in sys.path:
sys.path.insert(0, posix_path)

# Add GIT_ROOT/ and a few other subdirs
_p = subprocess.run(
["git", "rev-parse", "--show-toplevel"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)

if _p.returncode == 0:
_git_root: str = _p.stdout[:-1].decode("utf-8") # Remove trailing '\n'
_git_root_p = Path(_git_root)

my_sys_paths = [
_git_root_p,
_git_root_p / "src",
_git_root_p / "notebooks",
]
for sp in my_sys_paths:
sys_path_append(sp)
100 changes: 88 additions & 12 deletions {{cookiecutter.repo_name}}/notebooks/skeleton.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@
"- Best viewed using Jupyter Lab.\n",
"- The title is a styled sentence rather than `h1`, to prevent it being showed and numbered in TOC.\n",
"\n",
"<div style='color:firebrick'><b>NOTE:</b> this skeleton notebook is primarily for reading. To run it\n",
"completely, you need to install additional dependencies imported in the cell below.</div><br>"
"<font style='color:firebrick'>**NOTE:** this skeleton notebook is meant for reading. To run it,\n",
"please install additional dependencies imported in the second next cell which starts with line\n",
"`# Dependencies required`.</font>"
]
},
{
Expand All @@ -28,12 +29,24 @@
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"# Follow isort>=5 style: 'import ...' statements before 'from ... import ...'.\n",
"# Make sure my_nb_path is imported first (and when isort is used, it needs to be told).\n",
"import my_nb_path # isort: skip\n",
"from my_nb_color import print, rprint"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Dependencies required\n",
"import ndpretty\n",
"import numpy as np\n",
"import pandas as pd\n",
"import sagemaker as sm\n",
"from IPython.display import Markdown\n",
"from loguru import logger\n",
"from smallmatter.ds import mask_df # See: https://github.com/aws-samples/smallmatter-package/\n",
"\n",
"# A few standard SageMaker's stanzas. Use type annotation to be verbose.\n",
Expand Down Expand Up @@ -145,6 +158,67 @@
"# Improved output"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Colored outputs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Colored: \u001b[1m{\u001b[0m\u001b[32m'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'\u001b[0m, \u001b[32m'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB'\u001b[0m\u001b[1m}\u001b[0m\n",
"Colored and wrapped:\n",
"\u001b[1m{\u001b[0m\n",
" \u001b[32m'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\u001b[0m\n",
"\u001b[32mAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\u001b[0m\n",
"\u001b[32mAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'\u001b[0m,\n",
" \u001b[32m'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\u001b[0m\n",
"\u001b[32mBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\u001b[0m\n",
"\u001b[32mBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB'\u001b[0m\n",
"\u001b[1m}\u001b[0m\n",
"\n",
"\u001b[1m{\u001b[0m\n",
" \u001b[32m'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'\u001b[0m,\n",
" \u001b[32m'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB'\u001b[0m\n",
"\u001b[1m}\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2022-01-22 17:23:03.529\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m7\u001b[0m - \u001b[34m\u001b[1mHello World!\u001b[0m\n",
"\u001b[32m2022-01-22 17:23:03.530\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m7\u001b[0m - \u001b[1mHello World!\u001b[0m\n",
"\u001b[32m2022-01-22 17:23:03.531\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m7\u001b[0m - \u001b[32m\u001b[1mHello World!\u001b[0m\n",
"\u001b[32m2022-01-22 17:23:03.532\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m7\u001b[0m - \u001b[31m\u001b[1mHello World!\u001b[0m\n"
]
}
],
"source": [
"d = {\"A\" * 200, \"B\" * 200}\n",
"print(\"Colored:\", d)\n",
"rprint(\"Colored and wrapped:\", d)\n",
"display(d)\n",
"\n",
"for f in (logger.debug, logger.info, logger.success, logger.error):\n",
" f(\"Hello World!\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Dataframes"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -153,8 +227,8 @@
{
"data": {
"text/markdown": [
"## Plain dataframe\n",
"**NOTE:** this also appears in TOC as \"*2.1. Plain dataframe*\""
"### Plain dataframe\n",
"**NOTE:** this also appears in TOC as \"*2.2.1. Plain dataframe*\""
],
"text/plain": [
"<IPython.core.display.Markdown object>"
Expand Down Expand Up @@ -221,7 +295,7 @@
{
"data": {
"text/markdown": [
"## Masked dataframe\n",
"### Masked dataframe\n",
"Sometime, we would like to version the output of this cell into the git repo, to help readers to\n",
"quickly see the shape of a dataframe.\n",
"\n",
Expand Down Expand Up @@ -310,17 +384,19 @@
")\n",
"df_b = pd.DataFrame(\n",
" {\n",
" \"userid\": [1000, 2000, 3000],\n",
" \"userid\": [1000, 2000, 3000], # Illustration only. Usually read from somewhere.\n",
" \"pca_a\": [0.1, 0.2, 0.3],\n",
" \"pca_b\": [-0.3, 0.01, 0.7],\n",
" }\n",
")\n",
"\n",
"display(\n",
" Markdown('## Plain dataframe\\n**NOTE:** this also appears in TOC as \"*2.1. Plain dataframe*\"'),\n",
" Markdown(\n",
" '### Plain dataframe\\n**NOTE:** this also appears in TOC as \"*2.2.1. Plain dataframe*\"'\n",
" ),\n",
" df_a,\n",
" Markdown(\n",
" \"\"\"## Masked dataframe\n",
" \"\"\"### Masked dataframe\n",
"Sometime, we would like to version the output of this cell into the git repo, to help readers to\n",
"quickly see the shape of a dataframe.\n",
"\n",
Expand Down Expand Up @@ -400,9 +476,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Environment (virtualenv_p39x)",
"display_name": "Environment (virtualenv_ds-p310)",
"language": "python",
"name": "virtualenv_p39x"
"name": "virtualenv_ds-p310"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -414,7 +490,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
"version": "3.10.2"
},
"toc-autonumbering": true,
"toc-showcode": false,
Expand Down
10 changes: 5 additions & 5 deletions {{cookiecutter.repo_name}}/setup.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
raise ValueError(
"Baseline setup.py from cookiecutter aws-samples/python-data-science-template. "
"Please review and modify accordingly, then remove this exception"
)

import os
from typing import List

Expand Down Expand Up @@ -55,8 +60,3 @@ def read(fname) -> str:
python_requires=">=3.6.0",
install_requires=required_packages,
)

raise ValueError(
"Baseline setup.py from cookiecutter verdimrc/py-ds-template. "
"Please review and modify accordingly, then remove this exception"
)
44 changes: 44 additions & 0 deletions {{cookiecutter.repo_name}}/src/my_nb_color.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Convenience module to setup color prints and logs in a Jupyter notebook.

Dependencies: `loguru`, `rich`.

Basic usage by an ``.ipynb``:

>>> # Colorize notebook outputs
>>> from my_nb_color import print, rprint, oprint
>>>
>>> # Test-drive different behavior of print functionalities
>>> d = {"A" * 200, "B" * 200}
>>> print("Colored:", d)
>>> rprint("Colored and wrapped:", d)
>>> oprint("Plain (i.e., Python's original):", d)
>>> display(d)
>>>
>>> # Test-drive loguru
>>> from loguru import logger
>>> for f in (logger.debug, logger.info, logger.success, logger.error):
>>> f("Hello World!")
"""
import sys


# Try to setup rich.
try:
import rich
except ModuleNotFoundError:
print = rprint = oprint = print
else:
oprint = print # In-case plain old behavior is needed
rich.reconfigure(force_terminal=True, force_jupyter=False)
rich.pretty.install()
print = rich.get_console().out
rprint = rich.get_console().print


# Try to setup loguru.
try:
from loguru import logger
except ModuleNotFoundError:
pass
else:
logger.configure(handlers=[dict(sink=sys.stderr, colorize=True)])