Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ By using this template, your data science project is auto-generated as follows:
```
.
|-- notebooks # A directory to place all notebooks files.
| |-- *.ipynb
| `-- ipython_config.py # IPython magic to let *.ipynb treat src/ as PYTHONPATH
| `-- *.ipynb
|-- setup.py # To pip install your Python module (if module name specified to cookiecutter)
|-- src
| |-- my_custom_module # Your custom module
Expand All @@ -52,7 +51,6 @@ By using this template, your data science project is auto-generated as follows:
|-- .vscenv # Sample dot env with PYTHONPATH config (for IDE /editor that support this)
|-- LICENSE # Boilperplate (auto-generated content based on what specified to cookiecutter)
|-- README.md # Template for you to customize
|-- ipython_config.py # Sample copy of ipython_config.py (same as notebook/ipython_config.py)
|-- pyproject.toml # Sample setting for Python code formatter
`-- tox.ini # Sample configurations for Python toolchains
```
Expand Down
2 changes: 1 addition & 1 deletion hooks/post_gen_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def rm(s: Path) -> None:
"#",
"# Recommended next steps:",
f"# - cd {cwd}",
"# - git init (this is needed for the ipython_config.py magic to work)",
"# - git init",
"# - pre-commit autoupdate",
"# - pre-commit install",
"# - review README.md",
Expand Down
17 changes: 14 additions & 3 deletions {{cookiecutter.repo_name}}/.pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.4.0
rev: v4.0.1
hooks:
- id: check-json
- id: check-merge-conflict
Expand All @@ -13,16 +13,27 @@ repos:
- id: detect-private-key
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/asottile/pyupgrade
rev: v2.29.1
hooks:
- id: pyupgrade
- repo: https://github.com/myint/autoflake
rev: v1.4
hooks:
- id: autoflake
args: [--ignore-init-module-imports, --in-place]
- repo: https://github.com/timothycrosley/isort
rev: 5.8.0
rev: 5.10.1
hooks:
- id: isort
- repo: https://github.com/psf/black
rev: 20.8b1
rev: 21.12b0
hooks:
- id: black
- repo: https://github.com/nbQA-dev/nbQA
rev: 1.2.2
hooks:
- id: nbqa-pyupgrade
args: [--py36-plus]
- id: nbqa-isort
- id: nbqa-black
4 changes: 1 addition & 3 deletions {{cookiecutter.repo_name}}/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
{{cookiecutter.repo_name}}
|-- bin # CLI scripts
|-- notebooks
| |-- *.ipynb # Jupyter notebooks
| `-- ipython_config.py # IPython magic to let *.ipynb treat src/ as PYTHONPATH
| `-- *.ipynb # Jupyter notebooks
{% if cookiecutter.package_name != "" -%}
|-- setup.py # To install {{cookiecutter.repo_name}} as a Python module
{% endif -%}
Expand All @@ -27,7 +26,6 @@
|-- .vscenv # Dot env with PYTHONPATH config (for IDE /editor that support this)
|-- LICENSE # License
|-- README.md # Template document
|-- ipython_config.py # A copy of ipython_config.py (same as notebook/ipython_config.py)
|-- pyproject.toml # Setting for Python code formatter
`-- tox.ini # Settings for select Python toolchains
```
Expand Down
2 changes: 1 addition & 1 deletion {{cookiecutter.repo_name}}/bin/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# CLI scripts

This folder holds command line interface (CLI) scripts. This scripts typically provide entry points to kick off common tasks in your data science project, such as model training or inference.
This folder holds command line interface (CLI) scripts. This scripts typically provide entry points to kick off common tasks in your data science project, such as model training or inference.
38 changes: 0 additions & 38 deletions {{cookiecutter.repo_name}}/notebooks/ipython_config.py

This file was deleted.

38 changes: 23 additions & 15 deletions {{cookiecutter.repo_name}}/notebooks/skeleton.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@
"import ndpretty\n",
"import numpy as np\n",
"import pandas as pd\n",
"import sagemaker as sm\n",
"from IPython.display import Markdown\n",
"from smallmatter.ds import mask_df # See: https://github.com/aws-samples/smallmatter-package/\n",
"\n",
"# A few standard SageMaker's stanzas. Use type annotation to be verbose.\n",
"import sagemaker as sm\n",
"role: str = sm.get_execution_role()\n",
"sess = sm.Session()\n",
"region: str = sess.boto_session.region_name"
Expand Down Expand Up @@ -71,15 +71,15 @@
"####################################################################################################\n",
"# Change me\n",
"####################################################################################################\n",
"bucket_name = 'my-bucket-name'\n",
"prefix_name = 'some/prefix'\n",
"bucket_name = \"my-bucket-name\"\n",
"prefix_name = \"some/prefix\"\n",
"####################################################################################################\n",
"\n",
"\n",
"####################################################################################################\n",
"# Do not change the next lines, as they're derived and will be recomputed automatically.\n",
"####################################################################################################\n",
"s3_prefix = f's3://{bucket_name}/{prefix_name}'.rstrip('/')\n",
"s3_prefix = f\"s3://{bucket_name}/{prefix_name}\".rstrip(\"/\")\n",
"\n",
"# Synchronize Python variable and environment variable.\n",
"%set_env S3_PREFIX=$s3_prefix\n",
Expand Down Expand Up @@ -299,28 +299,36 @@
],
"source": [
"def mask_userid(df: pd.DataFrame) -> pd.DataFrame:\n",
" return mask_df(df, cols=['userid'])\n",
" return mask_df(df, cols=[\"userid\"])\n",
"\n",
"df_a = pd.DataFrame({'a': [1,2,3], 'b': [4,5,6]})\n",
"df_b = pd.DataFrame({\n",
" 'userid': [1000,2000,3000],\n",
" 'pca_a': [0.1, 0.2, 0.3],\n",
" 'pca_b': [-0.3, 0.01, 0.7]\n",
" })\n",
"\n",
"df_a = pd.DataFrame(\n",
" {\n",
" \"a\": [1, 2, 3],\n",
" \"b\": [4, 5, 6],\n",
" }\n",
")\n",
"df_b = pd.DataFrame(\n",
" {\n",
" \"userid\": [1000, 2000, 3000],\n",
" \"pca_a\": [0.1, 0.2, 0.3],\n",
" \"pca_b\": [-0.3, 0.01, 0.7],\n",
" }\n",
")\n",
"\n",
"display(\n",
" Markdown('## Plain dataframe\\n**NOTE:** this also appears in TOC as \"*2.1. Plain dataframe*\"'),\n",
" df_a,\n",
"\n",
" Markdown('''## Masked dataframe\n",
" Markdown(\n",
" \"\"\"## Masked dataframe\n",
"Sometime, we would like to version the output of this cell into the git repo, to help readers to\n",
"quickly see the shape of a dataframe.\n",
"\n",
"However, when the dataframe contains sensitive values, care must be taken to\n",
"**<font style='color:firebrick;background-color:yellow'>NEVER</font>** version these values to git.\n",
"Otherwise, as you all know, once checked into the git history, it can be tedious and challenging to\n",
"undo the versioning.\n",
"'''\n",
"\"\"\"\n",
" ),\n",
" mask_userid(df_b),\n",
")"
Expand All @@ -343,7 +351,7 @@
"source": [
"# Affect globally\n",
"ndpretty.default()\n",
"np.random.rand(9,9)\n",
"np.random.rand(9, 9)\n",
"\n",
"# NOTE: without ndpretty.default(), use this form:\n",
"# ndpretty.ndarray_html(np.random.rand(3, 4))\n",
Expand Down