diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 30bc8a4e..a4993a49 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,12 +25,12 @@ repos: - id: python-no-log-warn - id: text-unicode-replacement-char - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.15.12 + rev: v0.15.13 hooks: - id: ruff-format - id: ruff-check - repo: https://github.com/astral-sh/uv-pre-commit - rev: 0.11.13 + rev: 0.11.14 hooks: - id: uv-lock - repo: local @@ -60,14 +60,14 @@ repos: mdformat-pyproject>=0.0.2, ] files: (docs/.) - exclude: (docs/source/reference_guides/hookspecs\.md|docs/source/reference_guides/api/.*\.md|docs/source/how_to_guides/capture_warnings\.md|docs/source/how_to_guides/hashing_inputs_of_tasks\.md|docs/source/how_to_guides/provisional_nodes_and_task_generators\.md|docs/source/how_to_guides/remote_files\.md|docs/source/how_to_guides/writing_custom_nodes\.md|docs/source/tutorials/defining_dependencies_products\.md|docs/source/tutorials/using_a_data_catalog\.md) + exclude: (docs/source/reference_guides/hookspecs\.md|docs/source/reference_guides/api/.*\.md|docs/source/how_to_guides/capture_warnings\.md|docs/source/how_to_guides/hashing_inputs_of_tasks\.md|docs/source/how_to_guides/provisional_nodes_and_task_generators\.md|docs/source/how_to_guides/remote_files\.md|docs/source/how_to_guides/writing_custom_nodes\.md|docs/source/tutorials/defining_dependencies_products\.md|docs/source/tutorials/using_a_data_catalog\.md|docs/source/tutorials/write_a_task\.md|docs/source/tutorials/visualizing_the_dag\.md|docs/source/how_to_guides/bp_complex_task_repetitions\.md|docs/source/tutorials/repeating_tasks_with_different_inputs\.md) - repo: https://github.com/kynan/nbstripout rev: 0.9.1 hooks: - id: nbstripout exclude: (docs) - repo: https://github.com/crate-ci/typos - rev: v1 + rev: v1.46.1 hooks: - id: typos exclude: (\.ipynb) diff --git a/README.md b/README.md index b72fb5d6..4319db52 100644 --- a/README.md +++ b/README.md @@ -21,29 +21,23 @@ ______________________________________________________________________ -pytask is a workflow management system that facilitates reproducible data analyses. Its -features include: +pytask facilitates reproducible data analyses. Its features are: -- **Automatic discovery of tasks.** -- **Lazy evaluation.** If a task, its dependencies, and its products have not changed, - do not execute it. -- **Debug mode.** +- **Automatic discovery of tasks** +- **Lazy execution**: Skip tasks when inputs and outputs are unchanged. +- **Debug mode**: [Jump into the debugger](https://pytask-dev.readthedocs.io/en/stable/tutorials/debugging.html) - if a task fails, get feedback quickly, and be more productive. -- **Repeat a task with different inputs.** - [Loop over task functions](https://pytask-dev.readthedocs.io/en/stable/tutorials/repeating_tasks_with_different_inputs.html) - to run the same task with different inputs. -- **Select tasks via expressions.** Run only a subset of tasks with - [expressions and marker expressions](https://pytask-dev.readthedocs.io/en/stable/tutorials/selecting_tasks.html). -- **Easily extensible with plugins**. pytask is built on - [pluggy](https://pluggy.readthedocs.io/en/latest/), a plugin management framework that - allows you to adjust pytask to your needs. Plugins are available for + if a task fails. +- **Repeat a task with different inputs**: + [Loop over task functions](https://pytask-dev.readthedocs.io/en/stable/tutorials/repeating_tasks_with_different_inputs.html). +- **Task Selection**: Run subsets with + [expressions and markers](https://pytask-dev.readthedocs.io/en/stable/tutorials/selecting_tasks.html). +- **Plugins**: Extend pytask with plugins for [parallelization](https://github.com/pytask-dev/pytask-parallel), [LaTeX](https://github.com/pytask-dev/pytask-latex), [R](https://github.com/pytask-dev/pytask-r), and - [Stata](https://github.com/pytask-dev/pytask-stata) and more can be found - [here](https://github.com/topics/pytask). Learn more about plugins in - [this tutorial](https://pytask-dev.readthedocs.io/en/stable/tutorials/plugins.html). + [Stata](https://github.com/pytask-dev/pytask-stata), and + [more](https://pytask-dev.readthedocs.io/en/stable/tutorials/plugins.html). # Installation diff --git a/docs/source/_static/md/commands/build-arguments.md b/docs/source/_static/md/commands/build-arguments.md deleted file mode 100644 index c7ad3809..00000000 --- a/docs/source/_static/md/commands/build-arguments.md +++ /dev/null @@ -1,3 +0,0 @@ -| Argument | Description | -| ----------------------- | ---------------------------------------------------------- | -| [PATHS]... | Paths where pytask looks for task files and configuration. | diff --git a/docs/source/_static/md/commands/build-options.md b/docs/source/_static/md/commands/build-options.md deleted file mode 100644 index 3b8fe0e7..00000000 --- a/docs/source/_static/md/commands/build-options.md +++ /dev/null @@ -1,42 +0,0 @@ -| Option | Default | Description | -| ---------------------------------------------------------- | ------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------- | -| -c, --config FILE | - | Path to configuration file. | -| --capture [fd\|no\|sys\|tee-sys] | fd | Per task capturing method. | -| --database-url TEXT | - | Url to the database. | -| --debug-pytask | false | Trace all function calls in the plugin framework. | -| --disable-warnings | false | Disables the summary for warnings. | -| --dry-run | false | Perform a dry-run. | -| --editor-url-scheme TEXT | file | Use file, vscode, pycharm or a custom url scheme to add URLs to task ids to quickly jump to the task definition. Use no_link to disable URLs. | -| --explain | false | Explain why tasks need to be executed by showing what changed. | -| -f, --force | false | Execute a task even if it succeeded successfully before. | -| --hook-module TEXT | - | Path to a Python module that contains hook implementations. | -| --ignore TEXT | - | A pattern to ignore files or directories. Refer to 'pathlib.Path.match' for more info. | -| -k EXPRESSION | - | Select tasks via expressions on task ids. | -| --log-cli / --no-log-cli | --no-log-cli | Enable live log display during task execution. | -| --log-cli-date-format TEXT | - | Log date format used by the logging module for live logs. | -| --log-cli-format TEXT | - | Log format used by the logging module for live logs. | -| --log-cli-level LEVEL | - | CLI logging level. | -| --log-date-format TEXT | %H:%M:%S | Log date format used by the logging module. | -| --log-file TEXT | - | Path to a file where logging will be written. | -| --log-file-date-format TEXT | - | Log date format used by the logging module for the log file. | -| --log-file-format TEXT | - | Log format used by the logging module for the log file. | -| --log-file-level LEVEL | - | Log file logging level. | -| --log-file-mode [w\|a] | w | Log file open mode. | -| --log-format TEXT | %(levelname)-8s %(name)s:%(filename)s:%(lineno)d %(message)s | Log format used by the logging module. | -| --log-level LEVEL | - | Level of messages to catch/display. Not set by default, so it depends on the logger configuration. | -| -m MARKER_EXPRESSION | - | Select tasks via marker expressions. | -| --max-failures FLOAT RANGE | inf | Stop after some failures. | -| --n-entries-in-table INTEGER RANGE | 15 | How many entries to display in the table during the execution. Tasks which are running are always displayed. | -| --pdb | false | Start the interactive debugger on errors. | -| --pdbcls module_name:class_name | - | Start a custom debugger on errors. For example: --pdbcls=IPython.terminal.debugger:TerminalPdb | -| -s | false | Shortcut for --capture=no. | -| --show-capture [no\|stdout\|stderr\|log\|all] | all | Choose which captured output should be shown for failed tasks. | -| --show-errors-immediately | false | Show errors with tracebacks as soon as the task fails. | -| --show-locals | false | Show local variables in tracebacks. | -| --show-traceback / --show-no-traceback | --show-traceback | Choose whether tracebacks should be displayed or not. | -| --sort-table / --do-not-sort-table | --sort-table | Sort the table of tasks at the end of the execution. | -| --strict-markers | false | Raise errors for unknown markers. | -| --trace | false | Enter debugger in the beginning of each task. | -| -v, --verbose INTEGER RANGE | 1 | Make pytask verbose (>= 0) or quiet (= 0). | -| -x, --stop-after-first-failure | false | Stop after the first failure. | -| `-h, --help` | - | Show this message and exit. | diff --git a/docs/source/_static/md/commands/clean-arguments.md b/docs/source/_static/md/commands/clean-arguments.md deleted file mode 100644 index c7ad3809..00000000 --- a/docs/source/_static/md/commands/clean-arguments.md +++ /dev/null @@ -1,3 +0,0 @@ -| Argument | Description | -| ----------------------- | ---------------------------------------------------------- | -| [PATHS]... | Paths where pytask looks for task files and configuration. | diff --git a/docs/source/_static/md/commands/clean-options.md b/docs/source/_static/md/commands/clean-options.md deleted file mode 100644 index 5c458c80..00000000 --- a/docs/source/_static/md/commands/clean-options.md +++ /dev/null @@ -1,15 +0,0 @@ -| Option | Default | Description | -| ------------------------------------------------- | -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| -c, --config FILE | - | Path to configuration file. | -| -d, --directories | false | Remove whole directories. | -| --database-url TEXT | - | Url to the database. | -| -e, --exclude PATTERN | - | A filename pattern to exclude files from the cleaning process. | -| --editor-url-scheme TEXT | file | Use file, vscode, pycharm or a custom url scheme to add URLs to task ids to quickly jump to the task definition. Use no_link to disable URLs. | -| --hook-module TEXT | - | Path to a Python module that contains hook implementations. | -| --ignore TEXT | - | A pattern to ignore files or directories. Refer to 'pathlib.Path.match' for more info. | -| -k EXPRESSION | - | Select tasks via expressions on task ids. | -| -m MARKER_EXPRESSION | - | Select tasks via marker expressions. | -| --mode [dry-run\|force\|interactive] | dry-run | Choose 'dry-run' to print the paths of files/directories which would be removed, 'interactive' for a confirmation prompt for every path, and 'force' to remove all unknown paths at once. | -| -q, --quiet | false | Do not print the names of the removed paths. | -| --strict-markers | false | Raise errors for unknown markers. | -| `-h, --help` | - | Show this message and exit. | diff --git a/docs/source/_static/md/commands/collect-arguments.md b/docs/source/_static/md/commands/collect-arguments.md deleted file mode 100644 index c7ad3809..00000000 --- a/docs/source/_static/md/commands/collect-arguments.md +++ /dev/null @@ -1,3 +0,0 @@ -| Argument | Description | -| ----------------------- | ---------------------------------------------------------- | -| [PATHS]... | Paths where pytask looks for task files and configuration. | diff --git a/docs/source/_static/md/commands/collect-options.md b/docs/source/_static/md/commands/collect-options.md deleted file mode 100644 index 0eda5828..00000000 --- a/docs/source/_static/md/commands/collect-options.md +++ /dev/null @@ -1,12 +0,0 @@ -| Option | Default | Description | -| ------------------------------------- | ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------- | -| -c, --config FILE | - | Path to configuration file. | -| --database-url TEXT | - | Url to the database. | -| --editor-url-scheme TEXT | file | Use file, vscode, pycharm or a custom url scheme to add URLs to task ids to quickly jump to the task definition. Use no_link to disable URLs. | -| --hook-module TEXT | - | Path to a Python module that contains hook implementations. | -| --ignore TEXT | - | A pattern to ignore files or directories. Refer to 'pathlib.Path.match' for more info. | -| -k EXPRESSION | - | Select tasks via expressions on task ids. | -| -m MARKER_EXPRESSION | - | Select tasks via marker expressions. | -| --nodes | false | Show a task's dependencies and products. | -| --strict-markers | false | Raise errors for unknown markers. | -| `-h, --help` | - | Show this message and exit. | diff --git a/docs/source/_static/md/commands/command-list.md b/docs/source/_static/md/commands/command-list.md deleted file mode 100644 index af659a97..00000000 --- a/docs/source/_static/md/commands/command-list.md +++ /dev/null @@ -1,9 +0,0 @@ -| Command | Description | -| ----------------------- | ------------------------------------------------------------- | -| [`build`](build.md) | Collect tasks, execute them and report the results. | -| [`clean`](clean.md) | Clean the provided paths by removing files unknown to pytask. | -| [`collect`](collect.md) | Collect tasks and report information about them. | -| [`dag`](dag.md) | Create a visualization of the directed acyclic graph. | -| [`lock`](lock.md) | Inspect and update recorded task state in the lockfile. | -| [`markers`](markers.md) | Show all registered markers. | -| [`profile`](profile.md) | Show information about resource consumption. | diff --git a/docs/source/_static/md/commands/dag-arguments.md b/docs/source/_static/md/commands/dag-arguments.md deleted file mode 100644 index c7ad3809..00000000 --- a/docs/source/_static/md/commands/dag-arguments.md +++ /dev/null @@ -1,3 +0,0 @@ -| Argument | Description | -| ----------------------- | ---------------------------------------------------------- | -| [PATHS]... | Paths where pytask looks for task files and configuration. | diff --git a/docs/source/_static/md/commands/dag-options.md b/docs/source/_static/md/commands/dag-options.md deleted file mode 100644 index 8dc55edf..00000000 --- a/docs/source/_static/md/commands/dag-options.md +++ /dev/null @@ -1,9 +0,0 @@ -| Option | Default | Description | -| -------------------------------------------------- | -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | -| -c, --config FILE | - | Path to configuration file. | -| --database-url TEXT | - | Url to the database. | -| --hook-module TEXT | - | Path to a Python module that contains hook implementations. | -| -l, --layout TEXT | dot | The layout determines the structure of the graph. Here you find an overview of all available layouts: https://graphviz.org/docs/layouts. | -| -o, --output-path FILE | dag.pdf | The output path of the visualization. The format is inferred from the file extension. | -| -r, --rank-direction [TB\|LR\|BT\|RL] | TB | The direction of the directed graph. It can be ordered from top to bottom, TB, left to right, LR, bottom to top, BT, or right to left, RL. | -| `-h, --help` | - | Show this message and exit. | diff --git a/docs/source/_static/md/commands/markers-arguments.md b/docs/source/_static/md/commands/markers-arguments.md deleted file mode 100644 index c7ad3809..00000000 --- a/docs/source/_static/md/commands/markers-arguments.md +++ /dev/null @@ -1,3 +0,0 @@ -| Argument | Description | -| ----------------------- | ---------------------------------------------------------- | -| [PATHS]... | Paths where pytask looks for task files and configuration. | diff --git a/docs/source/_static/md/commands/markers-options.md b/docs/source/_static/md/commands/markers-options.md deleted file mode 100644 index aaf5aea0..00000000 --- a/docs/source/_static/md/commands/markers-options.md +++ /dev/null @@ -1,5 +0,0 @@ -| Option | Default | Description | -| ------------------------------- | ------- | ----------------------------------------------------------- | -| -c, --config FILE | - | Path to configuration file. | -| --hook-module TEXT | - | Path to a Python module that contains hook implementations. | -| `-h, --help` | - | Show this message and exit. | diff --git a/docs/source/_static/md/commands/profile-arguments.md b/docs/source/_static/md/commands/profile-arguments.md deleted file mode 100644 index c7ad3809..00000000 --- a/docs/source/_static/md/commands/profile-arguments.md +++ /dev/null @@ -1,3 +0,0 @@ -| Argument | Description | -| ----------------------- | ---------------------------------------------------------- | -| [PATHS]... | Paths where pytask looks for task files and configuration. | diff --git a/docs/source/_static/md/commands/profile-options.md b/docs/source/_static/md/commands/profile-options.md deleted file mode 100644 index 4b1f5e0c..00000000 --- a/docs/source/_static/md/commands/profile-options.md +++ /dev/null @@ -1,9 +0,0 @@ -| Option | Default | Description | -| ------------------------------------- | ----------------- | --------------------------------------------------------------------------------------------------------------------------------------------- | -| -c, --config FILE | - | Path to configuration file. | -| --database-url TEXT | - | Url to the database. | -| --editor-url-scheme TEXT | file | Use file, vscode, pycharm or a custom url scheme to add URLs to task ids to quickly jump to the task definition. Use no_link to disable URLs. | -| --export [no\|json\|csv] | no | Export the profile in the specified format. | -| --hook-module TEXT | - | Path to a Python module that contains hook implementations. | -| --ignore TEXT | - | A pattern to ignore files or directories. Refer to 'pathlib.Path.match' for more info. | -| `-h, --help` | - | Show this message and exit. | diff --git a/docs/source/_static/md/commands/root-options.md b/docs/source/_static/md/commands/root-options.md deleted file mode 100644 index a13dfe4a..00000000 --- a/docs/source/_static/md/commands/root-options.md +++ /dev/null @@ -1,4 +0,0 @@ -| Option | Description | -| ---------------------- | --------------------------- | -| --version | Show the version and exit. | -| `-h, --help` | Show this message and exit. | diff --git a/docs/source/explanations/comparison_to_other_tools.md b/docs/source/explanations/comparison_to_other_tools.md index 0f2dbded..d6c633a4 100644 --- a/docs/source/explanations/comparison_to_other_tools.md +++ b/docs/source/explanations/comparison_to_other_tools.md @@ -2,8 +2,8 @@ There exist some alternatives to pytask which are listed below. The short descriptions don't do them justice and you should check them out to see which -[workflow management system](../glossary.md#workflow-management-system) (WFM) fits you -and your use case best. +[workflow management system](../reference_guides/glossary.md#workflow-management-system) +(WFM) fits you and your use case best. Feel free to contribute to this list and add points which you found particularly favorable. The list also serves as an inspiration for pytask to adopt features present @@ -93,9 +93,9 @@ Azure. ## [pipefunc](https://github.com/pipefunc/pipefunc) pipefunc is a lightweight library for creating function pipelines as -[directed acyclic graphs (DAGs)](../glossary.md#dag) in pure Python. It automatically -handles execution order, supports map-reduce operations, parallel execution, and -provides resource profiling. +[directed acyclic graphs (DAGs)](../reference_guides/glossary.md#dag) in pure Python. It +automatically handles execution order, supports map-reduce operations, parallel +execution, and provides resource profiling. ## [Hamilton](https://github.com/dagworks-inc/hamilton) diff --git a/docs/source/explanations/pluggy.md b/docs/source/explanations/pluggy.md index d6edc08f..06e6d8e4 100644 --- a/docs/source/explanations/pluggy.md +++ b/docs/source/explanations/pluggy.md @@ -1,16 +1,19 @@ # pluggy and the Plugin Architecture -pluggy is at the heart of pytask and enables its [plugin](../glossary.md#plugin) system. -The mechanism to achieve extensibility is called [hooking](../glossary.md#hooking). +pluggy is at the heart of pytask and enables its +[plugin](../reference_guides/glossary.md#plugin) system. The mechanism to achieve +extensibility is called [hooking](../reference_guides/glossary.md#hooking). -At specific points, pytask, or more generally the [host](../glossary.md#host), -implements [entry-points](../glossary.md#entry-point) called -[hook specifications](../glossary.md#hook-specification). At these entry-points, the -host sends a message to all plugins which target this entry-point. The message's -recipient is implemented by the plugin and called a -[hook implementation](../glossary.md#hook-implementation). The hook implementation -receives the message and can decide whether to send a response or not. Then, the host -gets the responses and can choose whether to process all or just the first valid return. +At specific points, pytask, or more generally the +[host](../reference_guides/glossary.md#host), implements +[entry-points](../reference_guides/glossary.md#entry-point) called +[hook specifications](../reference_guides/glossary.md#hook-specification). At these +entry-points, the host sends a message to all plugins which target this entry-point. The +message's recipient is implemented by the plugin and called a +[hook implementation](../reference_guides/glossary.md#hook-implementation). The hook +implementation receives the message and can decide whether to send a response or not. +Then, the host gets the responses and can choose whether to process all or just the +first valid return. In contrast to some other mechanisms to change the behavior of a program (like method overriding and monkey patching), hooking excels at allowing multiple plugins to work diff --git a/docs/source/explanations/why_pytask.md b/docs/source/explanations/why_pytask.md index 16afcf43..9998afef 100644 --- a/docs/source/explanations/why_pytask.md +++ b/docs/source/explanations/why_pytask.md @@ -1,9 +1,9 @@ # Why pytask? There are a lot of -[workflow management systems](../glossary.md#workflow-management-system) out there with -existing communities that accumulated a lot of experience over time. So why bother -creating another workflow management system? +[workflow management systems](../reference_guides/glossary.md#workflow-management-system) +out there with existing communities that accumulated a lot of experience over time. So +why bother creating another workflow management system? pytask is created having a particular audience in mind. Many researchers are not computer scientists first. Instead, they acquired some programming skills throughout @@ -26,9 +26,9 @@ pytask tries to address this point in many ways. 1. pytask integrates with other tools used in the scientific community, such as R and Julia, and offers solutions to bridge the gap between a - [workflow management system](../glossary.md#workflow-management-system) written in - Python and scripts in another language. For example, pytask makes paths to - dependencies and products available in the scripts. + [workflow management system](../reference_guides/glossary.md#workflow-management-system) + written in Python and scripts in another language. For example, pytask makes + paths to dependencies and products available in the scripts. 1. The plugin system lets power users tailor pytask to their needs by adding additional functionality. It makes pytask extraordinarily versatile and offers people from diff --git a/docs/source/how_to_guides/bp_complex_task_repetitions.md b/docs/source/how_to_guides/bp_complex_task_repetitions.md index 7b35a80e..a886d64b 100644 --- a/docs/source/how_to_guides/bp_complex_task_repetitions.md +++ b/docs/source/how_to_guides/bp_complex_task_repetitions.md @@ -30,16 +30,15 @@ are growing over time and you run into these problems. ## Solution The main idea for the solution is quickly explained. We will, first, formalize -dimensions into objects using -[`dataclasses.dataclass`](https://docs.python.org/3/library/dataclasses.html#dataclasses.dataclass). +dimensions into objects using [`dataclasses.dataclass`][]. Secondly, we will combine dimensions in multi-dimensional objects such that we only have to iterate over instances of this object in a single loop. Here and for the lack of a better name, we will call the object an experiment. Lastly, we will also use the -[`pytask.DataCatalog`](../api/core_classes_and_exceptions.md#pytask.DataCatalog) to not -be bothered with defining paths. +[`pytask.DataCatalog`](../reference_guides/api/core_classes_and_exceptions.md#pytask.DataCatalog) +to not be bothered with defining paths. !!! note @@ -69,8 +68,9 @@ we use them. As you see, we lost a level of indentation and we moved all the generations of names and paths to the dimensions and multi-dimensional objects. -Using a [`pytask.PythonNode`](../api/nodes_and_tasks.md#pytask.PythonNode) allows us to -hash the model and reexecute the task if we define other model settings. +Using a +[`pytask.PythonNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PythonNode) +allows us to hash the model and reexecute the task if we define other model settings. ## Adding another level @@ -115,6 +115,7 @@ pytask -k logit Some repeated tasks are costly to run - costly in terms of computing power, memory, or runtime. If you change a task module, you might accidentally trigger all other tasks in -the module to be rerun. Use the [`@pytask.mark.persist`](../api/marks.md#pytask.mark) -decorator, which is explained in more detail in this +the module to be rerun. Use the +[`@pytask.mark.persist`](../reference_guides/api/marks.md#pytaskmarkpersist) decorator, +which is explained in more detail in this [tutorial](../tutorials/making_tasks_persist.md). diff --git a/docs/source/how_to_guides/bp_structure_of_task_files.md b/docs/source/how_to_guides/bp_structure_of_task_files.md index 570a20f7..78ccd9fb 100644 --- a/docs/source/how_to_guides/bp_structure_of_task_files.md +++ b/docs/source/how_to_guides/bp_structure_of_task_files.md @@ -15,8 +15,8 @@ are looking for orientation or inspiration, here are some tips. files and calling Python functions on the task's inputs. IO should not be handled in any other function. - Non-task functions in the task module are - [`private functions`](../glossary.md#private-function) and only used within this - task module. The functions should not have side-effects. + [`private functions`](../reference_guides/glossary.md#private-function) and only + used within this task module. The functions should not have side-effects. - It should never be necessary to import from task modules. So if you need a function in multiple task modules, put it in a separate module (which does not start with `task_`). @@ -39,8 +39,9 @@ to focus on one thing. The second reason is about runtime. If a task module is changed, all tasks within the module are re-run. If the runtime of all tasks in the module is high, you wait longer for your tasks to finish or until an error occurs which prolongs your feedback loops and -hurts your productivity. Use [`@pytask.mark.persist`](../api/marks.md#pytaskmarkpersist) -if you want to avoid accidentally triggering an expensive task. It is also explained in +hurts your productivity. Use +[`@pytask.mark.persist`](../reference_guides/api/marks.md#pytaskmarkpersist) if you want +to avoid accidentally triggering an expensive task. It is also explained in [this tutorial](../tutorials/making_tasks_persist.md). ### Structure of the module @@ -50,11 +51,13 @@ For the following example, let us assume that the task module contains one task. The task function should be the first function in the module. It should have a descriptive name and a docstring which explains what the task accomplishes. -It should be the only [`public function`](../glossary.md#public-function) in the module -which means the only function without a leading underscore. This is a convention to keep -[`public functions`](../glossary.md#public-function) separate from -[`private functions`](../glossary.md#private-function) (with a leading underscore) where -the latter must only be used in the same module and not imported elsewhere. +It should be the only +[`public function`](../reference_guides/glossary.md#public-function) in the module which +means the only function without a leading underscore. This is a convention to keep +[`public functions`](../reference_guides/glossary.md#public-function) separate from +[`private functions`](../reference_guides/glossary.md#private-function) (with a leading +underscore) where the latter must only be used in the same module and not imported +elsewhere. The body of the task function should contain two things: @@ -76,8 +79,8 @@ are used to accomplish this and only this task. Here is an example of a task module which conforms to all advice. -```python ---8 < --"docs_src/how_to_guides/bp_structure_of_task_files.py" +```py +--8<-- "docs_src/how_to_guides/bp_structure_of_task_files.py" ``` !!! note diff --git a/docs/source/how_to_guides/capture_warnings.md b/docs/source/how_to_guides/capture_warnings.md index ef455494..83f29cc2 100644 --- a/docs/source/how_to_guides/capture_warnings.md +++ b/docs/source/how_to_guides/capture_warnings.md @@ -14,9 +14,8 @@ Running pytask produces ## Controlling warnings -You can use the -[`filterwarnings`](../reference_guides/configuration.md#filterwarnings) option in -`pyproject.toml` to configure pytask's behavior when it comes to warnings. +You can use the [`filterwarnings`](../reference_guides/configuration.md#filterwarnings) +option in `pyproject.toml` to configure pytask's behavior when it comes to warnings. The syntax for specifying warnings filters is the same as in the [Python standard library](https://docs.python.org/3/library/warnings.html#the-warnings-filter), @@ -49,7 +48,7 @@ the Python documentation and there are also ## `@pytask.mark.filterwarnings` -You can use [`@pytask.mark.filterwarnings`](../api/marks.md#pytask.mark.filterwarnings) +You can use [`@pytask.mark.filterwarnings`](../reference_guides/api/marks.md#pytaskmarkfilterwarnings) to add warning filters to specific test items, allowing you to have finer control of which warnings should be captured at the test, class or even module level: diff --git a/docs/source/how_to_guides/extending_pytask.md b/docs/source/how_to_guides/extending_pytask.md index 8466e343..05084c13 100644 --- a/docs/source/how_to_guides/extending_pytask.md +++ b/docs/source/how_to_guides/extending_pytask.md @@ -1,14 +1,14 @@ # Extending pytask pytask can be extended since it is built upon -[pluggy](https://pluggy.readthedocs.io/en/latest/), a [plugin](../glossary.md#plugin) -system for Python. +[pluggy](https://pluggy.readthedocs.io/en/latest/), a +[plugin](../reference_guides/glossary.md#plugin) system for Python. How does it work? Throughout the execution, pytask arrives at -[entry-points](../glossary.md#entry-point), called hook functions. When pytask calls a -hook function it loops through -[hook implementations](../glossary.md#hook-implementation) and each hook implementation -can alter the result of the entrypoint. +[entry-points](../reference_guides/glossary.md#entry-point), called hook functions. When +pytask calls a hook function it loops through +[hook implementations](../reference_guides/glossary.md#hook-implementation) and each +hook implementation can alter the result of the entrypoint. The full list of hook functions is specified in [hookspecs](../reference_guides/hookspecs.md). @@ -17,13 +17,13 @@ More general information about pluggy can be found in its [documentation](https://pluggy.readthedocs.io/en/latest/). There are two ways to add new -[hook implementations](../glossary.md#hook-implementation). +[hook implementations](../reference_guides/glossary.md#hook-implementation). 1. Using the [`pytask build --hook-module`](../reference_guides/commands.md#pytask-build--hook-module) commandline option or the `hook_module` configuration value. -1. Packaging your [plugin](../glossary.md#plugin) as a Python package to publish and - share it. +1. Packaging your [plugin](../reference_guides/glossary.md#plugin) as a Python package + to publish and share it. @@ -57,8 +57,8 @@ hook_module = ["myproject.hooks"] In `hooks.py` we can add another commandline option to [`pytask build`](../reference_guides/commands.md#pytask-build) by providing an -additional [hook implementation](../glossary.md#hook-implementation) for the -[hook specification](../glossary.md#hook-specification) +additional [hook implementation](../reference_guides/glossary.md#hook-implementation) +for the [hook specification](../reference_guides/glossary.md#hook-specification) `_pytask.hookspecs.pytask_extend_command_line_interface`. ```py title="hooks.py" @@ -99,10 +99,10 @@ This section explains some steps which are required for all plugins. #### Set up the setuptools entry-point -pytask discovers plugins via `setuptools` [entry-points](../glossary.md#entry-point). -Following the approach advocated for by -[setuptools_scm](https://github.com/pypa/setuptools_scm), the entry-point is specified -in `pyproject.toml`. +pytask discovers plugins via `setuptools` +[entry-points](../reference_guides/glossary.md#entry-point). Following the approach +advocated for by [setuptools_scm](https://github.com/pypa/setuptools_scm), the +entry-point is specified in `pyproject.toml`. ```toml title="pyproject.toml" [project] @@ -137,10 +137,11 @@ The entry-point for pytask is called `"pytask"` and points to a module called #### `plugin.py` -`plugin.py` is the [entry-point](../glossary.md#entry-point) for pytask to your package. -You can put all of your hook implementations in this module, but it is recommended to -imitate the structure of pytask and its modules. For example, all hook implementations -which change the configuration should be implemented in `pytask_plugin.config`. +`plugin.py` is the [entry-point](../reference_guides/glossary.md#entry-point) for pytask +to your package. You can put all of your hook implementations in this module, but it is +recommended to imitate the structure of pytask and its modules. For example, all hook +implementations which change the configuration should be implemented in +`pytask_plugin.config`. If you follow the recommendations, the only content in `plugin.py` is a single hook implementation which registers other hook implementations of your plugin. The following diff --git a/docs/source/how_to_guides/hashing_inputs_of_tasks.md b/docs/source/how_to_guides/hashing_inputs_of_tasks.md index 976c2e36..165bd5db 100644 --- a/docs/source/how_to_guides/hashing_inputs_of_tasks.md +++ b/docs/source/how_to_guides/hashing_inputs_of_tasks.md @@ -2,28 +2,28 @@ Any input to a task function is parsed by pytask's nodes. For example, [`pathlib.Path`][]s are parsed by -[`pytask.PathNode`](../api/nodes_and_tasks.md#pytask.PathNode)s. The -[`pytask.PathNode`](../api/nodes_and_tasks.md#pytask.PathNode) handles among other things +[`pytask.PathNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PathNode)s. The +[`pytask.PathNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PathNode) handles among other things how changes in the underlying file are detected. If an input is not parsed by any more specific node type, the general -[`pytask.PythonNode`](../api/nodes_and_tasks.md#pytask.PythonNode) is used. +[`pytask.PythonNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PythonNode) is used. In the following example, the argument `text` will be parsed as a -[`pytask.PythonNode`](../api/nodes_and_tasks.md#pytask.PythonNode). +[`pytask.PythonNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PythonNode). ```py --8<-- "docs_src/how_to_guides/hashing_inputs_of_tasks_example_1_py310.py" ``` By default, pytask does not detect changes in -[`pytask.PythonNode`](../api/nodes_and_tasks.md#pytask.PythonNode) and if the value would +[`pytask.PythonNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PythonNode) and if the value would change (without changing the task module), pytask would not rerun the task. We can also hash the value of -[`pytask.PythonNode`](../api/nodes_and_tasks.md#pytask.PythonNode)s so that pytask knows +[`pytask.PythonNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PythonNode)s so that pytask knows when the input changed. For that, we need to use the -[`pytask.PythonNode`](../api/nodes_and_tasks.md#pytask.PythonNode) explicitly and set +[`pytask.PythonNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PythonNode) explicitly and set `hash = True`. ```py @@ -54,7 +54,7 @@ information). pytask will hash them using the `hashlib` module to create a stabl `list` and `dict` are not hashable by default. Luckily, there are libraries who provide this functionality like `deepdiff`. We can use them to pass a function to the -[`pytask.PythonNode`](../api/nodes_and_tasks.md#pytask.PythonNode) that generates a stable +[`pytask.PythonNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PythonNode) that generates a stable hash. First, install `deepdiff`. diff --git a/docs/source/how_to_guides/how_to_influence_build_order.md b/docs/source/how_to_guides/how_to_influence_build_order.md index 100b9160..bcb71f04 100644 --- a/docs/source/how_to_guides/how_to_influence_build_order.md +++ b/docs/source/how_to_guides/how_to_influence_build_order.md @@ -7,14 +7,16 @@ products are not fully specified. You can influence the order in which tasks are executed by assigning preferences to -tasks. Use [`@pytask.mark.try_first`](../api/marks.md#pytask.mark.try_first) to execute -a task as early as possible and -[`@pytask.mark.try_last`](../api/marks.md#pytask.mark.try_last) to defer execution. +tasks. Use +[`@pytask.mark.try_first`](../reference_guides/api/marks.md#pytaskmarktry_first) to +execute a task as early as possible and +[`@pytask.mark.try_last`](../reference_guides/api/marks.md#pytaskmarktry_last) to defer +execution. !!! note A little bit more background: Tasks, dependencies and products form a - [directed acyclic graph (DAG)](../glossary.md#dag). A + [directed acyclic graph (DAG)](../reference_guides/glossary.md#dag). A [topological ordering](https://en.wikipedia.org/wiki/Topological_sorting) determines the order in which tasks are executed such that tasks are not run until their predecessors have been executed. You should not assume a fixed ordering in which tasks are executed. @@ -40,8 +42,9 @@ make the output visible in the terminal) --8<-- "docs/source/_static/md/try-first.md" -Replacing [`pytask.mark.try_first`](../api/marks.md#pytask.mark.try_first) with -[`pytask.mark.try_last`](../api/marks.md#pytask.mark.try_last) yields +Replacing +[`pytask.mark.try_first`](../reference_guides/api/marks.md#pytaskmarktry_first) with +[`pytask.mark.try_last`](../reference_guides/api/marks.md#pytaskmarktry_last) yields ```py title="task_example.py" import pytask diff --git a/docs/source/how_to_guides/interfaces_for_dependencies_products.md b/docs/source/how_to_guides/interfaces_for_dependencies_products.md index cba3b305..da79d6af 100644 --- a/docs/source/how_to_guides/interfaces_for_dependencies_products.md +++ b/docs/source/how_to_guides/interfaces_for_dependencies_products.md @@ -39,11 +39,12 @@ You can pass a value to a task as a default argument. It is possible to include the value in the type annotation. It is especially helpful if you pass a -[`pytask.PNode`](../api/nodes_and_tasks.md#pytask.PNode) to the task. If you passed a -node as the default argument, type checkers like mypy would expect the node to enter the -task, but the value injected into the task depends on the nodes -[`pytask.PNode.load`](../api/nodes_and_tasks.md#pytask.PNode.load) method. For a -[`pytask.PathNode`](../api/nodes_and_tasks.md#pytask.PathNode) +[`pytask.PNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PNode) to the task. +If you passed a node as the default argument, type checkers like mypy would expect the +node to enter the task, but the value injected into the task depends on the nodes +[`pytask.PNode.load`](../reference_guides/api/nodes_and_tasks.md#pytask.PNode.load) +method. For a +[`pytask.PathNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PathNode) ```py --8<-- "docs_src/how_to_guides/interfaces/dependencies_annotation.py" @@ -72,8 +73,8 @@ applies to dependencies and products alike. ### `Product` annotation The syntax is the same as [default argument](#default-argument), but the -[`pytask.Product`](../api/utilities_and_typing.md#pytask.Product) annotation turns the -argument into a task product. +[`pytask.Product`](../reference_guides/api/utilities_and_typing.md#pytask.Product) +annotation turns the argument into a task product. ```py --8<-- "docs_src/how_to_guides/interfaces/products_annotation.py" @@ -82,8 +83,8 @@ argument into a task product. ### `Product` annotation with value The syntax is the same as [annotation](#annotation), but the -[`pytask.Product`](../api/utilities_and_typing.md#pytask.Product) annotation turns the -argument into a task product. +[`pytask.Product`](../reference_guides/api/utilities_and_typing.md#pytask.Product) +annotation turns the argument into a task product. ```py --8<-- "docs_src/how_to_guides/interfaces/products_annotation_with_pnode.py" @@ -116,7 +117,7 @@ the return type. It allows us to treat the returns of the task function as produ In situations where the task return is the product like [return annotation](#return-annotation), but you cannot modify the type annotation of the return, use the argument `produces` of the -[`@task`](../api/nodes_and_tasks.md#pytask.task) decorator. +[`@task`](../reference_guides/api/nodes_and_tasks.md#pytask.task) decorator. Pass the node or value you otherwise include in the type annotation to `produces`. diff --git a/docs/source/how_to_guides/logging.md b/docs/source/how_to_guides/logging.md index 9072af59..792668a7 100644 --- a/docs/source/how_to_guides/logging.md +++ b/docs/source/how_to_guides/logging.md @@ -222,7 +222,7 @@ log_file_date_format = "%Y-%m-%d %H:%M:%S" ## Use logging with the programmatic interface The same options are available via -[`pytask.build`](../api/functional_interfaces.md#build-workflow). +[`pytask.build`](../reference_guides/api/functional_interfaces.md#build-workflow). ```py title="build.py" from pytask import build diff --git a/docs/source/how_to_guides/migrating_from_scripts_to_pytask.md b/docs/source/how_to_guides/migrating_from_scripts_to_pytask.md index 0b385aa3..622f7998 100644 --- a/docs/source/how_to_guides/migrating_from_scripts_to_pytask.md +++ b/docs/source/how_to_guides/migrating_from_scripts_to_pytask.md @@ -10,9 +10,9 @@ With pytask, you can enjoy features like: - **Parallelization**. Use [pytask-parallel](https://github.com/pytask-dev/pytask-parallel) to speed up your scripts by running them in parallel. -- **Cross-language projects**. pytask has several [plugins](../glossary.md#plugin) for - running scripts written in other popular languages: - [pytask-r](https://github.com/pytask-dev/pytask-r), +- **Cross-language projects**. pytask has several + [plugins](../reference_guides/glossary.md#plugin) for running scripts written in + other popular languages: [pytask-r](https://github.com/pytask-dev/pytask-r), [pytask-julia](https://github.com/pytask-dev/pytask-julia), and [pytask-stata](https://github.com/pytask-dev/pytask-stata). @@ -59,8 +59,8 @@ An `if __name__ == "__main__"` block must be deleted. To let pytask know the order in which to execute tasks and when to re-run them, you'll need to specify task dependencies and products. Add dependencies as arguments to the function with default values. Do the same for products, but also add the special -[`pytask.Product`](../api/utilities_and_typing.md#pytask.Product) annotation with -`Annotated[Path, Product]`. For example: +[`pytask.Product`](../reference_guides/api/utilities_and_typing.md#pytask.Product) +annotation with `Annotated[Path, Product]`. For example: ```py --8<-- "docs_src/how_to_guides/migrating_from_scripts_to_pytask_4.py" @@ -103,10 +103,10 @@ $ pytask -n 4 pytask wants to help you get your job done, and sometimes a different programming language can make your life easier. Thus, pytask has several -[plugins](../glossary.md#plugin) to integrate code written in R, Julia, and Stata. Here, -we explore how to incorporate an R script with +[plugins](../reference_guides/glossary.md#plugin) to integrate code written in R, Julia, +and Stata. Here, we explore how to incorporate an R script with [pytask-r](https://github.com/pytask-dev/pytask-r). You can also find more information -about the [plugin](../glossary.md#plugin) in the repo's readme. +about the [plugin](../reference_guides/glossary.md#plugin) in the repo's readme. First, we will install the package. diff --git a/docs/source/how_to_guides/move_project_to_another_machine.md b/docs/source/how_to_guides/move_project_to_another_machine.md index a43fc15d..ed73488a 100644 --- a/docs/source/how_to_guides/move_project_to_another_machine.md +++ b/docs/source/how_to_guides/move_project_to_another_machine.md @@ -62,10 +62,11 @@ Make sure custom node IDs and state values stay stable across machines: - Use project-relative IDs instead of absolute paths. - Prefer file content hashes over timestamps. - Avoid machine-specific paths or timestamps in custom - [`state()`](../api/nodes_and_tasks.md#pytask.PNode.state) implementations. + [`state()`](../reference_guides/api/nodes_and_tasks.md#pytask.PNode.state) + implementations. - Provide a custom hash function for - [`PythonNode`](../api/nodes_and_tasks.md#pytask.PythonNode) values that are not - natively stable. + [`PythonNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PythonNode) values + that are not natively stable. Most projects that only use built-in nodes do not need extra work here. diff --git a/docs/source/how_to_guides/provisional_nodes_and_task_generators.md b/docs/source/how_to_guides/provisional_nodes_and_task_generators.md index 24f8f666..fffbbdab 100644 --- a/docs/source/how_to_guides/provisional_nodes_and_task_generators.md +++ b/docs/source/how_to_guides/provisional_nodes_and_task_generators.md @@ -3,7 +3,7 @@ pytask's execution model can usually be separated into three phases. 1. Collection of tasks, dependencies, and products. -1. Building the [DAG](../glossary.md#dag). +1. Building the [DAG](../reference_guides/glossary.md#dag). 1. Executing the tasks. But, in some situations, pytask needs to be more flexible. @@ -28,28 +28,28 @@ as the task module because it is a relative path. ``` Since the names of the files are not known when pytask is started, we need to use a -[`pytask.DirectoryNode`](../api/nodes_and_tasks.md#pytask.DirectoryNode) to define the +[`pytask.DirectoryNode`](../reference_guides/api/nodes_and_tasks.md#pytask.DirectoryNode) to define the task's product. With a -[`pytask.DirectoryNode`](../api/nodes_and_tasks.md#pytask.DirectoryNode) we can specify +[`pytask.DirectoryNode`](../reference_guides/api/nodes_and_tasks.md#pytask.DirectoryNode) we can specify where pytask can find the files. The files are described with a root path (default is the directory of the task module) and a glob pattern (default is `*`). -When we use the [`pytask.DirectoryNode`](../api/nodes_and_tasks.md#pytask.DirectoryNode) +When we use the [`pytask.DirectoryNode`](../reference_guides/api/nodes_and_tasks.md#pytask.DirectoryNode) as a product annotation, we get access to the `root_dir` as a [`pathlib.Path`][] object inside the function, which allows us to store the files. !!! note - The [`pytask.DirectoryNode`](../api/nodes_and_tasks.md#pytask.DirectoryNode) is a + The [`pytask.DirectoryNode`](../reference_guides/api/nodes_and_tasks.md#pytask.DirectoryNode) is a provisional node that implements - [`pytask.PProvisionalNode`](../api/nodes_and_tasks.md#pytask.PProvisionalNode). A - provisional node is not a [`pytask.PNode`](../api/nodes_and_tasks.md#pytask.PNode), but + [`pytask.PProvisionalNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PProvisionalNode). A + provisional node is not a [`pytask.PNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PNode), but when its - [`pytask.PProvisionalNode.collect`](../api/nodes_and_tasks.md#pytask.PProvisionalNode.collect) + [`pytask.PProvisionalNode.collect`](../reference_guides/api/nodes_and_tasks.md#pytask.PProvisionalNode.collect) method is called, it returns actual nodes. A - [`pytask.DirectoryNode`](../api/nodes_and_tasks.md#pytask.DirectoryNode), for example, - returns a [`pytask.PathNode`](../api/nodes_and_tasks.md#pytask.PathNode). + [`pytask.DirectoryNode`](../reference_guides/api/nodes_and_tasks.md#pytask.DirectoryNode), for example, + returns a [`pytask.PathNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PathNode). ## Depending on provisional nodes @@ -64,11 +64,11 @@ downloaded. ``` To reference the files that will be downloaded, we use -[`pytask.DirectoryNode`](../api/nodes_and_tasks.md#pytask.DirectoryNode) as a dependency. +[`pytask.DirectoryNode`](../reference_guides/api/nodes_and_tasks.md#pytask.DirectoryNode) as a dependency. Before the task is executed, the list of files in the folder defined by the root path and the pattern are automatically collected and passed to the task. -If we use a [`pytask.DirectoryNode`](../api/nodes_and_tasks.md#pytask.DirectoryNode) with +If we use a [`pytask.DirectoryNode`](../reference_guides/api/nodes_and_tasks.md#pytask.DirectoryNode) with the same `root_dir` and `pattern` in both tasks, pytask will automatically recognize that the second task depends on the first. If that is not true, you might need to make this dependency more explicit by using `@task(after=...)`, which is explained diff --git a/docs/source/how_to_guides/remote_files.md b/docs/source/how_to_guides/remote_files.md index 3b709b90..0efab204 100644 --- a/docs/source/how_to_guides/remote_files.md +++ b/docs/source/how_to_guides/remote_files.md @@ -4,7 +4,7 @@ So far, we have only dealt with local files in the tutorials and guides. But the lots of use cases to deal with remote files. - You distribute the workflow without the data and want to make it easy for others to - get started. So, some tasks reference remote files instead of local files. + get started. So, some tasks reference remote files instead of local files. - You store the workflow results in remote storage to save and distribute them. pytask uses [universal-pathlib](https://github.com/fsspec/universal_pathlib) to work @@ -41,7 +41,7 @@ ImportError: Install s3fs to access S3 Some filesystems are supported [out-of-the-box](https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations). [Others](https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations) -are available as [plugins](../glossary.md#plugin) and require additional packages. +are available as [plugins](../reference_guides/glossary.md#plugin) and require additional packages. After installing s3fs, rerun the command. diff --git a/docs/source/how_to_guides/the_data_catalog.md b/docs/source/how_to_guides/the_data_catalog.md index 2c805a34..58470f1a 100644 --- a/docs/source/how_to_guides/the_data_catalog.md +++ b/docs/source/how_to_guides/the_data_catalog.md @@ -1,20 +1,21 @@ # The `DataCatalog` - Revisited This guide explains more details about the -[`pytask.DataCatalog`](../api/core_classes_and_exceptions.md#pytask.DataCatalog) that -were left out of the [tutorial](../tutorials/using_a_data_catalog.md). Please, read the -tutorial for a basic understanding. +[`pytask.DataCatalog`](../reference_guides/api/core_classes_and_exceptions.md#pytask.DataCatalog) +that were left out of the [tutorial](../tutorials/using_a_data_catalog.md). Please, read +the tutorial for a basic understanding. ## Changing the default node The data catalog uses the -[`pytask.PickleNode`](../api/nodes_and_tasks.md#pytask.PickleNode) by default to -serialize any kind of Python object. You can use any other node that follows the -[`pytask.PNode`](../api/nodes_and_tasks.md#pytask.PNode) protocol and register it when -creating the data catalog. +[`pytask.PickleNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PickleNode) by +default to serialize any kind of Python object. You can use any other node that follows +the [`pytask.PNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PNode) protocol +and register it when creating the data catalog. -For example, use the [`pytask.PythonNode`](../api/nodes_and_tasks.md#pytask.PythonNode) -as the default. +For example, use the +[`pytask.PythonNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PythonNode) as +the default. ```python from pytask import PythonNode @@ -27,8 +28,8 @@ data_catalog = DataCatalog(default_node=PythonNode) Or, learn to write your node by reading [writing custom nodes](writing_custom_nodes.md). Here, is an example for a -[`pytask.PickleNode`](../api/nodes_and_tasks.md#pytask.PickleNode) that uses cloudpickle -instead of the normal `pickle` module. +[`pytask.PickleNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PickleNode) that +uses cloudpickle instead of the normal `pickle` module. ```py --8<-- "docs_src/how_to_guides/the_data_catalog.py" diff --git a/docs/source/how_to_guides/update_the_lockfile_to_match_project_state.md b/docs/source/how_to_guides/update_the_lockfile_to_match_project_state.md index 135c2496..f625a244 100644 --- a/docs/source/how_to_guides/update_the_lockfile_to_match_project_state.md +++ b/docs/source/how_to_guides/update_the_lockfile_to_match_project_state.md @@ -119,5 +119,4 @@ or update the current state of collected tasks. - [`pytask lock`](../reference_guides/commands.md#pytask-lock) - [`pytask build`](../reference_guides/commands.md#pytask-build) -- [Portability](portability.md) - [The lockfile](../reference_guides/lockfile.md) diff --git a/docs/source/how_to_guides/using_task_returns.md b/docs/source/how_to_guides/using_task_returns.md index b0b26db2..44bc7d3d 100644 --- a/docs/source/how_to_guides/using_task_returns.md +++ b/docs/source/how_to_guides/using_task_returns.md @@ -22,8 +22,8 @@ where the return of the function, a string, should be stored. ``` It works because internally the path is converted to a -[`pytask.PathNode`](../api/nodes_and_tasks.md#pytask.PathNode) that is able to store -objects of type `str` and `bytes`. +[`pytask.PathNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PathNode) that is +able to store objects of type `str` and `bytes`. !!! note @@ -60,9 +60,9 @@ of the previous interfaces. Each return is mapped to its node by respecting its position in the tuple. -In fact, any [PyTree](../glossary.md#pytree) can be used. The only requirement is that -the PyTree of nodes defined to capture the function returns has the same structure as -the returns or is a shallower tree. +In fact, any [PyTree](../reference_guides/glossary.md#pytree) can be used. The only +requirement is that the PyTree of nodes defined to capture the function returns has the +same structure as the returns or is a shallower tree. The following example shows how a task function with a complex structure of returns is mapped to the defined nodes. diff --git a/docs/source/how_to_guides/writing_custom_nodes.md b/docs/source/how_to_guides/writing_custom_nodes.md index 37a199dd..3395b3d9 100644 --- a/docs/source/how_to_guides/writing_custom_nodes.md +++ b/docs/source/how_to_guides/writing_custom_nodes.md @@ -2,9 +2,9 @@ In the previous tutorials and how-to guides, you learned that dependencies and products can be represented as plain Python objects with -[`pytask.PythonNode`](../api/nodes_and_tasks.md#pytask.PythonNode) or as paths where every +[`pytask.PythonNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PythonNode) or as paths where every [`pathlib.Path`][] is converted to a -[`pytask.PathNode`](../api/nodes_and_tasks.md#pytask.PathNode). +[`pytask.PathNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PathNode). In this how-to guide, you will learn about the general concept of nodes and how to write your own to improve your workflows. @@ -21,7 +21,7 @@ paths to point to inputs and outputs and call [`pandas.read_pickle`][] and ``` To remove IO operations from the task and delegate them to pytask, we will replicate the -[`pytask.PickleNode`](../api/nodes_and_tasks.md#pytask.PickleNode) that automatically +[`pytask.PickleNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PickleNode) that automatically loads and stores Python objects. And we pass the value to `df` via [`typing.Annotated`][] to preserve @@ -49,15 +49,15 @@ A custom node needs to follow an interface so that pytask can perform several ac - Load and save values when tasks are executed. This interface is defined by protocols. A custom node must follow at least the protocol -[`pytask.PNode`](../api/nodes_and_tasks.md#pytask.PNode) or, even better, -[`pytask.PPathNode`](../api/nodes_and_tasks.md#pytask.PPathNode) if it is based on a path. -The common node for paths, [`pytask.PathNode`](../api/nodes_and_tasks.md#pytask.PathNode), -follows the protocol [`pytask.PPathNode`](../api/nodes_and_tasks.md#pytask.PPathNode). +[`pytask.PNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PNode) or, even better, +[`pytask.PPathNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PPathNode) if it is based on a path. +The common node for paths, [`pytask.PathNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PathNode), +follows the protocol [`pytask.PPathNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PPathNode). ## `PickleNode` -Since our [`pytask.PickleNode`](../api/nodes_and_tasks.md#pytask.PickleNode) will only -vary slightly from [`pytask.PathNode`](../api/nodes_and_tasks.md#pytask.PathNode), we use +Since our [`pytask.PickleNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PickleNode) will only +vary slightly from [`pytask.PathNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PathNode), we use it as a template, and with some minor modifications, we arrive at the following class. ```py @@ -67,68 +67,68 @@ it as a template, and with some minor modifications, we arrive at the following Here are some explanations. - The node does not need to inherit from the protocol - [`pytask.PPathNode`](../api/nodes_and_tasks.md#pytask.PPathNode), but you can do it to - be more explicit. + [`pytask.PPathNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PPathNode), but you can do it to + be more explicit. - The node has two attributes - - `name` identifies the node in the DAG, so the name must be unique. - - `path` holds the path to the file and identifies the node as a path node that is - handled slightly differently than normal nodes within pytask. + - `name` identifies the node in the DAG, so the name must be unique. + - `path` holds the path to the file and identifies the node as a path node that is + handled slightly differently than normal nodes within pytask. - The node has an additional property that computes the signature of the node. The - signature is a hash and a unique identifier for the node. For most nodes it will be - a hash of the path or the name. + signature is a hash and a unique identifier for the node. For most nodes it will be + a hash of the path or the name. - `signature` and lockfile `id` are different concepts. - - `signature` is the runtime identity in pytask's in-memory DAG. - - lockfile `id` is the portable key stored in `pytask.lock`. + - `signature` is the runtime identity in pytask's in-memory DAG. + - lockfile `id` is the portable key stored in `pytask.lock`. - For custom nodes, make sure the lockfile id stays stable and unique within a task. + For custom nodes, make sure the lockfile id stays stable and unique within a task. - The classmethod - [`pytask.PickleNode.from_path`](../api/nodes_and_tasks.md#pytask.PickleNode.from_path) - is a convenient method to instantiate the class. + [`pytask.PickleNode.from_path`](../reference_guides/api/nodes_and_tasks.md#pytask.PickleNode.from_path) + is a convenient method to instantiate the class. - The method - [`pytask.PickleNode.state`](../api/nodes_and_tasks.md#pytask.PickleNode.state) yields - a value that signals the node's state. If the value changes, pytask knows it needs - to regenerate the workflow. We can use the timestamp of when the node was last - modified. + [`pytask.PickleNode.state`](../reference_guides/api/nodes_and_tasks.md#pytask.PickleNode.state) yields + a value that signals the node's state. If the value changes, pytask knows it needs + to regenerate the workflow. We can use the timestamp of when the node was last + modified. - pytask calls - [`pytask.PickleNode.load`](../api/nodes_and_tasks.md#pytask.PickleNode.load) when it - collects the values of function arguments to run the function. The argument - `is_product` signals that the node is loaded as a product with a - [`pytask.Product`](../api/utilities_and_typing.md#pytask.Product) annotation or via - `produces`. + [`pytask.PickleNode.load`](../reference_guides/api/nodes_and_tasks.md#pytask.PickleNode.load) when it + collects the values of function arguments to run the function. The argument + `is_product` signals that the node is loaded as a product with a + [`pytask.Product`](../reference_guides/api/utilities_and_typing.md#pytask.Product) annotation or via + `produces`. - When the node is loaded as a dependency, we want to inject the value of the pickle - file. In the other case, the node returns itself so users can call - [`pytask.PickleNode.save`](../api/nodes_and_tasks.md#pytask.PickleNode.save) - themselves. + When the node is loaded as a dependency, we want to inject the value of the pickle + file. In the other case, the node returns itself so users can call + [`pytask.PickleNode.save`](../reference_guides/api/nodes_and_tasks.md#pytask.PickleNode.save) + themselves. -- [`pytask.PickleNode.save`](../api/nodes_and_tasks.md#pytask.PickleNode.save) is called - when a task function returns and allows to save the return values. +- [`pytask.PickleNode.save`](../reference_guides/api/nodes_and_tasks.md#pytask.PickleNode.save) is called + when a task function returns and allows to save the return values. ## Improvements Usually, you would like your custom node to work with [`pathlib.Path`][] objects and [`upath.UPath`][] objects allowing to work with remote filesystems. To simplify getting the state of the node, you can use the -[`pytask.get_state_of_path`](../api/utilities_and_typing.md#pytask.get_state_of_path) +[`pytask.get_state_of_path`](../reference_guides/api/utilities_and_typing.md#pytask.get_state_of_path) function. ## Conclusion Nodes are an important in concept pytask. They allow to pytask to build a -[DAG](../glossary.md#dag) and generate a workflow, and they also allow users to extract +[DAG](../reference_guides/glossary.md#dag) and generate a workflow, and they also allow users to extract IO operations from the task function into the nodes. pytask only implements two node types, -[`pytask.PathNode`](../api/nodes_and_tasks.md#pytask.PathNode) and -[`pytask.PythonNode`](../api/nodes_and_tasks.md#pytask.PythonNode), but many more are -possible. In the future, there should probably be a [plugin](../glossary.md#plugin) that +[`pytask.PathNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PathNode) and +[`pytask.PythonNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PythonNode), but many more are +possible. In the future, there should probably be a [plugin](../reference_guides/glossary.md#plugin) that implements nodes for many other data sources like AWS S3 or databases. See [Kedro datasets](https://docs.kedro.org/en/stable/kedro_datasets.html) for one example. diff --git a/docs/source/reference_guides/api/cli_and_programmatic.md b/docs/source/reference_guides/api/cli_and_programmatic.md index 77e18a68..afc77eb3 100644 --- a/docs/source/reference_guides/api/cli_and_programmatic.md +++ b/docs/source/reference_guides/api/cli_and_programmatic.md @@ -15,17 +15,5 @@ For command usage and options, see the ## CLI Types ::: pytask.ColoredCommand - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.ColoredGroup - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.EnumChoice - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" diff --git a/docs/source/reference_guides/api/core_classes_and_exceptions.md b/docs/source/reference_guides/api/core_classes_and_exceptions.md index 48d79a22..b0abbb53 100644 --- a/docs/source/reference_guides/api/core_classes_and_exceptions.md +++ b/docs/source/reference_guides/api/core_classes_and_exceptions.md @@ -3,52 +3,16 @@ ## Exceptions ::: pytask.PytaskError - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" show_root_heading: true show_signature: true ::: pytask.CollectionError - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.ConfigurationError - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.ExecutionError - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.ResolvingDependenciesError - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.NodeNotCollectedError - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.NodeNotFoundError - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ## Core Classes ::: pytask.Session - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.DataCatalog - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" diff --git a/docs/source/reference_guides/api/functional_interfaces.md b/docs/source/reference_guides/api/functional_interfaces.md index fb907cc8..501fab67 100644 --- a/docs/source/reference_guides/api/functional_interfaces.md +++ b/docs/source/reference_guides/api/functional_interfaces.md @@ -3,7 +3,7 @@ This page documents the public functional interfaces for using pytask from Python. For end-to-end usage examples, see the -[how-to guide on the functional interface](../how_to_guides/functional_interface.md). +[how-to guide on the functional interface](../../how_to_guides/functional_interface.md). ## Build Workflow diff --git a/docs/source/reference_guides/api/marks.md b/docs/source/reference_guides/api/marks.md index 94ac52ef..9d7031de 100644 --- a/docs/source/reference_guides/api/marks.md +++ b/docs/source/reference_guides/api/marks.md @@ -12,7 +12,16 @@ manually here. ``` Prevent execution of a task when all neighboring nodes exist, even if something changed. -See [making tasks persist](../tutorials/making_tasks_persist.md). +See [making tasks persist](../../tutorials/making_tasks_persist.md). + +### `pytask.mark.filterwarnings` + +```python +@pytask.mark.filterwarnings("ignore:warning message") +``` + +Add warning filters to a task. +See [capture warnings](../../how_to_guides/capture_warnings.md). ### `pytask.mark.skip` @@ -21,7 +30,7 @@ See [making tasks persist](../tutorials/making_tasks_persist.md). ``` Skip a task and all downstream tasks. -See [skipping tasks](../tutorials/skipping_tasks.md). +See [skipping tasks](../../tutorials/skipping_tasks.md). ### `pytask.mark.skipif` @@ -30,7 +39,7 @@ See [skipping tasks](../tutorials/skipping_tasks.md). ``` Skip a task and all downstream tasks when `condition` is `True`. -See [skipping tasks](../tutorials/skipping_tasks.md). +See [skipping tasks](../../tutorials/skipping_tasks.md). ### `pytask.mark.try_first` @@ -39,7 +48,7 @@ See [skipping tasks](../tutorials/skipping_tasks.md). ``` Prefer running a task as early as possible in the DAG. -See [how to influence build order](../how_to_guides/how_to_influence_build_order.md). +See [how to influence build order](../../how_to_guides/how_to_influence_build_order.md). ### `pytask.mark.try_last` @@ -48,28 +57,16 @@ See [how to influence build order](../how_to_guides/how_to_influence_build_order ``` Prefer running a task as late as possible in the DAG. -See [how to influence build order](../how_to_guides/how_to_influence_build_order.md). +See [how to influence build order](../../how_to_guides/how_to_influence_build_order.md). ## Mark Classes ::: pytask.Mark - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" - show_root_heading: true - show_signature: true + show_root_heading: true + show_signature: true ::: pytask.mark ::: pytask.MarkDecorator - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.MarkGenerator - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ## Mark Utilities diff --git a/docs/source/reference_guides/api/nodes_and_tasks.md b/docs/source/reference_guides/api/nodes_and_tasks.md index 79f03fd3..b3636ef6 100644 --- a/docs/source/reference_guides/api/nodes_and_tasks.md +++ b/docs/source/reference_guides/api/nodes_and_tasks.md @@ -3,55 +3,19 @@ ## Protocols ::: pytask.PNode - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" show_root_heading: true show_signature: true ::: pytask.PPathNode - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.PTask - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.PTaskWithPath - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.PProvisionalNode - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ## Nodes ::: pytask.PathNode - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.PickleNode - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.PythonNode - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.DirectoryNode - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.parse_dependencies_from_task_function ::: pytask.parse_products_from_task_function @@ -59,17 +23,5 @@ ::: pytask.task ::: pytask.Task - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.TaskWithoutPath - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.CollectionMetadata - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" diff --git a/docs/source/reference_guides/api/outcomes_reports_and_warnings.md b/docs/source/reference_guides/api/outcomes_reports_and_warnings.md index dc29f5b8..682df8a3 100644 --- a/docs/source/reference_guides/api/outcomes_reports_and_warnings.md +++ b/docs/source/reference_guides/api/outcomes_reports_and_warnings.md @@ -3,81 +3,29 @@ ## Outcomes ::: pytask.ExitCode - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" show_root_heading: true show_signature: true ::: pytask.CollectionOutcome - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.TaskOutcome - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.Exit - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.Persisted - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.Skipped - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.SkippedAncestorFailed - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.SkippedUnchanged - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.count_outcomes ## Reports ::: pytask.CollectionReport - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.ExecutionReport - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.DagReport - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ## Tracebacks ::: pytask.Traceback - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ## Warnings ::: pytask.WarningReport - options: - filters: - - "!^_[^_].*" - - "!^__.*__$" ::: pytask.parse_warning_filter ::: pytask.warning_record_to_str diff --git a/docs/source/reference_guides/configuration.md b/docs/source/reference_guides/configuration.md index df2552e0..ed04272c 100644 --- a/docs/source/reference_guides/configuration.md +++ b/docs/source/reference_guides/configuration.md @@ -96,7 +96,7 @@ editor_url_scheme = "no_link" ### `hook_module` Register additional modules containing -[hook implementations](../glossary.md#hook-implementation). +[hook implementations](../reference_guides/glossary.md#hook-implementation). ```toml hook_modules = ["myproject.hooks", "hooks.py"] @@ -130,6 +130,24 @@ ignore = "some_file.py" ignore = ["some_directory/*", "some_file.py"] ``` +### `filterwarnings` + +You can configure how pytask handles warnings during a build with the `filterwarnings` +option. The value can be a string or a list of strings using the same colon-separated +warning filter syntax as Python's +[`warnings`](https://docs.python.org/3/library/warnings.html#the-warnings-filter) +module. + +```toml +filterwarnings = [ + "error", + "ignore::UserWarning", +] +``` + +The [capture warnings](../how_to_guides/capture_warnings.md) guide explains the +available fields and shows more examples. + ### `markers` pytask uses markers to attach additional information to task functions. To see which diff --git a/docs/source/reference_guides/glossary.md b/docs/source/reference_guides/glossary.md index d71ef620..34524d55 100644 --- a/docs/source/reference_guides/glossary.md +++ b/docs/source/reference_guides/glossary.md @@ -28,7 +28,7 @@ message sent by the host at an entry-point. A plugin can consist of one or more ## Hooking { #hooking } -See the reference guide on [pluggy](explanations/pluggy.md) or the more general +See the reference guide on [pluggy](../explanations/pluggy.md) or the more general explanation on [Wikipedia](https://en.wikipedia.org/wiki/Hooking). ## Hook specification { #hook-specification } diff --git a/docs/source/reference_guides/hookspecs.md b/docs/source/reference_guides/hookspecs.md index 76a97e49..364fae48 100644 --- a/docs/source/reference_guides/hookspecs.md +++ b/docs/source/reference_guides/hookspecs.md @@ -1,7 +1,7 @@ # Hook Specifications -[Hook specifications](../glossary.md#hook-specification) are the -[entry-points](../glossary.md#entry-point) provided by pytask to change the behavior of +[Hook specifications](../reference_guides/glossary.md#hook-specification) are the +[entry-points](../reference_guides/glossary.md#entry-point) provided by pytask to change the behavior of the program. The names of hooks always start with `pytask_` by convention. If you encounter hooks diff --git a/docs/source/tutorials/defining_dependencies_products.md b/docs/source/tutorials/defining_dependencies_products.md index f43b5b6f..e1de8aa9 100644 --- a/docs/source/tutorials/defining_dependencies_products.md +++ b/docs/source/tutorials/defining_dependencies_products.md @@ -5,7 +5,7 @@ Define task dependencies and products to run your tasks. Defining dependencies and products also determines task execution order. This tutorial offers you different interfaces. For type annotations, see the `Annotated` -tabs. You find a tutorial on type hints [here](../type_hints.md). +tabs. You find a tutorial on type hints [here](../reference_guides/type_hints.md). If you want to avoid type annotations for now, look at the tab named `produces`. @@ -52,7 +52,7 @@ Let's revisit the task from the [previous tutorial](write_a_task.md) that we def --8<-- "docs_src/tutorials/defining_dependencies_products_products_py310.py" ``` - [`pytask.Product`](../api/utilities_and_typing.md#pytask.Product) allows marking an + [`pytask.Product`](../reference_guides/api/utilities_and_typing.md#pytask.Product) allows marking an argument as a product. After the task has finished, pytask will check whether the file exists. @@ -68,7 +68,7 @@ Let's revisit the task from the [previous tutorial](write_a_task.md) that we def !!! tip - If you do not know about [`pathlib`][] check out this guide by + If you do not know about [`pathlib`](https://docs.python.org/3/library/pathlib.html) check out this guide by [RealPython](https://realpython.com/python-pathlib/). The module is beneficial for handling paths conveniently and across platforms. @@ -86,7 +86,7 @@ we will define it in `task_plot_data.py`. any argument name (here `path_to_data`). pytask assumes that all function arguments that do not have a - [`pytask.Product`](../api/utilities_and_typing.md#pytask.Product) annotation are + [`pytask.Product`](../reference_guides/api/utilities_and_typing.md#pytask.Product) annotation are dependencies of the task. ```py hl_lines="12" @@ -173,7 +173,7 @@ pytask allows you to do that, but you lose features like access to paths, which defining dependencies explicitly is always preferred. There are two modes for it, and both use -[`@task(after=...)`](../api/nodes_and_tasks.md#pytask.task). +[`@task(after=...)`](../reference_guides/api/nodes_and_tasks.md#pytask.task). First, you can pass the task function or multiple task functions to the decorator. Applied to the tasks from before, we could have written `task_plot_data` as @@ -198,8 +198,8 @@ You will learn more about expressions in [selecting tasks](selecting_tasks.md). ## Further reading - There is an additional way to specify products by treating the returns of a task - function as a product. Read - [using task returns](../how_to_guides/using_task_returns.md) to learn more about it. + function as a product. Read + [using task returns](../how_to_guides/using_task_returns.md) to learn more about it. - An overview of all ways to specify dependencies and products and their strengths and - weaknesses can be found in - [interfaces for dependencies products](../how_to_guides/interfaces_for_dependencies_products.md). + weaknesses can be found in + [interfaces for dependencies products](../how_to_guides/interfaces_for_dependencies_products.md). diff --git a/docs/source/tutorials/making_tasks_persist.md b/docs/source/tutorials/making_tasks_persist.md index 37247aca..e6c40412 100644 --- a/docs/source/tutorials/making_tasks_persist.md +++ b/docs/source/tutorials/making_tasks_persist.md @@ -5,8 +5,9 @@ Sometimes you want to skip the execution of a task and pretend nothing has chang A typical scenario is that you formatted the task's source files with [`ruff format`](https://docs.astral.sh/ruff/formatter/) which would rerun the task. -In this case, you can apply the [`@pytask.mark.persist`](../api/marks.md#pytask.mark) -decorator to the task, which will skip its execution as long as all products exist. +In this case, you can apply the +[`@pytask.mark.persist`](../reference_guides/api/marks.md#pytaskmarkpersist) decorator +to the task, which will skip its execution as long as all products exist. Internally, the state of the dependencies, the source file, and the products are updated in the lockfile such that the subsequent execution will skip the task successfully. @@ -43,13 +44,15 @@ Running pytask will execute the task since the product is missing. --8<-- "docs/source/_static/md/persist-executed.md" After that, we accidentally changed the task's source file by formatting the file with -Black. Without the [`@pytask.mark.persist`](../api/marks.md#pytask.mark) decorator, the -task would run again since the source has changed. With the decorator, a green p signals -that the execution is skipped. +Black. Without the +[`@pytask.mark.persist`](../reference_guides/api/marks.md#pytaskmarkpersist) decorator, +the task would run again since the source has changed. With the decorator, a green p +signals that the execution is skipped. --8<-- "docs/source/_static/md/persist-persisted.md" If we rerun the task, it is skipped because nothing has changed and not because it is -marked with [`@pytask.mark.persist`](../api/marks.md#pytask.mark). +marked with +[`@pytask.mark.persist`](../reference_guides/api/marks.md#pytaskmarkpersist). --8<-- "docs/source/_static/md/persist-skipped.md" diff --git a/docs/source/tutorials/plugins.md b/docs/source/tutorials/plugins.md index 025370e6..6418b59c 100644 --- a/docs/source/tutorials/plugins.md +++ b/docs/source/tutorials/plugins.md @@ -4,14 +4,15 @@ Users employ pytask in many different contexts, making it impossible for pytask' maintainers to cater to all possible use cases. Therefore, pytask uses [pluggy](https://github.com/pytest-dev/pluggy), a -[plugin](../glossary.md#plugin) framework, to allow users to extend pytask. +[plugin](../reference_guides/glossary.md#plugin) framework, to allow users to extend +pytask. ## How to extend pytask A quick method to extend pytask is explained in the [guide on extending pytask](../how_to_guides/extending_pytask.md). You will learn how to -add your own [hook implementations](../glossary.md#hook-implementation) or write your -[plugin](../glossary.md#plugin). +add your own [hook implementations](../reference_guides/glossary.md#hook-implementation) +or write your [plugin](../reference_guides/glossary.md#plugin). ## Where can I find plugins? diff --git a/docs/source/tutorials/repeating_tasks_with_different_inputs.md b/docs/source/tutorials/repeating_tasks_with_different_inputs.md index e1bb0867..ef5621ed 100644 --- a/docs/source/tutorials/repeating_tasks_with_different_inputs.md +++ b/docs/source/tutorials/repeating_tasks_with_different_inputs.md @@ -8,9 +8,9 @@ We reuse the task from the previous [tutorial](write_a_task.md), which generates data and repeat the same operation over several seeds to receive multiple, reproducible samples. -Apply the [`@task`](../api/nodes_and_tasks.md#pytask.task) decorator, loop over the -function and supply different seeds and output paths as default arguments of the -function. +Apply the [`@task`](../reference_guides/api/nodes_and_tasks.md#pytask.task) decorator, +loop over the function and supply different seeds and output paths as default arguments +of the function. === "Annotated" @@ -100,8 +100,8 @@ task_data_preparation.py::task_create_random_data[seed1] ### User-defined ids -The [`@task`](../api/nodes_and_tasks.md#pytask.task) decorator has an `id` keyword, -allowing the user to set a unique name for the iteration. +The [`@task`](../reference_guides/api/nodes_and_tasks.md#pytask.task) decorator has an +`id` keyword, allowing the user to set a unique name for the iteration. === "Annotated" @@ -131,9 +131,8 @@ and arguments. Here are three tips to organize the repetitions. **Dataclass** - [`dataclasses.dataclass`](https://docs.python.org/3/library/dataclasses.html#dataclasses.dataclass) - is a useful container to organize the arguments of the parametrizations. It also - works well with type checkers. + [`dataclasses.dataclass`][] is a useful container to organize the arguments of the + parametrizations. It also works well with type checkers. ```python from dataclasses import dataclass @@ -152,8 +151,9 @@ and arguments. Here are three tips to organize the repetitions. } ``` -1. [`@task`](../api/nodes_and_tasks.md#pytask.task) has a `kwargs` argument that allows - you inject arguments to the function instead of adding them as default arguments. +1. [`@task`](../reference_guides/api/nodes_and_tasks.md#pytask.task) has a `kwargs` + argument that allows you inject arguments to the function instead of adding them as + default arguments. 1. If the generation of arguments for the task function is complex, we should use a function. @@ -173,8 +173,8 @@ Following these three tips, the parametrization becomes ``` Unpacking all the arguments can become tedious. Instead, use the `kwargs` argument of -the [`@task`](../api/nodes_and_tasks.md#pytask.task) decorator to pass keyword arguments -to the task. +the [`@task`](../reference_guides/api/nodes_and_tasks.md#pytask.task) decorator to pass +keyword arguments to the task. ```python for id_, kwargs in ID_TO_KWARGS.items(): diff --git a/docs/source/tutorials/skipping_tasks.md b/docs/source/tutorials/skipping_tasks.md index c725fef2..df4929cc 100644 --- a/docs/source/tutorials/skipping_tasks.md +++ b/docs/source/tutorials/skipping_tasks.md @@ -6,10 +6,12 @@ but less dynamic than selecting tasks via [markers](selecting_tasks.md#markers) In contrast to tasks in ignored files, ignored with [`ignore`](../reference_guides/configuration.md#ignore), pytask will still check whether -skipped tasks are consistent with the [DAG](../glossary.md#dag) of the project. +skipped tasks are consistent with the [DAG](../reference_guides/glossary.md#dag) of the +project. -For example, you can use the [`@pytask.mark.skip`](../api/marks.md#pytask.mark) -decorator to skip tasks during development that take too much time to compute right now. +For example, you can use the +[`@pytask.mark.skip`](../reference_guides/api/marks.md#pytaskmarkskip) decorator to skip +tasks during development that take too much time to compute right now. ```py --8<-- "docs_src/tutorials/skipping_tasks_example_1.py" @@ -23,7 +25,8 @@ Not only will this task be skipped, but all tasks depending on In large projects, you may have many long-running tasks that you only want to execute on a remote server, but not when you are not working locally. -In this case, use the [`@pytask.mark.skipif`](../api/marks.md#pytask.mark) decorator, +In this case, use the +[`@pytask.mark.skipif`](../reference_guides/api/marks.md#pytaskmarkskipif) decorator, which requires a condition and a reason as arguments. Place the condition variable in a module different from the task so you can change it diff --git a/docs/source/tutorials/using_a_data_catalog.md b/docs/source/tutorials/using_a_data_catalog.md index 5d7d86d2..214a7483 100644 --- a/docs/source/tutorials/using_a_data_catalog.md +++ b/docs/source/tutorials/using_a_data_catalog.md @@ -7,15 +7,15 @@ Two things will quickly become a nuisance in bigger projects. 1. We have to define the same paths again and again. 1. We have to define paths to files that we are not particularly interested in since - they are just intermediate representations. + they are just intermediate representations. As a solution, pytask offers a -[`pytask.DataCatalog`](../api/core_classes_and_exceptions.md#pytask.DataCatalog), which +[`pytask.DataCatalog`](../reference_guides/api/core_classes_and_exceptions.md#pytask.DataCatalog), which is a purely optional feature. The tutorial focuses on the main features. To learn about all the features, read the [how-to guide](../how_to_guides/the_data_catalog.md). Let us focus on the previous example and see how -[`pytask.DataCatalog`](../api/core_classes_and_exceptions.md#pytask.DataCatalog) helps +[`pytask.DataCatalog`](../reference_guides/api/core_classes_and_exceptions.md#pytask.DataCatalog) helps us. The project structure is the same as in the previous example except the `.pytask` folder @@ -56,7 +56,7 @@ Next, we look at the module `task_data_preparation.py` and its task be stored on the disk. In the previous tutorial, we learned to use -[`pathlib.Path`](https://docs.python.org/3/library/pathlib.html#pathlib.Path)s to define +[`pathlib.Path`][]s to define products of our tasks. Here we see again the signature of the task function. === "Annotated" @@ -74,7 +74,7 @@ products of our tasks. Here we see again the signature of the task function. When we want to use the data catalog, we replace `BLD / "data.pkl"` with an entry of the data catalog like `data_catalog["data"]`. If there is yet no entry with the name `"data"`, the data catalog will automatically create a -[`pytask.PickleNode`](../api/nodes_and_tasks.md#pytask.PickleNode). The node allows you +[`pytask.PickleNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PickleNode). The node allows you to save any Python object to a `pickle` file. You probably noticed that we did not need to define a path. That is because the data @@ -87,9 +87,9 @@ The following tabs show you how to use the data catalog given the interface you === "Annotated" Use `data_catalog["data"]` as an default argument to access the - [`pytask.PickleNode`](../api/nodes_and_tasks.md#pytask.PickleNode) within the task. When + [`pytask.PickleNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PickleNode) within the task. When you are done transforming your [`pandas.DataFrame`][], save it with - [`pytask.PNode.save`](../api/nodes_and_tasks.md#pytask.PNode.save). + [`pytask.PNode.save`](../reference_guides/api/nodes_and_tasks.md#pytask.PNode.save). ```py hl_lines="11 22" title="task_data_preparation.py" --8<-- "docs_src/tutorials/using_a_data_catalog_2_py310.py" @@ -98,9 +98,9 @@ The following tabs show you how to use the data catalog given the interface you === "produces" Use `data_catalog["data"]` as an default argument to access the - [`pytask.PickleNode`](../api/nodes_and_tasks.md#pytask.PickleNode) within the task. When + [`pytask.PickleNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PickleNode) within the task. When you are done transforming your [`pandas.DataFrame`][], save it with - [`pytask.PNode.save`](../api/nodes_and_tasks.md#pytask.PNode.save). + [`pytask.PNode.save`](../reference_guides/api/nodes_and_tasks.md#pytask.PNode.save). ```py hl_lines="7 17" title="task_data_preparation.py" --8<-- "docs_src/tutorials/using_a_data_catalog_2_produces.py" @@ -110,7 +110,7 @@ The following tabs show you how to use the data catalog given the interface you An elegant way to use the data catalog is via return type annotations. Add `data_catalog["data"]` to the annotated return and simply return the - [`pandas.DataFrame`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html) + [`pandas.DataFrame`][] to store it. You can read more about return type annotations in @@ -139,7 +139,7 @@ Finally, let's execute the two tasks. In most projects, you have other data sets that you would like to access via the data catalog. To add them, call the -[`pytask.DataCatalog.add`](../api/core_classes_and_exceptions.md#pytask.DataCatalog.add) +[`pytask.DataCatalog.add`](../reference_guides/api/core_classes_and_exceptions.md#pytask.DataCatalog.add) method and supply a name and a path. Let's add `file.csv` with the name `"csv"` to the data catalog and use it to create @@ -181,9 +181,9 @@ You can now use the data catalog as in the previous example and use the Note that the value of `data_catalog["csv"]` inside the task becomes a [`pathlib.Path`][]. It is because a [`pathlib.Path`][] in - [`pytask.DataCatalog.add`](../api/core_classes_and_exceptions.md#pytask.DataCatalog.add) - is not parsed to a [`pytask.PickleNode`](../api/nodes_and_tasks.md#pytask.PickleNode) - but a [`pytask.PathNode`](../api/nodes_and_tasks.md#pytask.PathNode). + [`pytask.DataCatalog.add`](../reference_guides/api/core_classes_and_exceptions.md#pytask.DataCatalog.add) + is not parsed to a [`pytask.PickleNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PickleNode) + but a [`pytask.PathNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PathNode). Read [writing custom nodes](../how_to_guides/writing_custom_nodes.md) for more information about different node types which is not relevant now. @@ -207,7 +207,7 @@ interpreter. This can be super helpful when you develop tasks interactively in a Notebook. Simply import the data catalog, select a node and call -[`pytask.PNode.load`](../api/nodes_and_tasks.md#pytask.PNode.load) to access its value. +[`pytask.PNode.load`](../reference_guides/api/nodes_and_tasks.md#pytask.PNode.load) to access its value. Here is an example with a terminal. @@ -222,7 +222,7 @@ WindowsPath('C:\Users\pytask-dev\git\my_project\file.csv') ``` `data_catalog["data"]` was stored with a -[`pytask.PickleNode`](../api/nodes_and_tasks.md#pytask.PickleNode) and returns the +[`pytask.PickleNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PickleNode) and returns the [`pandas.DataFrame`][] whereas `data_catalog["csv"]` becomes a -[`pytask.PathNode`](../api/nodes_and_tasks.md#pytask.PathNode) and -[`pytask.PNode.load`](../api/nodes_and_tasks.md#pytask.PNode.load) returns the path. +[`pytask.PathNode`](../reference_guides/api/nodes_and_tasks.md#pytask.PathNode) and +[`pytask.PNode.load`](../reference_guides/api/nodes_and_tasks.md#pytask.PNode.load) returns the path. diff --git a/docs/source/tutorials/visualizing_the_dag.md b/docs/source/tutorials/visualizing_the_dag.md index cd119971..8f1b03ba 100644 --- a/docs/source/tutorials/visualizing_the_dag.md +++ b/docs/source/tutorials/visualizing_the_dag.md @@ -1,7 +1,7 @@ # Visualizing the DAG -To visualize the [DAG](../glossary.md#dag) of the project, first, install -[networkx](https://networkx.org/), +To visualize the [DAG](../reference_guides/glossary.md#dag) of the project, first, +install [networkx](https://networkx.org/), [pygraphviz](https://github.com/pygraphviz/pygraphviz), and [graphviz](https://graphviz.org/). @@ -49,9 +49,11 @@ layouts, which are listed [here](https://graphviz.org/docs/layouts/). The programmatic and interactive interface allows for customizing the figure. -Similar to [`pytask.build`](../api/functional_interfaces.md#pytask.build), there exists -[`pytask.build_dag`](../api/functional_interfaces.md#pytask.build_dag) which returns the -DAG as a \[`networkx.DiGraph`\][]. +Similar to +[`pytask.build`](../reference_guides/api/functional_interfaces.md#pytask.build), there +exists +[`pytask.build_dag`](../reference_guides/api/functional_interfaces.md#pytask.build_dag) +which returns the DAG as a [`networkx.DiGraph`][]. Create an executable script that you can execute with `python script.py`. @@ -59,10 +61,8 @@ Create an executable script that you can execute with `python script.py`. --8<-- "docs_src/tutorials/visualizing_the_dag.py" ``` -Customization works best on the -[`networkx.DiGraph`](https://networkx.org/documentation/stable/reference/classes/digraph.html). -For example, here, we set the shape of all nodes to hexagons by adding the property to -the node attributes. +Customization works best on the [`networkx.DiGraph`][]. For example, here, we set the +shape of all nodes to hexagons by adding the property to the node attributes. For drawing, you better switch to pygraphviz since the matplotlib backend handles shapes with texts poorly. Here we store the graph as a `.svg`. diff --git a/docs/source/tutorials/write_a_task.md b/docs/source/tutorials/write_a_task.md index 8d41f3c8..21a2149e 100644 --- a/docs/source/tutorials/write_a_task.md +++ b/docs/source/tutorials/write_a_task.md @@ -48,10 +48,10 @@ necessary for pytask to correctly run a workflow. The interfaces are ordered fro The type hint `Annotated[Path, Product]` uses [`typing.Annotated`](https://docs.python.org/3/library/typing.html#typing.Annotated) - syntax. The first entry specifies the argument type - ([`pathlib.Path`](https://docs.python.org/3/library/pathlib.html#pathlib.Path)), and the - second entry ([`pytask.Product`](../api/utilities_and_typing.md#pytask.Product)) marks - this argument as a product. + syntax. The first entry specifies the argument type ([`pathlib.Path`][]), and the second + entry + ([`pytask.Product`](../reference_guides/api/utilities_and_typing.md#pytask.Product)) + marks this argument as a product. ```{ .python .annotate hl_lines="2 12" title="task_data_preparation.py" } --8<-- "docs_src/tutorials/write_a_task_py310.py" @@ -62,7 +62,7 @@ necessary for pytask to correctly run a workflow. The interfaces are ordered fro !!! tip If you want to refresh your knowledge about type hints, read - [this guide](../type_hints.md). + [this guide](../reference_guides/type_hints.md). === "produces" @@ -87,9 +87,9 @@ file under version control so later builds can detect unchanged tasks. ## Customize task names -Use the [`@task`](../api/nodes_and_tasks.md#pytask.task) decorator to mark a function as -a task regardless of its function name. You can optionally pass a new name for the task. -Otherwise, pytask uses the function name. +Use the [`@task`](../reference_guides/api/nodes_and_tasks.md#pytask.task) decorator to +mark a function as a task regardless of its function name. You can optionally pass a new +name for the task. Otherwise, pytask uses the function name. ```python from pytask import task diff --git a/justfile b/justfile index 9efe56dd..45e65c07 100644 --- a/justfile +++ b/justfile @@ -29,7 +29,7 @@ check: lint typing test docs *FLAGS: uv run python scripts/generate_cli_reference.py uv run --group plugin-list python scripts/update_plugin_list.py - uv run --group docs zensical build {{FLAGS}} + uv run --group docs zensical build --strict {{FLAGS}} # Serve documentation with auto-reload docs-serve *FLAGS: diff --git a/mkdocs.yml b/mkdocs.yml index 8d64e016..9fa1dba7 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -5,6 +5,9 @@ repo_name: pytask-dev/pytask site_description: pytask is a workflow management system that facilitates reproducible data analyses. use_directory_urls: false +validation: + # https://github.com/zensical/zensical/issues/600 + unresolved_references: false docs_dir: docs/source site_dir: docs/build exclude_docs: | @@ -64,14 +67,14 @@ nav: - Reference Guides: - Overview: reference_guides/index.md - API: - - Overview: api/index.md - - CLI Imports: api/cli_and_programmatic.md - - Functional Interfaces: api/functional_interfaces.md - - Core Classes And Exceptions: api/core_classes_and_exceptions.md - - Marks: api/marks.md - - Nodes And Tasks: api/nodes_and_tasks.md - - Outcomes, Reports, And Warnings: api/outcomes_reports_and_warnings.md - - Utilities And Typing: api/utilities_and_typing.md + - Overview: reference_guides/api/index.md + - CLI Imports: reference_guides/api/cli_and_programmatic.md + - Functional Interfaces: reference_guides/api/functional_interfaces.md + - Core Classes And Exceptions: reference_guides/api/core_classes_and_exceptions.md + - Marks: reference_guides/api/marks.md + - Nodes And Tasks: reference_guides/api/nodes_and_tasks.md + - Outcomes, Reports, And Warnings: reference_guides/api/outcomes_reports_and_warnings.md + - Utilities And Typing: reference_guides/api/utilities_and_typing.md - Commands: reference_guides/commands.md - Configuration: reference_guides/configuration.md - Hook Specifications: reference_guides/hookspecs.md diff --git a/src/_pytask/build.py b/src/_pytask/build.py index 4195ee32..4e16d36b 100644 --- a/src/_pytask/build.py +++ b/src/_pytask/build.py @@ -200,7 +200,7 @@ def build( # noqa: PLR0913 strict_markers : bool, default=False Raise errors for unknown markers. tasks : Callable[..., Any] | PTask | Iterable[Callable[..., Any] | PTask] - A task or collection of tasks as callables or [pytask.PTask][] instances. + A task or collection of tasks as callables or [`pytask.PTask`][] instances. task_files : Iterable[str], default=("task_*.py",) A pattern to describe modules that contain tasks. trace : bool, default=False diff --git a/src/_pytask/collect.py b/src/_pytask/collect.py index ecbfeab0..7200c8d4 100644 --- a/src/_pytask/collect.py +++ b/src/_pytask/collect.py @@ -407,7 +407,7 @@ def pytask_collect_task( def pytask_collect_node( # noqa: C901, PLR0912 session: Session, path: Path, node_info: NodeInfo ) -> PNode | PProvisionalNode: - """Collect a node of a task as a [pytask.PNode][]. + """Collect a node of a task as a [`pytask.PNode`][]. Strings are assumed to be paths. This might be a strict assumption, but since this hook is executed at last and possible errors will be shown, it seems reasonable and diff --git a/src/_pytask/data_catalog.py b/src/_pytask/data_catalog.py index 9ad4cde0..1629da88 100644 --- a/src/_pytask/data_catalog.py +++ b/src/_pytask/data_catalog.py @@ -53,7 +53,7 @@ class DataCatalog: ---------- default_node A default node for loading and saving values. By default, - [pytask.PickleNode][] is used to serialize any Python object with the + [`pytask.PickleNode`][] is used to serialize any Python object with the `pickle` module. name The name of the data catalog which can only contain letters, numbers, hyphens diff --git a/src/_pytask/node_protocols.py b/src/_pytask/node_protocols.py index 7c22b879..27fadd30 100644 --- a/src/_pytask/node_protocols.py +++ b/src/_pytask/node_protocols.py @@ -108,7 +108,7 @@ class PProvisionalNode(Protocol): """A protocol for provisional nodes. This type of nodes is provisional since it resolves to actual nodes, - [pytask.PNode][], right before a task is executed as a dependency and after the + [`pytask.PNode`][], right before a task is executed as a dependency and after the task is executed as a product. Provisional nodes are nodes that define how the actual nodes look like. They can be @@ -132,7 +132,7 @@ def load(self, is_product: bool = False) -> Any: # pragma: no cover It is possible to load a provisional node as a dependency so that it can inject basic information about it in the task. For example, - [pytask.DirectoryNode.load][] injects the root directory. + [`pytask.DirectoryNode.load`][] injects the root directory. """ if is_product: diff --git a/src/_pytask/nodes.py b/src/_pytask/nodes.py index 5d128d4c..8910d289 100644 --- a/src/_pytask/nodes.py +++ b/src/_pytask/nodes.py @@ -236,7 +236,7 @@ class PythonNode(PNode): Examples -------- - To allow a [pytask.PythonNode][] to hash a dictionary, you need to pass your + To allow a [`pytask.PythonNode`][] to hash a dictionary, you need to pass your own hashing function. For example, from the `deepdiff` library. >>> from deepdiff import DeepHash @@ -284,7 +284,8 @@ def state(self) -> str | None: """Calculate state of the node. If ``hash = False``, the function returns ``"0"``, a constant hash value, so the - [pytask.PythonNode][] is ignored when checking for a changed state of the task. + [`pytask.PythonNode`][] is ignored when checking for a changed state of the + task. If ``hash`` is a callable, then use this function to calculate a hash expecting an integer or string. @@ -297,8 +298,8 @@ def state(self) -> str | None: ``hash("asd")`` returns a different value every invocation since the hash of strings is salted with a random integer and it would confuse users. See [`object.__hash__`]( - https://docs.python.org/3/reference/datamodel.html#object.__hash__ - ) for more information. + https://docs.python.org/3/reference/datamodel.html#object.__hash__ ) for more + information. """ if self.value is no_default: diff --git a/src/_pytask/pluginmanager.py b/src/_pytask/pluginmanager.py index 025f8698..57b2ecb8 100644 --- a/src/_pytask/pluginmanager.py +++ b/src/_pytask/pluginmanager.py @@ -92,7 +92,7 @@ class _PluginManagerStorage: Afterwards, it needs to be accessed in the different commands. When pytask is called from the API, the plugin manager needs to be created inside - the function, for example, [pytask.build][] to ensure each call can start from + the function, for example, [`pytask.build`][] to ensure each call can start from a blank slate and is able to register any plugins. """ diff --git a/src/_pytask/provisional_utils.py b/src/_pytask/provisional_utils.py index 604da3a8..32271016 100644 --- a/src/_pytask/provisional_utils.py +++ b/src/_pytask/provisional_utils.py @@ -32,7 +32,7 @@ def collect_provisional_nodes( ) -> PyTree[PNode | PProvisionalNode]: """Collect provisional nodes. - 1. Call the [pytask.PProvisionalNode.collect][] to receive the raw nodes. + 1. Call the [`pytask.PProvisionalNode.collect`][] to receive the raw nodes. 2. Collect the raw nodes as usual. """ diff --git a/src/_pytask/scheduler.py b/src/_pytask/scheduler.py index 9ef81a6b..73d85c8e 100644 --- a/src/_pytask/scheduler.py +++ b/src/_pytask/scheduler.py @@ -107,11 +107,11 @@ def rebuild(self, dag: DAG) -> SimpleScheduler: def _extract_priorities_from_tasks(tasks: list[PTask]) -> dict[str, int]: """Extract priorities from tasks. - Priorities are set via the [pytask.mark.try_first][] and [pytask.mark.try_last][] - markers. We recode these markers to numeric values to sort all available by - priorities. ``try_first`` is assigned the highest value such that it has the - rightmost position in the list. Then, we can simply call `list.pop` on the - list which is far more efficient than ``list.pop(0)``. + Priorities are set via the [`pytask.mark.try_first`][] and + [`pytask.mark.try_last`][] markers. We recode these markers to numeric values to + sort all available by priorities. ``try_first`` is assigned the highest value such + that it has the rightmost position in the list. Then, we can simply call `list.pop` + on the list which is far more efficient than ``list.pop(0)``. """ priorities = {