-
Notifications
You must be signed in to change notification settings - Fork 17.4k
Add deferrable mode to ExternalTaskSensor #29260
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
cb5a7b7
f88e363
4f6040f
bee2d6c
2d32bc6
99bf460
9bc8d5c
226ff69
d2987bd
994aa43
e801b7d
9c314d1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,8 +17,8 @@ | |
| from __future__ import annotations | ||
|
|
||
| import asyncio | ||
| import datetime | ||
| import typing | ||
| from datetime import datetime | ||
|
|
||
| from asgiref.sync import sync_to_async | ||
| from sqlalchemy import func | ||
|
|
@@ -27,7 +27,8 @@ | |
| from airflow.models import DagRun, TaskInstance | ||
| from airflow.triggers.base import BaseTrigger, TriggerEvent | ||
| from airflow.utils.session import NEW_SESSION, provide_session | ||
| from airflow.utils.state import DagRunState | ||
| from airflow.utils.state import DagRunState, TaskInstanceState | ||
| from airflow.utils.timezone import utcnow | ||
|
|
||
|
|
||
| class TaskStateTrigger(BaseTrigger): | ||
|
|
@@ -36,27 +37,36 @@ class TaskStateTrigger(BaseTrigger): | |
|
|
||
| :param dag_id: The dag_id that contains the task you want to wait for | ||
| :param task_id: The task_id that contains the task you want to | ||
| wait for. If ``None`` (default value) the sensor waits for the DAG | ||
| wait for. | ||
| :param states: allowed states, default is ``['success']`` | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. While having the default value be
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Currently, for the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This default is set in the sensor though, in the trigger itself this behavior is not implemented. If someone now wanted to use the trigger seperately, the documentation for it is incorrect. |
||
| :param execution_dates: | ||
| :param execution_dates: task execution time interval | ||
| :param poll_interval: The time interval in seconds to check the state. | ||
| The default value is 5 sec. | ||
| :param trigger_start_time: time in Datetime format when the trigger was started. Is used | ||
| to control the execution of trigger to prevent infinite loop in case if specified name | ||
| of the dag does not exist in database. It will wait period of time equals _timeout_sec parameter | ||
| from the time, when the trigger was started and if the execution lasts more time than expected, | ||
| the trigger will terminate with 'timeout' status. | ||
| """ | ||
|
|
||
| def __init__( | ||
| self, | ||
| dag_id: str, | ||
| task_id: str, | ||
| states: list[str], | ||
| execution_dates: list[datetime.datetime], | ||
| poll_interval: float = 5.0, | ||
| execution_dates: list[datetime], | ||
| trigger_start_time: datetime, | ||
| states: list[str] | None = None, | ||
| task_id: str | None = None, | ||
| poll_interval: float = 2.0, | ||
| ): | ||
| super().__init__() | ||
| self.dag_id = dag_id | ||
| self.task_id = task_id | ||
| self.states = states | ||
| self.execution_dates = execution_dates | ||
| self.poll_interval = poll_interval | ||
| self.trigger_start_time = trigger_start_time | ||
| self.states = states if states else [TaskInstanceState.SUCCESS.value] | ||
| self._timeout_sec = 60 | ||
|
|
||
| def serialize(self) -> tuple[str, dict[str, typing.Any]]: | ||
| """Serializes TaskStateTrigger arguments and classpath.""" | ||
|
|
@@ -68,17 +78,52 @@ def serialize(self) -> tuple[str, dict[str, typing.Any]]: | |
| "states": self.states, | ||
| "execution_dates": self.execution_dates, | ||
| "poll_interval": self.poll_interval, | ||
| "trigger_start_time": self.trigger_start_time, | ||
| }, | ||
| ) | ||
|
|
||
| async def run(self) -> typing.AsyncIterator[TriggerEvent]: | ||
| """Checks periodically in the database to see if the task exists and has hit one of the states.""" | ||
| """ | ||
| Checks periodically in the database to see if the dag exists and is in the running state. If found, | ||
| wait until the task specified will reach one of the expected states. If dag with specified name was | ||
| not in the running state after _timeout_sec seconds after starting execution process of the trigger, | ||
| terminate with status 'timeout'. | ||
| """ | ||
| while True: | ||
| # mypy confuses typing here | ||
| num_tasks = await self.count_tasks() # type: ignore[call-arg] | ||
| if num_tasks == len(self.execution_dates): | ||
| yield TriggerEvent(True) | ||
| await asyncio.sleep(self.poll_interval) | ||
| try: | ||
| delta = utcnow() - self.trigger_start_time | ||
| if delta.total_seconds() < self._timeout_sec: | ||
| # mypy confuses typing here | ||
| if await self.count_running_dags() == 0: # type: ignore[call-arg] | ||
| self.log.info("Waiting for DAG to start execution...") | ||
| await asyncio.sleep(self.poll_interval) | ||
| else: | ||
| yield TriggerEvent({"status": "timeout"}) | ||
| return | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't think we need return here
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If there is no return statement then trigger logic won't work in python 3.7. Trigger will do it's logic of checking task status infinitely and won't stop even after yielding the event. Also source code of the base class says that it is required to return None after yield statement. |
||
| # mypy confuses typing here | ||
| if await self.count_tasks() == len(self.execution_dates): # type: ignore[call-arg] | ||
| yield TriggerEvent({"status": "success"}) | ||
| return | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as above |
||
| self.log.info("Task is still running, sleeping for %s seconds...", self.poll_interval) | ||
| await asyncio.sleep(self.poll_interval) | ||
| except Exception: | ||
| yield TriggerEvent({"status": "failed"}) | ||
| return | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as above |
||
|
|
||
| @sync_to_async | ||
| @provide_session | ||
| def count_running_dags(self, session: Session): | ||
| """Count how many dag instances in running state in the database.""" | ||
| dags = ( | ||
| session.query(func.count("*")) | ||
| .filter( | ||
| TaskInstance.dag_id == self.dag_id, | ||
| TaskInstance.execution_date.in_(self.execution_dates), | ||
| TaskInstance.state.in_(["running", "success"]), | ||
| ) | ||
| .scalar() | ||
| ) | ||
| return dags | ||
|
|
||
| @sync_to_async | ||
| @provide_session | ||
|
|
@@ -112,7 +157,7 @@ def __init__( | |
| self, | ||
| dag_id: str, | ||
| states: list[DagRunState], | ||
| execution_dates: list[datetime.datetime], | ||
| execution_dates: list[datetime], | ||
| poll_interval: float = 5.0, | ||
| ): | ||
| super().__init__() | ||
|
|
@@ -134,7 +179,10 @@ def serialize(self) -> tuple[str, dict[str, typing.Any]]: | |
| ) | ||
|
|
||
| async def run(self) -> typing.AsyncIterator[TriggerEvent]: | ||
| """Checks periodically in the database to see if the dag run exists and has hit one of the states.""" | ||
| """ | ||
| Checks periodically in the database to see if the dag run exists, and has | ||
| hit one of the states yet, or not. | ||
| """ | ||
| while True: | ||
| # mypy confuses typing here | ||
| num_dags = await self.count_dags() # type: ignore[call-arg] | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| # Licensed to the Apache Software Foundation (ASF) under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the | ||
| # "License"); you may not use this file except in compliance | ||
| # with the License. You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, | ||
| # software distributed under the License is distributed on an | ||
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| # KIND, either express or implied. See the License for the | ||
| # specific language governing permissions and limitations | ||
| # under the License. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have added Triggers for
external_task, can you please rebase on Main.For sensors, I would be grateful if you can help the community by maintaining compatibility with astronomer-providers -- There are community members as well as our users using it https://pypistats.org/packages/astronomer-providers
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for great implementation os the trigger, i have updated the sensor to use it.
However, after some testing i have found some moments that could be improved in the trigger: