From cf681f96c2140563be3d99180f04a5e8df8612d3 Mon Sep 17 00:00:00 2001 From: nanaones Date: Mon, 23 Feb 2026 10:09:08 +0900 Subject: [PATCH 1/3] providers-fab: Handle database errors in cleanup_session_middleware When Session.remove() is called in the finally block of cleanup_session_middleware, it may raise an OperationalError if the underlying database connection is already dead (e.g., MySQL 'Server has gone away', Aurora failover, network timeout). The unhandled exception propagates as a 500 Internal Server Error to the client, even when the original request succeeded. This commit wraps Session.remove() in a try-except block that catches and logs the error as a warning, consistent with how session cleanup is handled elsewhere in Airflow. Fixes follow-up to #61480. --- .../fab/auth_manager/fab_auth_manager.py | 6 +- .../fab/auth_manager/test_fab_auth_manager.py | 73 +++++++++++++++++++ 2 files changed, 78 insertions(+), 1 deletion(-) diff --git a/providers/fab/src/airflow/providers/fab/auth_manager/fab_auth_manager.py b/providers/fab/src/airflow/providers/fab/auth_manager/fab_auth_manager.py index e7f4278fad3dd..58add2eaa490c 100644 --- a/providers/fab/src/airflow/providers/fab/auth_manager/fab_auth_manager.py +++ b/providers/fab/src/airflow/providers/fab/auth_manager/fab_auth_manager.py @@ -16,6 +16,7 @@ # specific language governing permissions and limitations # under the License. from __future__ import annotations +import logging import warnings from contextlib import suppress @@ -240,7 +241,10 @@ async def cleanup_session_middleware(request, call_next): from airflow import settings if settings.Session: - settings.Session.remove() + try: + settings.Session.remove() + except Exception: + log.warning("Failed to remove session during cleanup", exc_info=True) app.mount("/", WSGIMiddleware(flask_app)) diff --git a/providers/fab/tests/unit/fab/auth_manager/test_fab_auth_manager.py b/providers/fab/tests/unit/fab/auth_manager/test_fab_auth_manager.py index c47d02dd5a43a..4ce8844e4c29d 100644 --- a/providers/fab/tests/unit/fab/auth_manager/test_fab_auth_manager.py +++ b/providers/fab/tests/unit/fab/auth_manager/test_fab_auth_manager.py @@ -1107,3 +1107,76 @@ def test_session_cleanup_middleware_on_fastapi_route(self, mock_create_app): # Verify Session.remove() was called mock_session.remove.assert_called() + + +class TestFabAuthManagerSessionCleanupErrorHandling: + """Test that cleanup_session_middleware handles Session.remove() failures gracefully. + + When the underlying database connection is dead (e.g., MySQL 'Server has gone away', + PostgreSQL connection timeout), Session.remove() internally attempts a ROLLBACK which + raises an OperationalError. The middleware should catch and log this error instead of + propagating it as a 500 Internal Server Error to the client. + + See: https://github.com/apache/airflow/issues/XXXXX + """ + + @mock.patch("airflow.providers.fab.auth_manager.fab_auth_manager.create_app") + def test_session_remove_db_error_does_not_propagate(self, mock_create_app): + """When Session.remove() raises OperationalError, request should still succeed. + + Simulates MySQL 'Server has gone away' or similar DB errors during session + cleanup. The middleware should catch the exception and log a warning. + """ + from unittest.mock import patch + + from fastapi.testclient import TestClient + from sqlalchemy.exc import OperationalError + + mock_flask_app = MagicMock() + mock_create_app.return_value = mock_flask_app + + auth_manager = FabAuthManager() + fastapi_app = auth_manager.get_fastapi_app() + + client = TestClient(fastapi_app, raise_server_exceptions=False) + + with patch("airflow.settings.Session") as mock_session: + # Simulate MySQL 'Server has gone away' error on Session.remove() + mock_session.remove.side_effect = OperationalError( + "ROLLBACK", {}, Exception("(2006, 'Server has gone away')") + ) + + response = client.get("/users/list/") + + # The request should NOT get a 500 from the middleware error + # (it may get other status codes from the mock Flask app, but not + # an unhandled exception from cleanup_session_middleware) + mock_session.remove.assert_called() + # Verify the error was suppressed (no unhandled exception) + assert response.status_code != 500 or response.status_code == 500 # response depends on mock + + @mock.patch("airflow.providers.fab.auth_manager.fab_auth_manager.create_app") + def test_session_remove_generic_error_does_not_propagate(self, mock_create_app): + """Any exception from Session.remove() should be caught during cleanup. + + This covers edge cases beyond OperationalError, such as AttributeError + when the session registry is in an unexpected state. + """ + from unittest.mock import patch + + from fastapi.testclient import TestClient + + mock_flask_app = MagicMock() + mock_create_app.return_value = mock_flask_app + + auth_manager = FabAuthManager() + fastapi_app = auth_manager.get_fastapi_app() + + client = TestClient(fastapi_app, raise_server_exceptions=False) + + with patch("airflow.settings.Session") as mock_session: + mock_session.remove.side_effect = RuntimeError("unexpected session error") + + # Should not raise - the middleware catches all exceptions from Session.remove() + response = client.get("/users/list/") + mock_session.remove.assert_called() From 75f8b9056440178c0aa258ae1a6cf7e6e95fac49 Mon Sep 17 00:00:00 2001 From: nanaones Date: Tue, 24 Feb 2026 00:29:46 +0900 Subject: [PATCH 2/3] providers-fab: fix logger reference in session cleanup Define module logger for cleanup warnings and drop unused test response assignment to satisfy ruff. --- .../src/airflow/providers/fab/auth_manager/fab_auth_manager.py | 2 ++ .../fab/tests/unit/fab/auth_manager/test_fab_auth_manager.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/providers/fab/src/airflow/providers/fab/auth_manager/fab_auth_manager.py b/providers/fab/src/airflow/providers/fab/auth_manager/fab_auth_manager.py index 58add2eaa490c..4f4a6e243119d 100644 --- a/providers/fab/src/airflow/providers/fab/auth_manager/fab_auth_manager.py +++ b/providers/fab/src/airflow/providers/fab/auth_manager/fab_auth_manager.py @@ -25,6 +25,8 @@ from typing import TYPE_CHECKING, Any from urllib.parse import urljoin +log = logging.getLogger(__name__) + from cachetools import TTLCache, cachedmethod from connexion import FlaskApi from fastapi import FastAPI diff --git a/providers/fab/tests/unit/fab/auth_manager/test_fab_auth_manager.py b/providers/fab/tests/unit/fab/auth_manager/test_fab_auth_manager.py index 4ce8844e4c29d..7f9eee98b556b 100644 --- a/providers/fab/tests/unit/fab/auth_manager/test_fab_auth_manager.py +++ b/providers/fab/tests/unit/fab/auth_manager/test_fab_auth_manager.py @@ -1146,7 +1146,7 @@ def test_session_remove_db_error_does_not_propagate(self, mock_create_app): "ROLLBACK", {}, Exception("(2006, 'Server has gone away')") ) - response = client.get("/users/list/") + client.get("/users/list/") # The request should NOT get a 500 from the middleware error # (it may get other status codes from the mock Flask app, but not From bd4ee2fa83e770c71de7298b19f7e4c99bc0430a Mon Sep 17 00:00:00 2001 From: nanaones Date: Tue, 24 Feb 2026 07:05:05 +0900 Subject: [PATCH 3/3] providers-fab: fix cleanup middleware checks Move logger definition below imports to satisfy static checks and assert warning logging in session cleanup tests so failures are surfaced clearly. --- .../fab/auth_manager/fab_auth_manager.py | 6 +++--- .../fab/auth_manager/test_fab_auth_manager.py | 18 +++++++++++++----- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/providers/fab/src/airflow/providers/fab/auth_manager/fab_auth_manager.py b/providers/fab/src/airflow/providers/fab/auth_manager/fab_auth_manager.py index 4f4a6e243119d..ad08149fbd8bb 100644 --- a/providers/fab/src/airflow/providers/fab/auth_manager/fab_auth_manager.py +++ b/providers/fab/src/airflow/providers/fab/auth_manager/fab_auth_manager.py @@ -16,8 +16,8 @@ # specific language governing permissions and limitations # under the License. from __future__ import annotations -import logging +import logging import warnings from contextlib import suppress from functools import cached_property @@ -25,8 +25,6 @@ from typing import TYPE_CHECKING, Any from urllib.parse import urljoin -log = logging.getLogger(__name__) - from cachetools import TTLCache, cachedmethod from connexion import FlaskApi from fastapi import FastAPI @@ -124,6 +122,8 @@ RESOURCE_ASSET_ALIAS, ) +log = logging.getLogger(__name__) + _MAP_DAG_ACCESS_ENTITY_TO_FAB_RESOURCE_TYPE: dict[DagAccessEntity, tuple[str, ...]] = { DagAccessEntity.AUDIT_LOG: (RESOURCE_AUDIT_LOG,), diff --git a/providers/fab/tests/unit/fab/auth_manager/test_fab_auth_manager.py b/providers/fab/tests/unit/fab/auth_manager/test_fab_auth_manager.py index 7f9eee98b556b..8a0291f88d9c5 100644 --- a/providers/fab/tests/unit/fab/auth_manager/test_fab_auth_manager.py +++ b/providers/fab/tests/unit/fab/auth_manager/test_fab_auth_manager.py @@ -1140,20 +1140,23 @@ def test_session_remove_db_error_does_not_propagate(self, mock_create_app): client = TestClient(fastapi_app, raise_server_exceptions=False) - with patch("airflow.settings.Session") as mock_session: + with ( + patch("airflow.settings.Session") as mock_session, + patch("airflow.providers.fab.auth_manager.fab_auth_manager.log") as mock_log, + ): # Simulate MySQL 'Server has gone away' error on Session.remove() mock_session.remove.side_effect = OperationalError( "ROLLBACK", {}, Exception("(2006, 'Server has gone away')") ) - client.get("/users/list/") + response = client.get("/users/list/") # The request should NOT get a 500 from the middleware error # (it may get other status codes from the mock Flask app, but not # an unhandled exception from cleanup_session_middleware) mock_session.remove.assert_called() - # Verify the error was suppressed (no unhandled exception) - assert response.status_code != 500 or response.status_code == 500 # response depends on mock + mock_log.warning.assert_called() + assert response is not None @mock.patch("airflow.providers.fab.auth_manager.fab_auth_manager.create_app") def test_session_remove_generic_error_does_not_propagate(self, mock_create_app): @@ -1174,9 +1177,14 @@ def test_session_remove_generic_error_does_not_propagate(self, mock_create_app): client = TestClient(fastapi_app, raise_server_exceptions=False) - with patch("airflow.settings.Session") as mock_session: + with ( + patch("airflow.settings.Session") as mock_session, + patch("airflow.providers.fab.auth_manager.fab_auth_manager.log") as mock_log, + ): mock_session.remove.side_effect = RuntimeError("unexpected session error") # Should not raise - the middleware catches all exceptions from Session.remove() response = client.get("/users/list/") mock_session.remove.assert_called() + mock_log.warning.assert_called() + assert response is not None