From 559e9ec4bc2e6063cfafe09713ae14b25ba1e4d0 Mon Sep 17 00:00:00 2001 From: blag Date: Mon, 18 Oct 2021 14:36:51 -0700 Subject: [PATCH 01/12] Use implicit concatenation --- airflow/models/dagcode.py | 2 +- airflow/models/taskinstance.py | 4 +- .../operators/google_api_to_s3_transfer.py | 2 +- .../alibaba/cloud/sensors/oss_key.py | 4 +- airflow/providers/amazon/aws/hooks/s3.py | 8 +- .../providers/amazon/aws/sensors/s3_key.py | 4 +- airflow/providers/yandex/hooks/yandex.py | 2 +- .../prepare_provider_packages.py | 2 +- .../pre_commit_check_extras_have_providers.py | 2 +- .../apache/spark/hooks/test_spark_submit.py | 108 +++++++++--------- 10 files changed, 70 insertions(+), 68 deletions(-) diff --git a/airflow/models/dagcode.py b/airflow/models/dagcode.py index fa5f7e121d393..7e26e0c558c6c 100644 --- a/airflow/models/dagcode.py +++ b/airflow/models/dagcode.py @@ -96,7 +96,7 @@ def bulk_sync_to_db(cls, filelocs: Iterable[str], session=None): for fileloc in conflicting_filelocs: message += ( "Filename '{}' causes a hash collision in the " - + "database with '{}'. Please rename the file." + "database with '{}'. Please rename the file." ).format(hashes_to_filelocs[DagCode.dag_fileloc_hash(fileloc)], fileloc) raise AirflowException(message) diff --git a/airflow/models/taskinstance.py b/airflow/models/taskinstance.py index c3f3004a5762f..0ece17b226602 100644 --- a/airflow/models/taskinstance.py +++ b/airflow/models/taskinstance.py @@ -1267,8 +1267,8 @@ def _date_or_empty(self, attr: str): def _log_state(self, lead_msg: str = ''): self.log.info( '%sMarking task as %s.' - + ' dag_id=%s, task_id=%s,' - + ' execution_date=%s, start_date=%s, end_date=%s', + ' dag_id=%s, task_id=%s,' + ' execution_date=%s, start_date=%s, end_date=%s', lead_msg, self.state.upper(), self.dag_id, diff --git a/airflow/operators/google_api_to_s3_transfer.py b/airflow/operators/google_api_to_s3_transfer.py index b08ba752a13c6..8062b59bf978d 100644 --- a/airflow/operators/google_api_to_s3_transfer.py +++ b/airflow/operators/google_api_to_s3_transfer.py @@ -43,7 +43,7 @@ def __init__(self, **kwargs): """This class is deprecated. Please use `airflow.providers.amazon.aws.transfers.""" - + "google_api_to_s3_transfer.GoogleApiToS3Operator`.", + "google_api_to_s3_transfer.GoogleApiToS3Operator`.", DeprecationWarning, stacklevel=3, ) diff --git a/airflow/providers/alibaba/cloud/sensors/oss_key.py b/airflow/providers/alibaba/cloud/sensors/oss_key.py index 22bd8e9ee99a7..9949aff8bcf1b 100644 --- a/airflow/providers/alibaba/cloud/sensors/oss_key.py +++ b/airflow/providers/alibaba/cloud/sensors/oss_key.py @@ -76,8 +76,8 @@ def poke(self, context): if parsed_url.scheme != '' or parsed_url.netloc != '': raise AirflowException( 'If bucket_name is provided, bucket_key' - + ' should be relative path from root' - + ' level, rather than a full oss:// url' + ' should be relative path from root' + ' level, rather than a full oss:// url' ) self.log.info('Poking for key : oss://%s/%s', self.bucket_name, self.bucket_key) diff --git a/airflow/providers/amazon/aws/hooks/s3.py b/airflow/providers/amazon/aws/hooks/s3.py index 84f379474b707..4a2a15b7a9393 100644 --- a/airflow/providers/amazon/aws/hooks/s3.py +++ b/airflow/providers/amazon/aws/hooks/s3.py @@ -716,8 +716,8 @@ def copy_object( if parsed_url.scheme != '' or parsed_url.netloc != '': raise AirflowException( 'If dest_bucket_name is provided, ' - + 'dest_bucket_key should be relative path ' - + 'from root level, rather than a full s3:// url' + 'dest_bucket_key should be relative path ' + 'from root level, rather than a full s3:// url' ) if source_bucket_name is None: @@ -727,8 +727,8 @@ def copy_object( if parsed_url.scheme != '' or parsed_url.netloc != '': raise AirflowException( 'If source_bucket_name is provided, ' - + 'source_bucket_key should be relative path ' - + 'from root level, rather than a full s3:// url' + 'source_bucket_key should be relative path ' + 'from root level, rather than a full s3:// url' ) copy_source = {'Bucket': source_bucket_name, 'Key': source_bucket_key, 'VersionId': source_version_id} diff --git a/airflow/providers/amazon/aws/sensors/s3_key.py b/airflow/providers/amazon/aws/sensors/s3_key.py index 9a7af08394c61..57cc0df581aa0 100644 --- a/airflow/providers/amazon/aws/sensors/s3_key.py +++ b/airflow/providers/amazon/aws/sensors/s3_key.py @@ -89,8 +89,8 @@ def poke(self, context): if parsed_url.scheme != '' or parsed_url.netloc != '': raise AirflowException( 'If bucket_name is provided, bucket_key' - + ' should be relative path from root' - + ' level, rather than a full s3:// url' + ' should be relative path from root' + ' level, rather than a full s3:// url' ) self.log.info('Poking for key : s3://%s/%s', self.bucket_name, self.bucket_key) diff --git a/airflow/providers/yandex/hooks/yandex.py b/airflow/providers/yandex/hooks/yandex.py index ee1ae0dffe5ab..06049e52edeae 100644 --- a/airflow/providers/yandex/hooks/yandex.py +++ b/airflow/providers/yandex/hooks/yandex.py @@ -119,7 +119,7 @@ def _get_credentials(self) -> Dict[str, Any]: if not (service_account_json or oauth_token or service_account_json_path): raise AirflowException( 'No credentials are found in connection. Specify either service account ' - + 'authentication JSON or user OAuth token in Yandex.Cloud connection' + 'authentication JSON or user OAuth token in Yandex.Cloud connection' ) if service_account_json_path: with open(service_account_json_path) as infile: diff --git a/dev/provider_packages/prepare_provider_packages.py b/dev/provider_packages/prepare_provider_packages.py index a56b5299cf106..c3c755490894e 100755 --- a/dev/provider_packages/prepare_provider_packages.py +++ b/dev/provider_packages/prepare_provider_packages.py @@ -1238,7 +1238,7 @@ def validate_provider_info_with_runtime_schema(provider_info: Dict[str, Any]) -> console.print("[red]Provider info not validated against runtime schema[/]") raise Exception( "Error when validating schema. The schema must be compatible with " - + "airflow/provider_info.schema.json.", + "airflow/provider_info.schema.json.", ex, ) diff --git a/scripts/ci/pre_commit/pre_commit_check_extras_have_providers.py b/scripts/ci/pre_commit/pre_commit_check_extras_have_providers.py index 20088b749ce65..7c1b7eb23a741 100755 --- a/scripts/ci/pre_commit/pre_commit_check_extras_have_providers.py +++ b/scripts/ci/pre_commit/pre_commit_check_extras_have_providers.py @@ -71,7 +71,7 @@ def check_all_providers_are_listed_in_setup_py() -> None: if provider_name not in ALL_PROVIDERS: errors.append( f"The provider {provider_name} is missing in setup.py " - + "[bold]PROVIDERS_REQUIREMENTS[/]: [red]NOK[/]" + "[bold]PROVIDERS_REQUIREMENTS[/]: [red]NOK[/]" ) diff --git a/tests/providers/apache/spark/hooks/test_spark_submit.py b/tests/providers/apache/spark/hooks/test_spark_submit.py index 070719a9f40f0..30ce92c649dd7 100644 --- a/tests/providers/apache/spark/hooks/test_spark_submit.py +++ b/tests/providers/apache/spark/hooks/test_spark_submit.py @@ -88,9 +88,11 @@ def setUp(self): conn_id='spark_k8s_cluster', conn_type='spark', host='k8s://https://k8s-master', - extra='{"spark-home": "/opt/spark", ' - + '"deploy-mode": "cluster", ' - + '"namespace": "mynamespace"}', + extra=( + '{"spark-home": "/opt/spark", ' + ' "deploy-mode": "cluster", ' + ' "namespace": "mynamespace"}' + ), ) ) db.merge_conn( @@ -120,7 +122,7 @@ def setUp(self): conn_id='spark_binary_and_home_set', conn_type='spark', host='yarn', - extra='{"spark-home": "/path/to/spark_home", ' + '"spark-binary": "custom-spark-submit"}', + extra='{"spark-home": "/path/to/spark_home", "spark-binary": "custom-spark-submit"}', ) ) db.merge_conn( @@ -628,11 +630,11 @@ def test_process_spark_submit_log_yarn(self): log_lines = [ 'SPARK_MAJOR_VERSION is set to 2, using Spark2', 'WARN NativeCodeLoader: Unable to load native-hadoop library for your ' - + 'platform... using builtin-java classes where applicable', + 'platform... using builtin-java classes where applicable', 'WARN DomainSocketFactory: The short-circuit local reads feature cannot ' 'be used because libhadoop cannot be loaded.', 'INFO Client: Requesting a new application from cluster with 10 NodeManagers', - 'INFO Client: Submitting application application_1486558679801_1820 ' + 'to ResourceManager', + 'INFO Client: Submitting application application_1486558679801_1820 to ResourceManager', ] # When hook._process_spark_submit_log(log_lines) @@ -646,25 +648,25 @@ def test_process_spark_submit_log_k8s(self): hook = SparkSubmitHook(conn_id='spark_k8s_cluster') log_lines = [ 'INFO LoggingPodStatusWatcherImpl:54 - State changed, new state:' - + 'pod name: spark-pi-edf2ace37be7353a958b38733a12f8e6-driver' - + 'namespace: default' - + 'labels: spark-app-selector -> spark-465b868ada474bda82ccb84ab2747fcd,' - + 'spark-role -> driver' - + 'pod uid: ba9c61f6-205f-11e8-b65f-d48564c88e42' - + 'creation time: 2018-03-05T10:26:55Z' - + 'service account name: spark' - + 'volumes: spark-init-properties, download-jars-volume,' - + 'download-files-volume, spark-token-2vmlm' - + 'node name: N/A' - + 'start time: N/A' - + 'container images: N/A' - + 'phase: Pending' - + 'status: []' - + '2018-03-05 11:26:56 INFO LoggingPodStatusWatcherImpl:54 - State changed,' - + ' new state:' - + 'pod name: spark-pi-edf2ace37be7353a958b38733a12f8e6-driver' - + 'namespace: default' - + 'Exit code: 999' + 'pod name: spark-pi-edf2ace37be7353a958b38733a12f8e6-driver' + 'namespace: default' + 'labels: spark-app-selector -> spark-465b868ada474bda82ccb84ab2747fcd,' + 'spark-role -> driver' + 'pod uid: ba9c61f6-205f-11e8-b65f-d48564c88e42' + 'creation time: 2018-03-05T10:26:55Z' + 'service account name: spark' + 'volumes: spark-init-properties, download-jars-volume,' + 'download-files-volume, spark-token-2vmlm' + 'node name: N/A' + 'start time: N/A' + 'container images: N/A' + 'phase: Pending' + 'status: []' + '2018-03-05 11:26:56 INFO LoggingPodStatusWatcherImpl:54 - State changed,' + ' new state:' + 'pod name: spark-pi-edf2ace37be7353a958b38733a12f8e6-driver' + 'namespace: default' + 'Exit code: 999' ] # When @@ -693,7 +695,7 @@ def test_process_spark_submit_log_standalone_cluster(self): '17/11/28 11:14:15 INFO RestSubmissionClient: Submitting a request ' 'to launch an application in spark://spark-standalone-master:6066', '17/11/28 11:14:15 INFO RestSubmissionClient: Submission successfully ' - + 'created as driver-20171128111415-0001. Polling submission state...', + 'created as driver-20171128111415-0001. Polling submission state...', ] # When hook._process_spark_submit_log(log_lines) @@ -707,9 +709,9 @@ def test_process_spark_driver_status_log(self): hook = SparkSubmitHook(conn_id='spark_standalone_cluster') log_lines = [ 'Submitting a request for the status of submission ' - + 'driver-20171128111415-0001 in spark://spark-standalone-master:6066', + 'driver-20171128111415-0001 in spark://spark-standalone-master:6066', '17/11/28 11:15:37 INFO RestSubmissionClient: Server responded with ' - + 'SubmissionStatusResponse:', + 'SubmissionStatusResponse:', '{', '"action" : "SubmissionStatusResponse",', '"driverState" : "RUNNING",', @@ -738,12 +740,12 @@ def test_yarn_process_on_kill(self, mock_popen, mock_renew_from_kt): log_lines = [ 'SPARK_MAJOR_VERSION is set to 2, using Spark2', 'WARN NativeCodeLoader: Unable to load native-hadoop library for your ' - + 'platform... using builtin-java classes where applicable', + 'platform... using builtin-java classes where applicable', 'WARN DomainSocketFactory: The short-circuit local reads feature cannot ' - + 'be used because libhadoop cannot be loaded.', + 'be used because libhadoop cannot be loaded.', 'INFO Client: Requesting a new application from cluster with 10 ' - + 'NodeManagerapplication_1486558679801_1820s', - 'INFO Client: Submitting application application_1486558679801_1820 ' + 'to ResourceManager', + 'NodeManagerapplication_1486558679801_1820s', + 'INFO Client: Submitting application application_1486558679801_1820 to ResourceManager', ] env = {"PATH": "hadoop/bin"} hook = SparkSubmitHook(conn_id='spark_yarn_cluster', env_vars=env) @@ -792,9 +794,9 @@ def test_standalone_cluster_process_on_kill(self): log_lines = [ 'Running Spark using the REST application submission protocol.', '17/11/28 11:14:15 INFO RestSubmissionClient: Submitting a request ' - + 'to launch an application in spark://spark-standalone-master:6066', + 'to launch an application in spark://spark-standalone-master:6066', '17/11/28 11:14:15 INFO RestSubmissionClient: Submission successfully ' - + 'created as driver-20171128111415-0001. Polling submission state...', + 'created as driver-20171128111415-0001. Polling submission state...', ] hook = SparkSubmitHook(conn_id='spark_standalone_cluster') hook._process_spark_submit_log(log_lines) @@ -821,25 +823,25 @@ def test_k8s_process_on_kill(self, mock_popen, mock_client_method): hook = SparkSubmitHook(conn_id='spark_k8s_cluster') log_lines = [ 'INFO LoggingPodStatusWatcherImpl:54 - State changed, new state:' - + 'pod name: spark-pi-edf2ace37be7353a958b38733a12f8e6-driver' - + 'namespace: default' - + 'labels: spark-app-selector -> spark-465b868ada474bda82ccb84ab2747fcd,' - + 'spark-role -> driver' - + 'pod uid: ba9c61f6-205f-11e8-b65f-d48564c88e42' - + 'creation time: 2018-03-05T10:26:55Z' - + 'service account name: spark' - + 'volumes: spark-init-properties, download-jars-volume,' - + 'download-files-volume, spark-token-2vmlm' - + 'node name: N/A' - + 'start time: N/A' - + 'container images: N/A' - + 'phase: Pending' - + 'status: []' - + '2018-03-05 11:26:56 INFO LoggingPodStatusWatcherImpl:54 - State changed,' - + ' new state:' - + 'pod name: spark-pi-edf2ace37be7353a958b38733a12f8e6-driver' - + 'namespace: default' - + 'Exit code: 0' + 'pod name: spark-pi-edf2ace37be7353a958b38733a12f8e6-driver' + 'namespace: default' + 'labels: spark-app-selector -> spark-465b868ada474bda82ccb84ab2747fcd,' + 'spark-role -> driver' + 'pod uid: ba9c61f6-205f-11e8-b65f-d48564c88e42' + 'creation time: 2018-03-05T10:26:55Z' + 'service account name: spark' + 'volumes: spark-init-properties, download-jars-volume,' + 'download-files-volume, spark-token-2vmlm' + 'node name: N/A' + 'start time: N/A' + 'container images: N/A' + 'phase: Pending' + 'status: []' + '2018-03-05 11:26:56 INFO LoggingPodStatusWatcherImpl:54 - State changed,' + ' new state:' + 'pod name: spark-pi-edf2ace37be7353a958b38733a12f8e6-driver' + 'namespace: default' + 'Exit code: 0' ] hook._process_spark_submit_log(log_lines) hook.submit() From 7ef08e477e1f9e6732e0d3cfc0e1ff48d239e1bf Mon Sep 17 00:00:00 2001 From: blag Date: Mon, 18 Oct 2021 14:37:38 -0700 Subject: [PATCH 02/12] Remove useless string concatenations --- airflow/providers/databricks/hooks/databricks.py | 2 +- tests/providers/amazon/aws/utils/eks_test_constants.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow/providers/databricks/hooks/databricks.py b/airflow/providers/databricks/hooks/databricks.py index 5e7fc35c6dd0f..a317cba085e77 100644 --- a/airflow/providers/databricks/hooks/databricks.py +++ b/airflow/providers/databricks/hooks/databricks.py @@ -216,7 +216,7 @@ def _do_api_call(self, endpoint_info, json): if attempt_num == self.retry_limit: raise AirflowException( - ('API requests to Databricks failed {} times. ' + 'Giving up.').format(self.retry_limit) + ('API requests to Databricks failed {} times. Giving up.').format(self.retry_limit) ) attempt_num += 1 diff --git a/tests/providers/amazon/aws/utils/eks_test_constants.py b/tests/providers/amazon/aws/utils/eks_test_constants.py index 1a5985c6fb104..f338702b22fc6 100644 --- a/tests/providers/amazon/aws/utils/eks_test_constants.py +++ b/tests/providers/amazon/aws/utils/eks_test_constants.py @@ -229,7 +229,7 @@ class RegExTemplates: + NODEGROUP_UUID_PATTERN ) NODEGROUP_ASG_NAME_PATTERN: Pattern = re.compile("eks-" + NODEGROUP_UUID_PATTERN) - NODEGROUP_SECURITY_GROUP_NAME_PATTERN: Pattern = re.compile("sg-" + "([-0-9a-z]{17})") + NODEGROUP_SECURITY_GROUP_NAME_PATTERN: Pattern = re.compile("sg-([-0-9a-z]{17})") class MethodNames: From 61ef0b3892f86810fc746055445d89a3c7173843 Mon Sep 17 00:00:00 2001 From: blag Date: Mon, 18 Oct 2021 14:37:59 -0700 Subject: [PATCH 03/12] Use f-strings --- airflow/sensors/smart_sensor.py | 2 +- tests/providers/amazon/aws/utils/eks_test_constants.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow/sensors/smart_sensor.py b/airflow/sensors/smart_sensor.py index 1e8c827e87d9c..e042c61b2875c 100644 --- a/airflow/sensors/smart_sensor.py +++ b/airflow/sensors/smart_sensor.py @@ -125,7 +125,7 @@ def _get_sensor_logger(self, si): log_id = "-".join( [si.dag_id, si.task_id, si.execution_date.strftime("%Y_%m_%dT%H_%M_%S_%f"), str(si.try_number)] ) - logger = logging.getLogger('airflow.task' + '.' + log_id) + logger = logging.getLogger(f'airflow.task.{log_id}') if len(logger.handlers) == 0: handler = self.create_new_task_handler() diff --git a/tests/providers/amazon/aws/utils/eks_test_constants.py b/tests/providers/amazon/aws/utils/eks_test_constants.py index f338702b22fc6..cdd0622e77ae2 100644 --- a/tests/providers/amazon/aws/utils/eks_test_constants.py +++ b/tests/providers/amazon/aws/utils/eks_test_constants.py @@ -228,7 +228,7 @@ class RegExTemplates: + "(?P.+)/" + NODEGROUP_UUID_PATTERN ) - NODEGROUP_ASG_NAME_PATTERN: Pattern = re.compile("eks-" + NODEGROUP_UUID_PATTERN) + NODEGROUP_ASG_NAME_PATTERN: Pattern = re.compile(f"eks-{NODEGROUP_UUID_PATTERN}") NODEGROUP_SECURITY_GROUP_NAME_PATTERN: Pattern = re.compile("sg-([-0-9a-z]{17})") From 9558e82ee78a2661dad34dbf7512411ef18d6ff0 Mon Sep 17 00:00:00 2001 From: blag Date: Mon, 18 Oct 2021 14:38:13 -0700 Subject: [PATCH 04/12] Fix overuse of f-strings --- .../providers/docker/example_dags/example_docker_copy_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/providers/docker/example_dags/example_docker_copy_data.py b/airflow/providers/docker/example_dags/example_docker_copy_data.py index f03c4beb32a47..5ce78d02cd201 100644 --- a/airflow/providers/docker/example_dags/example_docker_copy_data.py +++ b/airflow/providers/docker/example_dags/example_docker_copy_data.py @@ -75,7 +75,7 @@ "/bin/bash", "-c", "/bin/sleep 30; " - "/bin/mv {{ params.source_location }}/" + f"{t_view.output}" + " {{ params.target_location }};" + "/bin/mv {{ params.source_location }}/" + str(t_view.output) + " {{ params.target_location }};" "/bin/echo '{{ params.target_location }}/" + f"{t_view.output}';", ], task_id="move_data", From f830cb7735b63ab066919651e9190c86aae1fb0c Mon Sep 17 00:00:00 2001 From: blag Date: Mon, 18 Oct 2021 14:40:20 -0700 Subject: [PATCH 05/12] Use re.VERBOSE to allow for multi-line regex specifications --- .../amazon/aws/utils/eks_test_constants.py | 53 ++++++++++--------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/tests/providers/amazon/aws/utils/eks_test_constants.py b/tests/providers/amazon/aws/utils/eks_test_constants.py index cdd0622e77ae2..640a99c648b23 100644 --- a/tests/providers/amazon/aws/utils/eks_test_constants.py +++ b/tests/providers/amazon/aws/utils/eks_test_constants.py @@ -198,35 +198,38 @@ class RegExTemplates: """The compiled RegEx patterns used in testing.""" CLUSTER_ARN: Pattern = re.compile( - "arn:" - + "(?P.+):" - + "eks:" - + "(?P[-0-9a-zA-Z]+):" - + "(?P[0-9]{12}):" - + "cluster/" - + "(?P.+)" + r"""arn: + (?P.+): + eks: + (?P[-0-9a-zA-Z]+): + (?P[0-9]{12}): + cluster/ + (?P.+)""", + re.VERBOSE ) FARGATE_PROFILE_ARN: Pattern = re.compile( - "arn:" - + "(?P.+):" - + "eks:" - + "(?P[-0-9a-zA-Z]+):" - + "(?P[0-9]{12}):" - + "fargateprofile/" - + "(?P.+)/" - + "(?P.+)/" - + FARGATE_PROFILE_UUID_PATTERN + r"""arn: + (?P.+): + eks: + (?P[-0-9a-zA-Z]+): + (?P[0-9]{12}): + fargateprofile/ + (?P.+)/ + (?P.+)/""" + + FARGATE_PROFILE_UUID_PATTERN, + re.VERBOSE ) NODEGROUP_ARN: Pattern = re.compile( - "arn:" - + "(?P.+):" - + "eks:" - + "(?P[-0-9a-zA-Z]+):" - + "(?P[0-9]{12}):" - + "nodegroup/" - + "(?P.+)/" - + "(?P.+)/" - + NODEGROUP_UUID_PATTERN + r"""arn: + (?P.+): + eks: + (?P[-0-9a-zA-Z]+): + (?P[0-9]{12}): + nodegroup/ + (?P.+)/ + (?P.+)/""" + + NODEGROUP_UUID_PATTERN, + re.VERBOSE ) NODEGROUP_ASG_NAME_PATTERN: Pattern = re.compile(f"eks-{NODEGROUP_UUID_PATTERN}") NODEGROUP_SECURITY_GROUP_NAME_PATTERN: Pattern = re.compile("sg-([-0-9a-z]{17})") From ede23ad84d6fad6b2a59c9a2fe1c1770cc3422cd Mon Sep 17 00:00:00 2001 From: blag Date: Mon, 18 Oct 2021 14:40:52 -0700 Subject: [PATCH 06/12] Reformat some SQL to remove useless concatenations and increase readability --- .../apache/hive/operators/test_hive_stats.py | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/tests/providers/apache/hive/operators/test_hive_stats.py b/tests/providers/apache/hive/operators/test_hive_stats.py index 02dbdd8c4d50b..7787d3feef828 100644 --- a/tests/providers/apache/hive/operators/test_hive_stats.py +++ b/tests/providers/apache/hive/operators/test_hive_stats.py @@ -276,10 +276,9 @@ def test_execute_delete_previous_runs_rows( sql = f""" DELETE FROM hive_stats - WHERE - table_name='{hive_stats_collection_operator.table}' AND - partition_repr='{mock_json_dumps.return_value}' AND - dttm='{hive_stats_collection_operator.dttm}'; + WHERE table_name='{hive_stats_collection_operator.table}' + AND partition_repr='{mock_json_dumps.return_value}' + AND dttm='{hive_stats_collection_operator.dttm}'; """ mock_mysql_hook.return_value.run.assert_called_once_with(sql) @@ -308,17 +307,19 @@ def test_runs_for_hive_stats(self, mock_hive_metastore_hook): op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) select_count_query = ( - "SELECT COUNT(*) AS __count FROM airflow." - + "static_babynames_partitioned WHERE ds = '2015-01-01';" + "SELECT COUNT(*) AS __count " + "FROM airflow.static_babynames_partitioned " + "WHERE ds = '2015-01-01';" ) mock_presto_hook.get_first.assert_called_with(hql=select_count_query) expected_stats_select_query = ( - "SELECT 1 FROM hive_stats WHERE table_name='airflow." - + "static_babynames_partitioned' AND " - + "partition_repr='{\"ds\": \"2015-01-01\"}' AND " - + "dttm='2015-01-01T00:00:00+00:00' " - + "LIMIT 1;" + "SELECT 1 " + "FROM hive_stats " + "WHERE table_name='airflow.static_babynames_partitioned' " + " AND partition_repr='{\"ds\": \"2015-01-01\"}' " + " AND dttm='2015-01-01T00:00:00+00:00' " + "LIMIT 1;" ) raw_stats_select_query = mock_mysql_hook.get_records.call_args_list[0][0][0] From 4c9c792a7d35b44f3aad46cc26077f41b7f81a48 Mon Sep 17 00:00:00 2001 From: blag Date: Tue, 19 Oct 2021 11:31:13 -0700 Subject: [PATCH 07/12] Update dependency to include python-tabulate fix for Python 3.10 --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e652f9551bf58..72a5ec6c8bed4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -563,7 +563,7 @@ repos: additional_dependencies: - 'PyYAML==5.3.1' - 'jsonschema==3.2.0' - - 'tabulate==0.8.7' + - 'tabulate==0.8.8' - 'jsonpath-ng==1.5.3' - 'rich==10.9.0' - id: mermaid From 00be5ade8e813f393e4b72bd4331f636886a0112 Mon Sep 17 00:00:00 2001 From: blag Date: Tue, 19 Oct 2021 11:32:03 -0700 Subject: [PATCH 08/12] Formatting fixups to make black formatter happy --- tests/providers/amazon/aws/utils/eks_test_constants.py | 6 +++--- tests/providers/apache/spark/hooks/test_spark_submit.py | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/providers/amazon/aws/utils/eks_test_constants.py b/tests/providers/amazon/aws/utils/eks_test_constants.py index 640a99c648b23..e8babd00a32d0 100644 --- a/tests/providers/amazon/aws/utils/eks_test_constants.py +++ b/tests/providers/amazon/aws/utils/eks_test_constants.py @@ -205,7 +205,7 @@ class RegExTemplates: (?P[0-9]{12}): cluster/ (?P.+)""", - re.VERBOSE + re.VERBOSE, ) FARGATE_PROFILE_ARN: Pattern = re.compile( r"""arn: @@ -217,7 +217,7 @@ class RegExTemplates: (?P.+)/ (?P.+)/""" + FARGATE_PROFILE_UUID_PATTERN, - re.VERBOSE + re.VERBOSE, ) NODEGROUP_ARN: Pattern = re.compile( r"""arn: @@ -229,7 +229,7 @@ class RegExTemplates: (?P.+)/ (?P.+)/""" + NODEGROUP_UUID_PATTERN, - re.VERBOSE + re.VERBOSE, ) NODEGROUP_ASG_NAME_PATTERN: Pattern = re.compile(f"eks-{NODEGROUP_UUID_PATTERN}") NODEGROUP_SECURITY_GROUP_NAME_PATTERN: Pattern = re.compile("sg-([-0-9a-z]{17})") diff --git a/tests/providers/apache/spark/hooks/test_spark_submit.py b/tests/providers/apache/spark/hooks/test_spark_submit.py index 30ce92c649dd7..be47bfb8a3fd6 100644 --- a/tests/providers/apache/spark/hooks/test_spark_submit.py +++ b/tests/providers/apache/spark/hooks/test_spark_submit.py @@ -710,8 +710,7 @@ def test_process_spark_driver_status_log(self): log_lines = [ 'Submitting a request for the status of submission ' 'driver-20171128111415-0001 in spark://spark-standalone-master:6066', - '17/11/28 11:15:37 INFO RestSubmissionClient: Server responded with ' - 'SubmissionStatusResponse:', + '17/11/28 11:15:37 INFO RestSubmissionClient: Server responded with SubmissionStatusResponse:', '{', '"action" : "SubmissionStatusResponse",', '"driverState" : "RUNNING",', From f73769695278a670133f12de2d0be312a2195e05 Mon Sep 17 00:00:00 2001 From: blag Date: Wed, 20 Oct 2021 10:45:59 -0700 Subject: [PATCH 09/12] Use f-strings more --- airflow/models/dagcode.py | 7 ++++--- airflow/providers/databricks/hooks/databricks.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/airflow/models/dagcode.py b/airflow/models/dagcode.py index 7e26e0c558c6c..7322ba92fb76a 100644 --- a/airflow/models/dagcode.py +++ b/airflow/models/dagcode.py @@ -94,10 +94,11 @@ def bulk_sync_to_db(cls, filelocs: Iterable[str], session=None): hashes_to_filelocs = {DagCode.dag_fileloc_hash(fileloc): fileloc for fileloc in filelocs} message = "" for fileloc in conflicting_filelocs: + filename = hashes_to_filelocs[DagCode.dag_fileloc_hash(fileloc)] message += ( - "Filename '{}' causes a hash collision in the " - "database with '{}'. Please rename the file." - ).format(hashes_to_filelocs[DagCode.dag_fileloc_hash(fileloc)], fileloc) + f"Filename '{filename}' causes a hash collision in the " + f"database with '{fileloc}'. Please rename the file." + ) raise AirflowException(message) existing_filelocs = {dag_code.fileloc for dag_code in existing_orm_dag_codes} diff --git a/airflow/providers/databricks/hooks/databricks.py b/airflow/providers/databricks/hooks/databricks.py index a317cba085e77..5e843a4bdcf9c 100644 --- a/airflow/providers/databricks/hooks/databricks.py +++ b/airflow/providers/databricks/hooks/databricks.py @@ -216,7 +216,7 @@ def _do_api_call(self, endpoint_info, json): if attempt_num == self.retry_limit: raise AirflowException( - ('API requests to Databricks failed {} times. Giving up.').format(self.retry_limit) + f'API requests to Databricks failed {self.retry_limit} times. Giving up.' ) attempt_num += 1 From 75666218ea632243126ce2e83a92911a2612c005 Mon Sep 17 00:00:00 2001 From: blag Date: Wed, 20 Oct 2021 11:37:37 -0700 Subject: [PATCH 10/12] Use normal strings instead of a multi-line string --- airflow/operators/google_api_to_s3_transfer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/airflow/operators/google_api_to_s3_transfer.py b/airflow/operators/google_api_to_s3_transfer.py index 8062b59bf978d..9566cddc77641 100644 --- a/airflow/operators/google_api_to_s3_transfer.py +++ b/airflow/operators/google_api_to_s3_transfer.py @@ -40,9 +40,9 @@ class GoogleApiToS3Transfer(GoogleApiToS3Operator): def __init__(self, **kwargs): warnings.warn( - """This class is deprecated. - Please use - `airflow.providers.amazon.aws.transfers.""" + "This class is deprecated. " + "Please use " + "`airflow.providers.amazon.aws.transfers." "google_api_to_s3_transfer.GoogleApiToS3Operator`.", DeprecationWarning, stacklevel=3, From 788f1f076a23f8035720ed511fb1d63c7298f94d Mon Sep 17 00:00:00 2001 From: blag Date: Wed, 20 Oct 2021 11:42:12 -0700 Subject: [PATCH 11/12] Make better use of raw strings --- tests/providers/amazon/aws/utils/eks_test_constants.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/providers/amazon/aws/utils/eks_test_constants.py b/tests/providers/amazon/aws/utils/eks_test_constants.py index e8babd00a32d0..4624cd40f809d 100644 --- a/tests/providers/amazon/aws/utils/eks_test_constants.py +++ b/tests/providers/amazon/aws/utils/eks_test_constants.py @@ -187,10 +187,10 @@ class PageCount: FARGATE_PROFILE_UUID_PATTERN: str = ( - "(?P[-0-9a-z]{8}-[-0-9a-z]{4}-[-0-9a-z]{4}-[-0-9a-z]{4}-[-0-9a-z]{12})" + r"(?P[-0-9a-z]{8}-[-0-9a-z]{4}-[-0-9a-z]{4}-[-0-9a-z]{4}-[-0-9a-z]{12})" ) NODEGROUP_UUID_PATTERN: str = ( - "(?P[-0-9a-z]{8}-[-0-9a-z]{4}-[-0-9a-z]{4}-[-0-9a-z]{4}-[-0-9a-z]{12})" + r"(?P[-0-9a-z]{8}-[-0-9a-z]{4}-[-0-9a-z]{4}-[-0-9a-z]{4}-[-0-9a-z]{12})" ) @@ -232,7 +232,7 @@ class RegExTemplates: re.VERBOSE, ) NODEGROUP_ASG_NAME_PATTERN: Pattern = re.compile(f"eks-{NODEGROUP_UUID_PATTERN}") - NODEGROUP_SECURITY_GROUP_NAME_PATTERN: Pattern = re.compile("sg-([-0-9a-z]{17})") + NODEGROUP_SECURITY_GROUP_NAME_PATTERN: Pattern = re.compile(r"sg-([-0-9a-z]{17})") class MethodNames: From c78746f7949e073f46c247b2b1218e4e20d31c56 Mon Sep 17 00:00:00 2001 From: blag Date: Thu, 21 Oct 2021 13:13:14 -0700 Subject: [PATCH 12/12] Undo some of the SQL formatting to fix tests --- tests/providers/apache/hive/operators/test_hive_stats.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/providers/apache/hive/operators/test_hive_stats.py b/tests/providers/apache/hive/operators/test_hive_stats.py index 7787d3feef828..b51420b4950bf 100644 --- a/tests/providers/apache/hive/operators/test_hive_stats.py +++ b/tests/providers/apache/hive/operators/test_hive_stats.py @@ -276,9 +276,10 @@ def test_execute_delete_previous_runs_rows( sql = f""" DELETE FROM hive_stats - WHERE table_name='{hive_stats_collection_operator.table}' - AND partition_repr='{mock_json_dumps.return_value}' - AND dttm='{hive_stats_collection_operator.dttm}'; + WHERE + table_name='{hive_stats_collection_operator.table}' AND + partition_repr='{mock_json_dumps.return_value}' AND + dttm='{hive_stats_collection_operator.dttm}'; """ mock_mysql_hook.return_value.run.assert_called_once_with(sql)