From 6215fdccfe00a66431d5e713a39f716266b0f324 Mon Sep 17 00:00:00 2001 From: Pedro Miranda Date: Mon, 11 Dec 2023 11:21:53 -0500 Subject: [PATCH 01/10] Add HPA to worker --- chart/templates/workers/worker-hpa.yaml | 49 +++++++++++++++++++++++++ chart/values.schema.json | 33 +++++++++++++++++ chart/values.yaml | 22 +++++++++++ 3 files changed, 104 insertions(+) create mode 100644 chart/templates/workers/worker-hpa.yaml diff --git a/chart/templates/workers/worker-hpa.yaml b/chart/templates/workers/worker-hpa.yaml new file mode 100644 index 0000000000000..b30b89a45696f --- /dev/null +++ b/chart/templates/workers/worker-hpa.yaml @@ -0,0 +1,49 @@ +{{/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/}} + +################################ +## Airflow Worker HPA +################################# +{{- if and (and (not .Values.workers.keda.enabled) .Values.workers.hpa.enabled) (has .Values.executor (list "CeleryExecutor" "CeleryKubernetesExecutor")) }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "airflow.fullname" . }}-worker + labels: + tier: airflow + component: worker-horizontalpodautoscaler + release: {{ .Release.Name }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + heritage: {{ .Release.Service }} + deploymentName: {{ .Release.Name }}-worker + {{- if or (.Values.labels) (.Values.workers.labels) }} + {{- mustMerge .Values.workers.labels .Values.labels | toYaml | nindent 4 }} + {{- end }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: {{ ternary "StatefulSet" "Deployment" .Values.workers.persistence.enabled }} + name: {{ include "airflow.fullname" . }}-worker + minReplicas: {{ .Values.workers.hpa.minReplicaCount }} + maxReplicas: {{ .Values.workers.hpa.maxReplicaCount }} + metrics: {{- tpl (default "[]" .Values.workers.hpa.metrics) . | nindent 4 }} + {{- with .Values.workers.hpa.behavior }} + behavior: {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/chart/values.schema.json b/chart/values.schema.json index e031376b8145d..28095807ec159 100644 --- a/chart/values.schema.json +++ b/chart/values.schema.json @@ -1531,6 +1531,39 @@ } } }, + "hpa": { + "description": "HPA configuration.", + "type": "object", + "additionalProperties": false, + "properties": { + "enabled": { + "description": "Allow HPA autoscaling (KEDA must be disabled).", + "type": "boolean", + "default": false + }, + "minReplicaCount": { + "description": "Minimum number of workers created by KEDA.", + "type": "integer", + "default": 0 + }, + "maxReplicaCount": { + "description": "Maximum number of workers created by KEDA.", + "type": "integer", + "default": 10 + }, + "metrics": { + "description": "Specifications for which to use to calculate the desired replica count.", + "type": "string", + "default": "See values.yaml" + }, + "behavior": { + "description": "HorizontalPodAutoscalerBehavior configures the scaling behavior of the target.", + "type": "object", + "default": {}, + "$ref": "#/definitions/io.k8s.api.autoscaling.v2beta2.HorizontalPodAutoscalerBehavior" + } + } + }, "persistence": { "description": "Persistence configuration.", "type": "object", diff --git a/chart/values.yaml b/chart/values.yaml index 4e62390473d5f..b8bddf9043673 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -592,6 +592,28 @@ workers: # This configuration will be ignored if PGBouncer is not enabled usePgbouncer: true + # Allow HPA (KEDA must be disabled). + hpa: + enabled: false + + # Minimum number of workers created by HPA + minReplicaCount: 0 + + # Maximum number of workers created by HPA + maxReplicaCount: 10 + + # Specifications for which to use to calculate the desired replica count + metrics: |- + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 80 + + # Scaling behavior of the target in both Up and Down directions + behavior: {} + persistence: # Enable persistent volumes enabled: true From 485b7b42c8dc35aabb8adbfd5192c459ef99ed3e Mon Sep 17 00:00:00 2001 From: Pedro Miranda Date: Mon, 11 Dec 2023 13:41:19 -0500 Subject: [PATCH 02/10] Add tests for native HPA --- .../templates/workers/worker-deployment.yaml | 3 +- helm_tests/airflow_core/test_worker.py | 99 +++++++++++++++++++ 2 files changed, 101 insertions(+), 1 deletion(-) diff --git a/chart/templates/workers/worker-deployment.yaml b/chart/templates/workers/worker-deployment.yaml index 33fb357aae6d2..55e89edfbb04e 100644 --- a/chart/templates/workers/worker-deployment.yaml +++ b/chart/templates/workers/worker-deployment.yaml @@ -22,6 +22,7 @@ ################################# {{- $persistence := .Values.workers.persistence.enabled }} {{- $keda := .Values.workers.keda.enabled }} +{{- $hpa := and .Values.workers.hpa.enabled (not .Values.workers.keda.enabled) }} {{- if or (eq .Values.executor "CeleryExecutor") (eq .Values.executor "CeleryKubernetesExecutor") }} {{- $nodeSelector := or .Values.workers.nodeSelector .Values.nodeSelector }} {{- $affinity := or .Values.workers.affinity .Values.affinity }} @@ -59,7 +60,7 @@ spec: {{- if $persistence }} serviceName: {{ include "airflow.fullname" . }}-worker {{- end }} - {{- if not $keda }} + {{- if and (not $keda) (not $hpa) }} replicas: {{ .Values.workers.replicas }} {{- end }} {{- if $revisionHistoryLimit }} diff --git a/helm_tests/airflow_core/test_worker.py b/helm_tests/airflow_core/test_worker.py index a65db04a5dbcf..9447215e7c362 100644 --- a/helm_tests/airflow_core/test_worker.py +++ b/helm_tests/airflow_core/test_worker.py @@ -977,6 +977,105 @@ def test_should_use_keda_query(self, query, executor, expected_query): assert expected_query == jmespath.search("spec.triggers[0].metadata.query", docs[0]) +class TestWorkerHPAAutoScaler: + """Tests worker HPA auto scaler.""" + + def test_should_be_disabled_on_keda_enabled(self): + docs = render_chart( + values={ + "executor": "CeleryExecutor", + "workers": { + "keda": {"enabled": True}, + "hpa": {"enabled": True}, + "labels": {"test_label": "test_label_value"}, + }, + }, + show_only=[ + "templates/workers/worker-kedaautoscaler.yaml", + "templates/workers/worker-hpa.yaml", + ], + ) + assert "test_label" in jmespath.search("metadata.labels", docs[0]) + assert jmespath.search("metadata.labels", docs[0])["test_label"] == "test_label_value" + assert len(docs) == 1 + + def test_should_add_component_specific_labels(self): + docs = render_chart( + values={ + "executor": "CeleryExecutor", + "workers": { + "hpa": {"enabled": True}, + "labels": {"test_label": "test_label_value"}, + }, + }, + show_only=["templates/workers/worker-hpa.yaml"], + ) + + assert "test_label" in jmespath.search("metadata.labels", docs[0]) + assert jmespath.search("metadata.labels", docs[0])["test_label"] == "test_label_value" + + def test_should_remove_replicas_field(self): + docs = render_chart( + values={ + "executor": "CeleryExecutor", + "workers": { + "hpa": {"enabled": True}, + }, + }, + show_only=["templates/workers/worker-deployment.yaml"], + ) + assert "replicas" not in jmespath.search("spec", docs[0]) + + @pytest.mark.parametrize( + "metrics, executor, expected_metrics", + [ + # default metrics + ( + None, + "CeleryExecutor", + { + "type": "Resource", + "resource": { + "name": "cpu", + "target": { + "type": "Utilization", + "averageUtilization": 80 + } + } + }, + ), + # custom metric + ( + "[{\"type\":\"Pods\",\"pods\":{\"metric\":{\"name\":\"custom_prometheus\"},\"target\":{\"type\":\"AverageValue\",\"averageValue\":\"20\"}}}]", + "CeleryKubernetesExecutor", + { + "type": "Pods", + "pods": { + "metric": { + "name": "custom_prometheus" + }, + "target": { + "type": "AverageValue", + "averageValue": "20" + } + } + }, + ) + ], + ) + def test_should_use_hpa_metrics(self, metrics, executor, expected_metrics): + docs = render_chart( + values={ + "executor": executor, + "workers": { + "hpa": {"enabled": True, **({"metrics": metrics} if metrics else {})}, + }, + }, + show_only=["templates/workers/worker-hpa.yaml"], + ) + assert expected_metrics == jmespath.search("spec.metrics[0]", docs[0]) + + class TestWorkerNetworkPolicy: """Tests worker network policy.""" From 7e445fb47cc1276a100e4c085ba7223ccb2a68d4 Mon Sep 17 00:00:00 2001 From: Pedro Miranda Date: Mon, 11 Dec 2023 13:52:45 -0500 Subject: [PATCH 03/10] Add tests for native HPA --- helm_tests/other/test_hpa.py | 97 ++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 helm_tests/other/test_hpa.py diff --git a/helm_tests/other/test_hpa.py b/helm_tests/other/test_hpa.py new file mode 100644 index 0000000000000..83121d5431e69 --- /dev/null +++ b/helm_tests/other/test_hpa.py @@ -0,0 +1,97 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import jmespath +import pytest + +from tests.charts.helm_template_generator import render_chart + + +class TestHPA: + """Tests HPA.""" + + def test_hpa_disabled_by_default(self): + """Disabled by default.""" + docs = render_chart( + values={}, + show_only=["templates/workers/worker-hpa.yaml"], + ) + assert docs == [] + + @pytest.mark.parametrize( + "executor, is_created", + [ + ("CeleryExecutor", True), + ("CeleryKubernetesExecutor", True), + ], + ) + def test_hpa_enabled(self, executor, is_created): + """HPA should only be created when enabled and executor is Celery or CeleryKubernetes.""" + docs = render_chart( + values={ + "workers": {"hpa": {"enabled": True}, "persistence": {"enabled": False}}, + "executor": executor, + }, + show_only=["templates/workers/worker-hpa.yaml"], + ) + if is_created: + assert jmespath.search("metadata.name", docs[0]) == "release-name-worker" + else: + assert docs == [] + + @pytest.mark.parametrize("executor", ["CeleryExecutor", "CeleryKubernetesExecutor"]) + def test_hpa_behavior(self, executor): + """Verify HPA behavior.""" + expected_behavior = { + "scaleDown": { + "stabilizationWindowSeconds": 300, + "policies": [{"type": "Percent", "value": 100, "periodSeconds": 15}], + } + } + docs = render_chart( + values={ + "workers": { + "hpa": { + "enabled": True, + "behavior": expected_behavior, + }, + }, + "executor": executor, + }, + show_only=["templates/workers/worker-hpa.yaml"], + ) + assert jmespath.search("spec.behavior", docs[0]) == expected_behavior + + @pytest.mark.parametrize( + "enabled, kind", + [ + ("enabled", "StatefulSet"), + ("not_enabled", "Deployment"), + ], + ) + def test_persistence(self, enabled, kind): + """If worker persistence is enabled, scaleTargetRef should be StatefulSet else Deployment.""" + is_enabled = enabled == "enabled" + docs = render_chart( + values={ + "workers": {"hpa": {"enabled": True}, "persistence": {"enabled": is_enabled}}, + "executor": "CeleryExecutor", + }, + show_only=["templates/workers/worker-hpa.yaml"], + ) + assert jmespath.search("spec.scaleTargetRef.kind", docs[0]) == kind From df8a03188470de6690496f798124336519260c21 Mon Sep 17 00:00:00 2001 From: Pedro Miranda Date: Mon, 11 Dec 2023 15:01:50 -0500 Subject: [PATCH 04/10] Fix lint issues --- helm_tests/airflow_core/test_worker.py | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/helm_tests/airflow_core/test_worker.py b/helm_tests/airflow_core/test_worker.py index 9447215e7c362..487a3305ede51 100644 --- a/helm_tests/airflow_core/test_worker.py +++ b/helm_tests/airflow_core/test_worker.py @@ -1035,32 +1035,21 @@ def test_should_remove_replicas_field(self): "CeleryExecutor", { "type": "Resource", - "resource": { - "name": "cpu", - "target": { - "type": "Utilization", - "averageUtilization": 80 - } - } + "resource": {"name": "cpu", "target": {"type": "Utilization", "averageUtilization": 80}}, }, ), # custom metric ( - "[{\"type\":\"Pods\",\"pods\":{\"metric\":{\"name\":\"custom_prometheus\"},\"target\":{\"type\":\"AverageValue\",\"averageValue\":\"20\"}}}]", + '[{"type":"Pods","pods":{"metric":{"name":"custom_prometheus"},"target":{"type":"AverageValue","averageValue":"20"}}}]', "CeleryKubernetesExecutor", { "type": "Pods", "pods": { - "metric": { - "name": "custom_prometheus" - }, - "target": { - "type": "AverageValue", - "averageValue": "20" - } - } + "metric": {"name": "custom_prometheus"}, + "target": {"type": "AverageValue", "averageValue": "20"}, + }, }, - ) + ), ], ) def test_should_use_hpa_metrics(self, metrics, executor, expected_metrics): From 428bf63f595c8d5ea5a105c9cb1d6d38113efefb Mon Sep 17 00:00:00 2001 From: Pedro Miranda Date: Fri, 22 Dec 2023 10:40:11 -0500 Subject: [PATCH 05/10] Add test for min and max replicas --- helm_tests/other/test_hpa.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/helm_tests/other/test_hpa.py b/helm_tests/other/test_hpa.py index 83121d5431e69..638e32299e890 100644 --- a/helm_tests/other/test_hpa.py +++ b/helm_tests/other/test_hpa.py @@ -54,6 +54,30 @@ def test_hpa_enabled(self, executor, is_created): else: assert docs == [] + @pytest.mark.parametrize( + "min_replicas, max_replicas", + [ + (None, None), + (2, 8), + ], + ) + def test_min_max_replicas(self, min_replicas, max_replicas): + """Verify minimum and maximum replicas.""" + docs = render_chart( + values={ + "workers": { + "hpa": { + "enabled": True, + "minReplicaCount": min_replicas, + "maxReplicaCount": max_replicas, + }, + }, + }, + show_only=["templates/workers/worker-hpa.yaml"], + ) + assert jmespath.search("spec.minReplicas", docs[0]) == 1 if min_replicas is None else min_replicas + assert jmespath.search("spec.maxReplicas", docs[0]) == 5 if max_replicas is None else max_replicas + @pytest.mark.parametrize("executor", ["CeleryExecutor", "CeleryKubernetesExecutor"]) def test_hpa_behavior(self, executor): """Verify HPA behavior.""" From 49cd836d107c3882209f78bd74fad3225182bc05 Mon Sep 17 00:00:00 2001 From: Pedro Miranda Date: Fri, 22 Dec 2023 13:15:35 -0500 Subject: [PATCH 06/10] Fix minimum replicas --- chart/values.schema.json | 2 +- chart/values.yaml | 2 +- helm_tests/other/test_hpa.py | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/chart/values.schema.json b/chart/values.schema.json index afd3504eb7d52..7594c56506b5e 100644 --- a/chart/values.schema.json +++ b/chart/values.schema.json @@ -1549,7 +1549,7 @@ "maxReplicaCount": { "description": "Maximum number of workers created by KEDA.", "type": "integer", - "default": 10 + "default": 5 }, "metrics": { "description": "Specifications for which to use to calculate the desired replica count.", diff --git a/chart/values.yaml b/chart/values.yaml index fd81000deddc2..446a4f1088210 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -600,7 +600,7 @@ workers: minReplicaCount: 0 # Maximum number of workers created by HPA - maxReplicaCount: 10 + maxReplicaCount: 5 # Specifications for which to use to calculate the desired replica count metrics: |- diff --git a/helm_tests/other/test_hpa.py b/helm_tests/other/test_hpa.py index 638e32299e890..dd71a970b12bd 100644 --- a/helm_tests/other/test_hpa.py +++ b/helm_tests/other/test_hpa.py @@ -68,14 +68,14 @@ def test_min_max_replicas(self, min_replicas, max_replicas): "workers": { "hpa": { "enabled": True, - "minReplicaCount": min_replicas, - "maxReplicaCount": max_replicas, - }, + **({"minReplicaCount": min_replicas} if min_replicas else {}), + **({"maxReplicaCount": max_replicas} if max_replicas else {}), + } }, }, show_only=["templates/workers/worker-hpa.yaml"], ) - assert jmespath.search("spec.minReplicas", docs[0]) == 1 if min_replicas is None else min_replicas + assert jmespath.search("spec.minReplicas", docs[0]) == 0 if min_replicas is None else min_replicas assert jmespath.search("spec.maxReplicas", docs[0]) == 5 if max_replicas is None else max_replicas @pytest.mark.parametrize("executor", ["CeleryExecutor", "CeleryKubernetesExecutor"]) From 736c16469488a65f771fd6e68f964365ecb6b1f2 Mon Sep 17 00:00:00 2001 From: Pedro Miranda Date: Wed, 3 Jan 2024 14:06:53 -0500 Subject: [PATCH 07/10] Define metrics as array --- chart/templates/workers/worker-hpa.yaml | 2 +- chart/values.schema.json | 193 +++++++++++++++++++++++- chart/values.yaml | 2 +- helm_tests/airflow_core/test_worker.py | 14 +- 4 files changed, 204 insertions(+), 7 deletions(-) diff --git a/chart/templates/workers/worker-hpa.yaml b/chart/templates/workers/worker-hpa.yaml index b30b89a45696f..e6b1a9ae523ea 100644 --- a/chart/templates/workers/worker-hpa.yaml +++ b/chart/templates/workers/worker-hpa.yaml @@ -42,7 +42,7 @@ spec: name: {{ include "airflow.fullname" . }}-worker minReplicas: {{ .Values.workers.hpa.minReplicaCount }} maxReplicas: {{ .Values.workers.hpa.maxReplicaCount }} - metrics: {{- tpl (default "[]" .Values.workers.hpa.metrics) . | nindent 4 }} + metrics: {{- toYaml .Values.workers.hpa.metrics | nindent 4 }} {{- with .Values.workers.hpa.behavior }} behavior: {{- toYaml . | nindent 4 }} {{- end }} diff --git a/chart/values.schema.json b/chart/values.schema.json index 7594c56506b5e..30bc0a2849d80 100644 --- a/chart/values.schema.json +++ b/chart/values.schema.json @@ -1553,8 +1553,11 @@ }, "metrics": { "description": "Specifications for which to use to calculate the desired replica count.", - "type": "string", - "default": "See values.yaml" + "type": "array", + "default": [], + "items": { + "$ref": "#/definitions/io.k8s.api.autoscaling.v2.HorizontalPodAutoscalerMetrics" + } }, "behavior": { "description": "HorizontalPodAutoscalerBehavior configures the scaling behavior of the target.", @@ -7506,6 +7509,192 @@ "type": "object", "additionalProperties": false }, + "io.k8s.api.autoscaling.v2.HorizontalPodAutoscalerMetrics": { + "description": "HorizontalPodAutoscalerMetrics contains the specifications for which to use to calculate the desired replica count (the maximum replica count across all metrics will be used)", + "properties": { + "external": { + "description": "external refers to a global metric that is not associated with any Kubernetes object. It allows autoscaling based on information coming from components running outside of cluster (for example length of queue in cloud messaging service, or QPS from loadbalancer running outside of cluster).", + "$ref": "#/definitions/io.k8s.api.autoscaling.v2.ExternalMetricStatus" + }, + "object": { + "description": "object refers to a metric describing a single kubernetes object (for example, hits-per-second on an Ingress object).", + "$ref": "#/definitions/io.k8s.api.autoscaling.v2.ObjectMetricStatus" + }, + "pods": { + "description": "pods refers to a metric describing each pod in the current scale target (for example, transactions-processed-per-second). The values will be averaged together before being compared to the target value.", + "$ref": "#/definitions/io.k8s.api.autoscaling.v2.PodsMetricStatus" + }, + "resource": { + "description": "resource refers to a resource metric (such as those specified in requests and limits) known to Kubernetes describing each pod in the current scale target (e.g. CPU or memory). Such metrics are built in to Kubernetes, and have special scaling options on top of those available to normal per-pod metrics using the \"pods\" source.", + "$ref": "#/definitions/io.k8s.api.autoscaling.v2.ResourceMetricStatus" + }, + "type": { + "description": "type is the type of metric source. It will be one of \"Object\", \"Pods\" or \"Resource\", each corresponds to a matching field in the object.", + "type": [ + "string", + "null" + ] + } + }, + "type": "object", + "additionalProperties": false + }, + "io.k8s.api.autoscaling.v2.ExternalMetricStatus": { + "description": "ExternalMetricStatus indicates the current value of a global metric not associated with any Kubernetes object.", + "required": [ + "metric", + "target" + ], + "type": "object", + "properties": { + "metric": { + "description": "currentAverageValue is the current value of metric averaged over autoscaled pods.", + "$ref": "#/definitions/io.k8s.api.autoscaling.v2.HorizontalPodAutoscalerMetricIdentifier" + }, + "target": { + "description": "target specifies the target value for the given metric", + "$ref": "#/definitions/io.k8s.api.autoscaling.v2.HorizontalPodAutoscalerMetricTarget" + } + } + }, + "io.k8s.api.autoscaling.v2.ObjectMetricStatus": { + "description": "ObjectMetricStatus indicates the current value of a metric describing a kubernetes object (for example, hits-per-second on an Ingress object).", + "required": [ + "describedObject", + "metric", + "target" + ], + "type": "object", + "properties": { + "describedObject": { + "description": "currentAverageValue is the current value of metric averaged over autoscaled pods.", + "$ref": "#/definitions/io.k8s.api.autoscaling.v2.CrossVersionObjectReference" + }, + "metric": { + "description": "currentAverageValue is the current value of metric averaged over autoscaled pods.", + "$ref": "#/definitions/io.k8s.api.autoscaling.v2.HorizontalPodAutoscalerMetricIdentifier" + }, + "target": { + "description": "target specifies the target value for the given metric", + "$ref": "#/definitions/io.k8s.api.autoscaling.v2.HorizontalPodAutoscalerMetricTarget" + } + } + }, + "io.k8s.api.autoscaling.v2.PodsMetricStatus": { + "description": "PodsMetricStatus indicates the current value of a metric describing each pod in the current scale target (for example, transactions-processed-per-second).", + "required": [ + "metric", + "target" + ], + "type": "object", + "properties": { + "metric": { + "description": "currentAverageValue is the current value of metric averaged over autoscaled pods.", + "$ref": "#/definitions/io.k8s.api.autoscaling.v2.HorizontalPodAutoscalerMetricIdentifier" + }, + "target": { + "description": "target specifies the target value for the given metric", + "$ref": "#/definitions/io.k8s.api.autoscaling.v2.HorizontalPodAutoscalerMetricTarget" + } + } + }, + "io.k8s.api.autoscaling.v2.ResourceMetricStatus": { + "description": "ResourceMetricStatus indicates the current value of a resource metric known to Kubernetes, as specified in requests and limits, describing each pod in the current scale target (e.g. CPU or memory). Such metrics are built in to Kubernetes, and have special scaling options on top of those available to normal per-pod metrics using the \"pods\" source.", + "required": [ + "name", + "target" + ], + "type": "object", + "properties": { + "name": { + "description": "name is the name of the resource in question.", + "type": [ + "string", + "null" + ] + }, + "target": { + "description": "target specifies the target value for the given metric", + "$ref": "#/definitions/io.k8s.api.autoscaling.v2.HorizontalPodAutoscalerMetricTarget" + } + } + }, + "io.k8s.api.autoscaling.v2.CrossVersionObjectReference": { + "description": "CrossVersionObjectReference contains enough information to let you identify the referred resource.", + "type": "object", + "required": [ + "kind", + "name" + ], + "properties": { + "apiVersion": { + "description": "API version of the referent", + "type": [ + "string", + "null" + ] + }, + "kind": { + "description": "Kind of the referent; More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds\"", + "type": [ + "string", + "null" + ] + }, + "name": { + "description": "Name of the referent; More info: http://kubernetes.io/docs/user-guide/identifiers#names", + "type": [ + "string", + "null" + ] + } + } + }, + "io.k8s.api.autoscaling.v2.HorizontalPodAutoscalerMetricIdentifier": { + "description": "metric identifies the target metric by name and selector", + "required": [ + "name" + ], + "properties": { + "name": { + "description": "name is the name of the given metric", + "type": "string" + }, + "selector": { + "description": "selector is the string-encoded form of a standard kubernetes label selector for the given metric", + "$ref": "#/definitions/io.k8s.apimachinery.pkg.apis.meta.v1.LabelSelector" + } + } + }, + "io.k8s.api.autoscaling.v2.HorizontalPodAutoscalerMetricTarget": { + "description": "target specifies the target value for the given metric", + "required": [ + "type" + ], + "properties": { + "type": { + "description": "type represents whether the metric type is Utilization, Value, or AverageValue", + "type": "string", + "enum": [ + "Utilization", + "Value", + "AverageValue" + ] + }, + "averageUtilization": { + "description": "averageUtilization is the target value of the average of the resource metric across all relevant pods, represented as a percentage of the requested value of the resource for the pods", + "type": "integer" + }, + "averageValue": { + "description": "averageValue is the target value of the average of the metric across all relevant pods (as a quantity)", + "$ref": "#/definitions/io.k8s.apimachinery.pkg.api.resource.Quantity" + }, + "value": { + "description": "value is the target value of the metric (as a quantity).", + "$ref": "#/definitions/io.k8s.apimachinery.pkg.api.resource.Quantity" + } + } + }, "io.k8s.api.core.v1.AWSElasticBlockStoreVolumeSource": { "description": "Represents a Persistent Disk resource in AWS.\n\nAn AWS EBS disk must exist before mounting to a container. The disk must also be in the same AWS zone as the kubelet. An AWS EBS disk can only be mounted as read/write once. AWS EBS volumes support ownership management and SELinux relabeling.", "properties": { diff --git a/chart/values.yaml b/chart/values.yaml index 446a4f1088210..784f7071574c7 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -603,7 +603,7 @@ workers: maxReplicaCount: 5 # Specifications for which to use to calculate the desired replica count - metrics: |- + metrics: - type: Resource resource: name: cpu diff --git a/helm_tests/airflow_core/test_worker.py b/helm_tests/airflow_core/test_worker.py index 487a3305ede51..2a5dab71f04c3 100644 --- a/helm_tests/airflow_core/test_worker.py +++ b/helm_tests/airflow_core/test_worker.py @@ -1040,13 +1040,21 @@ def test_should_remove_replicas_field(self): ), # custom metric ( - '[{"type":"Pods","pods":{"metric":{"name":"custom_prometheus"},"target":{"type":"AverageValue","averageValue":"20"}}}]', + [ + { + "type": "Pods", + "pods": { + "metric": {"name": "custom"}, + "target": {"type": "Utilization", "averageUtilization": 80}, + }, + } + ], "CeleryKubernetesExecutor", { "type": "Pods", "pods": { - "metric": {"name": "custom_prometheus"}, - "target": {"type": "AverageValue", "averageValue": "20"}, + "metric": {"name": "custom"}, + "target": {"type": "Utilization", "averageUtilization": 80}, }, }, ), From 41b1fd5339e5f1724d80f388e40181253a19ef89 Mon Sep 17 00:00:00 2001 From: Pedro Miranda Date: Wed, 3 Jan 2024 15:59:43 -0500 Subject: [PATCH 08/10] Fix default value in json schema --- chart/values.schema.json | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/chart/values.schema.json b/chart/values.schema.json index 30bc0a2849d80..1cd1adec84180 100644 --- a/chart/values.schema.json +++ b/chart/values.schema.json @@ -1554,7 +1554,18 @@ "metrics": { "description": "Specifications for which to use to calculate the desired replica count.", "type": "array", - "default": [], + "default": [ + { + "type": "Resource", + "resource": { + "name": "cpu", + "target": { + "type": "Utilization", + "averageUtilization": 80 + } + } + } + ], "items": { "$ref": "#/definitions/io.k8s.api.autoscaling.v2.HorizontalPodAutoscalerMetrics" } From c4ca280a14f25de2c4308735783eb56798235328 Mon Sep 17 00:00:00 2001 From: Pedro Miranda Date: Wed, 3 Jan 2024 16:20:25 -0500 Subject: [PATCH 09/10] Update json schemas --- Dockerfile | 1608 -------------------------------------- chart/values.schema.json | 296 +++---- 2 files changed, 158 insertions(+), 1746 deletions(-) diff --git a/Dockerfile b/Dockerfile index cbdbd168f5f12..e69de29bb2d1d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,1608 +0,0 @@ -# syntax=docker/dockerfile:1.4 -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# THIS DOCKERFILE IS INTENDED FOR PRODUCTION USE AND DEPLOYMENT. -# NOTE! IT IS ALPHA-QUALITY FOR NOW - WE ARE IN A PROCESS OF TESTING IT -# -# -# This is a multi-segmented image. It actually contains two images: -# -# airflow-build-image - there all airflow dependencies can be installed (and -# built - for those dependencies that require -# build essentials). Airflow is installed there with -# --user switch so that all the dependencies are -# installed to ${HOME}/.local -# -# main - this is the actual production image that is much -# smaller because it does not contain all the build -# essentials. Instead the ${HOME}/.local folder -# is copied from the build-image - this way we have -# only result of installation and we do not need -# all the build essentials. This makes the image -# much smaller. -# -# Use the same builder frontend version for everyone -ARG AIRFLOW_EXTRAS="aiobotocore,amazon,async,celery,cncf.kubernetes,common.io,docker,elasticsearch,ftp,google,google_auth,grpc,hashicorp,http,ldap,microsoft.azure,mysql,odbc,openlineage,pandas,postgres,redis,sendgrid,sftp,slack,snowflake,ssh,statsd,virtualenv" -ARG ADDITIONAL_AIRFLOW_EXTRAS="" -ARG ADDITIONAL_PYTHON_DEPS="" - -ARG AIRFLOW_HOME=/opt/airflow -ARG AIRFLOW_UID="50000" -ARG AIRFLOW_USER_HOME_DIR=/home/airflow - -# latest released version here -ARG AIRFLOW_VERSION="2.8.0" - -ARG PYTHON_BASE_IMAGE="python:3.8-slim-bookworm" - -ARG AIRFLOW_PIP_VERSION=23.3.2 -ARG AIRFLOW_IMAGE_REPOSITORY="https://github.com/apache/airflow" -ARG AIRFLOW_IMAGE_README_URL="https://raw.githubusercontent.com/apache/airflow/main/docs/docker-stack/README.md" - -# By default latest released version of airflow is installed (when empty) but this value can be overridden -# and we can install version according to specification (For example ==2.0.2 or <3.0.0). -ARG AIRFLOW_VERSION_SPECIFICATION="" - -# By default PIP has progress bar but you can disable it. -ARG PIP_PROGRESS_BAR="on" - -############################################################################################## -# This is the script image where we keep all inlined bash scripts needed in other segments -############################################################################################## -FROM scratch as scripts - -############################################################################################## -# Please DO NOT modify the inlined scripts manually. The content of those files will be -# replaced by pre-commit automatically from the "scripts/docker/" folder. -# This is done in order to avoid problems with caching and file permissions and in order to -# make the PROD Dockerfile standalone -############################################################################################## - -# The content below is automatically copied from scripts/docker/install_os_dependencies.sh -COPY <<"EOF" /install_os_dependencies.sh -#!/usr/bin/env bash -set -euo pipefail - -DOCKER_CLI_VERSION=24.0.6 - -if [[ "$#" != 1 ]]; then - echo "ERROR! There should be 'runtime' or 'dev' parameter passed as argument.". - exit 1 -fi - -if [[ "${1}" == "runtime" ]]; then - INSTALLATION_TYPE="RUNTIME" -elif [[ "${1}" == "dev" ]]; then - INSTALLATION_TYPE="dev" -else - echo "ERROR! Wrong argument. Passed ${1} and it should be one of 'runtime' or 'dev'.". - exit 1 -fi - -function get_dev_apt_deps() { - if [[ "${DEV_APT_DEPS=}" == "" ]]; then - DEV_APT_DEPS="apt-transport-https apt-utils build-essential ca-certificates dirmngr \ -freetds-bin freetds-dev git gosu graphviz graphviz-dev krb5-user ldap-utils libffi-dev libgeos-dev \ -libkrb5-dev libldap2-dev libleveldb1d libleveldb-dev libsasl2-2 libsasl2-dev libsasl2-modules \ -libssl-dev libxmlsec1 libxmlsec1-dev locales lsb-release openssh-client pkgconf sasl2-bin \ -software-properties-common sqlite3 sudo unixodbc unixodbc-dev" - export DEV_APT_DEPS - fi -} - -function get_runtime_apt_deps() { - local debian_version - local debian_version_apt_deps - # Get debian version without installing lsb_release - # shellcheck disable=SC1091 - debian_version=$(. /etc/os-release; printf '%s\n' "$VERSION_CODENAME";) - echo - echo "DEBIAN CODENAME: ${debian_version}" - echo - if [[ "${debian_version}" == "bullseye" ]]; then - debian_version_apt_deps="libffi7 libldap-2.4-2 libssl1.1 netcat" - else - debian_version_apt_deps="libffi8 libldap-2.5-0 libssl3 netcat-openbsd" - fi - echo - echo "APPLIED INSTALLATION CONFIGURATION FOR DEBIAN VERSION: ${debian_version}" - echo - if [[ "${RUNTIME_APT_DEPS=}" == "" ]]; then - RUNTIME_APT_DEPS="apt-transport-https apt-utils ca-certificates \ -curl dumb-init freetds-bin gosu krb5-user libgeos-dev \ -ldap-utils libsasl2-2 libsasl2-modules libxmlsec1 locales ${debian_version_apt_deps} \ -lsb-release openssh-client python3-selinux rsync sasl2-bin sqlite3 sudo unixodbc" - export RUNTIME_APT_DEPS - fi -} - -function install_docker_cli() { - local platform - if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then - platform="aarch64" - else - platform="x86_64" - fi - curl --silent \ - "https://download.docker.com/linux/static/stable/${platform}/docker-${DOCKER_CLI_VERSION}.tgz" \ - | tar -C /usr/bin --strip-components=1 -xvzf - docker/docker -} - -function install_debian_dev_dependencies() { - apt-get update - apt-get install --no-install-recommends -yqq apt-utils >/dev/null 2>&1 - apt-get install -y --no-install-recommends curl gnupg2 lsb-release - # shellcheck disable=SC2086 - export ${ADDITIONAL_DEV_APT_ENV?} - if [[ ${DEV_APT_COMMAND} != "" ]]; then - bash -o pipefail -o errexit -o nounset -o nolog -c "${DEV_APT_COMMAND}" - fi - if [[ ${ADDITIONAL_DEV_APT_COMMAND} != "" ]]; then - bash -o pipefail -o errexit -o nounset -o nolog -c "${ADDITIONAL_DEV_APT_COMMAND}" - fi - apt-get update - local debian_version - local debian_version_apt_deps - # Get debian version without installing lsb_release - # shellcheck disable=SC1091 - debian_version=$(. /etc/os-release; printf '%s\n' "$VERSION_CODENAME";) - echo - echo "DEBIAN CODENAME: ${debian_version}" - echo - if [[ "${debian_version}" == "bullseye" ]]; then - echo - echo "Bullseye detected - replacing dependencies in additional dev apt deps" - echo - # Replace dependencies in additional dev apt deps to be compatible with Bullseye - ADDITIONAL_DEV_APT_DEPS=${ADDITIONAL_DEV_APT_DEPS//libgcc-11-dev/libgcc-10-dev} - ADDITIONAL_DEV_APT_DEPS=${ADDITIONAL_DEV_APT_DEPS//netcat-openbsd/netcat} - echo - echo "Replaced bullseye dev apt dependencies" - echo "${ADDITIONAL_DEV_APT_COMMAND}" - echo - fi - - # shellcheck disable=SC2086 - apt-get install -y --no-install-recommends ${DEV_APT_DEPS} ${ADDITIONAL_DEV_APT_DEPS} -} - -function install_debian_runtime_dependencies() { - apt-get update - apt-get install --no-install-recommends -yqq apt-utils >/dev/null 2>&1 - apt-get install -y --no-install-recommends curl gnupg2 lsb-release - # shellcheck disable=SC2086 - export ${ADDITIONAL_RUNTIME_APT_ENV?} - if [[ "${RUNTIME_APT_COMMAND}" != "" ]]; then - bash -o pipefail -o errexit -o nounset -o nolog -c "${RUNTIME_APT_COMMAND}" - fi - if [[ "${ADDITIONAL_RUNTIME_APT_COMMAND}" != "" ]]; then - bash -o pipefail -o errexit -o nounset -o nolog -c "${ADDITIONAL_RUNTIME_APT_COMMAND}" - fi - apt-get update - # shellcheck disable=SC2086 - apt-get install -y --no-install-recommends ${RUNTIME_APT_DEPS} ${ADDITIONAL_RUNTIME_APT_DEPS} - apt-get autoremove -yqq --purge - apt-get clean - rm -rf /var/lib/apt/lists/* /var/log/* -} - -if [[ "${INSTALLATION_TYPE}" == "RUNTIME" ]]; then - get_runtime_apt_deps - install_debian_runtime_dependencies - install_docker_cli - -else - get_dev_apt_deps - install_debian_dev_dependencies - install_docker_cli -fi -EOF - -# The content below is automatically copied from scripts/docker/install_mysql.sh -COPY <<"EOF" /install_mysql.sh -#!/usr/bin/env bash -. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" - -set -euo pipefail - -common::get_colors -declare -a packages - -readonly MYSQL_LTS_VERSION="8.0" -readonly MARIADB_LTS_VERSION="10.11" - -: "${INSTALL_MYSQL_CLIENT:?Should be true or false}" -: "${INSTALL_MYSQL_CLIENT_TYPE:-mariadb}" - -install_mysql_client() { - if [[ "${1}" == "dev" ]]; then - packages=("libmysqlclient-dev" "mysql-client") - elif [[ "${1}" == "prod" ]]; then - # `libmysqlclientXX` where XX is number, and it should be increased every new GA MySQL release, for example - # 18 - MySQL 5.6.48 - # 20 - MySQL 5.7.42 - # 21 - MySQL 8.0.34 - # 22 - MySQL 8.1 - packages=("libmysqlclient21" "mysql-client") - else - echo - echo "${COLOR_RED}Specify either prod or dev${COLOR_RESET}" - echo - exit 1 - fi - - common::import_trusted_gpg "B7B3B788A8D3785C" "mysql" - - echo - echo "${COLOR_BLUE}Installing Oracle MySQL client version ${MYSQL_LTS_VERSION}: ${1}${COLOR_RESET}" - echo - - echo "deb http://repo.mysql.com/apt/debian/ $(lsb_release -cs) mysql-${MYSQL_LTS_VERSION}" > \ - /etc/apt/sources.list.d/mysql.list - apt-get update - apt-get install --no-install-recommends -y "${packages[@]}" - apt-get autoremove -yqq --purge - apt-get clean && rm -rf /var/lib/apt/lists/* - - # Remove mysql repository from sources.list.d as MySQL repos have a basic flaw that they put expiry - # date on their GPG signing keys and they sign their repo with those keys. This means that after a - # certain date, the GPG key becomes invalid and if you have the repository added in your sources.list - # then you will not be able to install anything from any other repository. This id unlike any other - # repository we have seen (for example Postgres, MariaDB, MsSQL - all have non-expiring signing keys) - rm /etc/apt/sources.list.d/mysql.list -} - -install_mariadb_client() { - # List of compatible package Oracle MySQL -> MariaDB: - # `mysql-client` -> `mariadb-client` or `mariadb-client-compat` (11+) - # `libmysqlclientXX` (where XX is a number) -> `libmariadb3-compat` - # `libmysqlclient-dev` -> `libmariadb-dev-compat` - # - # Different naming against Debian repo which we used before - # that some of packages might contains `-compat` suffix, Debian repo -> MariaDB repo: - # `libmariadb-dev` -> `libmariadb-dev-compat` - # `mariadb-client-core` -> `mariadb-client` or `mariadb-client-compat` (11+) - if [[ "${1}" == "dev" ]]; then - packages=("libmariadb-dev-compat" "mariadb-client") - elif [[ "${1}" == "prod" ]]; then - packages=("libmariadb3-compat" "mariadb-client") - else - echo - echo "${COLOR_RED}Specify either prod or dev${COLOR_RESET}" - echo - exit 1 - fi - - common::import_trusted_gpg "0xF1656F24C74CD1D8" "mariadb" - - echo - echo "${COLOR_BLUE}Installing MariaDB client version ${MARIADB_LTS_VERSION}: ${1}${COLOR_RESET}" - echo "${COLOR_YELLOW}MariaDB client protocol-compatible with MySQL client.${COLOR_RESET}" - echo - - curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - - echo "deb [arch=amd64,arm64] https://archive.mariadb.org/mariadb-${MARIADB_LTS_VERSION}/repo/debian/ $(lsb_release -cs) main" > \ - /etc/apt/sources.list.d/mariadb.list - # Make sure that dependencies from MariaDB repo are preferred over Debian dependencies - printf "Package: *\nPin: release o=MariaDB\nPin-Priority: 999\n" > /etc/apt/preferences.d/mariadb - apt-get update - apt-get install --no-install-recommends -y "${packages[@]}" - apt-get autoremove -yqq --purge - apt-get clean && rm -rf /var/lib/apt/lists/* -} - -if [[ ${INSTALL_MYSQL_CLIENT:="true"} == "true" ]]; then - if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then - INSTALL_MYSQL_CLIENT_TYPE="mariadb" - echo - echo "${COLOR_YELLOW}Client forced to mariadb for ARM${COLOR_RESET}" - echo - fi - - if [[ "${INSTALL_MYSQL_CLIENT_TYPE}" == "mysql" ]]; then - install_mysql_client "${@}" - elif [[ "${INSTALL_MYSQL_CLIENT_TYPE}" == "mariadb" ]]; then - install_mariadb_client "${@}" - else - echo - echo "${COLOR_RED}Specify either mysql or mariadb, got ${INSTALL_MYSQL_CLIENT_TYPE}${COLOR_RESET}" - echo - exit 1 - fi -fi -EOF - -# The content below is automatically copied from scripts/docker/install_mssql.sh -COPY <<"EOF" /install_mssql.sh -#!/usr/bin/env bash -. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" - -set -euo pipefail - -common::get_colors -declare -a packages - -: "${AIRFLOW_PIP_VERSION:?Should be set}" -: "${INSTALL_MSSQL_CLIENT:?Should be true or false}" - - -function install_mssql_client() { - # Install MsSQL client from Microsoft repositories - if [[ ${INSTALL_MSSQL_CLIENT:="true"} != "true" ]]; then - echo - echo "${COLOR_BLUE}Skip installing mssql client${COLOR_RESET}" - echo - return - fi - packages=("msodbcsql18") - - common::import_trusted_gpg "EB3E94ADBE1229CF" "microsoft" - - echo - echo "${COLOR_BLUE}Installing mssql client${COLOR_RESET}" - echo - - echo "deb [arch=amd64,arm64] https://packages.microsoft.com/debian/$(lsb_release -rs)/prod $(lsb_release -cs) main" > \ - /etc/apt/sources.list.d/mssql-release.list - apt-get update -yqq - apt-get upgrade -yqq - ACCEPT_EULA=Y apt-get -yqq install --no-install-recommends "${packages[@]}" - rm -rf /var/lib/apt/lists/* - apt-get autoremove -yqq --purge - apt-get clean && rm -rf /var/lib/apt/lists/* -} - -install_mssql_client "${@}" -EOF - -# The content below is automatically copied from scripts/docker/install_postgres.sh -COPY <<"EOF" /install_postgres.sh -#!/usr/bin/env bash -. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" -set -euo pipefail - -common::get_colors -declare -a packages - -: "${INSTALL_POSTGRES_CLIENT:?Should be true or false}" - -install_postgres_client() { - echo - echo "${COLOR_BLUE}Installing postgres client${COLOR_RESET}" - echo - - if [[ "${1}" == "dev" ]]; then - packages=("libpq-dev" "postgresql-client") - elif [[ "${1}" == "prod" ]]; then - packages=("postgresql-client") - else - echo - echo "Specify either prod or dev" - echo - exit 1 - fi - - common::import_trusted_gpg "7FCC7D46ACCC4CF8" "postgres" - - echo "deb [arch=amd64,arm64] https://apt.postgresql.org/pub/repos/apt/ $(lsb_release -cs)-pgdg main" > \ - /etc/apt/sources.list.d/pgdg.list - apt-get update - apt-get install --no-install-recommends -y "${packages[@]}" - apt-get autoremove -yqq --purge - apt-get clean && rm -rf /var/lib/apt/lists/* -} - -if [[ ${INSTALL_POSTGRES_CLIENT:="true"} == "true" ]]; then - install_postgres_client "${@}" -fi -EOF - -# The content below is automatically copied from scripts/docker/install_pip_version.sh -COPY <<"EOF" /install_pip_version.sh -#!/usr/bin/env bash -. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" - -: "${AIRFLOW_PIP_VERSION:?Should be set}" - -common::get_colors -common::get_airflow_version_specification -common::override_pip_version_if_needed -common::show_pip_version_and_location - -common::install_pip_version -EOF - -# The content below is automatically copied from scripts/docker/install_airflow_dependencies_from_branch_tip.sh -COPY <<"EOF" /install_airflow_dependencies_from_branch_tip.sh -#!/usr/bin/env bash - -. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" - -: "${AIRFLOW_REPO:?Should be set}" -: "${AIRFLOW_BRANCH:?Should be set}" -: "${INSTALL_MYSQL_CLIENT:?Should be true or false}" -: "${INSTALL_POSTGRES_CLIENT:?Should be true or false}" -: "${AIRFLOW_PIP_VERSION:?Should be set}" - -function install_airflow_dependencies_from_branch_tip() { - echo - echo "${COLOR_BLUE}Installing airflow from ${AIRFLOW_BRANCH}. It is used to cache dependencies${COLOR_RESET}" - echo - if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then - AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,} - fi - if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then - AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} - fi - # Install latest set of dependencies using constraints. In case constraints were upgraded and there - # are conflicts, this might fail, but it should be fixed in the following installation steps - set -x - pip install --root-user-action ignore \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - "https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]" \ - --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" || true - common::install_pip_version - pip freeze | grep apache-airflow-providers | xargs pip uninstall --yes 2>/dev/null || true - set +x - echo - echo "${COLOR_BLUE}Uninstalling just airflow. Dependencies remain. Now target airflow can be reinstalled using mostly cached dependencies${COLOR_RESET}" - echo - pip uninstall --yes apache-airflow || true -} - -common::get_colors -common::get_airflow_version_specification -common::override_pip_version_if_needed -common::get_constraints_location -common::show_pip_version_and_location - -install_airflow_dependencies_from_branch_tip -EOF - -# The content below is automatically copied from scripts/docker/common.sh -COPY <<"EOF" /common.sh -#!/usr/bin/env bash -set -euo pipefail - -function common::get_colors() { - COLOR_BLUE=$'\e[34m' - COLOR_GREEN=$'\e[32m' - COLOR_RED=$'\e[31m' - COLOR_RESET=$'\e[0m' - COLOR_YELLOW=$'\e[33m' - export COLOR_BLUE - export COLOR_GREEN - export COLOR_RED - export COLOR_RESET - export COLOR_YELLOW -} - - -function common::get_airflow_version_specification() { - if [[ -z ${AIRFLOW_VERSION_SPECIFICATION=} - && -n ${AIRFLOW_VERSION} - && ${AIRFLOW_INSTALLATION_METHOD} != "." ]]; then - AIRFLOW_VERSION_SPECIFICATION="==${AIRFLOW_VERSION}" - fi -} - -function common::override_pip_version_if_needed() { - if [[ -n ${AIRFLOW_VERSION} ]]; then - if [[ ${AIRFLOW_VERSION} =~ ^2\.0.* || ${AIRFLOW_VERSION} =~ ^1\.* ]]; then - export AIRFLOW_PIP_VERSION="23.3.2" - fi - fi -} - -function common::get_constraints_location() { - # auto-detect Airflow-constraint reference and location - if [[ -z "${AIRFLOW_CONSTRAINTS_REFERENCE=}" ]]; then - if [[ ${AIRFLOW_VERSION} =~ v?2.* && ! ${AIRFLOW_VERSION} =~ .*dev.* ]]; then - AIRFLOW_CONSTRAINTS_REFERENCE=constraints-${AIRFLOW_VERSION} - else - AIRFLOW_CONSTRAINTS_REFERENCE=${DEFAULT_CONSTRAINTS_BRANCH} - fi - fi - - if [[ -z ${AIRFLOW_CONSTRAINTS_LOCATION=} ]]; then - local constraints_base="https://raw.githubusercontent.com/${CONSTRAINTS_GITHUB_REPOSITORY}/${AIRFLOW_CONSTRAINTS_REFERENCE}" - local python_version - python_version="$(python --version 2>/dev/stdout | cut -d " " -f 2 | cut -d "." -f 1-2)" - AIRFLOW_CONSTRAINTS_LOCATION="${constraints_base}/${AIRFLOW_CONSTRAINTS_MODE}-${python_version}.txt" - fi -} - -function common::show_pip_version_and_location() { - echo "PATH=${PATH}" - echo "pip on path: $(which pip)" - echo "Using pip: $(pip --version)" -} - -function common::install_pip_version() { - echo - echo "${COLOR_BLUE}Installing pip version ${AIRFLOW_PIP_VERSION}${COLOR_RESET}" - echo - if [[ ${AIRFLOW_PIP_VERSION} =~ .*https.* ]]; then - pip install --disable-pip-version-check "pip @ ${AIRFLOW_PIP_VERSION}" - else - pip install --disable-pip-version-check "pip==${AIRFLOW_PIP_VERSION}" - fi - mkdir -p "${HOME}/.local/bin" -} - -function common::import_trusted_gpg() { - common::get_colors - - local key=${1:?${COLOR_RED}First argument expects OpenPGP Key ID${COLOR_RESET}} - local name=${2:?${COLOR_RED}Second argument expected trust storage name${COLOR_RESET}} - # Please note that not all servers could be used for retrieve keys - # sks-keyservers.net: Unmaintained and DNS taken down due to GDPR requests. - # keys.openpgp.org: User ID Mandatory, not suitable for APT repositories - # keyring.debian.org: Only accept keys in Debian keyring. - # pgp.mit.edu: High response time. - local keyservers=( - "hkps://keyserver.ubuntu.com" - "hkps://pgp.surf.nl" - ) - - GNUPGHOME="$(mktemp -d)" - export GNUPGHOME - set +e - for keyserver in $(shuf -e "${keyservers[@]}"); do - echo "${COLOR_BLUE}Try to receive GPG public key ${key} from ${keyserver}${COLOR_RESET}" - gpg --keyserver "${keyserver}" --recv-keys "${key}" 2>&1 && break - echo "${COLOR_YELLOW}Unable to receive GPG public key ${key} from ${keyserver}${COLOR_RESET}" - done - set -e - gpg --export "${key}" > "/etc/apt/trusted.gpg.d/${name}.gpg" - gpgconf --kill all - rm -rf "${GNUPGHOME}" - unset GNUPGHOME -} -EOF - -# The content below is automatically copied from scripts/docker/pip -COPY <<"EOF" /pip -#!/usr/bin/env bash -COLOR_RED=$'\e[31m' -COLOR_RESET=$'\e[0m' -COLOR_YELLOW=$'\e[33m' - -if [[ $(id -u) == "0" ]]; then - echo - echo "${COLOR_RED}You are running pip as root. Please use 'airflow' user to run pip!${COLOR_RESET}" - echo - echo "${COLOR_YELLOW}See: https://airflow.apache.org/docs/docker-stack/build.html#adding-a-new-pypi-package${COLOR_RESET}" - echo - exit 1 -fi -exec "${HOME}"/.local/bin/pip "${@}" -EOF - -# The content below is automatically copied from scripts/docker/install_from_docker_context_files.sh -COPY <<"EOF" /install_from_docker_context_files.sh - -. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" - -: "${AIRFLOW_PIP_VERSION:?Should be set}" - -function install_airflow_and_providers_from_docker_context_files(){ - if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then - AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,} - fi - if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then - AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} - fi - - if [[ ! -d /docker-context-files ]]; then - echo - echo "${COLOR_RED}You must provide a folder via --build-arg DOCKER_CONTEXT_FILES= and you missed it!${COLOR_RESET}" - echo - exit 1 - fi - - # shellcheck disable=SC2206 - local pip_flags=( - # Don't quote this -- if it is empty we don't want it to create an - # empty array element - --find-links="file:///docker-context-files" - ) - - # Find Apache Airflow packages in docker-context files - local reinstalling_apache_airflow_package - reinstalling_apache_airflow_package=$(ls \ - /docker-context-files/apache?airflow?[0-9]*.{whl,tar.gz} 2>/dev/null || true) - # Add extras when installing airflow - if [[ -n "${reinstalling_apache_airflow_package}" ]]; then - # When a provider depends on a dev version of Airflow, we need to - # specify `apache-airflow==$VER`, otherwise pip will look for it on - # pip, and fail to find it - - # This will work as long as the wheel file is correctly named, which it - # will be if it was build by wheel tooling - local ver - ver=$(basename "$reinstalling_apache_airflow_package" | cut -d "-" -f 2) - reinstalling_apache_airflow_package="apache-airflow[${AIRFLOW_EXTRAS}]==$ver" - fi - - if [[ -z "${reinstalling_apache_airflow_package}" && ${AIRFLOW_VERSION=} != "" ]]; then - # When we install only provider packages from docker-context files, we need to still - # install airflow from PyPI when AIRFLOW_VERSION is set. This handles the case where - # pre-release dockerhub image of airflow is built, but we want to install some providers from - # docker-context files - reinstalling_apache_airflow_package="apache-airflow[${AIRFLOW_EXTRAS}]==${AIRFLOW_VERSION}" - fi - # Find Apache Airflow packages in docker-context files - local reinstalling_apache_airflow_providers_packages - reinstalling_apache_airflow_providers_packages=$(ls \ - /docker-context-files/apache?airflow?providers*.{whl,tar.gz} 2>/dev/null || true) - if [[ -z "${reinstalling_apache_airflow_package}" && \ - -z "${reinstalling_apache_airflow_providers_packages}" ]]; then - return - fi - - if [[ ${USE_CONSTRAINTS_FOR_CONTEXT_PACKAGES=} == "true" ]]; then - local python_version - python_version=$(python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")') - local local_constraints_file=/docker-context-files/constraints-"${python_version}"/${AIRFLOW_CONSTRAINTS_MODE}-"${python_version}".txt - - if [[ -f "${local_constraints_file}" ]]; then - echo - echo "${COLOR_BLUE}Installing docker-context-files packages with constraints found in ${local_constraints_file}${COLOR_RESET}" - echo - # force reinstall all airflow + provider packages with constraints found in - set -x - pip install "${pip_flags[@]}" --root-user-action ignore --upgrade \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} --constraint "${local_constraints_file}" \ - ${reinstalling_apache_airflow_package} ${reinstalling_apache_airflow_providers_packages} - set +x - else - echo - echo "${COLOR_BLUE}Installing docker-context-files packages with constraints from GitHub${COLOR_RESET}" - echo - set -x - pip install "${pip_flags[@]}" --root-user-action ignore \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" \ - ${reinstalling_apache_airflow_package} ${reinstalling_apache_airflow_providers_packages} - set +x - fi - else - echo - echo "${COLOR_BLUE}Installing docker-context-files packages without constraints${COLOR_RESET}" - echo - set -x - pip install "${pip_flags[@]}" --root-user-action ignore \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - ${reinstalling_apache_airflow_package} ${reinstalling_apache_airflow_providers_packages} - set +x - fi - common::install_pip_version - pip check -} - -function install_all_other_packages_from_docker_context_files() { - - echo - echo "${COLOR_BLUE}Force re-installing all other package from local files without dependencies${COLOR_RESET}" - echo - local reinstalling_other_packages - # shellcheck disable=SC2010 - reinstalling_other_packages=$(ls /docker-context-files/*.{whl,tar.gz} 2>/dev/null | \ - grep -v apache_airflow | grep -v apache-airflow || true) - if [[ -n "${reinstalling_other_packages}" ]]; then - set -x - pip install ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - --root-user-action ignore --force-reinstall --no-deps --no-index ${reinstalling_other_packages} - common::install_pip_version - set +x - fi -} - -common::get_colors -common::get_airflow_version_specification -common::override_pip_version_if_needed -common::get_constraints_location -common::show_pip_version_and_location - -install_airflow_and_providers_from_docker_context_files - -common::show_pip_version_and_location -install_all_other_packages_from_docker_context_files -EOF - -# The content below is automatically copied from scripts/docker/install_airflow.sh -COPY <<"EOF" /install_airflow.sh -#!/usr/bin/env bash - -. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" - -: "${AIRFLOW_PIP_VERSION:?Should be set}" - -function install_airflow() { - # Coherence check for editable installation mode. - if [[ ${AIRFLOW_INSTALLATION_METHOD} != "." && \ - ${AIRFLOW_INSTALL_EDITABLE_FLAG} == "--editable" ]]; then - echo - echo "${COLOR_RED}ERROR! You can only use --editable flag when installing airflow from sources!${COLOR_RESET}" - echo "${COLOR_RED} Current installation method is '${AIRFLOW_INSTALLATION_METHOD} and should be '.'${COLOR_RESET}" - exit 1 - fi - # Remove mysql from extras if client is not going to be installed - if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then - AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,} - echo "${COLOR_YELLOW}MYSQL client installation is disabled. Extra 'mysql' installations were therefore omitted.${COLOR_RESET}" - fi - # Remove postgres from extras if client is not going to be installed - if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then - AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} - echo "${COLOR_YELLOW}Postgres client installation is disabled. Extra 'postgres' installations were therefore omitted.${COLOR_RESET}" - fi - if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then - echo - echo "${COLOR_BLUE}Installing all packages with eager upgrade${COLOR_RESET}" - echo - # eager upgrade - pip install --root-user-action ignore --upgrade --upgrade-strategy eager \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \ - ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=} - if [[ -n "${AIRFLOW_INSTALL_EDITABLE_FLAG}" ]]; then - # Remove airflow and reinstall it using editable flag - # We can only do it when we install airflow from sources - set -x - pip uninstall apache-airflow --yes - pip install --root-user-action ignore ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" - set +x - fi - - common::install_pip_version - echo - echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" - echo - pip check - else \ - echo - echo "${COLOR_BLUE}Installing all packages with constraints and upgrade if needed${COLOR_RESET}" - echo - set -x - pip install --root-user-action ignore ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \ - --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" - common::install_pip_version - # then upgrade if needed without using constraints to account for new limits in setup.py - pip install --root-user-action ignore --upgrade --upgrade-strategy only-if-needed \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ - "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" - common::install_pip_version - set +x - echo - echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" - echo - pip check - fi - -} - -common::get_colors -common::get_airflow_version_specification -common::override_pip_version_if_needed -common::get_constraints_location -common::show_pip_version_and_location - -install_airflow -EOF - -# The content below is automatically copied from scripts/docker/install_additional_dependencies.sh -COPY <<"EOF" /install_additional_dependencies.sh -#!/usr/bin/env bash -set -euo pipefail - -: "${UPGRADE_TO_NEWER_DEPENDENCIES:?Should be true or false}" -: "${ADDITIONAL_PYTHON_DEPS:?Should be set}" -: "${AIRFLOW_PIP_VERSION:?Should be set}" - -. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" - -function install_additional_dependencies() { - if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then - echo - echo "${COLOR_BLUE}Installing additional dependencies while upgrading to newer dependencies${COLOR_RESET}" - echo - set -x - pip install --root-user-action ignore --upgrade --upgrade-strategy eager \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - ${ADDITIONAL_PYTHON_DEPS} ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=} - common::install_pip_version - set +x - echo - echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" - echo - pip check - else - echo - echo "${COLOR_BLUE}Installing additional dependencies upgrading only if needed${COLOR_RESET}" - echo - set -x - pip install --root-user-action ignore --upgrade --upgrade-strategy only-if-needed \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - ${ADDITIONAL_PYTHON_DEPS} - common::install_pip_version - set +x - echo - echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" - echo - pip check - fi -} - -common::get_colors -common::get_airflow_version_specification -common::override_pip_version_if_needed -common::get_constraints_location -common::show_pip_version_and_location - -install_additional_dependencies -EOF - - -# The content below is automatically copied from scripts/docker/entrypoint_prod.sh -COPY <<"EOF" /entrypoint_prod.sh -#!/usr/bin/env bash -AIRFLOW_COMMAND="${1:-}" - -set -euo pipefail - -LD_PRELOAD="/usr/lib/$(uname -m)-linux-gnu/libstdc++.so.6" -export LD_PRELOAD - -function run_check_with_retries { - local cmd - cmd="${1}" - local countdown - countdown="${CONNECTION_CHECK_MAX_COUNT}" - - while true - do - set +e - local last_check_result - local res - last_check_result=$(eval "${cmd} 2>&1") - res=$? - set -e - if [[ ${res} == 0 ]]; then - echo - break - else - echo -n "." - countdown=$((countdown-1)) - fi - if [[ ${countdown} == 0 ]]; then - echo - echo "ERROR! Maximum number of retries (${CONNECTION_CHECK_MAX_COUNT}) reached." - echo - echo "Last check result:" - echo "$ ${cmd}" - echo "${last_check_result}" - echo - exit 1 - else - sleep "${CONNECTION_CHECK_SLEEP_TIME}" - fi - done -} - -function run_nc() { - # Checks if it is possible to connect to the host using netcat. - # - # We want to avoid misleading messages and perform only forward lookup of the service IP address. - # Netcat when run without -n performs both forward and reverse lookup and fails if the reverse - # lookup name does not match the original name even if the host is reachable via IP. This happens - # randomly with docker-compose in GitHub Actions. - # Since we are not using reverse lookup elsewhere, we can perform forward lookup in python - # And use the IP in NC and add '-n' switch to disable any DNS use. - # Even if this message might be harmless, it might hide the real reason for the problem - # Which is the long time needed to start some services, seeing this message might be totally misleading - # when you try to analyse the problem, that's why it's best to avoid it, - local host="${1}" - local port="${2}" - local ip - ip=$(python -c "import socket; print(socket.gethostbyname('${host}'))") - nc -zvvn "${ip}" "${port}" -} - - -function wait_for_connection { - # Waits for Connection to the backend specified via URL passed as first parameter - # Detects backend type depending on the URL schema and assigns - # default port numbers if not specified in the URL. - # Then it loops until connection to the host/port specified can be established - # It tries `CONNECTION_CHECK_MAX_COUNT` times and sleeps `CONNECTION_CHECK_SLEEP_TIME` between checks - local connection_url - connection_url="${1}" - local detected_backend - detected_backend=$(python -c "from urllib.parse import urlsplit; import sys; print(urlsplit(sys.argv[1]).scheme)" "${connection_url}") - local detected_host - detected_host=$(python -c "from urllib.parse import urlsplit; import sys; print(urlsplit(sys.argv[1]).hostname or '')" "${connection_url}") - local detected_port - detected_port=$(python -c "from urllib.parse import urlsplit; import sys; print(urlsplit(sys.argv[1]).port or '')" "${connection_url}") - - echo BACKEND="${BACKEND:=${detected_backend}}" - readonly BACKEND - - if [[ -z "${detected_port=}" ]]; then - if [[ ${BACKEND} == "postgres"* ]]; then - detected_port=5432 - elif [[ ${BACKEND} == "mysql"* ]]; then - detected_port=3306 - elif [[ ${BACKEND} == "mssql"* ]]; then - detected_port=1433 - elif [[ ${BACKEND} == "redis"* ]]; then - detected_port=6379 - elif [[ ${BACKEND} == "amqp"* ]]; then - detected_port=5672 - fi - fi - - detected_host=${detected_host:="localhost"} - - # Allow the DB parameters to be overridden by environment variable - echo DB_HOST="${DB_HOST:=${detected_host}}" - readonly DB_HOST - - echo DB_PORT="${DB_PORT:=${detected_port}}" - readonly DB_PORT - if [[ -n "${DB_HOST=}" ]] && [[ -n "${DB_PORT=}" ]]; then - run_check_with_retries "run_nc ${DB_HOST@Q} ${DB_PORT@Q}" - else - >&2 echo "The connection details to the broker could not be determined. Connectivity checks were skipped." - fi -} - -function create_www_user() { - local local_password="" - # Warning: command environment variables (*_CMD) have priority over usual configuration variables - # for configuration parameters that require sensitive information. This is the case for the SQL database - # and the broker backend in this entrypoint script. - if [[ -n "${_AIRFLOW_WWW_USER_PASSWORD_CMD=}" ]]; then - local_password=$(eval "${_AIRFLOW_WWW_USER_PASSWORD_CMD}") - unset _AIRFLOW_WWW_USER_PASSWORD_CMD - elif [[ -n "${_AIRFLOW_WWW_USER_PASSWORD=}" ]]; then - local_password="${_AIRFLOW_WWW_USER_PASSWORD}" - unset _AIRFLOW_WWW_USER_PASSWORD - fi - if [[ -z ${local_password} ]]; then - echo - echo "ERROR! Airflow Admin password not set via _AIRFLOW_WWW_USER_PASSWORD or _AIRFLOW_WWW_USER_PASSWORD_CMD variables!" - echo - exit 1 - fi - - airflow users create \ - --username "${_AIRFLOW_WWW_USER_USERNAME="admin"}" \ - --firstname "${_AIRFLOW_WWW_USER_FIRSTNAME="Airflow"}" \ - --lastname "${_AIRFLOW_WWW_USER_LASTNAME="Admin"}" \ - --email "${_AIRFLOW_WWW_USER_EMAIL="airflowadmin@example.com"}" \ - --role "${_AIRFLOW_WWW_USER_ROLE="Admin"}" \ - --password "${local_password}" || true -} - -function create_system_user_if_missing() { - # This is needed in case of OpenShift-compatible container execution. In case of OpenShift random - # User id is used when starting the image, however group 0 is kept as the user group. Our production - # Image is OpenShift compatible, so all permissions on all folders are set so that 0 group can exercise - # the same privileges as the default "airflow" user, this code checks if the user is already - # present in /etc/passwd and will create the system user dynamically, including setting its - # HOME directory to the /home/airflow so that (for example) the ${HOME}/.local folder where airflow is - # Installed can be automatically added to PYTHONPATH - if ! whoami &> /dev/null; then - if [[ -w /etc/passwd ]]; then - echo "${USER_NAME:-default}:x:$(id -u):0:${USER_NAME:-default} user:${AIRFLOW_USER_HOME_DIR}:/sbin/nologin" \ - >> /etc/passwd - fi - export HOME="${AIRFLOW_USER_HOME_DIR}" - fi -} - -function set_pythonpath_for_root_user() { - # Airflow is installed as a local user application which means that if the container is running as root - # the application is not available. because Python then only load system-wide applications. - # Now also adds applications installed as local user "airflow". - if [[ $UID == "0" ]]; then - local python_major_minor - python_major_minor="$(python --version | cut -d " " -f 2 | cut -d "." -f 1-2)" - export PYTHONPATH="${AIRFLOW_USER_HOME_DIR}/.local/lib/python${python_major_minor}/site-packages:${PYTHONPATH:-}" - >&2 echo "The container is run as root user. For security, consider using a regular user account." - fi -} - -function wait_for_airflow_db() { - # Wait for the command to run successfully to validate the database connection. - run_check_with_retries "airflow db check" -} - -function migrate_db() { - # Runs airflow db migrate - airflow db migrate || true -} - -function wait_for_celery_broker() { - # Verifies connection to Celery Broker - local executor - executor="$(airflow config get-value core executor)" - if [[ "${executor}" == "CeleryExecutor" ]]; then - local connection_url - connection_url="$(airflow config get-value celery broker_url)" - wait_for_connection "${connection_url}" - fi -} - -function exec_to_bash_or_python_command_if_specified() { - # If one of the commands: 'bash', 'python' is used, either run appropriate - # command with exec - if [[ ${AIRFLOW_COMMAND} == "bash" ]]; then - shift - exec "/bin/bash" "${@}" - elif [[ ${AIRFLOW_COMMAND} == "python" ]]; then - shift - exec "python" "${@}" - fi -} - -function check_uid_gid() { - if [[ $(id -g) == "0" ]]; then - return - fi - if [[ $(id -u) == "50000" ]]; then - >&2 echo - >&2 echo "WARNING! You should run the image with GID (Group ID) set to 0" - >&2 echo " even if you use 'airflow' user (UID=50000)" - >&2 echo - >&2 echo " You started the image with UID=$(id -u) and GID=$(id -g)" - >&2 echo - >&2 echo " This is to make sure you can run the image with an arbitrary UID in the future." - >&2 echo - >&2 echo " See more about it in the Airflow's docker image documentation" - >&2 echo " http://airflow.apache.org/docs/docker-stack/entrypoint" - >&2 echo - # We still allow the image to run with `airflow` user. - return - else - >&2 echo - >&2 echo "ERROR! You should run the image with GID=0" - >&2 echo - >&2 echo " You started the image with UID=$(id -u) and GID=$(id -g)" - >&2 echo - >&2 echo "The image should always be run with GID (Group ID) set to 0 regardless of the UID used." - >&2 echo " This is to make sure you can run the image with an arbitrary UID." - >&2 echo - >&2 echo " See more about it in the Airflow's docker image documentation" - >&2 echo " http://airflow.apache.org/docs/docker-stack/entrypoint" - # This will not work so we fail hard - exit 1 - fi -} - -unset PIP_USER - -check_uid_gid - -umask 0002 - -CONNECTION_CHECK_MAX_COUNT=${CONNECTION_CHECK_MAX_COUNT:=20} -readonly CONNECTION_CHECK_MAX_COUNT - -CONNECTION_CHECK_SLEEP_TIME=${CONNECTION_CHECK_SLEEP_TIME:=3} -readonly CONNECTION_CHECK_SLEEP_TIME - -create_system_user_if_missing -set_pythonpath_for_root_user -if [[ "${CONNECTION_CHECK_MAX_COUNT}" -gt "0" ]]; then - wait_for_airflow_db -fi - -if [[ -n "${_AIRFLOW_DB_UPGRADE=}" ]] || [[ -n "${_AIRFLOW_DB_MIGRATE=}" ]] ; then - migrate_db -fi - -if [[ -n "${_AIRFLOW_DB_UPGRADE=}" ]] ; then - >&2 echo "WARNING: Environment variable '_AIRFLOW_DB_UPGRADE' is deprecated please use '_AIRFLOW_DB_MIGRATE' instead" -fi - -if [[ -n "${_AIRFLOW_WWW_USER_CREATE=}" ]] ; then - create_www_user -fi - -if [[ -n "${_PIP_ADDITIONAL_REQUIREMENTS=}" ]] ; then - >&2 echo - >&2 echo "!!!!! Installing additional requirements: '${_PIP_ADDITIONAL_REQUIREMENTS}' !!!!!!!!!!!!" - >&2 echo - >&2 echo "WARNING: This is a development/test feature only. NEVER use it in production!" - >&2 echo " Instead, build a custom image as described in" - >&2 echo - >&2 echo " https://airflow.apache.org/docs/docker-stack/build.html" - >&2 echo - >&2 echo " Adding requirements at container startup is fragile and is done every time" - >&2 echo " the container starts, so it is only useful for testing and trying out" - >&2 echo " of adding dependencies." - >&2 echo - pip install --root-user-action ignore ${_PIP_ADDITIONAL_REQUIREMENTS} -fi - - -exec_to_bash_or_python_command_if_specified "${@}" - -if [[ ${AIRFLOW_COMMAND} == "airflow" ]]; then - AIRFLOW_COMMAND="${2:-}" - shift -fi - -if [[ ${AIRFLOW_COMMAND} =~ ^(scheduler|celery)$ ]] \ - && [[ "${CONNECTION_CHECK_MAX_COUNT}" -gt "0" ]]; then - wait_for_celery_broker -fi - -exec "airflow" "${@}" -EOF - -# The content below is automatically copied from scripts/docker/clean-logs.sh -COPY <<"EOF" /clean-logs.sh -#!/usr/bin/env bash - - -set -euo pipefail - -readonly DIRECTORY="${AIRFLOW_HOME:-/usr/local/airflow}" -readonly RETENTION="${AIRFLOW__LOG_RETENTION_DAYS:-15}" - -trap "exit" INT TERM - -readonly EVERY=$((15*60)) - -echo "Cleaning logs every $EVERY seconds" - -while true; do - echo "Trimming airflow logs to ${RETENTION} days." - find "${DIRECTORY}"/logs \ - -type d -name 'lost+found' -prune -o \ - -type f -mtime +"${RETENTION}" -name '*.log' -print0 | \ - xargs -0 rm -f - - find "${DIRECTORY}"/logs -type d -empty -delete || true - - seconds=$(( $(date -u +%s) % EVERY)) - (( seconds < 1 )) || sleep $((EVERY - seconds - 1)) - sleep 1 -done -EOF - -# The content below is automatically copied from scripts/docker/airflow-scheduler-autorestart.sh -COPY <<"EOF" /airflow-scheduler-autorestart.sh -#!/usr/bin/env bash - -while echo "Running"; do - airflow scheduler -n 5 - return_code=$? - if (( return_code != 0 )); then - echo "Scheduler crashed with exit code $return_code. Respawning.." >&2 - date >> /tmp/airflow_scheduler_errors.txt - fi - - sleep 1 -done -EOF - -############################################################################################## -# This is the build image where we build all dependencies -############################################################################################## -FROM ${PYTHON_BASE_IMAGE} as airflow-build-image - -# Nolog bash flag is currently ignored - but you can replace it with -# xtrace - to show commands executed) -SHELL ["/bin/bash", "-o", "pipefail", "-o", "errexit", "-o", "nounset", "-o", "nolog", "-c"] - -ARG PYTHON_BASE_IMAGE -ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE} \ - DEBIAN_FRONTEND=noninteractive LANGUAGE=C.UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8 \ - LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8 \ - PIP_CACHE_DIR=/tmp/.cache/pip - -ARG DEV_APT_DEPS="" -ARG ADDITIONAL_DEV_APT_DEPS="" -ARG DEV_APT_COMMAND="" -ARG ADDITIONAL_DEV_APT_COMMAND="" -ARG ADDITIONAL_DEV_APT_ENV="" - -ENV DEV_APT_DEPS=${DEV_APT_DEPS} \ - ADDITIONAL_DEV_APT_DEPS=${ADDITIONAL_DEV_APT_DEPS} \ - DEV_APT_COMMAND=${DEV_APT_COMMAND} \ - ADDITIONAL_DEV_APT_COMMAND=${ADDITIONAL_DEV_APT_COMMAND} \ - ADDITIONAL_DEV_APT_ENV=${ADDITIONAL_DEV_APT_ENV} - -COPY --from=scripts install_os_dependencies.sh /scripts/docker/ -RUN bash /scripts/docker/install_os_dependencies.sh dev - -ARG INSTALL_MYSQL_CLIENT="true" -ARG INSTALL_MYSQL_CLIENT_TYPE="mariadb" -ARG INSTALL_MSSQL_CLIENT="true" -ARG INSTALL_POSTGRES_CLIENT="true" -ARG AIRFLOW_PIP_VERSION - -ENV INSTALL_MYSQL_CLIENT=${INSTALL_MYSQL_CLIENT} \ - INSTALL_MYSQL_CLIENT_TYPE=${INSTALL_MYSQL_CLIENT_TYPE} \ - INSTALL_MSSQL_CLIENT=${INSTALL_MSSQL_CLIENT} \ - INSTALL_POSTGRES_CLIENT=${INSTALL_POSTGRES_CLIENT} - -COPY --from=scripts common.sh /scripts/docker/ - -# Only copy mysql/mssql installation scripts for now - so that changing the other -# scripts which are needed much later will not invalidate the docker layer here -COPY --from=scripts install_mysql.sh install_mssql.sh install_postgres.sh /scripts/docker/ - -RUN bash /scripts/docker/install_mysql.sh dev && \ - bash /scripts/docker/install_mssql.sh dev && \ - bash /scripts/docker/install_postgres.sh dev -ENV PATH=${PATH}:/opt/mssql-tools/bin - -# By default we do not install from docker context files but if we decide to install from docker context -# files, we should override those variables to "docker-context-files" -ARG DOCKER_CONTEXT_FILES="Dockerfile" - -COPY ${DOCKER_CONTEXT_FILES} /docker-context-files - -ARG AIRFLOW_HOME -ARG AIRFLOW_USER_HOME_DIR -ARG AIRFLOW_UID - -RUN adduser --gecos "First Last,RoomNumber,WorkPhone,HomePhone" --disabled-password \ - --quiet "airflow" --uid "${AIRFLOW_UID}" --gid "0" --home "${AIRFLOW_USER_HOME_DIR}" && \ - mkdir -p ${AIRFLOW_HOME} && chown -R "airflow:0" "${AIRFLOW_USER_HOME_DIR}" ${AIRFLOW_HOME} - -USER airflow - -ARG AIRFLOW_REPO=apache/airflow -ARG AIRFLOW_BRANCH=main -ARG AIRFLOW_EXTRAS -ARG ADDITIONAL_AIRFLOW_EXTRAS="" -# Allows to override constraints source -ARG CONSTRAINTS_GITHUB_REPOSITORY="apache/airflow" -ARG AIRFLOW_CONSTRAINTS_MODE="constraints" -ARG AIRFLOW_CONSTRAINTS_REFERENCE="" -ARG AIRFLOW_CONSTRAINTS_LOCATION="" -ARG DEFAULT_CONSTRAINTS_BRANCH="constraints-main" - -# By default PIP has progress bar but you can disable it. -ARG PIP_PROGRESS_BAR -# By default we do not use pre-cached packages, but in CI/Breeze environment we override this to speed up -# builds in case setup.py/setup.cfg changed. This is pure optimisation of CI/Breeze builds. -ARG AIRFLOW_PRE_CACHED_PIP_PACKAGES="false" -# This is airflow version that is put in the label of the image build -ARG AIRFLOW_VERSION -# By default latest released version of airflow is installed (when empty) but this value can be overridden -# and we can install version according to specification (For example ==2.0.2 or <3.0.0). -ARG AIRFLOW_VERSION_SPECIFICATION -# By default we install providers from PyPI but in case of Breeze build we want to install providers -# from local sources without the need of preparing provider packages upfront. This value is -# automatically overridden by Breeze scripts. -ARG INSTALL_PROVIDERS_FROM_SOURCES="false" -# Determines the way airflow is installed. By default we install airflow from PyPI `apache-airflow` package -# But it also can be `.` from local installation or GitHub URL pointing to specific branch or tag -# Of Airflow. Note That for local source installation you need to have local sources of -# Airflow checked out together with the Dockerfile and AIRFLOW_SOURCES_FROM and AIRFLOW_SOURCES_TO -# set to "." and "/opt/airflow" respectively. -ARG AIRFLOW_INSTALLATION_METHOD="apache-airflow" -# By default we do not upgrade to latest dependencies -ARG UPGRADE_TO_NEWER_DEPENDENCIES="false" -# By default we install latest airflow from PyPI so we do not need to copy sources of Airflow -# but in case of breeze/CI builds we use latest sources and we override those -# those SOURCES_FROM/TO with "." and "/opt/airflow" respectively -ARG AIRFLOW_SOURCES_FROM="Dockerfile" -ARG AIRFLOW_SOURCES_TO="/Dockerfile" - - -RUN if [[ -f /docker-context-files/pip.conf ]]; then \ - mkdir -p ${AIRFLOW_USER_HOME_DIR}/.config/pip; \ - cp /docker-context-files/pip.conf "${AIRFLOW_USER_HOME_DIR}/.config/pip/pip.conf"; \ - fi; \ - if [[ -f /docker-context-files/.piprc ]]; then \ - cp /docker-context-files/.piprc "${AIRFLOW_USER_HOME_DIR}/.piprc"; \ - fi - -# Additional PIP flags passed to all pip install commands except reinstalling pip itself -ARG ADDITIONAL_PIP_INSTALL_FLAGS="" - -ENV AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ - AIRFLOW_PRE_CACHED_PIP_PACKAGES=${AIRFLOW_PRE_CACHED_PIP_PACKAGES} \ - INSTALL_PROVIDERS_FROM_SOURCES=${INSTALL_PROVIDERS_FROM_SOURCES} \ - AIRFLOW_VERSION=${AIRFLOW_VERSION} \ - AIRFLOW_INSTALLATION_METHOD=${AIRFLOW_INSTALLATION_METHOD} \ - AIRFLOW_VERSION_SPECIFICATION=${AIRFLOW_VERSION_SPECIFICATION} \ - AIRFLOW_SOURCES_FROM=${AIRFLOW_SOURCES_FROM} \ - AIRFLOW_SOURCES_TO=${AIRFLOW_SOURCES_TO} \ - AIRFLOW_REPO=${AIRFLOW_REPO} \ - AIRFLOW_BRANCH=${AIRFLOW_BRANCH} \ - AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS}${ADDITIONAL_AIRFLOW_EXTRAS:+,}${ADDITIONAL_AIRFLOW_EXTRAS} \ - CONSTRAINTS_GITHUB_REPOSITORY=${CONSTRAINTS_GITHUB_REPOSITORY} \ - AIRFLOW_CONSTRAINTS_MODE=${AIRFLOW_CONSTRAINTS_MODE} \ - AIRFLOW_CONSTRAINTS_REFERENCE=${AIRFLOW_CONSTRAINTS_REFERENCE} \ - AIRFLOW_CONSTRAINTS_LOCATION=${AIRFLOW_CONSTRAINTS_LOCATION} \ - DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH} \ - PATH=${PATH}:${AIRFLOW_USER_HOME_DIR}/.local/bin \ - AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ - PIP_PROGRESS_BAR=${PIP_PROGRESS_BAR} \ - ADDITIONAL_PIP_INSTALL_FLAGS=${ADDITIONAL_PIP_INSTALL_FLAGS} \ - AIRFLOW_USER_HOME_DIR=${AIRFLOW_USER_HOME_DIR} \ - AIRFLOW_HOME=${AIRFLOW_HOME} \ - AIRFLOW_UID=${AIRFLOW_UID} \ - AIRFLOW_INSTALL_EDITABLE_FLAG="" \ - UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES} \ - # By default PIP installs everything to ~/.local - PIP_USER="true" - -# Copy all scripts required for installation - changing any of those should lead to -# rebuilding from here -COPY --from=scripts common.sh install_pip_version.sh \ - install_airflow_dependencies_from_branch_tip.sh /scripts/docker/ - -# We can set this value to true in case we want to install .whl/.tar.gz packages placed in the -# docker-context-files folder. This can be done for both additional packages you want to install -# as well as Airflow and Provider packages (it will be automatically detected if airflow -# is installed from docker-context files rather than from PyPI) -ARG INSTALL_PACKAGES_FROM_CONTEXT="false" - -# Normally constraints are not used when context packages are build - because we might have packages -# that are conflicting with Airflow constraints, however there are cases when we want to use constraints -# for example in CI builds when we already have source-package constraints - either from github branch or -# from eager-upgraded constraints by the CI builds -ARG USE_CONSTRAINTS_FOR_CONTEXT_PACKAGES="false" - -# In case of Production build image segment we want to pre-install main version of airflow -# dependencies from GitHub so that we do not have to always reinstall it from the scratch. -# The Airflow (and providers in case INSTALL_PROVIDERS_FROM_SOURCES is "false") -# are uninstalled, only dependencies remain -# the cache is only used when "upgrade to newer dependencies" is not set to automatically -# account for removed dependencies (we do not install them in the first place) and in case -# INSTALL_PACKAGES_FROM_CONTEXT is not set (because then caching it from main makes no sense). -RUN bash /scripts/docker/install_pip_version.sh; \ - if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" && \ - ${INSTALL_PACKAGES_FROM_CONTEXT} == "false" && \ - ${UPGRADE_TO_NEWER_DEPENDENCIES} == "false" ]]; then \ - bash /scripts/docker/install_airflow_dependencies_from_branch_tip.sh; \ - fi - -COPY --chown=airflow:0 ${AIRFLOW_SOURCES_FROM} ${AIRFLOW_SOURCES_TO} - -# Add extra python dependencies -ARG ADDITIONAL_PYTHON_DEPS="" - - -ARG VERSION_SUFFIX_FOR_PYPI="" - -ENV ADDITIONAL_PYTHON_DEPS=${ADDITIONAL_PYTHON_DEPS} \ - INSTALL_PACKAGES_FROM_CONTEXT=${INSTALL_PACKAGES_FROM_CONTEXT} \ - USE_CONSTRAINTS_FOR_CONTEXT_PACKAGES=${USE_CONSTRAINTS_FOR_CONTEXT_PACKAGES} \ - VERSION_SUFFIX_FOR_PYPI=${VERSION_SUFFIX_FOR_PYPI} - -WORKDIR ${AIRFLOW_HOME} - -COPY --from=scripts install_from_docker_context_files.sh install_airflow.sh \ - install_additional_dependencies.sh /scripts/docker/ - -# Useful for creating a cache id based on the underlying architecture, preventing the use of cached python packages from -# an incorrect architecture. -ARG TARGETARCH -# Value to be able to easily change cache id and therefore use a bare new cache -ARG PIP_CACHE_EPOCH="0" - -# hadolint ignore=SC2086, SC2010, DL3042 -RUN --mount=type=cache,id=$PYTHON_BASE_IMAGE-$AIRFLOW_PIP_VERSION-$TARGETARCH-$PIP_CACHE_EPOCH,target=/tmp/.cache/pip,uid=${AIRFLOW_UID} \ - if [[ ${INSTALL_PACKAGES_FROM_CONTEXT} == "true" ]]; then \ - bash /scripts/docker/install_from_docker_context_files.sh; \ - fi; \ - if ! airflow version 2>/dev/null >/dev/null; then \ - bash /scripts/docker/install_airflow.sh; \ - fi; \ - if [[ -n "${ADDITIONAL_PYTHON_DEPS}" ]]; then \ - bash /scripts/docker/install_additional_dependencies.sh; \ - fi; \ - find "${AIRFLOW_USER_HOME_DIR}/.local/" -name '*.pyc' -print0 | xargs -0 rm -f || true ; \ - find "${AIRFLOW_USER_HOME_DIR}/.local/" -type d -name '__pycache__' -print0 | xargs -0 rm -rf || true ; \ - # make sure that all directories and files in .local are also group accessible - find "${AIRFLOW_USER_HOME_DIR}/.local" -executable -print0 | xargs --null chmod g+x; \ - find "${AIRFLOW_USER_HOME_DIR}/.local" -print0 | xargs --null chmod g+rw - -# In case there is a requirements.txt file in "docker-context-files" it will be installed -# during the build additionally to whatever has been installed so far. It is recommended that -# the requirements.txt contains only dependencies with == version specification -# hadolint ignore=DL3042 -RUN --mount=type=cache,id=additional-requirements-$PYTHON_BASE_IMAGE-$AIRFLOW_PIP_VERSION-$TARGETARCH-$PIP_CACHE_EPOCH,target=/tmp/.cache/pip,uid=${AIRFLOW_UID} \ - if [[ -f /docker-context-files/requirements.txt ]]; then \ - pip install --user -r /docker-context-files/requirements.txt; \ - fi - -############################################################################################## -# This is the actual Airflow image - much smaller than the build one. We copy -# installed Airflow and all its dependencies from the build image to make it smaller. -############################################################################################## -FROM ${PYTHON_BASE_IMAGE} as main - -# Nolog bash flag is currently ignored - but you can replace it with other flags (for example -# xtrace - to show commands executed) -SHELL ["/bin/bash", "-o", "pipefail", "-o", "errexit", "-o", "nounset", "-o", "nolog", "-c"] - -ARG AIRFLOW_UID - -LABEL org.apache.airflow.distro="debian" \ - org.apache.airflow.module="airflow" \ - org.apache.airflow.component="airflow" \ - org.apache.airflow.image="airflow" \ - org.apache.airflow.uid="${AIRFLOW_UID}" - -ARG PYTHON_BASE_IMAGE -ARG AIRFLOW_PIP_VERSION - -ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE} \ - # Make sure noninteractive debian install is used and language variables set - DEBIAN_FRONTEND=noninteractive LANGUAGE=C.UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8 \ - LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8 LD_LIBRARY_PATH=/usr/local/lib \ - AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} - -ARG RUNTIME_APT_DEPS="" -ARG ADDITIONAL_RUNTIME_APT_DEPS="" -ARG RUNTIME_APT_COMMAND="echo" -ARG ADDITIONAL_RUNTIME_APT_COMMAND="" -ARG ADDITIONAL_RUNTIME_APT_ENV="" -ARG INSTALL_MYSQL_CLIENT="true" -ARG INSTALL_MYSQL_CLIENT_TYPE="mysql" -ARG INSTALL_MSSQL_CLIENT="true" -ARG INSTALL_POSTGRES_CLIENT="true" - -ENV RUNTIME_APT_DEPS=${RUNTIME_APT_DEPS} \ - ADDITIONAL_RUNTIME_APT_DEPS=${ADDITIONAL_RUNTIME_APT_DEPS} \ - RUNTIME_APT_COMMAND=${RUNTIME_APT_COMMAND} \ - ADDITIONAL_RUNTIME_APT_COMMAND=${ADDITIONAL_RUNTIME_APT_COMMAND} \ - INSTALL_MYSQL_CLIENT=${INSTALL_MYSQL_CLIENT} \ - INSTALL_MYSQL_CLIENT_TYPE=${INSTALL_MYSQL_CLIENT_TYPE} \ - INSTALL_MSSQL_CLIENT=${INSTALL_MSSQL_CLIENT} \ - INSTALL_POSTGRES_CLIENT=${INSTALL_POSTGRES_CLIENT} \ - GUNICORN_CMD_ARGS="--worker-tmp-dir /dev/shm" \ - AIRFLOW_INSTALLATION_METHOD=${AIRFLOW_INSTALLATION_METHOD} - -COPY --from=scripts install_os_dependencies.sh /scripts/docker/ -RUN bash /scripts/docker/install_os_dependencies.sh runtime - -# Having the variable in final image allows to disable providers manager warnings when -# production image is prepared from sources rather than from package -ARG AIRFLOW_INSTALLATION_METHOD="apache-airflow" -ARG AIRFLOW_IMAGE_REPOSITORY -ARG AIRFLOW_IMAGE_README_URL -ARG AIRFLOW_USER_HOME_DIR -ARG AIRFLOW_HOME - -# By default PIP installs everything to ~/.local -ENV PATH="${AIRFLOW_USER_HOME_DIR}/.local/bin:${PATH}" \ - AIRFLOW_UID=${AIRFLOW_UID} \ - AIRFLOW_USER_HOME_DIR=${AIRFLOW_USER_HOME_DIR} \ - AIRFLOW_HOME=${AIRFLOW_HOME} - -# THE 3 LINES ARE ONLY NEEDED IN ORDER TO MAKE PYMSSQL BUILD WORK WITH LATEST CYTHON -# AND SHOULD BE REMOVED WHEN WORKAROUND IN install_mssql.sh IS REMOVED -ARG AIRFLOW_PIP_VERSION=23.3.2 -ENV AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} -COPY --from=scripts common.sh /scripts/docker/ - -# Only copy mysql/mssql installation scripts for now - so that changing the other -# scripts which are needed much later will not invalidate the docker layer here. -COPY --from=scripts install_mysql.sh install_mssql.sh install_postgres.sh /scripts/docker/ -# We run scripts with bash here to make sure we can execute the scripts. Changing to +x might have an -# unexpected result - the cache for Dockerfiles might get invalidated in case the host system -# had different umask set and group x bit was not set. In Azure the bit might be not set at all. -# That also protects against AUFS Docker backend problem where changing the executable bit required sync -RUN bash /scripts/docker/install_mysql.sh prod \ - && bash /scripts/docker/install_mssql.sh prod \ - && bash /scripts/docker/install_postgres.sh prod \ - && adduser --gecos "First Last,RoomNumber,WorkPhone,HomePhone" --disabled-password \ - --quiet "airflow" --uid "${AIRFLOW_UID}" --gid "0" --home "${AIRFLOW_USER_HOME_DIR}" \ -# Make Airflow files belong to the root group and are accessible. This is to accommodate the guidelines from -# OpenShift https://docs.openshift.com/enterprise/3.0/creating_images/guidelines.html - && mkdir -pv "${AIRFLOW_HOME}" \ - && mkdir -pv "${AIRFLOW_HOME}/dags" \ - && mkdir -pv "${AIRFLOW_HOME}/logs" \ - && chown -R airflow:0 "${AIRFLOW_USER_HOME_DIR}" "${AIRFLOW_HOME}" \ - && chmod -R g+rw "${AIRFLOW_USER_HOME_DIR}" "${AIRFLOW_HOME}" \ - && find "${AIRFLOW_HOME}" -executable -print0 | xargs --null chmod g+x \ - && find "${AIRFLOW_USER_HOME_DIR}" -executable -print0 | xargs --null chmod g+x - -COPY --from=airflow-build-image --chown=airflow:0 \ - "${AIRFLOW_USER_HOME_DIR}/.local" "${AIRFLOW_USER_HOME_DIR}/.local" -COPY --from=scripts entrypoint_prod.sh /entrypoint -COPY --from=scripts clean-logs.sh /clean-logs -COPY --from=scripts airflow-scheduler-autorestart.sh /airflow-scheduler-autorestart - -# Make /etc/passwd root-group-writeable so that user can be dynamically added by OpenShift -# See https://github.com/apache/airflow/issues/9248 -# Set default groups for airflow and root user - -RUN chmod a+rx /entrypoint /clean-logs \ - && chmod g=u /etc/passwd \ - && chmod g+w "${AIRFLOW_USER_HOME_DIR}/.local" \ - && usermod -g 0 airflow -G 0 - -# make sure that the venv is activated for all users -# including plain sudo, sudo with --interactive flag -RUN sed --in-place=.bak "s/secure_path=\"/secure_path=\"\/.venv\/bin:/" /etc/sudoers - -ARG AIRFLOW_VERSION - -COPY --from=scripts install_pip_version.sh /scripts/docker/ -RUN bash /scripts/docker/install_pip_version.sh - -# See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation -# to learn more about the way how signals are handled by the image -# Also set airflow as nice PROMPT message. -ENV DUMB_INIT_SETSID="1" \ - PS1="(airflow)" \ - AIRFLOW_VERSION=${AIRFLOW_VERSION} \ - AIRFLOW__CORE__LOAD_EXAMPLES="false" \ - PIP_USER="true" \ - PATH="/root/bin:${PATH}" - -# Add protection against running pip as root user -RUN mkdir -pv /root/bin -COPY --from=scripts pip /root/bin/pip -RUN chmod u+x /root/bin/pip - -WORKDIR ${AIRFLOW_HOME} - -EXPOSE 8080 - -USER ${AIRFLOW_UID} - -# Those should be set and used as late as possible as any change in commit/build otherwise invalidates the -# layers right after -ARG BUILD_ID -ARG COMMIT_SHA -ARG AIRFLOW_IMAGE_REPOSITORY -ARG AIRFLOW_IMAGE_DATE_CREATED - -ENV BUILD_ID=${BUILD_ID} COMMIT_SHA=${COMMIT_SHA} - -LABEL org.apache.airflow.distro="debian" \ - org.apache.airflow.module="airflow" \ - org.apache.airflow.component="airflow" \ - org.apache.airflow.image="airflow" \ - org.apache.airflow.version="${AIRFLOW_VERSION}" \ - org.apache.airflow.uid="${AIRFLOW_UID}" \ - org.apache.airflow.main-image.build-id="${BUILD_ID}" \ - org.apache.airflow.main-image.commit-sha="${COMMIT_SHA}" \ - org.opencontainers.image.source="${AIRFLOW_IMAGE_REPOSITORY}" \ - org.opencontainers.image.created=${AIRFLOW_IMAGE_DATE_CREATED} \ - org.opencontainers.image.authors="dev@airflow.apache.org" \ - org.opencontainers.image.url="https://airflow.apache.org" \ - org.opencontainers.image.documentation="https://airflow.apache.org/docs/docker-stack/index.html" \ - org.opencontainers.image.version="${AIRFLOW_VERSION}" \ - org.opencontainers.image.revision="${COMMIT_SHA}" \ - org.opencontainers.image.vendor="Apache Software Foundation" \ - org.opencontainers.image.licenses="Apache-2.0" \ - org.opencontainers.image.ref.name="airflow" \ - org.opencontainers.image.title="Production Airflow Image" \ - org.opencontainers.image.description="Reference, production-ready Apache Airflow image" -ENTRYPOINT ["/usr/bin/dumb-init", "--", "/entrypoint"] -CMD [] diff --git a/chart/values.schema.json b/chart/values.schema.json index 1cd1adec84180..7c5806ab48550 100644 --- a/chart/values.schema.json +++ b/chart/values.schema.json @@ -1567,7 +1567,7 @@ } ], "items": { - "$ref": "#/definitions/io.k8s.api.autoscaling.v2.HorizontalPodAutoscalerMetrics" + "$ref": "#/definitions/io.k8s.api.autoscaling.v2beta2.MetricSpec" } }, "behavior": { @@ -7456,6 +7456,72 @@ "type": "object", "additionalProperties": false }, + "io.k8s.api.autoscaling.v2beta2.ContainerResourceMetricSource": { + "description": "ContainerResourceMetricSource indicates how to scale on a resource metric known to Kubernetes, as specified in requests and limits, describing each pod in the current scale target (e.g. CPU or memory). The values will be averaged together before being compared to the target. Such metrics are built in to Kubernetes, and have special scaling options on top of those available to normal per-pod metrics using the \"pods\" source. Only one \"target\" type should be set.", + "properties": { + "container": { + "description": "container is the name of the container in the pods of the scaling target", + "type": "string" + }, + "name": { + "description": "name is the name of the resource in question.", + "type": "string" + }, + "target": { + "$ref": "#/definitions/io.k8s.api.autoscaling.v2beta2.MetricTarget", + "description": "target specifies the target value for the given metric" + } + }, + "required": [ + "name", + "target", + "container" + ], + "type": "object", + "additionalProperties": false + }, + "io.k8s.api.autoscaling.v2beta2.CrossVersionObjectReference": { + "description": "CrossVersionObjectReference contains enough information to let you identify the referred resource.", + "properties": { + "apiVersion": { + "description": "API version of the referent", + "type": "string" + }, + "kind": { + "description": "Kind of the referent; More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds\"", + "type": "string" + }, + "name": { + "description": "Name of the referent; More info: http://kubernetes.io/docs/user-guide/identifiers#names", + "type": "string" + } + }, + "required": [ + "kind", + "name" + ], + "type": "object", + "additionalProperties": false + }, + "io.k8s.api.autoscaling.v2beta2.ExternalMetricSource": { + "description": "ExternalMetricSource indicates how to scale on a metric not associated with any Kubernetes object (for example length of queue in cloud messaging service, or QPS from loadbalancer running outside of cluster).", + "properties": { + "metric": { + "$ref": "#/definitions/io.k8s.api.autoscaling.v2beta2.MetricIdentifier", + "description": "metric identifies the target metric by name and selector" + }, + "target": { + "$ref": "#/definitions/io.k8s.api.autoscaling.v2beta2.MetricTarget", + "description": "target specifies the target value for the given metric" + } + }, + "required": [ + "metric", + "target" + ], + "type": "object", + "additionalProperties": false + }, "io.k8s.api.autoscaling.v2beta2.HPAScalingPolicy": { "description": "HPAScalingPolicy is a single policy which must hold true for a specified past interval.", "properties": { @@ -7520,191 +7586,145 @@ "type": "object", "additionalProperties": false }, - "io.k8s.api.autoscaling.v2.HorizontalPodAutoscalerMetrics": { - "description": "HorizontalPodAutoscalerMetrics contains the specifications for which to use to calculate the desired replica count (the maximum replica count across all metrics will be used)", + "io.k8s.api.autoscaling.v2beta2.MetricIdentifier": { + "description": "MetricIdentifier defines the name and optionally selector for a metric", + "properties": { + "name": { + "description": "name is the name of the given metric", + "type": "string" + }, + "selector": { + "$ref": "#/definitions/io.k8s.apimachinery.pkg.apis.meta.v1.LabelSelector", + "description": "selector is the string-encoded form of a standard kubernetes label selector for the given metric When set, it is passed as an additional parameter to the metrics server for more specific metrics scoping. When unset, just the metricName will be used to gather metrics." + } + }, + "required": [ + "name" + ], + "type": "object", + "additionalProperties": false + }, + "io.k8s.api.autoscaling.v2beta2.MetricSpec": { + "description": "MetricSpec specifies how to scale based on a single metric (only `type` and one other matching field should be set at once).", "properties": { + "containerResource": { + "$ref": "#/definitions/io.k8s.api.autoscaling.v2beta2.ContainerResourceMetricSource", + "description": "container resource refers to a resource metric (such as those specified in requests and limits) known to Kubernetes describing a single container in each pod of the current scale target (e.g. CPU or memory). Such metrics are built in to Kubernetes, and have special scaling options on top of those available to normal per-pod metrics using the \"pods\" source. This is an alpha feature and can be enabled by the HPAContainerMetrics feature flag." + }, "external": { - "description": "external refers to a global metric that is not associated with any Kubernetes object. It allows autoscaling based on information coming from components running outside of cluster (for example length of queue in cloud messaging service, or QPS from loadbalancer running outside of cluster).", - "$ref": "#/definitions/io.k8s.api.autoscaling.v2.ExternalMetricStatus" + "$ref": "#/definitions/io.k8s.api.autoscaling.v2beta2.ExternalMetricSource", + "description": "external refers to a global metric that is not associated with any Kubernetes object. It allows autoscaling based on information coming from components running outside of cluster (for example length of queue in cloud messaging service, or QPS from loadbalancer running outside of cluster)." }, "object": { - "description": "object refers to a metric describing a single kubernetes object (for example, hits-per-second on an Ingress object).", - "$ref": "#/definitions/io.k8s.api.autoscaling.v2.ObjectMetricStatus" + "$ref": "#/definitions/io.k8s.api.autoscaling.v2beta2.ObjectMetricSource", + "description": "object refers to a metric describing a single kubernetes object (for example, hits-per-second on an Ingress object)." }, "pods": { - "description": "pods refers to a metric describing each pod in the current scale target (for example, transactions-processed-per-second). The values will be averaged together before being compared to the target value.", - "$ref": "#/definitions/io.k8s.api.autoscaling.v2.PodsMetricStatus" + "$ref": "#/definitions/io.k8s.api.autoscaling.v2beta2.PodsMetricSource", + "description": "pods refers to a metric describing each pod in the current scale target (for example, transactions-processed-per-second). The values will be averaged together before being compared to the target value." }, "resource": { - "description": "resource refers to a resource metric (such as those specified in requests and limits) known to Kubernetes describing each pod in the current scale target (e.g. CPU or memory). Such metrics are built in to Kubernetes, and have special scaling options on top of those available to normal per-pod metrics using the \"pods\" source.", - "$ref": "#/definitions/io.k8s.api.autoscaling.v2.ResourceMetricStatus" + "$ref": "#/definitions/io.k8s.api.autoscaling.v2beta2.ResourceMetricSource", + "description": "resource refers to a resource metric (such as those specified in requests and limits) known to Kubernetes describing each pod in the current scale target (e.g. CPU or memory). Such metrics are built in to Kubernetes, and have special scaling options on top of those available to normal per-pod metrics using the \"pods\" source." }, "type": { - "description": "type is the type of metric source. It will be one of \"Object\", \"Pods\" or \"Resource\", each corresponds to a matching field in the object.", - "type": [ - "string", - "null" - ] + "description": "type is the type of metric source. It should be one of \"ContainerResource\", \"External\", \"Object\", \"Pods\" or \"Resource\", each mapping to a matching field in the object. Note: \"ContainerResource\" type is available on when the feature-gate HPAContainerMetrics is enabled", + "type": "string" } }, - "type": "object", - "additionalProperties": false - }, - "io.k8s.api.autoscaling.v2.ExternalMetricStatus": { - "description": "ExternalMetricStatus indicates the current value of a global metric not associated with any Kubernetes object.", "required": [ - "metric", - "target" + "type" ], "type": "object", + "additionalProperties": false + }, + "io.k8s.api.autoscaling.v2beta2.MetricTarget": { + "description": "MetricTarget defines the target value, average value, or average utilization of a specific metric", "properties": { - "metric": { - "description": "currentAverageValue is the current value of metric averaged over autoscaled pods.", - "$ref": "#/definitions/io.k8s.api.autoscaling.v2.HorizontalPodAutoscalerMetricIdentifier" + "averageUtilization": { + "description": "averageUtilization is the target value of the average of the resource metric across all relevant pods, represented as a percentage of the requested value of the resource for the pods. Currently only valid for Resource metric source type", + "format": "int32", + "type": "integer" }, - "target": { - "description": "target specifies the target value for the given metric", - "$ref": "#/definitions/io.k8s.api.autoscaling.v2.HorizontalPodAutoscalerMetricTarget" + "averageValue": { + "$ref": "#/definitions/io.k8s.apimachinery.pkg.api.resource.Quantity", + "description": "averageValue is the target value of the average of the metric across all relevant pods (as a quantity)" + }, + "type": { + "description": "type represents whether the metric type is Utilization, Value, or AverageValue", + "type": "string" + }, + "value": { + "$ref": "#/definitions/io.k8s.apimachinery.pkg.api.resource.Quantity", + "description": "value is the target value of the metric (as a quantity)." } - } - }, - "io.k8s.api.autoscaling.v2.ObjectMetricStatus": { - "description": "ObjectMetricStatus indicates the current value of a metric describing a kubernetes object (for example, hits-per-second on an Ingress object).", + }, "required": [ - "describedObject", - "metric", - "target" + "type" ], "type": "object", + "additionalProperties": false + }, + "io.k8s.api.autoscaling.v2beta2.ObjectMetricSource": { + "description": "ObjectMetricSource indicates how to scale on a metric describing a kubernetes object (for example, hits-per-second on an Ingress object).", "properties": { "describedObject": { - "description": "currentAverageValue is the current value of metric averaged over autoscaled pods.", - "$ref": "#/definitions/io.k8s.api.autoscaling.v2.CrossVersionObjectReference" + "$ref": "#/definitions/io.k8s.api.autoscaling.v2beta2.CrossVersionObjectReference" }, "metric": { - "description": "currentAverageValue is the current value of metric averaged over autoscaled pods.", - "$ref": "#/definitions/io.k8s.api.autoscaling.v2.HorizontalPodAutoscalerMetricIdentifier" + "$ref": "#/definitions/io.k8s.api.autoscaling.v2beta2.MetricIdentifier", + "description": "metric identifies the target metric by name and selector" }, "target": { - "description": "target specifies the target value for the given metric", - "$ref": "#/definitions/io.k8s.api.autoscaling.v2.HorizontalPodAutoscalerMetricTarget" + "$ref": "#/definitions/io.k8s.api.autoscaling.v2beta2.MetricTarget", + "description": "target specifies the target value for the given metric" } - } - }, - "io.k8s.api.autoscaling.v2.PodsMetricStatus": { - "description": "PodsMetricStatus indicates the current value of a metric describing each pod in the current scale target (for example, transactions-processed-per-second).", + }, "required": [ - "metric", - "target" + "describedObject", + "target", + "metric" ], "type": "object", + "additionalProperties": false + }, + "io.k8s.api.autoscaling.v2beta2.PodsMetricSource": { + "description": "PodsMetricSource indicates how to scale on a metric describing each pod in the current scale target (for example, transactions-processed-per-second). The values will be averaged together before being compared to the target value.", "properties": { "metric": { - "description": "currentAverageValue is the current value of metric averaged over autoscaled pods.", - "$ref": "#/definitions/io.k8s.api.autoscaling.v2.HorizontalPodAutoscalerMetricIdentifier" + "$ref": "#/definitions/io.k8s.api.autoscaling.v2beta2.MetricIdentifier", + "description": "metric identifies the target metric by name and selector" }, "target": { - "description": "target specifies the target value for the given metric", - "$ref": "#/definitions/io.k8s.api.autoscaling.v2.HorizontalPodAutoscalerMetricTarget" + "$ref": "#/definitions/io.k8s.api.autoscaling.v2beta2.MetricTarget", + "description": "target specifies the target value for the given metric" } - } - }, - "io.k8s.api.autoscaling.v2.ResourceMetricStatus": { - "description": "ResourceMetricStatus indicates the current value of a resource metric known to Kubernetes, as specified in requests and limits, describing each pod in the current scale target (e.g. CPU or memory). Such metrics are built in to Kubernetes, and have special scaling options on top of those available to normal per-pod metrics using the \"pods\" source.", + }, "required": [ - "name", + "metric", "target" ], "type": "object", - "properties": { - "name": { - "description": "name is the name of the resource in question.", - "type": [ - "string", - "null" - ] - }, - "target": { - "description": "target specifies the target value for the given metric", - "$ref": "#/definitions/io.k8s.api.autoscaling.v2.HorizontalPodAutoscalerMetricTarget" - } - } - }, - "io.k8s.api.autoscaling.v2.CrossVersionObjectReference": { - "description": "CrossVersionObjectReference contains enough information to let you identify the referred resource.", - "type": "object", - "required": [ - "kind", - "name" - ], - "properties": { - "apiVersion": { - "description": "API version of the referent", - "type": [ - "string", - "null" - ] - }, - "kind": { - "description": "Kind of the referent; More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds\"", - "type": [ - "string", - "null" - ] - }, - "name": { - "description": "Name of the referent; More info: http://kubernetes.io/docs/user-guide/identifiers#names", - "type": [ - "string", - "null" - ] - } - } + "additionalProperties": false }, - "io.k8s.api.autoscaling.v2.HorizontalPodAutoscalerMetricIdentifier": { - "description": "metric identifies the target metric by name and selector", - "required": [ - "name" - ], + "io.k8s.api.autoscaling.v2beta2.ResourceMetricSource": { + "description": "ResourceMetricSource indicates how to scale on a resource metric known to Kubernetes, as specified in requests and limits, describing each pod in the current scale target (e.g. CPU or memory). The values will be averaged together before being compared to the target. Such metrics are built in to Kubernetes, and have special scaling options on top of those available to normal per-pod metrics using the \"pods\" source. Only one \"target\" type should be set.", "properties": { "name": { - "description": "name is the name of the given metric", + "description": "name is the name of the resource in question.", "type": "string" }, - "selector": { - "description": "selector is the string-encoded form of a standard kubernetes label selector for the given metric", - "$ref": "#/definitions/io.k8s.apimachinery.pkg.apis.meta.v1.LabelSelector" + "target": { + "$ref": "#/definitions/io.k8s.api.autoscaling.v2beta2.MetricTarget", + "description": "target specifies the target value for the given metric" } - } - }, - "io.k8s.api.autoscaling.v2.HorizontalPodAutoscalerMetricTarget": { - "description": "target specifies the target value for the given metric", + }, "required": [ - "type" + "name", + "target" ], - "properties": { - "type": { - "description": "type represents whether the metric type is Utilization, Value, or AverageValue", - "type": "string", - "enum": [ - "Utilization", - "Value", - "AverageValue" - ] - }, - "averageUtilization": { - "description": "averageUtilization is the target value of the average of the resource metric across all relevant pods, represented as a percentage of the requested value of the resource for the pods", - "type": "integer" - }, - "averageValue": { - "description": "averageValue is the target value of the average of the metric across all relevant pods (as a quantity)", - "$ref": "#/definitions/io.k8s.apimachinery.pkg.api.resource.Quantity" - }, - "value": { - "description": "value is the target value of the metric (as a quantity).", - "$ref": "#/definitions/io.k8s.apimachinery.pkg.api.resource.Quantity" - } - } + "type": "object", + "additionalProperties": false }, "io.k8s.api.core.v1.AWSElasticBlockStoreVolumeSource": { "description": "Represents a Persistent Disk resource in AWS.\n\nAn AWS EBS disk must exist before mounting to a container. The disk must also be in the same AWS zone as the kubelet. An AWS EBS disk can only be mounted as read/write once. AWS EBS volumes support ownership management and SELinux relabeling.", From 1c4ecc18ff8d4928823d959c1b25cacccb9b7fbd Mon Sep 17 00:00:00 2001 From: Pedro Miranda Date: Wed, 3 Jan 2024 16:36:35 -0500 Subject: [PATCH 10/10] Resolve conflict --- Dockerfile | 3216 ++++++++++++++++++++++++++-------------------------- 1 file changed, 1608 insertions(+), 1608 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5ce0652aa794b..c1ed35a61ebd8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,1608 +1,1608 @@ -# syntax=docker/dockerfile:1.4 -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# THIS DOCKERFILE IS INTENDED FOR PRODUCTION USE AND DEPLOYMENT. -# NOTE! IT IS ALPHA-QUALITY FOR NOW - WE ARE IN A PROCESS OF TESTING IT -# -# -# This is a multi-segmented image. It actually contains two images: -# -# airflow-build-image - there all airflow dependencies can be installed (and -# built - for those dependencies that require -# build essentials). Airflow is installed there with -# --user switch so that all the dependencies are -# installed to ${HOME}/.local -# -# main - this is the actual production image that is much -# smaller because it does not contain all the build -# essentials. Instead the ${HOME}/.local folder -# is copied from the build-image - this way we have -# only result of installation and we do not need -# all the build essentials. This makes the image -# much smaller. -# -# Use the same builder frontend version for everyone -ARG AIRFLOW_EXTRAS="aiobotocore,amazon,async,celery,cncf.kubernetes,common.io,docker,elasticsearch,ftp,google,google_auth,grpc,hashicorp,http,ldap,microsoft.azure,mysql,odbc,openlineage,pandas,postgres,redis,sendgrid,sftp,slack,snowflake,ssh,statsd,virtualenv" -ARG ADDITIONAL_AIRFLOW_EXTRAS="" -ARG ADDITIONAL_PYTHON_DEPS="" - -ARG AIRFLOW_HOME=/opt/airflow -ARG AIRFLOW_UID="50000" -ARG AIRFLOW_USER_HOME_DIR=/home/airflow - -# latest released version here -ARG AIRFLOW_VERSION="2.8.0" - -ARG PYTHON_BASE_IMAGE="python:3.8-slim-bookworm" - -ARG AIRFLOW_PIP_VERSION=23.3.2 -ARG AIRFLOW_IMAGE_REPOSITORY="https://github.com/apache/airflow" -ARG AIRFLOW_IMAGE_README_URL="https://raw.githubusercontent.com/apache/airflow/main/docs/docker-stack/README.md" - -# By default latest released version of airflow is installed (when empty) but this value can be overridden -# and we can install version according to specification (For example ==2.0.2 or <3.0.0). -ARG AIRFLOW_VERSION_SPECIFICATION="" - -# By default PIP has progress bar but you can disable it. -ARG PIP_PROGRESS_BAR="on" - -############################################################################################## -# This is the script image where we keep all inlined bash scripts needed in other segments -############################################################################################## -FROM scratch as scripts - -############################################################################################## -# Please DO NOT modify the inlined scripts manually. The content of those files will be -# replaced by pre-commit automatically from the "scripts/docker/" folder. -# This is done in order to avoid problems with caching and file permissions and in order to -# make the PROD Dockerfile standalone -############################################################################################## - -# The content below is automatically copied from scripts/docker/install_os_dependencies.sh -COPY <<"EOF" /install_os_dependencies.sh -#!/usr/bin/env bash -set -euo pipefail - -DOCKER_CLI_VERSION=24.0.6 - -if [[ "$#" != 1 ]]; then - echo "ERROR! There should be 'runtime' or 'dev' parameter passed as argument.". - exit 1 -fi - -if [[ "${1}" == "runtime" ]]; then - INSTALLATION_TYPE="RUNTIME" -elif [[ "${1}" == "dev" ]]; then - INSTALLATION_TYPE="dev" -else - echo "ERROR! Wrong argument. Passed ${1} and it should be one of 'runtime' or 'dev'.". - exit 1 -fi - -function get_dev_apt_deps() { - if [[ "${DEV_APT_DEPS=}" == "" ]]; then - DEV_APT_DEPS="apt-transport-https apt-utils build-essential ca-certificates dirmngr \ -freetds-bin freetds-dev git gosu graphviz graphviz-dev krb5-user ldap-utils libffi-dev libgeos-dev \ -libkrb5-dev libldap2-dev libleveldb1d libleveldb-dev libsasl2-2 libsasl2-dev libsasl2-modules \ -libssl-dev libxmlsec1 libxmlsec1-dev locales lsb-release openssh-client pkgconf sasl2-bin \ -software-properties-common sqlite3 sudo unixodbc unixodbc-dev zlib1g-dev" - export DEV_APT_DEPS - fi -} - -function get_runtime_apt_deps() { - local debian_version - local debian_version_apt_deps - # Get debian version without installing lsb_release - # shellcheck disable=SC1091 - debian_version=$(. /etc/os-release; printf '%s\n' "$VERSION_CODENAME";) - echo - echo "DEBIAN CODENAME: ${debian_version}" - echo - if [[ "${debian_version}" == "bullseye" ]]; then - debian_version_apt_deps="libffi7 libldap-2.4-2 libssl1.1 netcat" - else - debian_version_apt_deps="libffi8 libldap-2.5-0 libssl3 netcat-openbsd" - fi - echo - echo "APPLIED INSTALLATION CONFIGURATION FOR DEBIAN VERSION: ${debian_version}" - echo - if [[ "${RUNTIME_APT_DEPS=}" == "" ]]; then - RUNTIME_APT_DEPS="apt-transport-https apt-utils ca-certificates \ -curl dumb-init freetds-bin gosu krb5-user libgeos-dev \ -ldap-utils libsasl2-2 libsasl2-modules libxmlsec1 locales ${debian_version_apt_deps} \ -lsb-release openssh-client python3-selinux rsync sasl2-bin sqlite3 sudo unixodbc" - export RUNTIME_APT_DEPS - fi -} - -function install_docker_cli() { - local platform - if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then - platform="aarch64" - else - platform="x86_64" - fi - curl --silent \ - "https://download.docker.com/linux/static/stable/${platform}/docker-${DOCKER_CLI_VERSION}.tgz" \ - | tar -C /usr/bin --strip-components=1 -xvzf - docker/docker -} - -function install_debian_dev_dependencies() { - apt-get update - apt-get install --no-install-recommends -yqq apt-utils >/dev/null 2>&1 - apt-get install -y --no-install-recommends curl gnupg2 lsb-release - # shellcheck disable=SC2086 - export ${ADDITIONAL_DEV_APT_ENV?} - if [[ ${DEV_APT_COMMAND} != "" ]]; then - bash -o pipefail -o errexit -o nounset -o nolog -c "${DEV_APT_COMMAND}" - fi - if [[ ${ADDITIONAL_DEV_APT_COMMAND} != "" ]]; then - bash -o pipefail -o errexit -o nounset -o nolog -c "${ADDITIONAL_DEV_APT_COMMAND}" - fi - apt-get update - local debian_version - local debian_version_apt_deps - # Get debian version without installing lsb_release - # shellcheck disable=SC1091 - debian_version=$(. /etc/os-release; printf '%s\n' "$VERSION_CODENAME";) - echo - echo "DEBIAN CODENAME: ${debian_version}" - echo - if [[ "${debian_version}" == "bullseye" ]]; then - echo - echo "Bullseye detected - replacing dependencies in additional dev apt deps" - echo - # Replace dependencies in additional dev apt deps to be compatible with Bullseye - ADDITIONAL_DEV_APT_DEPS=${ADDITIONAL_DEV_APT_DEPS//libgcc-11-dev/libgcc-10-dev} - ADDITIONAL_DEV_APT_DEPS=${ADDITIONAL_DEV_APT_DEPS//netcat-openbsd/netcat} - echo - echo "Replaced bullseye dev apt dependencies" - echo "${ADDITIONAL_DEV_APT_COMMAND}" - echo - fi - - # shellcheck disable=SC2086 - apt-get install -y --no-install-recommends ${DEV_APT_DEPS} ${ADDITIONAL_DEV_APT_DEPS} -} - -function install_debian_runtime_dependencies() { - apt-get update - apt-get install --no-install-recommends -yqq apt-utils >/dev/null 2>&1 - apt-get install -y --no-install-recommends curl gnupg2 lsb-release - # shellcheck disable=SC2086 - export ${ADDITIONAL_RUNTIME_APT_ENV?} - if [[ "${RUNTIME_APT_COMMAND}" != "" ]]; then - bash -o pipefail -o errexit -o nounset -o nolog -c "${RUNTIME_APT_COMMAND}" - fi - if [[ "${ADDITIONAL_RUNTIME_APT_COMMAND}" != "" ]]; then - bash -o pipefail -o errexit -o nounset -o nolog -c "${ADDITIONAL_RUNTIME_APT_COMMAND}" - fi - apt-get update - # shellcheck disable=SC2086 - apt-get install -y --no-install-recommends ${RUNTIME_APT_DEPS} ${ADDITIONAL_RUNTIME_APT_DEPS} - apt-get autoremove -yqq --purge - apt-get clean - rm -rf /var/lib/apt/lists/* /var/log/* -} - -if [[ "${INSTALLATION_TYPE}" == "RUNTIME" ]]; then - get_runtime_apt_deps - install_debian_runtime_dependencies - install_docker_cli - -else - get_dev_apt_deps - install_debian_dev_dependencies - install_docker_cli -fi -EOF - -# The content below is automatically copied from scripts/docker/install_mysql.sh -COPY <<"EOF" /install_mysql.sh -#!/usr/bin/env bash -. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" - -set -euo pipefail - -common::get_colors -declare -a packages - -readonly MYSQL_LTS_VERSION="8.0" -readonly MARIADB_LTS_VERSION="10.11" - -: "${INSTALL_MYSQL_CLIENT:?Should be true or false}" -: "${INSTALL_MYSQL_CLIENT_TYPE:-mariadb}" - -install_mysql_client() { - if [[ "${1}" == "dev" ]]; then - packages=("libmysqlclient-dev" "mysql-client") - elif [[ "${1}" == "prod" ]]; then - # `libmysqlclientXX` where XX is number, and it should be increased every new GA MySQL release, for example - # 18 - MySQL 5.6.48 - # 20 - MySQL 5.7.42 - # 21 - MySQL 8.0.34 - # 22 - MySQL 8.1 - packages=("libmysqlclient21" "mysql-client") - else - echo - echo "${COLOR_RED}Specify either prod or dev${COLOR_RESET}" - echo - exit 1 - fi - - common::import_trusted_gpg "B7B3B788A8D3785C" "mysql" - - echo - echo "${COLOR_BLUE}Installing Oracle MySQL client version ${MYSQL_LTS_VERSION}: ${1}${COLOR_RESET}" - echo - - echo "deb http://repo.mysql.com/apt/debian/ $(lsb_release -cs) mysql-${MYSQL_LTS_VERSION}" > \ - /etc/apt/sources.list.d/mysql.list - apt-get update - apt-get install --no-install-recommends -y "${packages[@]}" - apt-get autoremove -yqq --purge - apt-get clean && rm -rf /var/lib/apt/lists/* - - # Remove mysql repository from sources.list.d as MySQL repos have a basic flaw that they put expiry - # date on their GPG signing keys and they sign their repo with those keys. This means that after a - # certain date, the GPG key becomes invalid and if you have the repository added in your sources.list - # then you will not be able to install anything from any other repository. This id unlike any other - # repository we have seen (for example Postgres, MariaDB, MsSQL - all have non-expiring signing keys) - rm /etc/apt/sources.list.d/mysql.list -} - -install_mariadb_client() { - # List of compatible package Oracle MySQL -> MariaDB: - # `mysql-client` -> `mariadb-client` or `mariadb-client-compat` (11+) - # `libmysqlclientXX` (where XX is a number) -> `libmariadb3-compat` - # `libmysqlclient-dev` -> `libmariadb-dev-compat` - # - # Different naming against Debian repo which we used before - # that some of packages might contains `-compat` suffix, Debian repo -> MariaDB repo: - # `libmariadb-dev` -> `libmariadb-dev-compat` - # `mariadb-client-core` -> `mariadb-client` or `mariadb-client-compat` (11+) - if [[ "${1}" == "dev" ]]; then - packages=("libmariadb-dev-compat" "mariadb-client") - elif [[ "${1}" == "prod" ]]; then - packages=("libmariadb3-compat" "mariadb-client") - else - echo - echo "${COLOR_RED}Specify either prod or dev${COLOR_RESET}" - echo - exit 1 - fi - - common::import_trusted_gpg "0xF1656F24C74CD1D8" "mariadb" - - echo - echo "${COLOR_BLUE}Installing MariaDB client version ${MARIADB_LTS_VERSION}: ${1}${COLOR_RESET}" - echo "${COLOR_YELLOW}MariaDB client protocol-compatible with MySQL client.${COLOR_RESET}" - echo - - curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - - echo "deb [arch=amd64,arm64] https://archive.mariadb.org/mariadb-${MARIADB_LTS_VERSION}/repo/debian/ $(lsb_release -cs) main" > \ - /etc/apt/sources.list.d/mariadb.list - # Make sure that dependencies from MariaDB repo are preferred over Debian dependencies - printf "Package: *\nPin: release o=MariaDB\nPin-Priority: 999\n" > /etc/apt/preferences.d/mariadb - apt-get update - apt-get install --no-install-recommends -y "${packages[@]}" - apt-get autoremove -yqq --purge - apt-get clean && rm -rf /var/lib/apt/lists/* -} - -if [[ ${INSTALL_MYSQL_CLIENT:="true"} == "true" ]]; then - if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then - INSTALL_MYSQL_CLIENT_TYPE="mariadb" - echo - echo "${COLOR_YELLOW}Client forced to mariadb for ARM${COLOR_RESET}" - echo - fi - - if [[ "${INSTALL_MYSQL_CLIENT_TYPE}" == "mysql" ]]; then - install_mysql_client "${@}" - elif [[ "${INSTALL_MYSQL_CLIENT_TYPE}" == "mariadb" ]]; then - install_mariadb_client "${@}" - else - echo - echo "${COLOR_RED}Specify either mysql or mariadb, got ${INSTALL_MYSQL_CLIENT_TYPE}${COLOR_RESET}" - echo - exit 1 - fi -fi -EOF - -# The content below is automatically copied from scripts/docker/install_mssql.sh -COPY <<"EOF" /install_mssql.sh -#!/usr/bin/env bash -. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" - -set -euo pipefail - -common::get_colors -declare -a packages - -: "${AIRFLOW_PIP_VERSION:?Should be set}" -: "${INSTALL_MSSQL_CLIENT:?Should be true or false}" - - -function install_mssql_client() { - # Install MsSQL client from Microsoft repositories - if [[ ${INSTALL_MSSQL_CLIENT:="true"} != "true" ]]; then - echo - echo "${COLOR_BLUE}Skip installing mssql client${COLOR_RESET}" - echo - return - fi - packages=("msodbcsql18") - - common::import_trusted_gpg "EB3E94ADBE1229CF" "microsoft" - - echo - echo "${COLOR_BLUE}Installing mssql client${COLOR_RESET}" - echo - - echo "deb [arch=amd64,arm64] https://packages.microsoft.com/debian/$(lsb_release -rs)/prod $(lsb_release -cs) main" > \ - /etc/apt/sources.list.d/mssql-release.list - apt-get update -yqq - apt-get upgrade -yqq - ACCEPT_EULA=Y apt-get -yqq install --no-install-recommends "${packages[@]}" - rm -rf /var/lib/apt/lists/* - apt-get autoremove -yqq --purge - apt-get clean && rm -rf /var/lib/apt/lists/* -} - -install_mssql_client "${@}" -EOF - -# The content below is automatically copied from scripts/docker/install_postgres.sh -COPY <<"EOF" /install_postgres.sh -#!/usr/bin/env bash -. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" -set -euo pipefail - -common::get_colors -declare -a packages - -: "${INSTALL_POSTGRES_CLIENT:?Should be true or false}" - -install_postgres_client() { - echo - echo "${COLOR_BLUE}Installing postgres client${COLOR_RESET}" - echo - - if [[ "${1}" == "dev" ]]; then - packages=("libpq-dev" "postgresql-client") - elif [[ "${1}" == "prod" ]]; then - packages=("postgresql-client") - else - echo - echo "Specify either prod or dev" - echo - exit 1 - fi - - common::import_trusted_gpg "7FCC7D46ACCC4CF8" "postgres" - - echo "deb [arch=amd64,arm64] https://apt.postgresql.org/pub/repos/apt/ $(lsb_release -cs)-pgdg main" > \ - /etc/apt/sources.list.d/pgdg.list - apt-get update - apt-get install --no-install-recommends -y "${packages[@]}" - apt-get autoremove -yqq --purge - apt-get clean && rm -rf /var/lib/apt/lists/* -} - -if [[ ${INSTALL_POSTGRES_CLIENT:="true"} == "true" ]]; then - install_postgres_client "${@}" -fi -EOF - -# The content below is automatically copied from scripts/docker/install_pip_version.sh -COPY <<"EOF" /install_pip_version.sh -#!/usr/bin/env bash -. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" - -: "${AIRFLOW_PIP_VERSION:?Should be set}" - -common::get_colors -common::get_airflow_version_specification -common::override_pip_version_if_needed -common::show_pip_version_and_location - -common::install_pip_version -EOF - -# The content below is automatically copied from scripts/docker/install_airflow_dependencies_from_branch_tip.sh -COPY <<"EOF" /install_airflow_dependencies_from_branch_tip.sh -#!/usr/bin/env bash - -. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" - -: "${AIRFLOW_REPO:?Should be set}" -: "${AIRFLOW_BRANCH:?Should be set}" -: "${INSTALL_MYSQL_CLIENT:?Should be true or false}" -: "${INSTALL_POSTGRES_CLIENT:?Should be true or false}" -: "${AIRFLOW_PIP_VERSION:?Should be set}" - -function install_airflow_dependencies_from_branch_tip() { - echo - echo "${COLOR_BLUE}Installing airflow from ${AIRFLOW_BRANCH}. It is used to cache dependencies${COLOR_RESET}" - echo - if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then - AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,} - fi - if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then - AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} - fi - # Install latest set of dependencies using constraints. In case constraints were upgraded and there - # are conflicts, this might fail, but it should be fixed in the following installation steps - set -x - pip install --root-user-action ignore \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - "https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]" \ - --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" || true - common::install_pip_version - pip freeze | grep apache-airflow-providers | xargs pip uninstall --yes 2>/dev/null || true - set +x - echo - echo "${COLOR_BLUE}Uninstalling just airflow. Dependencies remain. Now target airflow can be reinstalled using mostly cached dependencies${COLOR_RESET}" - echo - pip uninstall --yes apache-airflow || true -} - -common::get_colors -common::get_airflow_version_specification -common::override_pip_version_if_needed -common::get_constraints_location -common::show_pip_version_and_location - -install_airflow_dependencies_from_branch_tip -EOF - -# The content below is automatically copied from scripts/docker/common.sh -COPY <<"EOF" /common.sh -#!/usr/bin/env bash -set -euo pipefail - -function common::get_colors() { - COLOR_BLUE=$'\e[34m' - COLOR_GREEN=$'\e[32m' - COLOR_RED=$'\e[31m' - COLOR_RESET=$'\e[0m' - COLOR_YELLOW=$'\e[33m' - export COLOR_BLUE - export COLOR_GREEN - export COLOR_RED - export COLOR_RESET - export COLOR_YELLOW -} - - -function common::get_airflow_version_specification() { - if [[ -z ${AIRFLOW_VERSION_SPECIFICATION=} - && -n ${AIRFLOW_VERSION} - && ${AIRFLOW_INSTALLATION_METHOD} != "." ]]; then - AIRFLOW_VERSION_SPECIFICATION="==${AIRFLOW_VERSION}" - fi -} - -function common::override_pip_version_if_needed() { - if [[ -n ${AIRFLOW_VERSION} ]]; then - if [[ ${AIRFLOW_VERSION} =~ ^2\.0.* || ${AIRFLOW_VERSION} =~ ^1\.* ]]; then - export AIRFLOW_PIP_VERSION="23.3.2" - fi - fi -} - -function common::get_constraints_location() { - # auto-detect Airflow-constraint reference and location - if [[ -z "${AIRFLOW_CONSTRAINTS_REFERENCE=}" ]]; then - if [[ ${AIRFLOW_VERSION} =~ v?2.* && ! ${AIRFLOW_VERSION} =~ .*dev.* ]]; then - AIRFLOW_CONSTRAINTS_REFERENCE=constraints-${AIRFLOW_VERSION} - else - AIRFLOW_CONSTRAINTS_REFERENCE=${DEFAULT_CONSTRAINTS_BRANCH} - fi - fi - - if [[ -z ${AIRFLOW_CONSTRAINTS_LOCATION=} ]]; then - local constraints_base="https://raw.githubusercontent.com/${CONSTRAINTS_GITHUB_REPOSITORY}/${AIRFLOW_CONSTRAINTS_REFERENCE}" - local python_version - python_version="$(python --version 2>/dev/stdout | cut -d " " -f 2 | cut -d "." -f 1-2)" - AIRFLOW_CONSTRAINTS_LOCATION="${constraints_base}/${AIRFLOW_CONSTRAINTS_MODE}-${python_version}.txt" - fi -} - -function common::show_pip_version_and_location() { - echo "PATH=${PATH}" - echo "pip on path: $(which pip)" - echo "Using pip: $(pip --version)" -} - -function common::install_pip_version() { - echo - echo "${COLOR_BLUE}Installing pip version ${AIRFLOW_PIP_VERSION}${COLOR_RESET}" - echo - if [[ ${AIRFLOW_PIP_VERSION} =~ .*https.* ]]; then - pip install --disable-pip-version-check "pip @ ${AIRFLOW_PIP_VERSION}" - else - pip install --disable-pip-version-check "pip==${AIRFLOW_PIP_VERSION}" - fi - mkdir -p "${HOME}/.local/bin" -} - -function common::import_trusted_gpg() { - common::get_colors - - local key=${1:?${COLOR_RED}First argument expects OpenPGP Key ID${COLOR_RESET}} - local name=${2:?${COLOR_RED}Second argument expected trust storage name${COLOR_RESET}} - # Please note that not all servers could be used for retrieve keys - # sks-keyservers.net: Unmaintained and DNS taken down due to GDPR requests. - # keys.openpgp.org: User ID Mandatory, not suitable for APT repositories - # keyring.debian.org: Only accept keys in Debian keyring. - # pgp.mit.edu: High response time. - local keyservers=( - "hkps://keyserver.ubuntu.com" - "hkps://pgp.surf.nl" - ) - - GNUPGHOME="$(mktemp -d)" - export GNUPGHOME - set +e - for keyserver in $(shuf -e "${keyservers[@]}"); do - echo "${COLOR_BLUE}Try to receive GPG public key ${key} from ${keyserver}${COLOR_RESET}" - gpg --keyserver "${keyserver}" --recv-keys "${key}" 2>&1 && break - echo "${COLOR_YELLOW}Unable to receive GPG public key ${key} from ${keyserver}${COLOR_RESET}" - done - set -e - gpg --export "${key}" > "/etc/apt/trusted.gpg.d/${name}.gpg" - gpgconf --kill all - rm -rf "${GNUPGHOME}" - unset GNUPGHOME -} -EOF - -# The content below is automatically copied from scripts/docker/pip -COPY <<"EOF" /pip -#!/usr/bin/env bash -COLOR_RED=$'\e[31m' -COLOR_RESET=$'\e[0m' -COLOR_YELLOW=$'\e[33m' - -if [[ $(id -u) == "0" ]]; then - echo - echo "${COLOR_RED}You are running pip as root. Please use 'airflow' user to run pip!${COLOR_RESET}" - echo - echo "${COLOR_YELLOW}See: https://airflow.apache.org/docs/docker-stack/build.html#adding-a-new-pypi-package${COLOR_RESET}" - echo - exit 1 -fi -exec "${HOME}"/.local/bin/pip "${@}" -EOF - -# The content below is automatically copied from scripts/docker/install_from_docker_context_files.sh -COPY <<"EOF" /install_from_docker_context_files.sh - -. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" - -: "${AIRFLOW_PIP_VERSION:?Should be set}" - -function install_airflow_and_providers_from_docker_context_files(){ - if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then - AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,} - fi - if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then - AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} - fi - - if [[ ! -d /docker-context-files ]]; then - echo - echo "${COLOR_RED}You must provide a folder via --build-arg DOCKER_CONTEXT_FILES= and you missed it!${COLOR_RESET}" - echo - exit 1 - fi - - # shellcheck disable=SC2206 - local pip_flags=( - # Don't quote this -- if it is empty we don't want it to create an - # empty array element - --find-links="file:///docker-context-files" - ) - - # Find Apache Airflow packages in docker-context files - local reinstalling_apache_airflow_package - reinstalling_apache_airflow_package=$(ls \ - /docker-context-files/apache?airflow?[0-9]*.{whl,tar.gz} 2>/dev/null || true) - # Add extras when installing airflow - if [[ -n "${reinstalling_apache_airflow_package}" ]]; then - # When a provider depends on a dev version of Airflow, we need to - # specify `apache-airflow==$VER`, otherwise pip will look for it on - # pip, and fail to find it - - # This will work as long as the wheel file is correctly named, which it - # will be if it was build by wheel tooling - local ver - ver=$(basename "$reinstalling_apache_airflow_package" | cut -d "-" -f 2) - reinstalling_apache_airflow_package="apache-airflow[${AIRFLOW_EXTRAS}]==$ver" - fi - - if [[ -z "${reinstalling_apache_airflow_package}" && ${AIRFLOW_VERSION=} != "" ]]; then - # When we install only provider packages from docker-context files, we need to still - # install airflow from PyPI when AIRFLOW_VERSION is set. This handles the case where - # pre-release dockerhub image of airflow is built, but we want to install some providers from - # docker-context files - reinstalling_apache_airflow_package="apache-airflow[${AIRFLOW_EXTRAS}]==${AIRFLOW_VERSION}" - fi - # Find Apache Airflow packages in docker-context files - local reinstalling_apache_airflow_providers_packages - reinstalling_apache_airflow_providers_packages=$(ls \ - /docker-context-files/apache?airflow?providers*.{whl,tar.gz} 2>/dev/null || true) - if [[ -z "${reinstalling_apache_airflow_package}" && \ - -z "${reinstalling_apache_airflow_providers_packages}" ]]; then - return - fi - - if [[ ${USE_CONSTRAINTS_FOR_CONTEXT_PACKAGES=} == "true" ]]; then - local python_version - python_version=$(python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")') - local local_constraints_file=/docker-context-files/constraints-"${python_version}"/${AIRFLOW_CONSTRAINTS_MODE}-"${python_version}".txt - - if [[ -f "${local_constraints_file}" ]]; then - echo - echo "${COLOR_BLUE}Installing docker-context-files packages with constraints found in ${local_constraints_file}${COLOR_RESET}" - echo - # force reinstall all airflow + provider packages with constraints found in - set -x - pip install "${pip_flags[@]}" --root-user-action ignore --upgrade \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} --constraint "${local_constraints_file}" \ - ${reinstalling_apache_airflow_package} ${reinstalling_apache_airflow_providers_packages} - set +x - else - echo - echo "${COLOR_BLUE}Installing docker-context-files packages with constraints from GitHub${COLOR_RESET}" - echo - set -x - pip install "${pip_flags[@]}" --root-user-action ignore \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" \ - ${reinstalling_apache_airflow_package} ${reinstalling_apache_airflow_providers_packages} - set +x - fi - else - echo - echo "${COLOR_BLUE}Installing docker-context-files packages without constraints${COLOR_RESET}" - echo - set -x - pip install "${pip_flags[@]}" --root-user-action ignore \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - ${reinstalling_apache_airflow_package} ${reinstalling_apache_airflow_providers_packages} - set +x - fi - common::install_pip_version - pip check -} - -function install_all_other_packages_from_docker_context_files() { - - echo - echo "${COLOR_BLUE}Force re-installing all other package from local files without dependencies${COLOR_RESET}" - echo - local reinstalling_other_packages - # shellcheck disable=SC2010 - reinstalling_other_packages=$(ls /docker-context-files/*.{whl,tar.gz} 2>/dev/null | \ - grep -v apache_airflow | grep -v apache-airflow || true) - if [[ -n "${reinstalling_other_packages}" ]]; then - set -x - pip install ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - --root-user-action ignore --force-reinstall --no-deps --no-index ${reinstalling_other_packages} - common::install_pip_version - set +x - fi -} - -common::get_colors -common::get_airflow_version_specification -common::override_pip_version_if_needed -common::get_constraints_location -common::show_pip_version_and_location - -install_airflow_and_providers_from_docker_context_files - -common::show_pip_version_and_location -install_all_other_packages_from_docker_context_files -EOF - -# The content below is automatically copied from scripts/docker/install_airflow.sh -COPY <<"EOF" /install_airflow.sh -#!/usr/bin/env bash - -. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" - -: "${AIRFLOW_PIP_VERSION:?Should be set}" - -function install_airflow() { - # Coherence check for editable installation mode. - if [[ ${AIRFLOW_INSTALLATION_METHOD} != "." && \ - ${AIRFLOW_INSTALL_EDITABLE_FLAG} == "--editable" ]]; then - echo - echo "${COLOR_RED}ERROR! You can only use --editable flag when installing airflow from sources!${COLOR_RESET}" - echo "${COLOR_RED} Current installation method is '${AIRFLOW_INSTALLATION_METHOD} and should be '.'${COLOR_RESET}" - exit 1 - fi - # Remove mysql from extras if client is not going to be installed - if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then - AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,} - echo "${COLOR_YELLOW}MYSQL client installation is disabled. Extra 'mysql' installations were therefore omitted.${COLOR_RESET}" - fi - # Remove postgres from extras if client is not going to be installed - if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then - AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} - echo "${COLOR_YELLOW}Postgres client installation is disabled. Extra 'postgres' installations were therefore omitted.${COLOR_RESET}" - fi - if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then - echo - echo "${COLOR_BLUE}Installing all packages with eager upgrade${COLOR_RESET}" - echo - # eager upgrade - pip install --root-user-action ignore --upgrade --upgrade-strategy eager \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \ - ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=} - if [[ -n "${AIRFLOW_INSTALL_EDITABLE_FLAG}" ]]; then - # Remove airflow and reinstall it using editable flag - # We can only do it when we install airflow from sources - set -x - pip uninstall apache-airflow --yes - pip install --root-user-action ignore ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" - set +x - fi - - common::install_pip_version - echo - echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" - echo - pip check - else \ - echo - echo "${COLOR_BLUE}Installing all packages with constraints and upgrade if needed${COLOR_RESET}" - echo - set -x - pip install --root-user-action ignore ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \ - --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" - common::install_pip_version - # then upgrade if needed without using constraints to account for new limits in setup.py - pip install --root-user-action ignore --upgrade --upgrade-strategy only-if-needed \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ - "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" - common::install_pip_version - set +x - echo - echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" - echo - pip check - fi - -} - -common::get_colors -common::get_airflow_version_specification -common::override_pip_version_if_needed -common::get_constraints_location -common::show_pip_version_and_location - -install_airflow -EOF - -# The content below is automatically copied from scripts/docker/install_additional_dependencies.sh -COPY <<"EOF" /install_additional_dependencies.sh -#!/usr/bin/env bash -set -euo pipefail - -: "${UPGRADE_TO_NEWER_DEPENDENCIES:?Should be true or false}" -: "${ADDITIONAL_PYTHON_DEPS:?Should be set}" -: "${AIRFLOW_PIP_VERSION:?Should be set}" - -. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" - -function install_additional_dependencies() { - if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then - echo - echo "${COLOR_BLUE}Installing additional dependencies while upgrading to newer dependencies${COLOR_RESET}" - echo - set -x - pip install --root-user-action ignore --upgrade --upgrade-strategy eager \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - ${ADDITIONAL_PYTHON_DEPS} ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=} - common::install_pip_version - set +x - echo - echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" - echo - pip check - else - echo - echo "${COLOR_BLUE}Installing additional dependencies upgrading only if needed${COLOR_RESET}" - echo - set -x - pip install --root-user-action ignore --upgrade --upgrade-strategy only-if-needed \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - ${ADDITIONAL_PYTHON_DEPS} - common::install_pip_version - set +x - echo - echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" - echo - pip check - fi -} - -common::get_colors -common::get_airflow_version_specification -common::override_pip_version_if_needed -common::get_constraints_location -common::show_pip_version_and_location - -install_additional_dependencies -EOF - - -# The content below is automatically copied from scripts/docker/entrypoint_prod.sh -COPY <<"EOF" /entrypoint_prod.sh -#!/usr/bin/env bash -AIRFLOW_COMMAND="${1:-}" - -set -euo pipefail - -LD_PRELOAD="/usr/lib/$(uname -m)-linux-gnu/libstdc++.so.6" -export LD_PRELOAD - -function run_check_with_retries { - local cmd - cmd="${1}" - local countdown - countdown="${CONNECTION_CHECK_MAX_COUNT}" - - while true - do - set +e - local last_check_result - local res - last_check_result=$(eval "${cmd} 2>&1") - res=$? - set -e - if [[ ${res} == 0 ]]; then - echo - break - else - echo -n "." - countdown=$((countdown-1)) - fi - if [[ ${countdown} == 0 ]]; then - echo - echo "ERROR! Maximum number of retries (${CONNECTION_CHECK_MAX_COUNT}) reached." - echo - echo "Last check result:" - echo "$ ${cmd}" - echo "${last_check_result}" - echo - exit 1 - else - sleep "${CONNECTION_CHECK_SLEEP_TIME}" - fi - done -} - -function run_nc() { - # Checks if it is possible to connect to the host using netcat. - # - # We want to avoid misleading messages and perform only forward lookup of the service IP address. - # Netcat when run without -n performs both forward and reverse lookup and fails if the reverse - # lookup name does not match the original name even if the host is reachable via IP. This happens - # randomly with docker-compose in GitHub Actions. - # Since we are not using reverse lookup elsewhere, we can perform forward lookup in python - # And use the IP in NC and add '-n' switch to disable any DNS use. - # Even if this message might be harmless, it might hide the real reason for the problem - # Which is the long time needed to start some services, seeing this message might be totally misleading - # when you try to analyse the problem, that's why it's best to avoid it, - local host="${1}" - local port="${2}" - local ip - ip=$(python -c "import socket; print(socket.gethostbyname('${host}'))") - nc -zvvn "${ip}" "${port}" -} - - -function wait_for_connection { - # Waits for Connection to the backend specified via URL passed as first parameter - # Detects backend type depending on the URL schema and assigns - # default port numbers if not specified in the URL. - # Then it loops until connection to the host/port specified can be established - # It tries `CONNECTION_CHECK_MAX_COUNT` times and sleeps `CONNECTION_CHECK_SLEEP_TIME` between checks - local connection_url - connection_url="${1}" - local detected_backend - detected_backend=$(python -c "from urllib.parse import urlsplit; import sys; print(urlsplit(sys.argv[1]).scheme)" "${connection_url}") - local detected_host - detected_host=$(python -c "from urllib.parse import urlsplit; import sys; print(urlsplit(sys.argv[1]).hostname or '')" "${connection_url}") - local detected_port - detected_port=$(python -c "from urllib.parse import urlsplit; import sys; print(urlsplit(sys.argv[1]).port or '')" "${connection_url}") - - echo BACKEND="${BACKEND:=${detected_backend}}" - readonly BACKEND - - if [[ -z "${detected_port=}" ]]; then - if [[ ${BACKEND} == "postgres"* ]]; then - detected_port=5432 - elif [[ ${BACKEND} == "mysql"* ]]; then - detected_port=3306 - elif [[ ${BACKEND} == "mssql"* ]]; then - detected_port=1433 - elif [[ ${BACKEND} == "redis"* ]]; then - detected_port=6379 - elif [[ ${BACKEND} == "amqp"* ]]; then - detected_port=5672 - fi - fi - - detected_host=${detected_host:="localhost"} - - # Allow the DB parameters to be overridden by environment variable - echo DB_HOST="${DB_HOST:=${detected_host}}" - readonly DB_HOST - - echo DB_PORT="${DB_PORT:=${detected_port}}" - readonly DB_PORT - if [[ -n "${DB_HOST=}" ]] && [[ -n "${DB_PORT=}" ]]; then - run_check_with_retries "run_nc ${DB_HOST@Q} ${DB_PORT@Q}" - else - >&2 echo "The connection details to the broker could not be determined. Connectivity checks were skipped." - fi -} - -function create_www_user() { - local local_password="" - # Warning: command environment variables (*_CMD) have priority over usual configuration variables - # for configuration parameters that require sensitive information. This is the case for the SQL database - # and the broker backend in this entrypoint script. - if [[ -n "${_AIRFLOW_WWW_USER_PASSWORD_CMD=}" ]]; then - local_password=$(eval "${_AIRFLOW_WWW_USER_PASSWORD_CMD}") - unset _AIRFLOW_WWW_USER_PASSWORD_CMD - elif [[ -n "${_AIRFLOW_WWW_USER_PASSWORD=}" ]]; then - local_password="${_AIRFLOW_WWW_USER_PASSWORD}" - unset _AIRFLOW_WWW_USER_PASSWORD - fi - if [[ -z ${local_password} ]]; then - echo - echo "ERROR! Airflow Admin password not set via _AIRFLOW_WWW_USER_PASSWORD or _AIRFLOW_WWW_USER_PASSWORD_CMD variables!" - echo - exit 1 - fi - - airflow users create \ - --username "${_AIRFLOW_WWW_USER_USERNAME="admin"}" \ - --firstname "${_AIRFLOW_WWW_USER_FIRSTNAME="Airflow"}" \ - --lastname "${_AIRFLOW_WWW_USER_LASTNAME="Admin"}" \ - --email "${_AIRFLOW_WWW_USER_EMAIL="airflowadmin@example.com"}" \ - --role "${_AIRFLOW_WWW_USER_ROLE="Admin"}" \ - --password "${local_password}" || true -} - -function create_system_user_if_missing() { - # This is needed in case of OpenShift-compatible container execution. In case of OpenShift random - # User id is used when starting the image, however group 0 is kept as the user group. Our production - # Image is OpenShift compatible, so all permissions on all folders are set so that 0 group can exercise - # the same privileges as the default "airflow" user, this code checks if the user is already - # present in /etc/passwd and will create the system user dynamically, including setting its - # HOME directory to the /home/airflow so that (for example) the ${HOME}/.local folder where airflow is - # Installed can be automatically added to PYTHONPATH - if ! whoami &> /dev/null; then - if [[ -w /etc/passwd ]]; then - echo "${USER_NAME:-default}:x:$(id -u):0:${USER_NAME:-default} user:${AIRFLOW_USER_HOME_DIR}:/sbin/nologin" \ - >> /etc/passwd - fi - export HOME="${AIRFLOW_USER_HOME_DIR}" - fi -} - -function set_pythonpath_for_root_user() { - # Airflow is installed as a local user application which means that if the container is running as root - # the application is not available. because Python then only load system-wide applications. - # Now also adds applications installed as local user "airflow". - if [[ $UID == "0" ]]; then - local python_major_minor - python_major_minor="$(python --version | cut -d " " -f 2 | cut -d "." -f 1-2)" - export PYTHONPATH="${AIRFLOW_USER_HOME_DIR}/.local/lib/python${python_major_minor}/site-packages:${PYTHONPATH:-}" - >&2 echo "The container is run as root user. For security, consider using a regular user account." - fi -} - -function wait_for_airflow_db() { - # Wait for the command to run successfully to validate the database connection. - run_check_with_retries "airflow db check" -} - -function migrate_db() { - # Runs airflow db migrate - airflow db migrate || true -} - -function wait_for_celery_broker() { - # Verifies connection to Celery Broker - local executor - executor="$(airflow config get-value core executor)" - if [[ "${executor}" == "CeleryExecutor" ]]; then - local connection_url - connection_url="$(airflow config get-value celery broker_url)" - wait_for_connection "${connection_url}" - fi -} - -function exec_to_bash_or_python_command_if_specified() { - # If one of the commands: 'bash', 'python' is used, either run appropriate - # command with exec - if [[ ${AIRFLOW_COMMAND} == "bash" ]]; then - shift - exec "/bin/bash" "${@}" - elif [[ ${AIRFLOW_COMMAND} == "python" ]]; then - shift - exec "python" "${@}" - fi -} - -function check_uid_gid() { - if [[ $(id -g) == "0" ]]; then - return - fi - if [[ $(id -u) == "50000" ]]; then - >&2 echo - >&2 echo "WARNING! You should run the image with GID (Group ID) set to 0" - >&2 echo " even if you use 'airflow' user (UID=50000)" - >&2 echo - >&2 echo " You started the image with UID=$(id -u) and GID=$(id -g)" - >&2 echo - >&2 echo " This is to make sure you can run the image with an arbitrary UID in the future." - >&2 echo - >&2 echo " See more about it in the Airflow's docker image documentation" - >&2 echo " http://airflow.apache.org/docs/docker-stack/entrypoint" - >&2 echo - # We still allow the image to run with `airflow` user. - return - else - >&2 echo - >&2 echo "ERROR! You should run the image with GID=0" - >&2 echo - >&2 echo " You started the image with UID=$(id -u) and GID=$(id -g)" - >&2 echo - >&2 echo "The image should always be run with GID (Group ID) set to 0 regardless of the UID used." - >&2 echo " This is to make sure you can run the image with an arbitrary UID." - >&2 echo - >&2 echo " See more about it in the Airflow's docker image documentation" - >&2 echo " http://airflow.apache.org/docs/docker-stack/entrypoint" - # This will not work so we fail hard - exit 1 - fi -} - -unset PIP_USER - -check_uid_gid - -umask 0002 - -CONNECTION_CHECK_MAX_COUNT=${CONNECTION_CHECK_MAX_COUNT:=20} -readonly CONNECTION_CHECK_MAX_COUNT - -CONNECTION_CHECK_SLEEP_TIME=${CONNECTION_CHECK_SLEEP_TIME:=3} -readonly CONNECTION_CHECK_SLEEP_TIME - -create_system_user_if_missing -set_pythonpath_for_root_user -if [[ "${CONNECTION_CHECK_MAX_COUNT}" -gt "0" ]]; then - wait_for_airflow_db -fi - -if [[ -n "${_AIRFLOW_DB_UPGRADE=}" ]] || [[ -n "${_AIRFLOW_DB_MIGRATE=}" ]] ; then - migrate_db -fi - -if [[ -n "${_AIRFLOW_DB_UPGRADE=}" ]] ; then - >&2 echo "WARNING: Environment variable '_AIRFLOW_DB_UPGRADE' is deprecated please use '_AIRFLOW_DB_MIGRATE' instead" -fi - -if [[ -n "${_AIRFLOW_WWW_USER_CREATE=}" ]] ; then - create_www_user -fi - -if [[ -n "${_PIP_ADDITIONAL_REQUIREMENTS=}" ]] ; then - >&2 echo - >&2 echo "!!!!! Installing additional requirements: '${_PIP_ADDITIONAL_REQUIREMENTS}' !!!!!!!!!!!!" - >&2 echo - >&2 echo "WARNING: This is a development/test feature only. NEVER use it in production!" - >&2 echo " Instead, build a custom image as described in" - >&2 echo - >&2 echo " https://airflow.apache.org/docs/docker-stack/build.html" - >&2 echo - >&2 echo " Adding requirements at container startup is fragile and is done every time" - >&2 echo " the container starts, so it is only useful for testing and trying out" - >&2 echo " of adding dependencies." - >&2 echo - pip install --root-user-action ignore ${_PIP_ADDITIONAL_REQUIREMENTS} -fi - - -exec_to_bash_or_python_command_if_specified "${@}" - -if [[ ${AIRFLOW_COMMAND} == "airflow" ]]; then - AIRFLOW_COMMAND="${2:-}" - shift -fi - -if [[ ${AIRFLOW_COMMAND} =~ ^(scheduler|celery)$ ]] \ - && [[ "${CONNECTION_CHECK_MAX_COUNT}" -gt "0" ]]; then - wait_for_celery_broker -fi - -exec "airflow" "${@}" -EOF - -# The content below is automatically copied from scripts/docker/clean-logs.sh -COPY <<"EOF" /clean-logs.sh -#!/usr/bin/env bash - - -set -euo pipefail - -readonly DIRECTORY="${AIRFLOW_HOME:-/usr/local/airflow}" -readonly RETENTION="${AIRFLOW__LOG_RETENTION_DAYS:-15}" - -trap "exit" INT TERM - -readonly EVERY=$((15*60)) - -echo "Cleaning logs every $EVERY seconds" - -while true; do - echo "Trimming airflow logs to ${RETENTION} days." - find "${DIRECTORY}"/logs \ - -type d -name 'lost+found' -prune -o \ - -type f -mtime +"${RETENTION}" -name '*.log' -print0 | \ - xargs -0 rm -f - - find "${DIRECTORY}"/logs -type d -empty -delete || true - - seconds=$(( $(date -u +%s) % EVERY)) - (( seconds < 1 )) || sleep $((EVERY - seconds - 1)) - sleep 1 -done -EOF - -# The content below is automatically copied from scripts/docker/airflow-scheduler-autorestart.sh -COPY <<"EOF" /airflow-scheduler-autorestart.sh -#!/usr/bin/env bash - -while echo "Running"; do - airflow scheduler -n 5 - return_code=$? - if (( return_code != 0 )); then - echo "Scheduler crashed with exit code $return_code. Respawning.." >&2 - date >> /tmp/airflow_scheduler_errors.txt - fi - - sleep 1 -done -EOF - -############################################################################################## -# This is the build image where we build all dependencies -############################################################################################## -FROM ${PYTHON_BASE_IMAGE} as airflow-build-image - -# Nolog bash flag is currently ignored - but you can replace it with -# xtrace - to show commands executed) -SHELL ["/bin/bash", "-o", "pipefail", "-o", "errexit", "-o", "nounset", "-o", "nolog", "-c"] - -ARG PYTHON_BASE_IMAGE -ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE} \ - DEBIAN_FRONTEND=noninteractive LANGUAGE=C.UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8 \ - LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8 \ - PIP_CACHE_DIR=/tmp/.cache/pip - -ARG DEV_APT_DEPS="" -ARG ADDITIONAL_DEV_APT_DEPS="" -ARG DEV_APT_COMMAND="" -ARG ADDITIONAL_DEV_APT_COMMAND="" -ARG ADDITIONAL_DEV_APT_ENV="" - -ENV DEV_APT_DEPS=${DEV_APT_DEPS} \ - ADDITIONAL_DEV_APT_DEPS=${ADDITIONAL_DEV_APT_DEPS} \ - DEV_APT_COMMAND=${DEV_APT_COMMAND} \ - ADDITIONAL_DEV_APT_COMMAND=${ADDITIONAL_DEV_APT_COMMAND} \ - ADDITIONAL_DEV_APT_ENV=${ADDITIONAL_DEV_APT_ENV} - -COPY --from=scripts install_os_dependencies.sh /scripts/docker/ -RUN bash /scripts/docker/install_os_dependencies.sh dev - -ARG INSTALL_MYSQL_CLIENT="true" -ARG INSTALL_MYSQL_CLIENT_TYPE="mariadb" -ARG INSTALL_MSSQL_CLIENT="true" -ARG INSTALL_POSTGRES_CLIENT="true" -ARG AIRFLOW_PIP_VERSION - -ENV INSTALL_MYSQL_CLIENT=${INSTALL_MYSQL_CLIENT} \ - INSTALL_MYSQL_CLIENT_TYPE=${INSTALL_MYSQL_CLIENT_TYPE} \ - INSTALL_MSSQL_CLIENT=${INSTALL_MSSQL_CLIENT} \ - INSTALL_POSTGRES_CLIENT=${INSTALL_POSTGRES_CLIENT} - -COPY --from=scripts common.sh /scripts/docker/ - -# Only copy mysql/mssql installation scripts for now - so that changing the other -# scripts which are needed much later will not invalidate the docker layer here -COPY --from=scripts install_mysql.sh install_mssql.sh install_postgres.sh /scripts/docker/ - -RUN bash /scripts/docker/install_mysql.sh dev && \ - bash /scripts/docker/install_mssql.sh dev && \ - bash /scripts/docker/install_postgres.sh dev -ENV PATH=${PATH}:/opt/mssql-tools/bin - -# By default we do not install from docker context files but if we decide to install from docker context -# files, we should override those variables to "docker-context-files" -ARG DOCKER_CONTEXT_FILES="Dockerfile" - -COPY ${DOCKER_CONTEXT_FILES} /docker-context-files - -ARG AIRFLOW_HOME -ARG AIRFLOW_USER_HOME_DIR -ARG AIRFLOW_UID - -RUN adduser --gecos "First Last,RoomNumber,WorkPhone,HomePhone" --disabled-password \ - --quiet "airflow" --uid "${AIRFLOW_UID}" --gid "0" --home "${AIRFLOW_USER_HOME_DIR}" && \ - mkdir -p ${AIRFLOW_HOME} && chown -R "airflow:0" "${AIRFLOW_USER_HOME_DIR}" ${AIRFLOW_HOME} - -USER airflow - -ARG AIRFLOW_REPO=apache/airflow -ARG AIRFLOW_BRANCH=main -ARG AIRFLOW_EXTRAS -ARG ADDITIONAL_AIRFLOW_EXTRAS="" -# Allows to override constraints source -ARG CONSTRAINTS_GITHUB_REPOSITORY="apache/airflow" -ARG AIRFLOW_CONSTRAINTS_MODE="constraints" -ARG AIRFLOW_CONSTRAINTS_REFERENCE="" -ARG AIRFLOW_CONSTRAINTS_LOCATION="" -ARG DEFAULT_CONSTRAINTS_BRANCH="constraints-main" - -# By default PIP has progress bar but you can disable it. -ARG PIP_PROGRESS_BAR -# By default we do not use pre-cached packages, but in CI/Breeze environment we override this to speed up -# builds in case setup.py/setup.cfg changed. This is pure optimisation of CI/Breeze builds. -ARG AIRFLOW_PRE_CACHED_PIP_PACKAGES="false" -# This is airflow version that is put in the label of the image build -ARG AIRFLOW_VERSION -# By default latest released version of airflow is installed (when empty) but this value can be overridden -# and we can install version according to specification (For example ==2.0.2 or <3.0.0). -ARG AIRFLOW_VERSION_SPECIFICATION -# By default we install providers from PyPI but in case of Breeze build we want to install providers -# from local sources without the need of preparing provider packages upfront. This value is -# automatically overridden by Breeze scripts. -ARG INSTALL_PROVIDERS_FROM_SOURCES="false" -# Determines the way airflow is installed. By default we install airflow from PyPI `apache-airflow` package -# But it also can be `.` from local installation or GitHub URL pointing to specific branch or tag -# Of Airflow. Note That for local source installation you need to have local sources of -# Airflow checked out together with the Dockerfile and AIRFLOW_SOURCES_FROM and AIRFLOW_SOURCES_TO -# set to "." and "/opt/airflow" respectively. -ARG AIRFLOW_INSTALLATION_METHOD="apache-airflow" -# By default we do not upgrade to latest dependencies -ARG UPGRADE_TO_NEWER_DEPENDENCIES="false" -# By default we install latest airflow from PyPI so we do not need to copy sources of Airflow -# but in case of breeze/CI builds we use latest sources and we override those -# those SOURCES_FROM/TO with "." and "/opt/airflow" respectively -ARG AIRFLOW_SOURCES_FROM="Dockerfile" -ARG AIRFLOW_SOURCES_TO="/Dockerfile" - - -RUN if [[ -f /docker-context-files/pip.conf ]]; then \ - mkdir -p ${AIRFLOW_USER_HOME_DIR}/.config/pip; \ - cp /docker-context-files/pip.conf "${AIRFLOW_USER_HOME_DIR}/.config/pip/pip.conf"; \ - fi; \ - if [[ -f /docker-context-files/.piprc ]]; then \ - cp /docker-context-files/.piprc "${AIRFLOW_USER_HOME_DIR}/.piprc"; \ - fi - -# Additional PIP flags passed to all pip install commands except reinstalling pip itself -ARG ADDITIONAL_PIP_INSTALL_FLAGS="" - -ENV AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ - AIRFLOW_PRE_CACHED_PIP_PACKAGES=${AIRFLOW_PRE_CACHED_PIP_PACKAGES} \ - INSTALL_PROVIDERS_FROM_SOURCES=${INSTALL_PROVIDERS_FROM_SOURCES} \ - AIRFLOW_VERSION=${AIRFLOW_VERSION} \ - AIRFLOW_INSTALLATION_METHOD=${AIRFLOW_INSTALLATION_METHOD} \ - AIRFLOW_VERSION_SPECIFICATION=${AIRFLOW_VERSION_SPECIFICATION} \ - AIRFLOW_SOURCES_FROM=${AIRFLOW_SOURCES_FROM} \ - AIRFLOW_SOURCES_TO=${AIRFLOW_SOURCES_TO} \ - AIRFLOW_REPO=${AIRFLOW_REPO} \ - AIRFLOW_BRANCH=${AIRFLOW_BRANCH} \ - AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS}${ADDITIONAL_AIRFLOW_EXTRAS:+,}${ADDITIONAL_AIRFLOW_EXTRAS} \ - CONSTRAINTS_GITHUB_REPOSITORY=${CONSTRAINTS_GITHUB_REPOSITORY} \ - AIRFLOW_CONSTRAINTS_MODE=${AIRFLOW_CONSTRAINTS_MODE} \ - AIRFLOW_CONSTRAINTS_REFERENCE=${AIRFLOW_CONSTRAINTS_REFERENCE} \ - AIRFLOW_CONSTRAINTS_LOCATION=${AIRFLOW_CONSTRAINTS_LOCATION} \ - DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH} \ - PATH=${PATH}:${AIRFLOW_USER_HOME_DIR}/.local/bin \ - AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ - PIP_PROGRESS_BAR=${PIP_PROGRESS_BAR} \ - ADDITIONAL_PIP_INSTALL_FLAGS=${ADDITIONAL_PIP_INSTALL_FLAGS} \ - AIRFLOW_USER_HOME_DIR=${AIRFLOW_USER_HOME_DIR} \ - AIRFLOW_HOME=${AIRFLOW_HOME} \ - AIRFLOW_UID=${AIRFLOW_UID} \ - AIRFLOW_INSTALL_EDITABLE_FLAG="" \ - UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES} \ - # By default PIP installs everything to ~/.local - PIP_USER="true" - -# Copy all scripts required for installation - changing any of those should lead to -# rebuilding from here -COPY --from=scripts common.sh install_pip_version.sh \ - install_airflow_dependencies_from_branch_tip.sh /scripts/docker/ - -# We can set this value to true in case we want to install .whl/.tar.gz packages placed in the -# docker-context-files folder. This can be done for both additional packages you want to install -# as well as Airflow and Provider packages (it will be automatically detected if airflow -# is installed from docker-context files rather than from PyPI) -ARG INSTALL_PACKAGES_FROM_CONTEXT="false" - -# Normally constraints are not used when context packages are build - because we might have packages -# that are conflicting with Airflow constraints, however there are cases when we want to use constraints -# for example in CI builds when we already have source-package constraints - either from github branch or -# from eager-upgraded constraints by the CI builds -ARG USE_CONSTRAINTS_FOR_CONTEXT_PACKAGES="false" - -# In case of Production build image segment we want to pre-install main version of airflow -# dependencies from GitHub so that we do not have to always reinstall it from the scratch. -# The Airflow (and providers in case INSTALL_PROVIDERS_FROM_SOURCES is "false") -# are uninstalled, only dependencies remain -# the cache is only used when "upgrade to newer dependencies" is not set to automatically -# account for removed dependencies (we do not install them in the first place) and in case -# INSTALL_PACKAGES_FROM_CONTEXT is not set (because then caching it from main makes no sense). -RUN bash /scripts/docker/install_pip_version.sh; \ - if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" && \ - ${INSTALL_PACKAGES_FROM_CONTEXT} == "false" && \ - ${UPGRADE_TO_NEWER_DEPENDENCIES} == "false" ]]; then \ - bash /scripts/docker/install_airflow_dependencies_from_branch_tip.sh; \ - fi - -COPY --chown=airflow:0 ${AIRFLOW_SOURCES_FROM} ${AIRFLOW_SOURCES_TO} - -# Add extra python dependencies -ARG ADDITIONAL_PYTHON_DEPS="" - - -ARG VERSION_SUFFIX_FOR_PYPI="" - -ENV ADDITIONAL_PYTHON_DEPS=${ADDITIONAL_PYTHON_DEPS} \ - INSTALL_PACKAGES_FROM_CONTEXT=${INSTALL_PACKAGES_FROM_CONTEXT} \ - USE_CONSTRAINTS_FOR_CONTEXT_PACKAGES=${USE_CONSTRAINTS_FOR_CONTEXT_PACKAGES} \ - VERSION_SUFFIX_FOR_PYPI=${VERSION_SUFFIX_FOR_PYPI} - -WORKDIR ${AIRFLOW_HOME} - -COPY --from=scripts install_from_docker_context_files.sh install_airflow.sh \ - install_additional_dependencies.sh /scripts/docker/ - -# Useful for creating a cache id based on the underlying architecture, preventing the use of cached python packages from -# an incorrect architecture. -ARG TARGETARCH -# Value to be able to easily change cache id and therefore use a bare new cache -ARG PIP_CACHE_EPOCH="0" - -# hadolint ignore=SC2086, SC2010, DL3042 -RUN --mount=type=cache,id=$PYTHON_BASE_IMAGE-$AIRFLOW_PIP_VERSION-$TARGETARCH-$PIP_CACHE_EPOCH,target=/tmp/.cache/pip,uid=${AIRFLOW_UID} \ - if [[ ${INSTALL_PACKAGES_FROM_CONTEXT} == "true" ]]; then \ - bash /scripts/docker/install_from_docker_context_files.sh; \ - fi; \ - if ! airflow version 2>/dev/null >/dev/null; then \ - bash /scripts/docker/install_airflow.sh; \ - fi; \ - if [[ -n "${ADDITIONAL_PYTHON_DEPS}" ]]; then \ - bash /scripts/docker/install_additional_dependencies.sh; \ - fi; \ - find "${AIRFLOW_USER_HOME_DIR}/.local/" -name '*.pyc' -print0 | xargs -0 rm -f || true ; \ - find "${AIRFLOW_USER_HOME_DIR}/.local/" -type d -name '__pycache__' -print0 | xargs -0 rm -rf || true ; \ - # make sure that all directories and files in .local are also group accessible - find "${AIRFLOW_USER_HOME_DIR}/.local" -executable -print0 | xargs --null chmod g+x; \ - find "${AIRFLOW_USER_HOME_DIR}/.local" -print0 | xargs --null chmod g+rw - -# In case there is a requirements.txt file in "docker-context-files" it will be installed -# during the build additionally to whatever has been installed so far. It is recommended that -# the requirements.txt contains only dependencies with == version specification -# hadolint ignore=DL3042 -RUN --mount=type=cache,id=additional-requirements-$PYTHON_BASE_IMAGE-$AIRFLOW_PIP_VERSION-$TARGETARCH-$PIP_CACHE_EPOCH,target=/tmp/.cache/pip,uid=${AIRFLOW_UID} \ - if [[ -f /docker-context-files/requirements.txt ]]; then \ - pip install --user -r /docker-context-files/requirements.txt; \ - fi - -############################################################################################## -# This is the actual Airflow image - much smaller than the build one. We copy -# installed Airflow and all its dependencies from the build image to make it smaller. -############################################################################################## -FROM ${PYTHON_BASE_IMAGE} as main - -# Nolog bash flag is currently ignored - but you can replace it with other flags (for example -# xtrace - to show commands executed) -SHELL ["/bin/bash", "-o", "pipefail", "-o", "errexit", "-o", "nounset", "-o", "nolog", "-c"] - -ARG AIRFLOW_UID - -LABEL org.apache.airflow.distro="debian" \ - org.apache.airflow.module="airflow" \ - org.apache.airflow.component="airflow" \ - org.apache.airflow.image="airflow" \ - org.apache.airflow.uid="${AIRFLOW_UID}" - -ARG PYTHON_BASE_IMAGE -ARG AIRFLOW_PIP_VERSION - -ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE} \ - # Make sure noninteractive debian install is used and language variables set - DEBIAN_FRONTEND=noninteractive LANGUAGE=C.UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8 \ - LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8 LD_LIBRARY_PATH=/usr/local/lib \ - AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} - -ARG RUNTIME_APT_DEPS="" -ARG ADDITIONAL_RUNTIME_APT_DEPS="" -ARG RUNTIME_APT_COMMAND="echo" -ARG ADDITIONAL_RUNTIME_APT_COMMAND="" -ARG ADDITIONAL_RUNTIME_APT_ENV="" -ARG INSTALL_MYSQL_CLIENT="true" -ARG INSTALL_MYSQL_CLIENT_TYPE="mysql" -ARG INSTALL_MSSQL_CLIENT="true" -ARG INSTALL_POSTGRES_CLIENT="true" - -ENV RUNTIME_APT_DEPS=${RUNTIME_APT_DEPS} \ - ADDITIONAL_RUNTIME_APT_DEPS=${ADDITIONAL_RUNTIME_APT_DEPS} \ - RUNTIME_APT_COMMAND=${RUNTIME_APT_COMMAND} \ - ADDITIONAL_RUNTIME_APT_COMMAND=${ADDITIONAL_RUNTIME_APT_COMMAND} \ - INSTALL_MYSQL_CLIENT=${INSTALL_MYSQL_CLIENT} \ - INSTALL_MYSQL_CLIENT_TYPE=${INSTALL_MYSQL_CLIENT_TYPE} \ - INSTALL_MSSQL_CLIENT=${INSTALL_MSSQL_CLIENT} \ - INSTALL_POSTGRES_CLIENT=${INSTALL_POSTGRES_CLIENT} \ - GUNICORN_CMD_ARGS="--worker-tmp-dir /dev/shm" \ - AIRFLOW_INSTALLATION_METHOD=${AIRFLOW_INSTALLATION_METHOD} - -COPY --from=scripts install_os_dependencies.sh /scripts/docker/ -RUN bash /scripts/docker/install_os_dependencies.sh runtime - -# Having the variable in final image allows to disable providers manager warnings when -# production image is prepared from sources rather than from package -ARG AIRFLOW_INSTALLATION_METHOD="apache-airflow" -ARG AIRFLOW_IMAGE_REPOSITORY -ARG AIRFLOW_IMAGE_README_URL -ARG AIRFLOW_USER_HOME_DIR -ARG AIRFLOW_HOME - -# By default PIP installs everything to ~/.local -ENV PATH="${AIRFLOW_USER_HOME_DIR}/.local/bin:${PATH}" \ - AIRFLOW_UID=${AIRFLOW_UID} \ - AIRFLOW_USER_HOME_DIR=${AIRFLOW_USER_HOME_DIR} \ - AIRFLOW_HOME=${AIRFLOW_HOME} - -# THE 3 LINES ARE ONLY NEEDED IN ORDER TO MAKE PYMSSQL BUILD WORK WITH LATEST CYTHON -# AND SHOULD BE REMOVED WHEN WORKAROUND IN install_mssql.sh IS REMOVED -ARG AIRFLOW_PIP_VERSION=23.3.2 -ENV AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} -COPY --from=scripts common.sh /scripts/docker/ - -# Only copy mysql/mssql installation scripts for now - so that changing the other -# scripts which are needed much later will not invalidate the docker layer here. -COPY --from=scripts install_mysql.sh install_mssql.sh install_postgres.sh /scripts/docker/ -# We run scripts with bash here to make sure we can execute the scripts. Changing to +x might have an -# unexpected result - the cache for Dockerfiles might get invalidated in case the host system -# had different umask set and group x bit was not set. In Azure the bit might be not set at all. -# That also protects against AUFS Docker backend problem where changing the executable bit required sync -RUN bash /scripts/docker/install_mysql.sh prod \ - && bash /scripts/docker/install_mssql.sh prod \ - && bash /scripts/docker/install_postgres.sh prod \ - && adduser --gecos "First Last,RoomNumber,WorkPhone,HomePhone" --disabled-password \ - --quiet "airflow" --uid "${AIRFLOW_UID}" --gid "0" --home "${AIRFLOW_USER_HOME_DIR}" \ -# Make Airflow files belong to the root group and are accessible. This is to accommodate the guidelines from -# OpenShift https://docs.openshift.com/enterprise/3.0/creating_images/guidelines.html - && mkdir -pv "${AIRFLOW_HOME}" \ - && mkdir -pv "${AIRFLOW_HOME}/dags" \ - && mkdir -pv "${AIRFLOW_HOME}/logs" \ - && chown -R airflow:0 "${AIRFLOW_USER_HOME_DIR}" "${AIRFLOW_HOME}" \ - && chmod -R g+rw "${AIRFLOW_USER_HOME_DIR}" "${AIRFLOW_HOME}" \ - && find "${AIRFLOW_HOME}" -executable -print0 | xargs --null chmod g+x \ - && find "${AIRFLOW_USER_HOME_DIR}" -executable -print0 | xargs --null chmod g+x - -COPY --from=airflow-build-image --chown=airflow:0 \ - "${AIRFLOW_USER_HOME_DIR}/.local" "${AIRFLOW_USER_HOME_DIR}/.local" -COPY --from=scripts entrypoint_prod.sh /entrypoint -COPY --from=scripts clean-logs.sh /clean-logs -COPY --from=scripts airflow-scheduler-autorestart.sh /airflow-scheduler-autorestart - -# Make /etc/passwd root-group-writeable so that user can be dynamically added by OpenShift -# See https://github.com/apache/airflow/issues/9248 -# Set default groups for airflow and root user - -RUN chmod a+rx /entrypoint /clean-logs \ - && chmod g=u /etc/passwd \ - && chmod g+w "${AIRFLOW_USER_HOME_DIR}/.local" \ - && usermod -g 0 airflow -G 0 - -# make sure that the venv is activated for all users -# including plain sudo, sudo with --interactive flag -RUN sed --in-place=.bak "s/secure_path=\"/secure_path=\"\/.venv\/bin:/" /etc/sudoers - -ARG AIRFLOW_VERSION - -COPY --from=scripts install_pip_version.sh /scripts/docker/ -RUN bash /scripts/docker/install_pip_version.sh - -# See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation -# to learn more about the way how signals are handled by the image -# Also set airflow as nice PROMPT message. -ENV DUMB_INIT_SETSID="1" \ - PS1="(airflow)" \ - AIRFLOW_VERSION=${AIRFLOW_VERSION} \ - AIRFLOW__CORE__LOAD_EXAMPLES="false" \ - PIP_USER="true" \ - PATH="/root/bin:${PATH}" - -# Add protection against running pip as root user -RUN mkdir -pv /root/bin -COPY --from=scripts pip /root/bin/pip -RUN chmod u+x /root/bin/pip - -WORKDIR ${AIRFLOW_HOME} - -EXPOSE 8080 - -USER ${AIRFLOW_UID} - -# Those should be set and used as late as possible as any change in commit/build otherwise invalidates the -# layers right after -ARG BUILD_ID -ARG COMMIT_SHA -ARG AIRFLOW_IMAGE_REPOSITORY -ARG AIRFLOW_IMAGE_DATE_CREATED - -ENV BUILD_ID=${BUILD_ID} COMMIT_SHA=${COMMIT_SHA} - -LABEL org.apache.airflow.distro="debian" \ - org.apache.airflow.module="airflow" \ - org.apache.airflow.component="airflow" \ - org.apache.airflow.image="airflow" \ - org.apache.airflow.version="${AIRFLOW_VERSION}" \ - org.apache.airflow.uid="${AIRFLOW_UID}" \ - org.apache.airflow.main-image.build-id="${BUILD_ID}" \ - org.apache.airflow.main-image.commit-sha="${COMMIT_SHA}" \ - org.opencontainers.image.source="${AIRFLOW_IMAGE_REPOSITORY}" \ - org.opencontainers.image.created=${AIRFLOW_IMAGE_DATE_CREATED} \ - org.opencontainers.image.authors="dev@airflow.apache.org" \ - org.opencontainers.image.url="https://airflow.apache.org" \ - org.opencontainers.image.documentation="https://airflow.apache.org/docs/docker-stack/index.html" \ - org.opencontainers.image.version="${AIRFLOW_VERSION}" \ - org.opencontainers.image.revision="${COMMIT_SHA}" \ - org.opencontainers.image.vendor="Apache Software Foundation" \ - org.opencontainers.image.licenses="Apache-2.0" \ - org.opencontainers.image.ref.name="airflow" \ - org.opencontainers.image.title="Production Airflow Image" \ - org.opencontainers.image.description="Reference, production-ready Apache Airflow image" -ENTRYPOINT ["/usr/bin/dumb-init", "--", "/entrypoint"] -CMD [] \ No newline at end of file +# syntax=docker/dockerfile:1.4 +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# THIS DOCKERFILE IS INTENDED FOR PRODUCTION USE AND DEPLOYMENT. +# NOTE! IT IS ALPHA-QUALITY FOR NOW - WE ARE IN A PROCESS OF TESTING IT +# +# +# This is a multi-segmented image. It actually contains two images: +# +# airflow-build-image - there all airflow dependencies can be installed (and +# built - for those dependencies that require +# build essentials). Airflow is installed there with +# --user switch so that all the dependencies are +# installed to ${HOME}/.local +# +# main - this is the actual production image that is much +# smaller because it does not contain all the build +# essentials. Instead the ${HOME}/.local folder +# is copied from the build-image - this way we have +# only result of installation and we do not need +# all the build essentials. This makes the image +# much smaller. +# +# Use the same builder frontend version for everyone +ARG AIRFLOW_EXTRAS="aiobotocore,amazon,async,celery,cncf.kubernetes,common.io,docker,elasticsearch,ftp,google,google_auth,grpc,hashicorp,http,ldap,microsoft.azure,mysql,odbc,openlineage,pandas,postgres,redis,sendgrid,sftp,slack,snowflake,ssh,statsd,virtualenv" +ARG ADDITIONAL_AIRFLOW_EXTRAS="" +ARG ADDITIONAL_PYTHON_DEPS="" + +ARG AIRFLOW_HOME=/opt/airflow +ARG AIRFLOW_UID="50000" +ARG AIRFLOW_USER_HOME_DIR=/home/airflow + +# latest released version here +ARG AIRFLOW_VERSION="2.8.0" + +ARG PYTHON_BASE_IMAGE="python:3.8-slim-bookworm" + +ARG AIRFLOW_PIP_VERSION=23.3.2 +ARG AIRFLOW_IMAGE_REPOSITORY="https://github.com/apache/airflow" +ARG AIRFLOW_IMAGE_README_URL="https://raw.githubusercontent.com/apache/airflow/main/docs/docker-stack/README.md" + +# By default latest released version of airflow is installed (when empty) but this value can be overridden +# and we can install version according to specification (For example ==2.0.2 or <3.0.0). +ARG AIRFLOW_VERSION_SPECIFICATION="" + +# By default PIP has progress bar but you can disable it. +ARG PIP_PROGRESS_BAR="on" + +############################################################################################## +# This is the script image where we keep all inlined bash scripts needed in other segments +############################################################################################## +FROM scratch as scripts + +############################################################################################## +# Please DO NOT modify the inlined scripts manually. The content of those files will be +# replaced by pre-commit automatically from the "scripts/docker/" folder. +# This is done in order to avoid problems with caching and file permissions and in order to +# make the PROD Dockerfile standalone +############################################################################################## + +# The content below is automatically copied from scripts/docker/install_os_dependencies.sh +COPY <<"EOF" /install_os_dependencies.sh +#!/usr/bin/env bash +set -euo pipefail + +DOCKER_CLI_VERSION=24.0.6 + +if [[ "$#" != 1 ]]; then + echo "ERROR! There should be 'runtime' or 'dev' parameter passed as argument.". + exit 1 +fi + +if [[ "${1}" == "runtime" ]]; then + INSTALLATION_TYPE="RUNTIME" +elif [[ "${1}" == "dev" ]]; then + INSTALLATION_TYPE="dev" +else + echo "ERROR! Wrong argument. Passed ${1} and it should be one of 'runtime' or 'dev'.". + exit 1 +fi + +function get_dev_apt_deps() { + if [[ "${DEV_APT_DEPS=}" == "" ]]; then + DEV_APT_DEPS="apt-transport-https apt-utils build-essential ca-certificates dirmngr \ +freetds-bin freetds-dev git gosu graphviz graphviz-dev krb5-user ldap-utils libffi-dev libgeos-dev \ +libkrb5-dev libldap2-dev libleveldb1d libleveldb-dev libsasl2-2 libsasl2-dev libsasl2-modules \ +libssl-dev libxmlsec1 libxmlsec1-dev locales lsb-release openssh-client pkgconf sasl2-bin \ +software-properties-common sqlite3 sudo unixodbc unixodbc-dev zlib1g-dev" + export DEV_APT_DEPS + fi +} + +function get_runtime_apt_deps() { + local debian_version + local debian_version_apt_deps + # Get debian version without installing lsb_release + # shellcheck disable=SC1091 + debian_version=$(. /etc/os-release; printf '%s\n' "$VERSION_CODENAME";) + echo + echo "DEBIAN CODENAME: ${debian_version}" + echo + if [[ "${debian_version}" == "bullseye" ]]; then + debian_version_apt_deps="libffi7 libldap-2.4-2 libssl1.1 netcat" + else + debian_version_apt_deps="libffi8 libldap-2.5-0 libssl3 netcat-openbsd" + fi + echo + echo "APPLIED INSTALLATION CONFIGURATION FOR DEBIAN VERSION: ${debian_version}" + echo + if [[ "${RUNTIME_APT_DEPS=}" == "" ]]; then + RUNTIME_APT_DEPS="apt-transport-https apt-utils ca-certificates \ +curl dumb-init freetds-bin gosu krb5-user libgeos-dev \ +ldap-utils libsasl2-2 libsasl2-modules libxmlsec1 locales ${debian_version_apt_deps} \ +lsb-release openssh-client python3-selinux rsync sasl2-bin sqlite3 sudo unixodbc" + export RUNTIME_APT_DEPS + fi +} + +function install_docker_cli() { + local platform + if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then + platform="aarch64" + else + platform="x86_64" + fi + curl --silent \ + "https://download.docker.com/linux/static/stable/${platform}/docker-${DOCKER_CLI_VERSION}.tgz" \ + | tar -C /usr/bin --strip-components=1 -xvzf - docker/docker +} + +function install_debian_dev_dependencies() { + apt-get update + apt-get install --no-install-recommends -yqq apt-utils >/dev/null 2>&1 + apt-get install -y --no-install-recommends curl gnupg2 lsb-release + # shellcheck disable=SC2086 + export ${ADDITIONAL_DEV_APT_ENV?} + if [[ ${DEV_APT_COMMAND} != "" ]]; then + bash -o pipefail -o errexit -o nounset -o nolog -c "${DEV_APT_COMMAND}" + fi + if [[ ${ADDITIONAL_DEV_APT_COMMAND} != "" ]]; then + bash -o pipefail -o errexit -o nounset -o nolog -c "${ADDITIONAL_DEV_APT_COMMAND}" + fi + apt-get update + local debian_version + local debian_version_apt_deps + # Get debian version without installing lsb_release + # shellcheck disable=SC1091 + debian_version=$(. /etc/os-release; printf '%s\n' "$VERSION_CODENAME";) + echo + echo "DEBIAN CODENAME: ${debian_version}" + echo + if [[ "${debian_version}" == "bullseye" ]]; then + echo + echo "Bullseye detected - replacing dependencies in additional dev apt deps" + echo + # Replace dependencies in additional dev apt deps to be compatible with Bullseye + ADDITIONAL_DEV_APT_DEPS=${ADDITIONAL_DEV_APT_DEPS//libgcc-11-dev/libgcc-10-dev} + ADDITIONAL_DEV_APT_DEPS=${ADDITIONAL_DEV_APT_DEPS//netcat-openbsd/netcat} + echo + echo "Replaced bullseye dev apt dependencies" + echo "${ADDITIONAL_DEV_APT_COMMAND}" + echo + fi + + # shellcheck disable=SC2086 + apt-get install -y --no-install-recommends ${DEV_APT_DEPS} ${ADDITIONAL_DEV_APT_DEPS} +} + +function install_debian_runtime_dependencies() { + apt-get update + apt-get install --no-install-recommends -yqq apt-utils >/dev/null 2>&1 + apt-get install -y --no-install-recommends curl gnupg2 lsb-release + # shellcheck disable=SC2086 + export ${ADDITIONAL_RUNTIME_APT_ENV?} + if [[ "${RUNTIME_APT_COMMAND}" != "" ]]; then + bash -o pipefail -o errexit -o nounset -o nolog -c "${RUNTIME_APT_COMMAND}" + fi + if [[ "${ADDITIONAL_RUNTIME_APT_COMMAND}" != "" ]]; then + bash -o pipefail -o errexit -o nounset -o nolog -c "${ADDITIONAL_RUNTIME_APT_COMMAND}" + fi + apt-get update + # shellcheck disable=SC2086 + apt-get install -y --no-install-recommends ${RUNTIME_APT_DEPS} ${ADDITIONAL_RUNTIME_APT_DEPS} + apt-get autoremove -yqq --purge + apt-get clean + rm -rf /var/lib/apt/lists/* /var/log/* +} + +if [[ "${INSTALLATION_TYPE}" == "RUNTIME" ]]; then + get_runtime_apt_deps + install_debian_runtime_dependencies + install_docker_cli + +else + get_dev_apt_deps + install_debian_dev_dependencies + install_docker_cli +fi +EOF + +# The content below is automatically copied from scripts/docker/install_mysql.sh +COPY <<"EOF" /install_mysql.sh +#!/usr/bin/env bash +. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" + +set -euo pipefail + +common::get_colors +declare -a packages + +readonly MYSQL_LTS_VERSION="8.0" +readonly MARIADB_LTS_VERSION="10.11" + +: "${INSTALL_MYSQL_CLIENT:?Should be true or false}" +: "${INSTALL_MYSQL_CLIENT_TYPE:-mariadb}" + +install_mysql_client() { + if [[ "${1}" == "dev" ]]; then + packages=("libmysqlclient-dev" "mysql-client") + elif [[ "${1}" == "prod" ]]; then + # `libmysqlclientXX` where XX is number, and it should be increased every new GA MySQL release, for example + # 18 - MySQL 5.6.48 + # 20 - MySQL 5.7.42 + # 21 - MySQL 8.0.34 + # 22 - MySQL 8.1 + packages=("libmysqlclient21" "mysql-client") + else + echo + echo "${COLOR_RED}Specify either prod or dev${COLOR_RESET}" + echo + exit 1 + fi + + common::import_trusted_gpg "B7B3B788A8D3785C" "mysql" + + echo + echo "${COLOR_BLUE}Installing Oracle MySQL client version ${MYSQL_LTS_VERSION}: ${1}${COLOR_RESET}" + echo + + echo "deb http://repo.mysql.com/apt/debian/ $(lsb_release -cs) mysql-${MYSQL_LTS_VERSION}" > \ + /etc/apt/sources.list.d/mysql.list + apt-get update + apt-get install --no-install-recommends -y "${packages[@]}" + apt-get autoremove -yqq --purge + apt-get clean && rm -rf /var/lib/apt/lists/* + + # Remove mysql repository from sources.list.d as MySQL repos have a basic flaw that they put expiry + # date on their GPG signing keys and they sign their repo with those keys. This means that after a + # certain date, the GPG key becomes invalid and if you have the repository added in your sources.list + # then you will not be able to install anything from any other repository. This id unlike any other + # repository we have seen (for example Postgres, MariaDB, MsSQL - all have non-expiring signing keys) + rm /etc/apt/sources.list.d/mysql.list +} + +install_mariadb_client() { + # List of compatible package Oracle MySQL -> MariaDB: + # `mysql-client` -> `mariadb-client` or `mariadb-client-compat` (11+) + # `libmysqlclientXX` (where XX is a number) -> `libmariadb3-compat` + # `libmysqlclient-dev` -> `libmariadb-dev-compat` + # + # Different naming against Debian repo which we used before + # that some of packages might contains `-compat` suffix, Debian repo -> MariaDB repo: + # `libmariadb-dev` -> `libmariadb-dev-compat` + # `mariadb-client-core` -> `mariadb-client` or `mariadb-client-compat` (11+) + if [[ "${1}" == "dev" ]]; then + packages=("libmariadb-dev-compat" "mariadb-client") + elif [[ "${1}" == "prod" ]]; then + packages=("libmariadb3-compat" "mariadb-client") + else + echo + echo "${COLOR_RED}Specify either prod or dev${COLOR_RESET}" + echo + exit 1 + fi + + common::import_trusted_gpg "0xF1656F24C74CD1D8" "mariadb" + + echo + echo "${COLOR_BLUE}Installing MariaDB client version ${MARIADB_LTS_VERSION}: ${1}${COLOR_RESET}" + echo "${COLOR_YELLOW}MariaDB client protocol-compatible with MySQL client.${COLOR_RESET}" + echo + + curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - + echo "deb [arch=amd64,arm64] https://archive.mariadb.org/mariadb-${MARIADB_LTS_VERSION}/repo/debian/ $(lsb_release -cs) main" > \ + /etc/apt/sources.list.d/mariadb.list + # Make sure that dependencies from MariaDB repo are preferred over Debian dependencies + printf "Package: *\nPin: release o=MariaDB\nPin-Priority: 999\n" > /etc/apt/preferences.d/mariadb + apt-get update + apt-get install --no-install-recommends -y "${packages[@]}" + apt-get autoremove -yqq --purge + apt-get clean && rm -rf /var/lib/apt/lists/* +} + +if [[ ${INSTALL_MYSQL_CLIENT:="true"} == "true" ]]; then + if [[ $(uname -m) == "arm64" || $(uname -m) == "aarch64" ]]; then + INSTALL_MYSQL_CLIENT_TYPE="mariadb" + echo + echo "${COLOR_YELLOW}Client forced to mariadb for ARM${COLOR_RESET}" + echo + fi + + if [[ "${INSTALL_MYSQL_CLIENT_TYPE}" == "mysql" ]]; then + install_mysql_client "${@}" + elif [[ "${INSTALL_MYSQL_CLIENT_TYPE}" == "mariadb" ]]; then + install_mariadb_client "${@}" + else + echo + echo "${COLOR_RED}Specify either mysql or mariadb, got ${INSTALL_MYSQL_CLIENT_TYPE}${COLOR_RESET}" + echo + exit 1 + fi +fi +EOF + +# The content below is automatically copied from scripts/docker/install_mssql.sh +COPY <<"EOF" /install_mssql.sh +#!/usr/bin/env bash +. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" + +set -euo pipefail + +common::get_colors +declare -a packages + +: "${AIRFLOW_PIP_VERSION:?Should be set}" +: "${INSTALL_MSSQL_CLIENT:?Should be true or false}" + + +function install_mssql_client() { + # Install MsSQL client from Microsoft repositories + if [[ ${INSTALL_MSSQL_CLIENT:="true"} != "true" ]]; then + echo + echo "${COLOR_BLUE}Skip installing mssql client${COLOR_RESET}" + echo + return + fi + packages=("msodbcsql18") + + common::import_trusted_gpg "EB3E94ADBE1229CF" "microsoft" + + echo + echo "${COLOR_BLUE}Installing mssql client${COLOR_RESET}" + echo + + echo "deb [arch=amd64,arm64] https://packages.microsoft.com/debian/$(lsb_release -rs)/prod $(lsb_release -cs) main" > \ + /etc/apt/sources.list.d/mssql-release.list + apt-get update -yqq + apt-get upgrade -yqq + ACCEPT_EULA=Y apt-get -yqq install --no-install-recommends "${packages[@]}" + rm -rf /var/lib/apt/lists/* + apt-get autoremove -yqq --purge + apt-get clean && rm -rf /var/lib/apt/lists/* +} + +install_mssql_client "${@}" +EOF + +# The content below is automatically copied from scripts/docker/install_postgres.sh +COPY <<"EOF" /install_postgres.sh +#!/usr/bin/env bash +. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" +set -euo pipefail + +common::get_colors +declare -a packages + +: "${INSTALL_POSTGRES_CLIENT:?Should be true or false}" + +install_postgres_client() { + echo + echo "${COLOR_BLUE}Installing postgres client${COLOR_RESET}" + echo + + if [[ "${1}" == "dev" ]]; then + packages=("libpq-dev" "postgresql-client") + elif [[ "${1}" == "prod" ]]; then + packages=("postgresql-client") + else + echo + echo "Specify either prod or dev" + echo + exit 1 + fi + + common::import_trusted_gpg "7FCC7D46ACCC4CF8" "postgres" + + echo "deb [arch=amd64,arm64] https://apt.postgresql.org/pub/repos/apt/ $(lsb_release -cs)-pgdg main" > \ + /etc/apt/sources.list.d/pgdg.list + apt-get update + apt-get install --no-install-recommends -y "${packages[@]}" + apt-get autoremove -yqq --purge + apt-get clean && rm -rf /var/lib/apt/lists/* +} + +if [[ ${INSTALL_POSTGRES_CLIENT:="true"} == "true" ]]; then + install_postgres_client "${@}" +fi +EOF + +# The content below is automatically copied from scripts/docker/install_pip_version.sh +COPY <<"EOF" /install_pip_version.sh +#!/usr/bin/env bash +. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" + +: "${AIRFLOW_PIP_VERSION:?Should be set}" + +common::get_colors +common::get_airflow_version_specification +common::override_pip_version_if_needed +common::show_pip_version_and_location + +common::install_pip_version +EOF + +# The content below is automatically copied from scripts/docker/install_airflow_dependencies_from_branch_tip.sh +COPY <<"EOF" /install_airflow_dependencies_from_branch_tip.sh +#!/usr/bin/env bash + +. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" + +: "${AIRFLOW_REPO:?Should be set}" +: "${AIRFLOW_BRANCH:?Should be set}" +: "${INSTALL_MYSQL_CLIENT:?Should be true or false}" +: "${INSTALL_POSTGRES_CLIENT:?Should be true or false}" +: "${AIRFLOW_PIP_VERSION:?Should be set}" + +function install_airflow_dependencies_from_branch_tip() { + echo + echo "${COLOR_BLUE}Installing airflow from ${AIRFLOW_BRANCH}. It is used to cache dependencies${COLOR_RESET}" + echo + if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then + AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,} + fi + if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then + AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} + fi + # Install latest set of dependencies using constraints. In case constraints were upgraded and there + # are conflicts, this might fail, but it should be fixed in the following installation steps + set -x + pip install --root-user-action ignore \ + ${ADDITIONAL_PIP_INSTALL_FLAGS} \ + "https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]" \ + --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" || true + common::install_pip_version + pip freeze | grep apache-airflow-providers | xargs pip uninstall --yes 2>/dev/null || true + set +x + echo + echo "${COLOR_BLUE}Uninstalling just airflow. Dependencies remain. Now target airflow can be reinstalled using mostly cached dependencies${COLOR_RESET}" + echo + pip uninstall --yes apache-airflow || true +} + +common::get_colors +common::get_airflow_version_specification +common::override_pip_version_if_needed +common::get_constraints_location +common::show_pip_version_and_location + +install_airflow_dependencies_from_branch_tip +EOF + +# The content below is automatically copied from scripts/docker/common.sh +COPY <<"EOF" /common.sh +#!/usr/bin/env bash +set -euo pipefail + +function common::get_colors() { + COLOR_BLUE=$'\e[34m' + COLOR_GREEN=$'\e[32m' + COLOR_RED=$'\e[31m' + COLOR_RESET=$'\e[0m' + COLOR_YELLOW=$'\e[33m' + export COLOR_BLUE + export COLOR_GREEN + export COLOR_RED + export COLOR_RESET + export COLOR_YELLOW +} + + +function common::get_airflow_version_specification() { + if [[ -z ${AIRFLOW_VERSION_SPECIFICATION=} + && -n ${AIRFLOW_VERSION} + && ${AIRFLOW_INSTALLATION_METHOD} != "." ]]; then + AIRFLOW_VERSION_SPECIFICATION="==${AIRFLOW_VERSION}" + fi +} + +function common::override_pip_version_if_needed() { + if [[ -n ${AIRFLOW_VERSION} ]]; then + if [[ ${AIRFLOW_VERSION} =~ ^2\.0.* || ${AIRFLOW_VERSION} =~ ^1\.* ]]; then + export AIRFLOW_PIP_VERSION="23.3.2" + fi + fi +} + +function common::get_constraints_location() { + # auto-detect Airflow-constraint reference and location + if [[ -z "${AIRFLOW_CONSTRAINTS_REFERENCE=}" ]]; then + if [[ ${AIRFLOW_VERSION} =~ v?2.* && ! ${AIRFLOW_VERSION} =~ .*dev.* ]]; then + AIRFLOW_CONSTRAINTS_REFERENCE=constraints-${AIRFLOW_VERSION} + else + AIRFLOW_CONSTRAINTS_REFERENCE=${DEFAULT_CONSTRAINTS_BRANCH} + fi + fi + + if [[ -z ${AIRFLOW_CONSTRAINTS_LOCATION=} ]]; then + local constraints_base="https://raw.githubusercontent.com/${CONSTRAINTS_GITHUB_REPOSITORY}/${AIRFLOW_CONSTRAINTS_REFERENCE}" + local python_version + python_version="$(python --version 2>/dev/stdout | cut -d " " -f 2 | cut -d "." -f 1-2)" + AIRFLOW_CONSTRAINTS_LOCATION="${constraints_base}/${AIRFLOW_CONSTRAINTS_MODE}-${python_version}.txt" + fi +} + +function common::show_pip_version_and_location() { + echo "PATH=${PATH}" + echo "pip on path: $(which pip)" + echo "Using pip: $(pip --version)" +} + +function common::install_pip_version() { + echo + echo "${COLOR_BLUE}Installing pip version ${AIRFLOW_PIP_VERSION}${COLOR_RESET}" + echo + if [[ ${AIRFLOW_PIP_VERSION} =~ .*https.* ]]; then + pip install --disable-pip-version-check "pip @ ${AIRFLOW_PIP_VERSION}" + else + pip install --disable-pip-version-check "pip==${AIRFLOW_PIP_VERSION}" + fi + mkdir -p "${HOME}/.local/bin" +} + +function common::import_trusted_gpg() { + common::get_colors + + local key=${1:?${COLOR_RED}First argument expects OpenPGP Key ID${COLOR_RESET}} + local name=${2:?${COLOR_RED}Second argument expected trust storage name${COLOR_RESET}} + # Please note that not all servers could be used for retrieve keys + # sks-keyservers.net: Unmaintained and DNS taken down due to GDPR requests. + # keys.openpgp.org: User ID Mandatory, not suitable for APT repositories + # keyring.debian.org: Only accept keys in Debian keyring. + # pgp.mit.edu: High response time. + local keyservers=( + "hkps://keyserver.ubuntu.com" + "hkps://pgp.surf.nl" + ) + + GNUPGHOME="$(mktemp -d)" + export GNUPGHOME + set +e + for keyserver in $(shuf -e "${keyservers[@]}"); do + echo "${COLOR_BLUE}Try to receive GPG public key ${key} from ${keyserver}${COLOR_RESET}" + gpg --keyserver "${keyserver}" --recv-keys "${key}" 2>&1 && break + echo "${COLOR_YELLOW}Unable to receive GPG public key ${key} from ${keyserver}${COLOR_RESET}" + done + set -e + gpg --export "${key}" > "/etc/apt/trusted.gpg.d/${name}.gpg" + gpgconf --kill all + rm -rf "${GNUPGHOME}" + unset GNUPGHOME +} +EOF + +# The content below is automatically copied from scripts/docker/pip +COPY <<"EOF" /pip +#!/usr/bin/env bash +COLOR_RED=$'\e[31m' +COLOR_RESET=$'\e[0m' +COLOR_YELLOW=$'\e[33m' + +if [[ $(id -u) == "0" ]]; then + echo + echo "${COLOR_RED}You are running pip as root. Please use 'airflow' user to run pip!${COLOR_RESET}" + echo + echo "${COLOR_YELLOW}See: https://airflow.apache.org/docs/docker-stack/build.html#adding-a-new-pypi-package${COLOR_RESET}" + echo + exit 1 +fi +exec "${HOME}"/.local/bin/pip "${@}" +EOF + +# The content below is automatically copied from scripts/docker/install_from_docker_context_files.sh +COPY <<"EOF" /install_from_docker_context_files.sh + +. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" + +: "${AIRFLOW_PIP_VERSION:?Should be set}" + +function install_airflow_and_providers_from_docker_context_files(){ + if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then + AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,} + fi + if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then + AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} + fi + + if [[ ! -d /docker-context-files ]]; then + echo + echo "${COLOR_RED}You must provide a folder via --build-arg DOCKER_CONTEXT_FILES= and you missed it!${COLOR_RESET}" + echo + exit 1 + fi + + # shellcheck disable=SC2206 + local pip_flags=( + # Don't quote this -- if it is empty we don't want it to create an + # empty array element + --find-links="file:///docker-context-files" + ) + + # Find Apache Airflow packages in docker-context files + local reinstalling_apache_airflow_package + reinstalling_apache_airflow_package=$(ls \ + /docker-context-files/apache?airflow?[0-9]*.{whl,tar.gz} 2>/dev/null || true) + # Add extras when installing airflow + if [[ -n "${reinstalling_apache_airflow_package}" ]]; then + # When a provider depends on a dev version of Airflow, we need to + # specify `apache-airflow==$VER`, otherwise pip will look for it on + # pip, and fail to find it + + # This will work as long as the wheel file is correctly named, which it + # will be if it was build by wheel tooling + local ver + ver=$(basename "$reinstalling_apache_airflow_package" | cut -d "-" -f 2) + reinstalling_apache_airflow_package="apache-airflow[${AIRFLOW_EXTRAS}]==$ver" + fi + + if [[ -z "${reinstalling_apache_airflow_package}" && ${AIRFLOW_VERSION=} != "" ]]; then + # When we install only provider packages from docker-context files, we need to still + # install airflow from PyPI when AIRFLOW_VERSION is set. This handles the case where + # pre-release dockerhub image of airflow is built, but we want to install some providers from + # docker-context files + reinstalling_apache_airflow_package="apache-airflow[${AIRFLOW_EXTRAS}]==${AIRFLOW_VERSION}" + fi + # Find Apache Airflow packages in docker-context files + local reinstalling_apache_airflow_providers_packages + reinstalling_apache_airflow_providers_packages=$(ls \ + /docker-context-files/apache?airflow?providers*.{whl,tar.gz} 2>/dev/null || true) + if [[ -z "${reinstalling_apache_airflow_package}" && \ + -z "${reinstalling_apache_airflow_providers_packages}" ]]; then + return + fi + + if [[ ${USE_CONSTRAINTS_FOR_CONTEXT_PACKAGES=} == "true" ]]; then + local python_version + python_version=$(python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")') + local local_constraints_file=/docker-context-files/constraints-"${python_version}"/${AIRFLOW_CONSTRAINTS_MODE}-"${python_version}".txt + + if [[ -f "${local_constraints_file}" ]]; then + echo + echo "${COLOR_BLUE}Installing docker-context-files packages with constraints found in ${local_constraints_file}${COLOR_RESET}" + echo + # force reinstall all airflow + provider packages with constraints found in + set -x + pip install "${pip_flags[@]}" --root-user-action ignore --upgrade \ + ${ADDITIONAL_PIP_INSTALL_FLAGS} --constraint "${local_constraints_file}" \ + ${reinstalling_apache_airflow_package} ${reinstalling_apache_airflow_providers_packages} + set +x + else + echo + echo "${COLOR_BLUE}Installing docker-context-files packages with constraints from GitHub${COLOR_RESET}" + echo + set -x + pip install "${pip_flags[@]}" --root-user-action ignore \ + ${ADDITIONAL_PIP_INSTALL_FLAGS} \ + --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" \ + ${reinstalling_apache_airflow_package} ${reinstalling_apache_airflow_providers_packages} + set +x + fi + else + echo + echo "${COLOR_BLUE}Installing docker-context-files packages without constraints${COLOR_RESET}" + echo + set -x + pip install "${pip_flags[@]}" --root-user-action ignore \ + ${ADDITIONAL_PIP_INSTALL_FLAGS} \ + ${reinstalling_apache_airflow_package} ${reinstalling_apache_airflow_providers_packages} + set +x + fi + common::install_pip_version + pip check +} + +function install_all_other_packages_from_docker_context_files() { + + echo + echo "${COLOR_BLUE}Force re-installing all other package from local files without dependencies${COLOR_RESET}" + echo + local reinstalling_other_packages + # shellcheck disable=SC2010 + reinstalling_other_packages=$(ls /docker-context-files/*.{whl,tar.gz} 2>/dev/null | \ + grep -v apache_airflow | grep -v apache-airflow || true) + if [[ -n "${reinstalling_other_packages}" ]]; then + set -x + pip install ${ADDITIONAL_PIP_INSTALL_FLAGS} \ + --root-user-action ignore --force-reinstall --no-deps --no-index ${reinstalling_other_packages} + common::install_pip_version + set +x + fi +} + +common::get_colors +common::get_airflow_version_specification +common::override_pip_version_if_needed +common::get_constraints_location +common::show_pip_version_and_location + +install_airflow_and_providers_from_docker_context_files + +common::show_pip_version_and_location +install_all_other_packages_from_docker_context_files +EOF + +# The content below is automatically copied from scripts/docker/install_airflow.sh +COPY <<"EOF" /install_airflow.sh +#!/usr/bin/env bash + +. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" + +: "${AIRFLOW_PIP_VERSION:?Should be set}" + +function install_airflow() { + # Coherence check for editable installation mode. + if [[ ${AIRFLOW_INSTALLATION_METHOD} != "." && \ + ${AIRFLOW_INSTALL_EDITABLE_FLAG} == "--editable" ]]; then + echo + echo "${COLOR_RED}ERROR! You can only use --editable flag when installing airflow from sources!${COLOR_RESET}" + echo "${COLOR_RED} Current installation method is '${AIRFLOW_INSTALLATION_METHOD} and should be '.'${COLOR_RESET}" + exit 1 + fi + # Remove mysql from extras if client is not going to be installed + if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then + AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,} + echo "${COLOR_YELLOW}MYSQL client installation is disabled. Extra 'mysql' installations were therefore omitted.${COLOR_RESET}" + fi + # Remove postgres from extras if client is not going to be installed + if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then + AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} + echo "${COLOR_YELLOW}Postgres client installation is disabled. Extra 'postgres' installations were therefore omitted.${COLOR_RESET}" + fi + if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then + echo + echo "${COLOR_BLUE}Installing all packages with eager upgrade${COLOR_RESET}" + echo + # eager upgrade + pip install --root-user-action ignore --upgrade --upgrade-strategy eager \ + ${ADDITIONAL_PIP_INSTALL_FLAGS} \ + "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \ + ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=} + if [[ -n "${AIRFLOW_INSTALL_EDITABLE_FLAG}" ]]; then + # Remove airflow and reinstall it using editable flag + # We can only do it when we install airflow from sources + set -x + pip uninstall apache-airflow --yes + pip install --root-user-action ignore ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ + ${ADDITIONAL_PIP_INSTALL_FLAGS} \ + "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" + set +x + fi + + common::install_pip_version + echo + echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" + echo + pip check + else \ + echo + echo "${COLOR_BLUE}Installing all packages with constraints and upgrade if needed${COLOR_RESET}" + echo + set -x + pip install --root-user-action ignore ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ + ${ADDITIONAL_PIP_INSTALL_FLAGS} \ + "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \ + --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" + common::install_pip_version + # then upgrade if needed without using constraints to account for new limits in setup.py + pip install --root-user-action ignore --upgrade --upgrade-strategy only-if-needed \ + ${ADDITIONAL_PIP_INSTALL_FLAGS} \ + ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ + "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" + common::install_pip_version + set +x + echo + echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" + echo + pip check + fi + +} + +common::get_colors +common::get_airflow_version_specification +common::override_pip_version_if_needed +common::get_constraints_location +common::show_pip_version_and_location + +install_airflow +EOF + +# The content below is automatically copied from scripts/docker/install_additional_dependencies.sh +COPY <<"EOF" /install_additional_dependencies.sh +#!/usr/bin/env bash +set -euo pipefail + +: "${UPGRADE_TO_NEWER_DEPENDENCIES:?Should be true or false}" +: "${ADDITIONAL_PYTHON_DEPS:?Should be set}" +: "${AIRFLOW_PIP_VERSION:?Should be set}" + +. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" + +function install_additional_dependencies() { + if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then + echo + echo "${COLOR_BLUE}Installing additional dependencies while upgrading to newer dependencies${COLOR_RESET}" + echo + set -x + pip install --root-user-action ignore --upgrade --upgrade-strategy eager \ + ${ADDITIONAL_PIP_INSTALL_FLAGS} \ + ${ADDITIONAL_PYTHON_DEPS} ${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=} + common::install_pip_version + set +x + echo + echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" + echo + pip check + else + echo + echo "${COLOR_BLUE}Installing additional dependencies upgrading only if needed${COLOR_RESET}" + echo + set -x + pip install --root-user-action ignore --upgrade --upgrade-strategy only-if-needed \ + ${ADDITIONAL_PIP_INSTALL_FLAGS} \ + ${ADDITIONAL_PYTHON_DEPS} + common::install_pip_version + set +x + echo + echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" + echo + pip check + fi +} + +common::get_colors +common::get_airflow_version_specification +common::override_pip_version_if_needed +common::get_constraints_location +common::show_pip_version_and_location + +install_additional_dependencies +EOF + + +# The content below is automatically copied from scripts/docker/entrypoint_prod.sh +COPY <<"EOF" /entrypoint_prod.sh +#!/usr/bin/env bash +AIRFLOW_COMMAND="${1:-}" + +set -euo pipefail + +LD_PRELOAD="/usr/lib/$(uname -m)-linux-gnu/libstdc++.so.6" +export LD_PRELOAD + +function run_check_with_retries { + local cmd + cmd="${1}" + local countdown + countdown="${CONNECTION_CHECK_MAX_COUNT}" + + while true + do + set +e + local last_check_result + local res + last_check_result=$(eval "${cmd} 2>&1") + res=$? + set -e + if [[ ${res} == 0 ]]; then + echo + break + else + echo -n "." + countdown=$((countdown-1)) + fi + if [[ ${countdown} == 0 ]]; then + echo + echo "ERROR! Maximum number of retries (${CONNECTION_CHECK_MAX_COUNT}) reached." + echo + echo "Last check result:" + echo "$ ${cmd}" + echo "${last_check_result}" + echo + exit 1 + else + sleep "${CONNECTION_CHECK_SLEEP_TIME}" + fi + done +} + +function run_nc() { + # Checks if it is possible to connect to the host using netcat. + # + # We want to avoid misleading messages and perform only forward lookup of the service IP address. + # Netcat when run without -n performs both forward and reverse lookup and fails if the reverse + # lookup name does not match the original name even if the host is reachable via IP. This happens + # randomly with docker-compose in GitHub Actions. + # Since we are not using reverse lookup elsewhere, we can perform forward lookup in python + # And use the IP in NC and add '-n' switch to disable any DNS use. + # Even if this message might be harmless, it might hide the real reason for the problem + # Which is the long time needed to start some services, seeing this message might be totally misleading + # when you try to analyse the problem, that's why it's best to avoid it, + local host="${1}" + local port="${2}" + local ip + ip=$(python -c "import socket; print(socket.gethostbyname('${host}'))") + nc -zvvn "${ip}" "${port}" +} + + +function wait_for_connection { + # Waits for Connection to the backend specified via URL passed as first parameter + # Detects backend type depending on the URL schema and assigns + # default port numbers if not specified in the URL. + # Then it loops until connection to the host/port specified can be established + # It tries `CONNECTION_CHECK_MAX_COUNT` times and sleeps `CONNECTION_CHECK_SLEEP_TIME` between checks + local connection_url + connection_url="${1}" + local detected_backend + detected_backend=$(python -c "from urllib.parse import urlsplit; import sys; print(urlsplit(sys.argv[1]).scheme)" "${connection_url}") + local detected_host + detected_host=$(python -c "from urllib.parse import urlsplit; import sys; print(urlsplit(sys.argv[1]).hostname or '')" "${connection_url}") + local detected_port + detected_port=$(python -c "from urllib.parse import urlsplit; import sys; print(urlsplit(sys.argv[1]).port or '')" "${connection_url}") + + echo BACKEND="${BACKEND:=${detected_backend}}" + readonly BACKEND + + if [[ -z "${detected_port=}" ]]; then + if [[ ${BACKEND} == "postgres"* ]]; then + detected_port=5432 + elif [[ ${BACKEND} == "mysql"* ]]; then + detected_port=3306 + elif [[ ${BACKEND} == "mssql"* ]]; then + detected_port=1433 + elif [[ ${BACKEND} == "redis"* ]]; then + detected_port=6379 + elif [[ ${BACKEND} == "amqp"* ]]; then + detected_port=5672 + fi + fi + + detected_host=${detected_host:="localhost"} + + # Allow the DB parameters to be overridden by environment variable + echo DB_HOST="${DB_HOST:=${detected_host}}" + readonly DB_HOST + + echo DB_PORT="${DB_PORT:=${detected_port}}" + readonly DB_PORT + if [[ -n "${DB_HOST=}" ]] && [[ -n "${DB_PORT=}" ]]; then + run_check_with_retries "run_nc ${DB_HOST@Q} ${DB_PORT@Q}" + else + >&2 echo "The connection details to the broker could not be determined. Connectivity checks were skipped." + fi +} + +function create_www_user() { + local local_password="" + # Warning: command environment variables (*_CMD) have priority over usual configuration variables + # for configuration parameters that require sensitive information. This is the case for the SQL database + # and the broker backend in this entrypoint script. + if [[ -n "${_AIRFLOW_WWW_USER_PASSWORD_CMD=}" ]]; then + local_password=$(eval "${_AIRFLOW_WWW_USER_PASSWORD_CMD}") + unset _AIRFLOW_WWW_USER_PASSWORD_CMD + elif [[ -n "${_AIRFLOW_WWW_USER_PASSWORD=}" ]]; then + local_password="${_AIRFLOW_WWW_USER_PASSWORD}" + unset _AIRFLOW_WWW_USER_PASSWORD + fi + if [[ -z ${local_password} ]]; then + echo + echo "ERROR! Airflow Admin password not set via _AIRFLOW_WWW_USER_PASSWORD or _AIRFLOW_WWW_USER_PASSWORD_CMD variables!" + echo + exit 1 + fi + + airflow users create \ + --username "${_AIRFLOW_WWW_USER_USERNAME="admin"}" \ + --firstname "${_AIRFLOW_WWW_USER_FIRSTNAME="Airflow"}" \ + --lastname "${_AIRFLOW_WWW_USER_LASTNAME="Admin"}" \ + --email "${_AIRFLOW_WWW_USER_EMAIL="airflowadmin@example.com"}" \ + --role "${_AIRFLOW_WWW_USER_ROLE="Admin"}" \ + --password "${local_password}" || true +} + +function create_system_user_if_missing() { + # This is needed in case of OpenShift-compatible container execution. In case of OpenShift random + # User id is used when starting the image, however group 0 is kept as the user group. Our production + # Image is OpenShift compatible, so all permissions on all folders are set so that 0 group can exercise + # the same privileges as the default "airflow" user, this code checks if the user is already + # present in /etc/passwd and will create the system user dynamically, including setting its + # HOME directory to the /home/airflow so that (for example) the ${HOME}/.local folder where airflow is + # Installed can be automatically added to PYTHONPATH + if ! whoami &> /dev/null; then + if [[ -w /etc/passwd ]]; then + echo "${USER_NAME:-default}:x:$(id -u):0:${USER_NAME:-default} user:${AIRFLOW_USER_HOME_DIR}:/sbin/nologin" \ + >> /etc/passwd + fi + export HOME="${AIRFLOW_USER_HOME_DIR}" + fi +} + +function set_pythonpath_for_root_user() { + # Airflow is installed as a local user application which means that if the container is running as root + # the application is not available. because Python then only load system-wide applications. + # Now also adds applications installed as local user "airflow". + if [[ $UID == "0" ]]; then + local python_major_minor + python_major_minor="$(python --version | cut -d " " -f 2 | cut -d "." -f 1-2)" + export PYTHONPATH="${AIRFLOW_USER_HOME_DIR}/.local/lib/python${python_major_minor}/site-packages:${PYTHONPATH:-}" + >&2 echo "The container is run as root user. For security, consider using a regular user account." + fi +} + +function wait_for_airflow_db() { + # Wait for the command to run successfully to validate the database connection. + run_check_with_retries "airflow db check" +} + +function migrate_db() { + # Runs airflow db migrate + airflow db migrate || true +} + +function wait_for_celery_broker() { + # Verifies connection to Celery Broker + local executor + executor="$(airflow config get-value core executor)" + if [[ "${executor}" == "CeleryExecutor" ]]; then + local connection_url + connection_url="$(airflow config get-value celery broker_url)" + wait_for_connection "${connection_url}" + fi +} + +function exec_to_bash_or_python_command_if_specified() { + # If one of the commands: 'bash', 'python' is used, either run appropriate + # command with exec + if [[ ${AIRFLOW_COMMAND} == "bash" ]]; then + shift + exec "/bin/bash" "${@}" + elif [[ ${AIRFLOW_COMMAND} == "python" ]]; then + shift + exec "python" "${@}" + fi +} + +function check_uid_gid() { + if [[ $(id -g) == "0" ]]; then + return + fi + if [[ $(id -u) == "50000" ]]; then + >&2 echo + >&2 echo "WARNING! You should run the image with GID (Group ID) set to 0" + >&2 echo " even if you use 'airflow' user (UID=50000)" + >&2 echo + >&2 echo " You started the image with UID=$(id -u) and GID=$(id -g)" + >&2 echo + >&2 echo " This is to make sure you can run the image with an arbitrary UID in the future." + >&2 echo + >&2 echo " See more about it in the Airflow's docker image documentation" + >&2 echo " http://airflow.apache.org/docs/docker-stack/entrypoint" + >&2 echo + # We still allow the image to run with `airflow` user. + return + else + >&2 echo + >&2 echo "ERROR! You should run the image with GID=0" + >&2 echo + >&2 echo " You started the image with UID=$(id -u) and GID=$(id -g)" + >&2 echo + >&2 echo "The image should always be run with GID (Group ID) set to 0 regardless of the UID used." + >&2 echo " This is to make sure you can run the image with an arbitrary UID." + >&2 echo + >&2 echo " See more about it in the Airflow's docker image documentation" + >&2 echo " http://airflow.apache.org/docs/docker-stack/entrypoint" + # This will not work so we fail hard + exit 1 + fi +} + +unset PIP_USER + +check_uid_gid + +umask 0002 + +CONNECTION_CHECK_MAX_COUNT=${CONNECTION_CHECK_MAX_COUNT:=20} +readonly CONNECTION_CHECK_MAX_COUNT + +CONNECTION_CHECK_SLEEP_TIME=${CONNECTION_CHECK_SLEEP_TIME:=3} +readonly CONNECTION_CHECK_SLEEP_TIME + +create_system_user_if_missing +set_pythonpath_for_root_user +if [[ "${CONNECTION_CHECK_MAX_COUNT}" -gt "0" ]]; then + wait_for_airflow_db +fi + +if [[ -n "${_AIRFLOW_DB_UPGRADE=}" ]] || [[ -n "${_AIRFLOW_DB_MIGRATE=}" ]] ; then + migrate_db +fi + +if [[ -n "${_AIRFLOW_DB_UPGRADE=}" ]] ; then + >&2 echo "WARNING: Environment variable '_AIRFLOW_DB_UPGRADE' is deprecated please use '_AIRFLOW_DB_MIGRATE' instead" +fi + +if [[ -n "${_AIRFLOW_WWW_USER_CREATE=}" ]] ; then + create_www_user +fi + +if [[ -n "${_PIP_ADDITIONAL_REQUIREMENTS=}" ]] ; then + >&2 echo + >&2 echo "!!!!! Installing additional requirements: '${_PIP_ADDITIONAL_REQUIREMENTS}' !!!!!!!!!!!!" + >&2 echo + >&2 echo "WARNING: This is a development/test feature only. NEVER use it in production!" + >&2 echo " Instead, build a custom image as described in" + >&2 echo + >&2 echo " https://airflow.apache.org/docs/docker-stack/build.html" + >&2 echo + >&2 echo " Adding requirements at container startup is fragile and is done every time" + >&2 echo " the container starts, so it is only useful for testing and trying out" + >&2 echo " of adding dependencies." + >&2 echo + pip install --root-user-action ignore ${_PIP_ADDITIONAL_REQUIREMENTS} +fi + + +exec_to_bash_or_python_command_if_specified "${@}" + +if [[ ${AIRFLOW_COMMAND} == "airflow" ]]; then + AIRFLOW_COMMAND="${2:-}" + shift +fi + +if [[ ${AIRFLOW_COMMAND} =~ ^(scheduler|celery)$ ]] \ + && [[ "${CONNECTION_CHECK_MAX_COUNT}" -gt "0" ]]; then + wait_for_celery_broker +fi + +exec "airflow" "${@}" +EOF + +# The content below is automatically copied from scripts/docker/clean-logs.sh +COPY <<"EOF" /clean-logs.sh +#!/usr/bin/env bash + + +set -euo pipefail + +readonly DIRECTORY="${AIRFLOW_HOME:-/usr/local/airflow}" +readonly RETENTION="${AIRFLOW__LOG_RETENTION_DAYS:-15}" + +trap "exit" INT TERM + +readonly EVERY=$((15*60)) + +echo "Cleaning logs every $EVERY seconds" + +while true; do + echo "Trimming airflow logs to ${RETENTION} days." + find "${DIRECTORY}"/logs \ + -type d -name 'lost+found' -prune -o \ + -type f -mtime +"${RETENTION}" -name '*.log' -print0 | \ + xargs -0 rm -f + + find "${DIRECTORY}"/logs -type d -empty -delete || true + + seconds=$(( $(date -u +%s) % EVERY)) + (( seconds < 1 )) || sleep $((EVERY - seconds - 1)) + sleep 1 +done +EOF + +# The content below is automatically copied from scripts/docker/airflow-scheduler-autorestart.sh +COPY <<"EOF" /airflow-scheduler-autorestart.sh +#!/usr/bin/env bash + +while echo "Running"; do + airflow scheduler -n 5 + return_code=$? + if (( return_code != 0 )); then + echo "Scheduler crashed with exit code $return_code. Respawning.." >&2 + date >> /tmp/airflow_scheduler_errors.txt + fi + + sleep 1 +done +EOF + +############################################################################################## +# This is the build image where we build all dependencies +############################################################################################## +FROM ${PYTHON_BASE_IMAGE} as airflow-build-image + +# Nolog bash flag is currently ignored - but you can replace it with +# xtrace - to show commands executed) +SHELL ["/bin/bash", "-o", "pipefail", "-o", "errexit", "-o", "nounset", "-o", "nolog", "-c"] + +ARG PYTHON_BASE_IMAGE +ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE} \ + DEBIAN_FRONTEND=noninteractive LANGUAGE=C.UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8 \ + LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8 \ + PIP_CACHE_DIR=/tmp/.cache/pip + +ARG DEV_APT_DEPS="" +ARG ADDITIONAL_DEV_APT_DEPS="" +ARG DEV_APT_COMMAND="" +ARG ADDITIONAL_DEV_APT_COMMAND="" +ARG ADDITIONAL_DEV_APT_ENV="" + +ENV DEV_APT_DEPS=${DEV_APT_DEPS} \ + ADDITIONAL_DEV_APT_DEPS=${ADDITIONAL_DEV_APT_DEPS} \ + DEV_APT_COMMAND=${DEV_APT_COMMAND} \ + ADDITIONAL_DEV_APT_COMMAND=${ADDITIONAL_DEV_APT_COMMAND} \ + ADDITIONAL_DEV_APT_ENV=${ADDITIONAL_DEV_APT_ENV} + +COPY --from=scripts install_os_dependencies.sh /scripts/docker/ +RUN bash /scripts/docker/install_os_dependencies.sh dev + +ARG INSTALL_MYSQL_CLIENT="true" +ARG INSTALL_MYSQL_CLIENT_TYPE="mariadb" +ARG INSTALL_MSSQL_CLIENT="true" +ARG INSTALL_POSTGRES_CLIENT="true" +ARG AIRFLOW_PIP_VERSION + +ENV INSTALL_MYSQL_CLIENT=${INSTALL_MYSQL_CLIENT} \ + INSTALL_MYSQL_CLIENT_TYPE=${INSTALL_MYSQL_CLIENT_TYPE} \ + INSTALL_MSSQL_CLIENT=${INSTALL_MSSQL_CLIENT} \ + INSTALL_POSTGRES_CLIENT=${INSTALL_POSTGRES_CLIENT} + +COPY --from=scripts common.sh /scripts/docker/ + +# Only copy mysql/mssql installation scripts for now - so that changing the other +# scripts which are needed much later will not invalidate the docker layer here +COPY --from=scripts install_mysql.sh install_mssql.sh install_postgres.sh /scripts/docker/ + +RUN bash /scripts/docker/install_mysql.sh dev && \ + bash /scripts/docker/install_mssql.sh dev && \ + bash /scripts/docker/install_postgres.sh dev +ENV PATH=${PATH}:/opt/mssql-tools/bin + +# By default we do not install from docker context files but if we decide to install from docker context +# files, we should override those variables to "docker-context-files" +ARG DOCKER_CONTEXT_FILES="Dockerfile" + +COPY ${DOCKER_CONTEXT_FILES} /docker-context-files + +ARG AIRFLOW_HOME +ARG AIRFLOW_USER_HOME_DIR +ARG AIRFLOW_UID + +RUN adduser --gecos "First Last,RoomNumber,WorkPhone,HomePhone" --disabled-password \ + --quiet "airflow" --uid "${AIRFLOW_UID}" --gid "0" --home "${AIRFLOW_USER_HOME_DIR}" && \ + mkdir -p ${AIRFLOW_HOME} && chown -R "airflow:0" "${AIRFLOW_USER_HOME_DIR}" ${AIRFLOW_HOME} + +USER airflow + +ARG AIRFLOW_REPO=apache/airflow +ARG AIRFLOW_BRANCH=main +ARG AIRFLOW_EXTRAS +ARG ADDITIONAL_AIRFLOW_EXTRAS="" +# Allows to override constraints source +ARG CONSTRAINTS_GITHUB_REPOSITORY="apache/airflow" +ARG AIRFLOW_CONSTRAINTS_MODE="constraints" +ARG AIRFLOW_CONSTRAINTS_REFERENCE="" +ARG AIRFLOW_CONSTRAINTS_LOCATION="" +ARG DEFAULT_CONSTRAINTS_BRANCH="constraints-main" + +# By default PIP has progress bar but you can disable it. +ARG PIP_PROGRESS_BAR +# By default we do not use pre-cached packages, but in CI/Breeze environment we override this to speed up +# builds in case setup.py/setup.cfg changed. This is pure optimisation of CI/Breeze builds. +ARG AIRFLOW_PRE_CACHED_PIP_PACKAGES="false" +# This is airflow version that is put in the label of the image build +ARG AIRFLOW_VERSION +# By default latest released version of airflow is installed (when empty) but this value can be overridden +# and we can install version according to specification (For example ==2.0.2 or <3.0.0). +ARG AIRFLOW_VERSION_SPECIFICATION +# By default we install providers from PyPI but in case of Breeze build we want to install providers +# from local sources without the need of preparing provider packages upfront. This value is +# automatically overridden by Breeze scripts. +ARG INSTALL_PROVIDERS_FROM_SOURCES="false" +# Determines the way airflow is installed. By default we install airflow from PyPI `apache-airflow` package +# But it also can be `.` from local installation or GitHub URL pointing to specific branch or tag +# Of Airflow. Note That for local source installation you need to have local sources of +# Airflow checked out together with the Dockerfile and AIRFLOW_SOURCES_FROM and AIRFLOW_SOURCES_TO +# set to "." and "/opt/airflow" respectively. +ARG AIRFLOW_INSTALLATION_METHOD="apache-airflow" +# By default we do not upgrade to latest dependencies +ARG UPGRADE_TO_NEWER_DEPENDENCIES="false" +# By default we install latest airflow from PyPI so we do not need to copy sources of Airflow +# but in case of breeze/CI builds we use latest sources and we override those +# those SOURCES_FROM/TO with "." and "/opt/airflow" respectively +ARG AIRFLOW_SOURCES_FROM="Dockerfile" +ARG AIRFLOW_SOURCES_TO="/Dockerfile" + + +RUN if [[ -f /docker-context-files/pip.conf ]]; then \ + mkdir -p ${AIRFLOW_USER_HOME_DIR}/.config/pip; \ + cp /docker-context-files/pip.conf "${AIRFLOW_USER_HOME_DIR}/.config/pip/pip.conf"; \ + fi; \ + if [[ -f /docker-context-files/.piprc ]]; then \ + cp /docker-context-files/.piprc "${AIRFLOW_USER_HOME_DIR}/.piprc"; \ + fi + +# Additional PIP flags passed to all pip install commands except reinstalling pip itself +ARG ADDITIONAL_PIP_INSTALL_FLAGS="" + +ENV AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ + AIRFLOW_PRE_CACHED_PIP_PACKAGES=${AIRFLOW_PRE_CACHED_PIP_PACKAGES} \ + INSTALL_PROVIDERS_FROM_SOURCES=${INSTALL_PROVIDERS_FROM_SOURCES} \ + AIRFLOW_VERSION=${AIRFLOW_VERSION} \ + AIRFLOW_INSTALLATION_METHOD=${AIRFLOW_INSTALLATION_METHOD} \ + AIRFLOW_VERSION_SPECIFICATION=${AIRFLOW_VERSION_SPECIFICATION} \ + AIRFLOW_SOURCES_FROM=${AIRFLOW_SOURCES_FROM} \ + AIRFLOW_SOURCES_TO=${AIRFLOW_SOURCES_TO} \ + AIRFLOW_REPO=${AIRFLOW_REPO} \ + AIRFLOW_BRANCH=${AIRFLOW_BRANCH} \ + AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS}${ADDITIONAL_AIRFLOW_EXTRAS:+,}${ADDITIONAL_AIRFLOW_EXTRAS} \ + CONSTRAINTS_GITHUB_REPOSITORY=${CONSTRAINTS_GITHUB_REPOSITORY} \ + AIRFLOW_CONSTRAINTS_MODE=${AIRFLOW_CONSTRAINTS_MODE} \ + AIRFLOW_CONSTRAINTS_REFERENCE=${AIRFLOW_CONSTRAINTS_REFERENCE} \ + AIRFLOW_CONSTRAINTS_LOCATION=${AIRFLOW_CONSTRAINTS_LOCATION} \ + DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH} \ + PATH=${PATH}:${AIRFLOW_USER_HOME_DIR}/.local/bin \ + AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ + PIP_PROGRESS_BAR=${PIP_PROGRESS_BAR} \ + ADDITIONAL_PIP_INSTALL_FLAGS=${ADDITIONAL_PIP_INSTALL_FLAGS} \ + AIRFLOW_USER_HOME_DIR=${AIRFLOW_USER_HOME_DIR} \ + AIRFLOW_HOME=${AIRFLOW_HOME} \ + AIRFLOW_UID=${AIRFLOW_UID} \ + AIRFLOW_INSTALL_EDITABLE_FLAG="" \ + UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES} \ + # By default PIP installs everything to ~/.local + PIP_USER="true" + +# Copy all scripts required for installation - changing any of those should lead to +# rebuilding from here +COPY --from=scripts common.sh install_pip_version.sh \ + install_airflow_dependencies_from_branch_tip.sh /scripts/docker/ + +# We can set this value to true in case we want to install .whl/.tar.gz packages placed in the +# docker-context-files folder. This can be done for both additional packages you want to install +# as well as Airflow and Provider packages (it will be automatically detected if airflow +# is installed from docker-context files rather than from PyPI) +ARG INSTALL_PACKAGES_FROM_CONTEXT="false" + +# Normally constraints are not used when context packages are build - because we might have packages +# that are conflicting with Airflow constraints, however there are cases when we want to use constraints +# for example in CI builds when we already have source-package constraints - either from github branch or +# from eager-upgraded constraints by the CI builds +ARG USE_CONSTRAINTS_FOR_CONTEXT_PACKAGES="false" + +# In case of Production build image segment we want to pre-install main version of airflow +# dependencies from GitHub so that we do not have to always reinstall it from the scratch. +# The Airflow (and providers in case INSTALL_PROVIDERS_FROM_SOURCES is "false") +# are uninstalled, only dependencies remain +# the cache is only used when "upgrade to newer dependencies" is not set to automatically +# account for removed dependencies (we do not install them in the first place) and in case +# INSTALL_PACKAGES_FROM_CONTEXT is not set (because then caching it from main makes no sense). +RUN bash /scripts/docker/install_pip_version.sh; \ + if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" && \ + ${INSTALL_PACKAGES_FROM_CONTEXT} == "false" && \ + ${UPGRADE_TO_NEWER_DEPENDENCIES} == "false" ]]; then \ + bash /scripts/docker/install_airflow_dependencies_from_branch_tip.sh; \ + fi + +COPY --chown=airflow:0 ${AIRFLOW_SOURCES_FROM} ${AIRFLOW_SOURCES_TO} + +# Add extra python dependencies +ARG ADDITIONAL_PYTHON_DEPS="" + + +ARG VERSION_SUFFIX_FOR_PYPI="" + +ENV ADDITIONAL_PYTHON_DEPS=${ADDITIONAL_PYTHON_DEPS} \ + INSTALL_PACKAGES_FROM_CONTEXT=${INSTALL_PACKAGES_FROM_CONTEXT} \ + USE_CONSTRAINTS_FOR_CONTEXT_PACKAGES=${USE_CONSTRAINTS_FOR_CONTEXT_PACKAGES} \ + VERSION_SUFFIX_FOR_PYPI=${VERSION_SUFFIX_FOR_PYPI} + +WORKDIR ${AIRFLOW_HOME} + +COPY --from=scripts install_from_docker_context_files.sh install_airflow.sh \ + install_additional_dependencies.sh /scripts/docker/ + +# Useful for creating a cache id based on the underlying architecture, preventing the use of cached python packages from +# an incorrect architecture. +ARG TARGETARCH +# Value to be able to easily change cache id and therefore use a bare new cache +ARG PIP_CACHE_EPOCH="0" + +# hadolint ignore=SC2086, SC2010, DL3042 +RUN --mount=type=cache,id=$PYTHON_BASE_IMAGE-$AIRFLOW_PIP_VERSION-$TARGETARCH-$PIP_CACHE_EPOCH,target=/tmp/.cache/pip,uid=${AIRFLOW_UID} \ + if [[ ${INSTALL_PACKAGES_FROM_CONTEXT} == "true" ]]; then \ + bash /scripts/docker/install_from_docker_context_files.sh; \ + fi; \ + if ! airflow version 2>/dev/null >/dev/null; then \ + bash /scripts/docker/install_airflow.sh; \ + fi; \ + if [[ -n "${ADDITIONAL_PYTHON_DEPS}" ]]; then \ + bash /scripts/docker/install_additional_dependencies.sh; \ + fi; \ + find "${AIRFLOW_USER_HOME_DIR}/.local/" -name '*.pyc' -print0 | xargs -0 rm -f || true ; \ + find "${AIRFLOW_USER_HOME_DIR}/.local/" -type d -name '__pycache__' -print0 | xargs -0 rm -rf || true ; \ + # make sure that all directories and files in .local are also group accessible + find "${AIRFLOW_USER_HOME_DIR}/.local" -executable -print0 | xargs --null chmod g+x; \ + find "${AIRFLOW_USER_HOME_DIR}/.local" -print0 | xargs --null chmod g+rw + +# In case there is a requirements.txt file in "docker-context-files" it will be installed +# during the build additionally to whatever has been installed so far. It is recommended that +# the requirements.txt contains only dependencies with == version specification +# hadolint ignore=DL3042 +RUN --mount=type=cache,id=additional-requirements-$PYTHON_BASE_IMAGE-$AIRFLOW_PIP_VERSION-$TARGETARCH-$PIP_CACHE_EPOCH,target=/tmp/.cache/pip,uid=${AIRFLOW_UID} \ + if [[ -f /docker-context-files/requirements.txt ]]; then \ + pip install --user -r /docker-context-files/requirements.txt; \ + fi + +############################################################################################## +# This is the actual Airflow image - much smaller than the build one. We copy +# installed Airflow and all its dependencies from the build image to make it smaller. +############################################################################################## +FROM ${PYTHON_BASE_IMAGE} as main + +# Nolog bash flag is currently ignored - but you can replace it with other flags (for example +# xtrace - to show commands executed) +SHELL ["/bin/bash", "-o", "pipefail", "-o", "errexit", "-o", "nounset", "-o", "nolog", "-c"] + +ARG AIRFLOW_UID + +LABEL org.apache.airflow.distro="debian" \ + org.apache.airflow.module="airflow" \ + org.apache.airflow.component="airflow" \ + org.apache.airflow.image="airflow" \ + org.apache.airflow.uid="${AIRFLOW_UID}" + +ARG PYTHON_BASE_IMAGE +ARG AIRFLOW_PIP_VERSION + +ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE} \ + # Make sure noninteractive debian install is used and language variables set + DEBIAN_FRONTEND=noninteractive LANGUAGE=C.UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8 \ + LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8 LD_LIBRARY_PATH=/usr/local/lib \ + AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} + +ARG RUNTIME_APT_DEPS="" +ARG ADDITIONAL_RUNTIME_APT_DEPS="" +ARG RUNTIME_APT_COMMAND="echo" +ARG ADDITIONAL_RUNTIME_APT_COMMAND="" +ARG ADDITIONAL_RUNTIME_APT_ENV="" +ARG INSTALL_MYSQL_CLIENT="true" +ARG INSTALL_MYSQL_CLIENT_TYPE="mysql" +ARG INSTALL_MSSQL_CLIENT="true" +ARG INSTALL_POSTGRES_CLIENT="true" + +ENV RUNTIME_APT_DEPS=${RUNTIME_APT_DEPS} \ + ADDITIONAL_RUNTIME_APT_DEPS=${ADDITIONAL_RUNTIME_APT_DEPS} \ + RUNTIME_APT_COMMAND=${RUNTIME_APT_COMMAND} \ + ADDITIONAL_RUNTIME_APT_COMMAND=${ADDITIONAL_RUNTIME_APT_COMMAND} \ + INSTALL_MYSQL_CLIENT=${INSTALL_MYSQL_CLIENT} \ + INSTALL_MYSQL_CLIENT_TYPE=${INSTALL_MYSQL_CLIENT_TYPE} \ + INSTALL_MSSQL_CLIENT=${INSTALL_MSSQL_CLIENT} \ + INSTALL_POSTGRES_CLIENT=${INSTALL_POSTGRES_CLIENT} \ + GUNICORN_CMD_ARGS="--worker-tmp-dir /dev/shm" \ + AIRFLOW_INSTALLATION_METHOD=${AIRFLOW_INSTALLATION_METHOD} + +COPY --from=scripts install_os_dependencies.sh /scripts/docker/ +RUN bash /scripts/docker/install_os_dependencies.sh runtime + +# Having the variable in final image allows to disable providers manager warnings when +# production image is prepared from sources rather than from package +ARG AIRFLOW_INSTALLATION_METHOD="apache-airflow" +ARG AIRFLOW_IMAGE_REPOSITORY +ARG AIRFLOW_IMAGE_README_URL +ARG AIRFLOW_USER_HOME_DIR +ARG AIRFLOW_HOME + +# By default PIP installs everything to ~/.local +ENV PATH="${AIRFLOW_USER_HOME_DIR}/.local/bin:${PATH}" \ + AIRFLOW_UID=${AIRFLOW_UID} \ + AIRFLOW_USER_HOME_DIR=${AIRFLOW_USER_HOME_DIR} \ + AIRFLOW_HOME=${AIRFLOW_HOME} + +# THE 3 LINES ARE ONLY NEEDED IN ORDER TO MAKE PYMSSQL BUILD WORK WITH LATEST CYTHON +# AND SHOULD BE REMOVED WHEN WORKAROUND IN install_mssql.sh IS REMOVED +ARG AIRFLOW_PIP_VERSION=23.3.2 +ENV AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} +COPY --from=scripts common.sh /scripts/docker/ + +# Only copy mysql/mssql installation scripts for now - so that changing the other +# scripts which are needed much later will not invalidate the docker layer here. +COPY --from=scripts install_mysql.sh install_mssql.sh install_postgres.sh /scripts/docker/ +# We run scripts with bash here to make sure we can execute the scripts. Changing to +x might have an +# unexpected result - the cache for Dockerfiles might get invalidated in case the host system +# had different umask set and group x bit was not set. In Azure the bit might be not set at all. +# That also protects against AUFS Docker backend problem where changing the executable bit required sync +RUN bash /scripts/docker/install_mysql.sh prod \ + && bash /scripts/docker/install_mssql.sh prod \ + && bash /scripts/docker/install_postgres.sh prod \ + && adduser --gecos "First Last,RoomNumber,WorkPhone,HomePhone" --disabled-password \ + --quiet "airflow" --uid "${AIRFLOW_UID}" --gid "0" --home "${AIRFLOW_USER_HOME_DIR}" \ +# Make Airflow files belong to the root group and are accessible. This is to accommodate the guidelines from +# OpenShift https://docs.openshift.com/enterprise/3.0/creating_images/guidelines.html + && mkdir -pv "${AIRFLOW_HOME}" \ + && mkdir -pv "${AIRFLOW_HOME}/dags" \ + && mkdir -pv "${AIRFLOW_HOME}/logs" \ + && chown -R airflow:0 "${AIRFLOW_USER_HOME_DIR}" "${AIRFLOW_HOME}" \ + && chmod -R g+rw "${AIRFLOW_USER_HOME_DIR}" "${AIRFLOW_HOME}" \ + && find "${AIRFLOW_HOME}" -executable -print0 | xargs --null chmod g+x \ + && find "${AIRFLOW_USER_HOME_DIR}" -executable -print0 | xargs --null chmod g+x + +COPY --from=airflow-build-image --chown=airflow:0 \ + "${AIRFLOW_USER_HOME_DIR}/.local" "${AIRFLOW_USER_HOME_DIR}/.local" +COPY --from=scripts entrypoint_prod.sh /entrypoint +COPY --from=scripts clean-logs.sh /clean-logs +COPY --from=scripts airflow-scheduler-autorestart.sh /airflow-scheduler-autorestart + +# Make /etc/passwd root-group-writeable so that user can be dynamically added by OpenShift +# See https://github.com/apache/airflow/issues/9248 +# Set default groups for airflow and root user + +RUN chmod a+rx /entrypoint /clean-logs \ + && chmod g=u /etc/passwd \ + && chmod g+w "${AIRFLOW_USER_HOME_DIR}/.local" \ + && usermod -g 0 airflow -G 0 + +# make sure that the venv is activated for all users +# including plain sudo, sudo with --interactive flag +RUN sed --in-place=.bak "s/secure_path=\"/secure_path=\"\/.venv\/bin:/" /etc/sudoers + +ARG AIRFLOW_VERSION + +COPY --from=scripts install_pip_version.sh /scripts/docker/ +RUN bash /scripts/docker/install_pip_version.sh + +# See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation +# to learn more about the way how signals are handled by the image +# Also set airflow as nice PROMPT message. +ENV DUMB_INIT_SETSID="1" \ + PS1="(airflow)" \ + AIRFLOW_VERSION=${AIRFLOW_VERSION} \ + AIRFLOW__CORE__LOAD_EXAMPLES="false" \ + PIP_USER="true" \ + PATH="/root/bin:${PATH}" + +# Add protection against running pip as root user +RUN mkdir -pv /root/bin +COPY --from=scripts pip /root/bin/pip +RUN chmod u+x /root/bin/pip + +WORKDIR ${AIRFLOW_HOME} + +EXPOSE 8080 + +USER ${AIRFLOW_UID} + +# Those should be set and used as late as possible as any change in commit/build otherwise invalidates the +# layers right after +ARG BUILD_ID +ARG COMMIT_SHA +ARG AIRFLOW_IMAGE_REPOSITORY +ARG AIRFLOW_IMAGE_DATE_CREATED + +ENV BUILD_ID=${BUILD_ID} COMMIT_SHA=${COMMIT_SHA} + +LABEL org.apache.airflow.distro="debian" \ + org.apache.airflow.module="airflow" \ + org.apache.airflow.component="airflow" \ + org.apache.airflow.image="airflow" \ + org.apache.airflow.version="${AIRFLOW_VERSION}" \ + org.apache.airflow.uid="${AIRFLOW_UID}" \ + org.apache.airflow.main-image.build-id="${BUILD_ID}" \ + org.apache.airflow.main-image.commit-sha="${COMMIT_SHA}" \ + org.opencontainers.image.source="${AIRFLOW_IMAGE_REPOSITORY}" \ + org.opencontainers.image.created=${AIRFLOW_IMAGE_DATE_CREATED} \ + org.opencontainers.image.authors="dev@airflow.apache.org" \ + org.opencontainers.image.url="https://airflow.apache.org" \ + org.opencontainers.image.documentation="https://airflow.apache.org/docs/docker-stack/index.html" \ + org.opencontainers.image.version="${AIRFLOW_VERSION}" \ + org.opencontainers.image.revision="${COMMIT_SHA}" \ + org.opencontainers.image.vendor="Apache Software Foundation" \ + org.opencontainers.image.licenses="Apache-2.0" \ + org.opencontainers.image.ref.name="airflow" \ + org.opencontainers.image.title="Production Airflow Image" \ + org.opencontainers.image.description="Reference, production-ready Apache Airflow image" +ENTRYPOINT ["/usr/bin/dumb-init", "--", "/entrypoint"] +CMD []