From c766e4bd7f39f57708525987f4f7b31a28e1e7f5 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Sat, 28 Aug 2021 19:38:53 +0200 Subject: [PATCH] Stop using docker manifest to check for image presence We need to set the "experimental" flag in CI in order to use `docker manifest` command to check for presence of the images in ghcr.io. In order to use them we need to enable experimental features via ~/.docker/config.json. Sometimes, very rarely, we had the case that the config file got broken and the problem turned out to be that we tried to do this experimental replacement in parallel by several running "wait image" commands (:facepalm: here for myself) that were apparenlty overriding the same config.json at the same time in non-atomic way, which (very rarely) led to corrupted file. However for quite some time we pulled the image immediately after it was available, in order to verify the image, so rather than checking if the image is there via manifest, we can simply pull the image and effect will be the same - if it fails, the image is not there, if it has been pulled - we can immediately verify it. We do not need experimental flag at all for that so no messing around with .docker/config.json is needed at all. --- .../ci/images/ci_prepare_ci_image_on_ci.sh | 3 +- .../ci/images/ci_prepare_prod_image_on_ci.sh | 3 +- scripts/ci/images/ci_push_ci_images.sh | 2 + .../ci/images/ci_push_production_images.sh | 2 + .../ci_wait_for_and_verify_all_ci_images.sh | 5 +- .../ci_wait_for_and_verify_all_prod_images.sh | 4 +- .../images/ci_wait_for_and_verify_ci_image.sh | 15 +-- .../ci_wait_for_and_verify_prod_image.sh | 16 +--- scripts/ci/libraries/_build_images.sh | 95 ++++++------------- scripts/ci/libraries/_initialization.sh | 3 - .../ci/libraries/_push_pull_remove_images.sh | 38 ++++---- scripts/ci/tools/free_space.sh | 4 + 12 files changed, 67 insertions(+), 123 deletions(-) diff --git a/scripts/ci/images/ci_prepare_ci_image_on_ci.sh b/scripts/ci/images/ci_prepare_ci_image_on_ci.sh index d0d4aac38b36d..8fe92990e106c 100755 --- a/scripts/ci/images/ci_prepare_ci_image_on_ci.sh +++ b/scripts/ci/images/ci_prepare_ci_image_on_ci.sh @@ -31,7 +31,8 @@ function build_ci_image_on_ci() { # Pretend that the image was build. We already have image with the right sources baked in! # so all the checksums are assumed to be correct md5sum::calculate_md5sum_for_all_files - build_images::wait_for_image_tag "${AIRFLOW_CI_IMAGE}" ":${GITHUB_REGISTRY_PULL_IMAGE_TAG}" + local image_name_with_tag="${AIRFLOW_CI_IMAGE}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}" + push_pull_remove_images::wait_for_image "${image_name_with_tag}" md5sum::update_all_md5_with_group else build_images::rebuild_ci_image_if_needed diff --git a/scripts/ci/images/ci_prepare_prod_image_on_ci.sh b/scripts/ci/images/ci_prepare_prod_image_on_ci.sh index 8027d786786d4..0aea3c7fdfd7b 100755 --- a/scripts/ci/images/ci_prepare_prod_image_on_ci.sh +++ b/scripts/ci/images/ci_prepare_prod_image_on_ci.sh @@ -32,7 +32,8 @@ function build_prod_images_on_ci() { build_images::prepare_prod_build if [[ ${GITHUB_REGISTRY_WAIT_FOR_IMAGE} == "true" ]]; then - build_images::wait_for_image_tag "${AIRFLOW_PROD_IMAGE}" ":${GITHUB_REGISTRY_PULL_IMAGE_TAG}" + local image_name_with_tag="${AIRFLOW_PROD_IMAGE}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}" + push_pull_remove_images::wait_for_image "${image_name_with_tag}" else build_images::build_prod_images_from_locally_built_airflow_packages fi diff --git a/scripts/ci/images/ci_push_ci_images.sh b/scripts/ci/images/ci_push_ci_images.sh index 5bd5eb3c006f6..6e232fa7b8bf9 100755 --- a/scripts/ci/images/ci_push_ci_images.sh +++ b/scripts/ci/images/ci_push_ci_images.sh @@ -20,4 +20,6 @@ build_images::prepare_ci_build +build_images::login_to_docker_registry + push_pull_remove_images::push_ci_images_to_github diff --git a/scripts/ci/images/ci_push_production_images.sh b/scripts/ci/images/ci_push_production_images.sh index 43e14d3d927ab..7e0e0eae5f2a4 100755 --- a/scripts/ci/images/ci_push_production_images.sh +++ b/scripts/ci/images/ci_push_production_images.sh @@ -20,4 +20,6 @@ build_images::prepare_prod_build +build_images::login_to_docker_registry + push_pull_remove_images::push_prod_images_to_github diff --git a/scripts/ci/images/ci_wait_for_and_verify_all_ci_images.sh b/scripts/ci/images/ci_wait_for_and_verify_all_ci_images.sh index 39dfe8f5db72c..cb45c1ff3f5da 100755 --- a/scripts/ci/images/ci_wait_for_and_verify_all_ci_images.sh +++ b/scripts/ci/images/ci_wait_for_and_verify_all_ci_images.sh @@ -25,7 +25,7 @@ source "${LIBRARIES_DIR}/_all_libs.sh" initialization::set_output_color_variables -PARALLEL_TAIL_LENGTH=5 +export PARALLEL_TAIL_LENGTH=5 parallel::make_sure_gnu_parallel_is_installed @@ -35,11 +35,12 @@ echo echo "${COLOR_BLUE}Waiting for all CI images to appear${COLOR_RESET}" echo - parallel::initialize_monitoring parallel::monitor_progress +build_images::login_to_docker_registry + # shellcheck disable=SC2086 parallel --results "${PARALLEL_MONITORED_DIR}" \ "$( dirname "${BASH_SOURCE[0]}" )/ci_wait_for_and_verify_ci_image.sh" ::: \ diff --git a/scripts/ci/images/ci_wait_for_and_verify_all_prod_images.sh b/scripts/ci/images/ci_wait_for_and_verify_all_prod_images.sh index 6786a3189b460..5df66a31ce81c 100755 --- a/scripts/ci/images/ci_wait_for_and_verify_all_prod_images.sh +++ b/scripts/ci/images/ci_wait_for_and_verify_all_prod_images.sh @@ -25,7 +25,7 @@ source "${LIBRARIES_DIR}/_all_libs.sh" initialization::set_output_color_variables -PARALLEL_TAIL_LENGTH=5 +export PARALLEL_TAIL_LENGTH=5 parallel::make_sure_gnu_parallel_is_installed @@ -39,6 +39,8 @@ parallel::initialize_monitoring parallel::monitor_progress +build_images::login_to_docker_registry + # shellcheck disable=SC2086 parallel --results "${PARALLEL_MONITORED_DIR}" \ "$( dirname "${BASH_SOURCE[0]}" )/ci_wait_for_and_verify_prod_image.sh" ::: \ diff --git a/scripts/ci/images/ci_wait_for_and_verify_ci_image.sh b/scripts/ci/images/ci_wait_for_and_verify_ci_image.sh index 1d52f69cab2de..bc5034a58fabd 100755 --- a/scripts/ci/images/ci_wait_for_and_verify_ci_image.sh +++ b/scripts/ci/images/ci_wait_for_and_verify_ci_image.sh @@ -30,22 +30,9 @@ shift image_name_with_tag="${AIRFLOW_CI_IMAGE}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}" -function pull_ci_image() { - start_end::group_start "Pulling image: ${IMAGE_AVAILABLE}" - push_pull_remove_images::pull_image_if_not_present_or_forced "${IMAGE_AVAILABLE}" - start_end::group_end -} - -start_end::group_start "Configure Docker Registry" -build_images::configure_docker_registry -start_end::group_end - -start_end::group_start "Waiting for ${image_name_with_tag}" -push_pull_remove_images::wait_for_image "${image_name_with_tag}" build_images::prepare_ci_build -start_end::group_end -pull_ci_image +push_pull_remove_images::wait_for_image "${image_name_with_tag}" if [[ ${VERIFY_IMAGE=} != "false" ]]; then verify_image::verify_ci_image "${image_name_with_tag}" diff --git a/scripts/ci/images/ci_wait_for_and_verify_prod_image.sh b/scripts/ci/images/ci_wait_for_and_verify_prod_image.sh index aae05ae3e68e0..bc2f80f25ce99 100755 --- a/scripts/ci/images/ci_wait_for_and_verify_prod_image.sh +++ b/scripts/ci/images/ci_wait_for_and_verify_prod_image.sh @@ -30,22 +30,8 @@ shift image_name_with_tag="${AIRFLOW_PROD_IMAGE}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}" -function pull_prod_image() { - start_end::group_start "Pulling image: ${IMAGE_AVAILABLE}" - push_pull_remove_images::pull_image_if_not_present_or_forced "${IMAGE_AVAILABLE}" - start_end::group_end -} - -start_end::group_start "Configure Docker Registry" -build_images::configure_docker_registry -start_end::group_end - -start_end::group_start "Waiting for ${image_name_with_tag}" -push_pull_remove_images::wait_for_image "${image_name_with_tag}" build_images::prepare_prod_build -start_end::group_end - -pull_prod_image +push_pull_remove_images::wait_for_image "${image_name_with_tag}" if [[ ${VERIFY_IMAGE=} != "false" ]]; then verify_image::verify_prod_image "${image_name_with_tag}" diff --git a/scripts/ci/libraries/_build_images.sh b/scripts/ci/libraries/_build_images.sh index 270d71bc27196..4a7d49cdf1720 100644 --- a/scripts/ci/libraries/_build_images.sh +++ b/scripts/ci/libraries/_build_images.sh @@ -374,7 +374,7 @@ function build_images::get_docker_cache_image_names() { export PYTHON_BASE_IMAGE="python:${PYTHON_MAJOR_MINOR_VERSION}-slim-buster" local image_name - image_name="${GITHUB_REGISTRY}/$(build_images::get_github_container_registry_image_prefix)" + image_name="ghcr.io/$(build_images::get_github_container_registry_image_prefix)" # Example: # ghcr.io/apache/airflow/main/python:3.8-slim-buster @@ -412,36 +412,33 @@ function build_images::get_docker_cache_image_names() { } # If GitHub Registry is used, login to the registry using GITHUB_USERNAME and -# GITHUB_TOKEN. In case Personal Access token is not set, skip logging in -# Also enable experimental features of docker (we need `docker manifest` command) -function build_images::configure_docker_registry() { - local token="${GITHUB_TOKEN}" - if [[ -z "${token}" ]]; then - verbosity::print_info - verbosity::print_info "Skip logging in to GitHub Registry. No Token available!" - verbosity::print_info - elif [[ ${AIRFLOW_LOGIN_TO_GITHUB_REGISTRY=} != "true" ]]; then - verbosity::print_info - verbosity::print_info "Skip logging in to GitHub Registry. AIRFLOW_LOGIN_TO_GITHUB_REGISTRY != true" - verbosity::print_info - elif [[ -n "${token}" ]]; then - echo "${token}" | docker_v login \ - --username "${GITHUB_USERNAME:-apache}" \ - --password-stdin \ - "${GITHUB_REGISTRY}" - else - verbosity::print_info "Skip Login to GitHub Registry ${GITHUB_REGISTRY} as token is missing" - fi - if [[ ${CI} == "true" ]]; then - # we only need that on CI in order to run "docker manifest" when we check if remote image exists - # See function push_pull_remove_images::check_image_manifest() - local new_config - new_config=$(jq '.experimental = "enabled"' "${HOME}/.docker/config.json" || true) - if [[ ${new_config} != "" ]]; then - echo "${new_config}" > "${HOME}/.docker/config.json" +# GITHUB_TOKEN. We only need to login to docker registry on CI and only when we push +# images. All other images we pull from docker registry are public and we do not need +# to login there. +function build_images::login_to_docker_registry() { + if [[ "${CI}" == "true" ]]; then + start_end::group_start "Configure Docker Registry" + local token="${GITHUB_TOKEN}" + if [[ -z "${token}" ]]; then + verbosity::print_info + verbosity::print_info "Skip logging in to GitHub Registry. No Token available!" + verbosity::print_info + elif [[ ${AIRFLOW_LOGIN_TO_GITHUB_REGISTRY=} != "true" ]]; then + verbosity::print_info + verbosity::print_info "Skip logging in to GitHub Registry. AIRFLOW_LOGIN_TO_GITHUB_REGISTRY != true" + verbosity::print_info + elif [[ -n "${token}" ]]; then + # logout from the repository first - so that we do not keep us logged in if the token + # already expired (which can happen if we have a long build running) + docker_v logout "ghcr.io" + # The login might succeed or not - in some cases, when we pull public images in forked + # repos it might fail, but the pulls will continue to work + echo "${token}" | docker_v login \ + --username "${GITHUB_USERNAME:-apache}" \ + --password-stdin \ + "ghcr.io" || true else - echo "${COLOR_YELLOW}Could not set experimental flag in ${HOME}/.docker/config.json ${COLOR_RESET}" - echo "${COLOR_YELLOW}docker manifest command will likely not work${COLOR_RESET}" + verbosity::print_info "Skip Login to GitHub Container Registry as token is missing" fi fi } @@ -457,7 +454,6 @@ function build_images::prepare_ci_build() { export AIRFLOW_EXTRAS="${AIRFLOW_EXTRAS:="${DEFAULT_CI_EXTRAS}"}" readonly AIRFLOW_EXTRAS - build_images::configure_docker_registry sanity_checks::go_to_airflow_sources permissions::fix_group_permissions } @@ -754,7 +750,6 @@ function build_images::prepare_prod_build() { export AIRFLOW_EXTRAS="${AIRFLOW_EXTRAS:="${DEFAULT_PROD_EXTRAS}"}" readonly AIRFLOW_EXTRAS - build_images::configure_docker_registry AIRFLOW_BRANCH_FOR_PYPI_PRELOADING="${BRANCH_NAME}" sanity_checks::go_to_airflow_sources } @@ -895,42 +890,6 @@ function build_images::tag_image() { done } -# Waits for image tag to appear in GitHub Registry, pulls it and tags with the target tag -# Parameters: -# $1 - image name to wait for -# $2 - fallback image to wait for -# $3, $4, ... - target tags to tag the image with -function build_images::wait_for_image_tag() { - - local image_name="${1}" - local image_suffix="${2}" - shift 2 - - local image_to_wait_for="${image_name}${image_suffix}" - start_end::group_start "Wait for image tag ${image_to_wait_for}" - while true; do - set +e - echo "${COLOR_BLUE}Docker pull ${image_to_wait_for} ${COLOR_RESET}" >"${OUTPUT_LOG}" - docker_v pull "${image_to_wait_for}" >>"${OUTPUT_LOG}" 2>&1 - set -e - local image_hash - echo "${COLOR_BLUE} Docker images -q ${image_to_wait_for}${COLOR_RESET}" >>"${OUTPUT_LOG}" - image_hash="$(docker images -q "${image_to_wait_for}" 2>>"${OUTPUT_LOG}" || true)" - if [[ -z "${image_hash}" ]]; then - echo - echo "The image ${image_to_wait_for} is not yet available. No local hash for the image" - echo - echo "Last log:" - cat "${OUTPUT_LOG}" || true - echo - else - build_images::tag_image "${image_to_wait_for}" "${image_name}:latest" "${@}" - break - fi - done - start_end::group_end -} - # We use pulled docker image cache by default for CI images to speed up the builds # and local to speed up iteration on kerberos tests function build_images::determine_docker_cache_strategy() { diff --git a/scripts/ci/libraries/_initialization.sh b/scripts/ci/libraries/_initialization.sh index f9ad3c59569e1..114b25c9c5279 100644 --- a/scripts/ci/libraries/_initialization.sh +++ b/scripts/ci/libraries/_initialization.sh @@ -556,8 +556,6 @@ function initialization::initialize_git_variables() { } function initialization::initialize_github_variables() { - # Defaults for interacting with GitHub - export GITHUB_REGISTRY="ghcr.io" export GITHUB_REGISTRY_WAIT_FOR_IMAGE=${GITHUB_REGISTRY_WAIT_FOR_IMAGE:="false"} export GITHUB_REGISTRY_PULL_IMAGE_TAG=${GITHUB_REGISTRY_PULL_IMAGE_TAG:="latest"} export GITHUB_REGISTRY_PUSH_IMAGE_TAG=${GITHUB_REGISTRY_PUSH_IMAGE_TAG:="latest"} @@ -841,7 +839,6 @@ function initialization::make_constants_read_only() { readonly ADDITIONAL_RUNTIME_APT_DEPS readonly ADDITIONAL_RUNTIME_APT_ENV - readonly GITHUB_REGISTRY readonly GITHUB_REGISTRY_WAIT_FOR_IMAGE readonly GITHUB_REGISTRY_PULL_IMAGE_TAG readonly GITHUB_REGISTRY_PUSH_IMAGE_TAG diff --git a/scripts/ci/libraries/_push_pull_remove_images.sh b/scripts/ci/libraries/_push_pull_remove_images.sh index 750b2eb029e6a..51611ae235434 100644 --- a/scripts/ci/libraries/_push_pull_remove_images.sh +++ b/scripts/ci/libraries/_push_pull_remove_images.sh @@ -62,7 +62,7 @@ function push_pull_remove_images::pull_image_if_not_present_or_forced() { echo echo "Pulling the image ${image_to_pull}" echo - docker_v pull "${image_to_pull}" + docker pull "${image_to_pull}" fi } @@ -262,32 +262,34 @@ function push_pull_remove_images::push_prod_images_to_github () { fi } -# waits for an image to be available in GitHub Container Registry. Should be run with `set +e` -function push_pull_remove_images::check_image_manifest() { - local image_to_wait_for="${1}" - echo "GitHub Container Registry: checking for ${image_to_wait_for} via docker manifest inspect!" - docker_v manifest inspect "${image_to_wait_for}" - local res=$? - if [[ ${res} == "0" ]]; then - echo "Image: ${image_to_wait_for} found in Container Registry: ${COLOR_GREEN}OK.${COLOR_RESET}" - return 0 - else - echo "${COLOR_YELLOW}Still waiting. Not found!${COLOR_RESET}" - return 1 - fi -} # waits for an image to be available in the GitHub registry function push_pull_remove_images::wait_for_image() { + # Maximum number of tries 100 = we try for max. 100 minutes. + local MAX_TRIES=100 set +e echo " Waiting for github registry image: $1" + local count=0 while true do - if push_pull_remove_images::check_image_manifest "$1"; then - export IMAGE_AVAILABLE="$1" + if push_pull_remove_images::pull_image_if_not_present_or_forced "$1"; then break fi - sleep 30 + if [[ ${count} == "${MAX_TRIES}" ]]; then + echo "${COLOR_RED}Giving up after ${MAX_TRIES}!${COLOR_RESET}" + echo "If there were delays with building the image, maintainers could potentially restart the build when the images are ready!" + echo "Or you can run 'git commit --amend' and then push the PR again with 'git push --force-with-lease' to re-trigger the build." + return 1 + fi + echo "${COLOR_YELLOW}Failed to pull the image for ${count} time. Sleeping!${COLOR_RESET}" + sleep 60 + count=$((count + 1)) done set -e } + +function push_pull_remove_images::pull_image() { + start_end::group_start "Pulling image: $1" + push_pull_remove_images::pull_image_if_not_present_or_forced "$1" + start_end::group_end +} diff --git a/scripts/ci/tools/free_space.sh b/scripts/ci/tools/free_space.sh index d89172df0e5c9..4767d60643d3c 100755 --- a/scripts/ci/tools/free_space.sh +++ b/scripts/ci/tools/free_space.sh @@ -30,3 +30,7 @@ docker_v system prune --all --force --volumes echo "${COLOR_BLUE}Free disk space ${COLOR_RESET}" df -h + +# always logout from the docker registry - this is necessary as we can have an expired token from +# previous job!. +docker_v logout "ghcr.io"