Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions BREEZE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1931,6 +1931,21 @@ This bumps the constraint files to latest versions and stores hash of setup.py.
and setup.py hash files are stored in the ``files`` folder and while generating the constraints diff
of changes vs the previous constraint files is printed.

Generating Providers Metadata
.............................

The release manager can generate providers metadata per provider version - information about provider versions
including the associated Airflow version for the provider version (i.e first airflow version released after the
provider has been released) and date of the release of the provider version.

These are all of the available flags for the ``generate-providers-metadata`` command:

.. image:: ./images/breeze/output_release-management_generate-providers-metadata.svg
:target: https://raw.githubusercontent.com/apache/airflow/main/images/breeze/output_release-management_generate-providers-metadata.svg
:width: 100%
:alt: Breeze release management generate providers metadata


Releasing Production images
...........................

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import os
import re
import shlex
import shutil
import sys
import textwrap
import time
Expand Down Expand Up @@ -57,6 +58,7 @@
option_debug_resources,
option_dry_run,
option_github_repository,
option_historical_python_version,
option_image_tag_for_running,
option_include_success_outputs,
option_install_selected_providers,
Expand All @@ -82,6 +84,7 @@
get_extra_docker_flags,
perform_environment_checks,
)
from airflow_breeze.utils.github import download_constraints_file, get_active_airflow_versions
from airflow_breeze.utils.parallel import (
GenericRegexpProgressMatcher,
SummarizeAfter,
Expand All @@ -90,10 +93,16 @@
)
from airflow_breeze.utils.path_utils import (
AIRFLOW_SOURCES_ROOT,
CONSTRAINTS_CACHE_DIR,
DIST_DIR,
PROVIDER_METADATA_JSON_FILE_PATH,
cleanup_python_generated_files,
)
from airflow_breeze.utils.provider_dependencies import DEPENDENCIES, get_related_providers
from airflow_breeze.utils.provider_dependencies import (
DEPENDENCIES,
generate_providers_metadata_for_package,
get_related_providers,
)
from airflow_breeze.utils.python_versions import get_python_version_list
from airflow_breeze.utils.run_utils import (
RunCommandResult,
Expand Down Expand Up @@ -1107,6 +1116,67 @@ class ProviderPRInfo(NamedTuple):
get_console().print(" ".join(users))


def get_all_constraint_files(refresh_constraints: bool, python_version: str) -> None:
if refresh_constraints:
shutil.rmtree(CONSTRAINTS_CACHE_DIR, ignore_errors=True)
if not CONSTRAINTS_CACHE_DIR.exists():
with ci_group(f"Downloading constraints for all Airflow versions for Python {python_version}"):
CONSTRAINTS_CACHE_DIR.mkdir(parents=True, exist_ok=True)
all_airflow_versions = get_active_airflow_versions(confirm=False)
for airflow_version in all_airflow_versions:
if not download_constraints_file(
airflow_version=airflow_version,
python_version=python_version,
include_provider_dependencies=True,
output_file=CONSTRAINTS_CACHE_DIR
/ f"constraints-{airflow_version}-python-{python_version}.txt",
):
get_console().print(
"[warning]Could not download constraints for "
f"Airflow {airflow_version} and Python {python_version}[/]"
)


MATCH_CONSTRAINTS_FILE_REGEX = re.compile(r"constraints-(.*)-python-(.*).txt")


def load_constraints(python_version: str) -> dict[str, dict[str, str]]:
constraints: dict[str, dict[str, str]] = {}
for filename in CONSTRAINTS_CACHE_DIR.glob(f"constraints-*-python-{python_version}.txt"):
filename_match = MATCH_CONSTRAINTS_FILE_REGEX.match(filename.name)
if filename_match:
airflow_version = filename_match.group(1)
constraints[airflow_version] = {}
for line in filename.read_text().splitlines():
if line and not line.startswith("#"):
package, version = line.split("==")
constraints[airflow_version][package] = version
return constraints


@release_management.command(name="generate-providers-metadata", help="Generates metadata for providers.")
@click.option(
"--refresh-constraints",
is_flag=True,
help="Refresh constraints before generating metadata",
)
@option_historical_python_version
def generate_providers_metadata(refresh_constraints: bool, python: str | None):
metadata_dict: dict[str, dict[str, dict[str, str]]] = {}
if python is None:
python = DEFAULT_PYTHON_MAJOR_MINOR_VERSION
get_all_constraint_files(refresh_constraints=refresh_constraints, python_version=python)
constraints = load_constraints(python_version=python)
for package_id in DEPENDENCIES.keys():
with ci_group(f"Generating metadata for {package_id}"):
metadata = generate_providers_metadata_for_package(package_id, constraints)
if metadata:
metadata_dict[package_id] = metadata
import json

PROVIDER_METADATA_JSON_FILE_PATH.write_text(json.dumps(metadata_dict, indent=4, sort_keys=True))


# AIRFLOW RELEASE COMMANDS
release_management.add_command(publish_release_candidate)
release_management.add_command(airflow_release)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,9 @@
],
}
],
"breeze release-management generate-providers-metadata": [
{"name": "Generate providers metadata flags", "options": ["--refresh-constraints", "--python"]}
],
"breeze release-management start-rc-process": [
{
"name": "Start RC process flags",
Expand Down
9 changes: 2 additions & 7 deletions dev/breeze/src/airflow_breeze/commands/sbom_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
option_answer,
option_debug_resources,
option_dry_run,
option_historical_python_version,
option_include_success_outputs,
option_parallelism,
option_run_in_parallel,
Expand Down Expand Up @@ -92,13 +93,7 @@ def sbom():
envvar="AIRFLOW_VERSION",
help="Version of airflow to update sbom from. (defaulted to all active airflow versions)",
)
@click.option(
"--python",
type=BetterChoice(ALL_HISTORICAL_PYTHON_VERSIONS),
required=False,
envvar="PYTHON_VERSION",
help="Python version to update sbom from. (defaults to all python versions)",
)
@option_historical_python_version
@click.option(
"--include-provider-dependencies",
is_flag=True,
Expand Down
4 changes: 2 additions & 2 deletions dev/breeze/src/airflow_breeze/global_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from pathlib import Path

from airflow_breeze.utils.host_info_utils import Architecture
from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT, DEPENDENCIES_JSON_FILE_PATH
from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT, PROVIDER_DEPENDENCIES_JSON_FILE_PATH

RUNS_ON_PUBLIC_RUNNER = "ubuntu-22.04"
RUNS_ON_SELF_HOSTED_RUNNER = "self-hosted"
Expand Down Expand Up @@ -146,7 +146,7 @@ def all_helm_test_packages() -> list[str]:


def get_available_documentation_packages(short_version=False) -> list[str]:
provider_names: list[str] = list(json.loads(DEPENDENCIES_JSON_FILE_PATH.read_text()).keys())
provider_names: list[str] = list(json.loads(PROVIDER_DEPENDENCIES_JSON_FILE_PATH.read_text()).keys())
doc_provider_names = [provider_name.replace(".", "-") for provider_name in provider_names]
available_packages = [f"apache-airflow-providers-{doc_provider}" for doc_provider in doc_provider_names]
available_packages.extend(["apache-airflow", "docker-stack", "helm-chart"])
Expand Down
14 changes: 5 additions & 9 deletions dev/breeze/src/airflow_breeze/utils/cdxgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

from airflow_breeze.global_constants import DEFAULT_PYTHON_MAJOR_MINOR_VERSION
from airflow_breeze.utils.console import Output, get_console
from airflow_breeze.utils.github import download_file_from_github
from airflow_breeze.utils.github import download_constraints_file, download_file_from_github
from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT, FILES_DIR
from airflow_breeze.utils.run_utils import run_command
from airflow_breeze.utils.shared_options import get_dry_run
Expand Down Expand Up @@ -243,18 +243,14 @@ def produce_sbom_for_application_via_cdxgen_server(
)
source_dir = job.application_root_path / job.airflow_version / job.python_version
source_dir.mkdir(parents=True, exist_ok=True)
constraints_tag = f"constraints-{job.airflow_version}"
lock_file_relative_path = "airflow/www/yarn.lock"
download_file_from_github(
tag=job.airflow_version, path=lock_file_relative_path, output_file=source_dir / "yarn.lock"
)
if job.include_provider_dependencies:
constraints_file_path = f"constraints-{job.python_version}.txt"
else:
constraints_file_path = f"constraints-no-providers-{job.python_version}.txt"
if not download_file_from_github(
tag=constraints_tag,
path=constraints_file_path,
if not download_constraints_file(
airflow_version=job.airflow_version,
python_version=job.python_version,
include_provider_dependencies=job.include_provider_dependencies,
output_file=source_dir / "requirements.txt",
):
get_console(output=output).print(
Expand Down
8 changes: 8 additions & 0 deletions dev/breeze/src/airflow_breeze/utils/common_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

from airflow_breeze.branch_defaults import DEFAULT_AIRFLOW_CONSTRAINTS_BRANCH
from airflow_breeze.global_constants import (
ALL_HISTORICAL_PYTHON_VERSIONS,
ALLOWED_BACKENDS,
ALLOWED_BUILD_CACHE,
ALLOWED_CELERY_BROKERS,
Expand Down Expand Up @@ -561,3 +562,10 @@ def _set_default_from_parent(ctx: click.core.Context, option: click.core.Option,
help="Do not use constraints when installing providers.",
envvar="SKIP_CONSTRAINTS",
)
option_historical_python_version = click.option(
"--python",
type=BetterChoice(ALL_HISTORICAL_PYTHON_VERSIONS),
required=False,
envvar="PYTHON_VERSION",
help="Python version to update sbom from. (defaults to all historical python versions)",
)
45 changes: 45 additions & 0 deletions dev/breeze/src/airflow_breeze/utils/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import re
import sys
from datetime import datetime
from pathlib import Path
from typing import Any

Expand Down Expand Up @@ -109,3 +110,47 @@ def get_active_airflow_versions(confirm: bool = True) -> list[str]:
get_console().print("[red]Aborting[/]")
sys.exit(1)
return airflow_versions


def download_constraints_file(
airflow_version: str, python_version: str, include_provider_dependencies: bool, output_file: Path
) -> bool:
"""
Downloads constraints file from GitHub repository of Apache Airflow

:param airflow_version: airflow version
:param python_version: python version
:param include_provider_dependencies: whether to include provider dependencies
:param output_file: the file where to store the constraint file
:return: true if the file was successfully downloaded
"""
if include_provider_dependencies:
constraints_file_path = f"constraints-{python_version}.txt"
else:
constraints_file_path = f"constraints-no-providers-{python_version}.txt"
constraints_tag = f"constraints-{airflow_version}"
return download_file_from_github(
tag=constraints_tag,
path=constraints_file_path,
output_file=output_file,
)


def get_tag_date(tag: str) -> str | None:
"""
Returns UTC timestamp of the tag in the repo in iso time format 8601
:param tag: tag to get date for
:return: iso time format 8601 of the tag date
"""
from git import Repo

repo = Repo(AIRFLOW_SOURCES_ROOT)
try:
tag_object = repo.tags[tag].object
except IndexError:
get_console().print(f"[warning]Tag {tag} not found in the repository")
return None
timestamp: int = (
tag_object.committed_date if hasattr(tag_object, "committed_date") else tag_object.tagged_date
)
return datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%dT%H:%M:%SZ")
5 changes: 4 additions & 1 deletion dev/breeze/src/airflow_breeze/utils/path_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,10 @@ def find_airflow_sources_root_to_operate_on() -> Path:
SYSTEM_TESTS_PROVIDERS_ROOT = AIRFLOW_SOURCES_ROOT / "tests" / "system" / "providers"
AIRFLOW_PROVIDERS_ROOT = AIRFLOW_SOURCES_ROOT / "airflow" / "providers"
BUILD_CACHE_DIR = AIRFLOW_SOURCES_ROOT / ".build"
DEPENDENCIES_JSON_FILE_PATH = AIRFLOW_SOURCES_ROOT / "generated" / "provider_dependencies.json"
GENERATED_DIR = AIRFLOW_SOURCES_ROOT / "generated"
CONSTRAINTS_CACHE_DIR = BUILD_CACHE_DIR / "constraints"
PROVIDER_DEPENDENCIES_JSON_FILE_PATH = GENERATED_DIR / "provider_dependencies.json"
PROVIDER_METADATA_JSON_FILE_PATH = GENERATED_DIR / "provider_metadata.json"
WWW_CACHE_DIR = BUILD_CACHE_DIR / "www"
AIRFLOW_TMP_DIR_PATH = AIRFLOW_SOURCES_ROOT / "tmp"
WWW_ASSET_COMPILE_LOCK = WWW_CACHE_DIR / ".asset_compile.lock"
Expand Down
32 changes: 30 additions & 2 deletions dev/breeze/src/airflow_breeze/utils/provider_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@

import json

from airflow_breeze.utils.path_utils import DEPENDENCIES_JSON_FILE_PATH
import yaml

DEPENDENCIES = json.loads(DEPENDENCIES_JSON_FILE_PATH.read_text())
from airflow_breeze.utils.github import get_tag_date
from airflow_breeze.utils.path_utils import AIRFLOW_PROVIDERS_ROOT, PROVIDER_DEPENDENCIES_JSON_FILE_PATH

DEPENDENCIES = json.loads(PROVIDER_DEPENDENCIES_JSON_FILE_PATH.read_text())


def get_related_providers(
Expand Down Expand Up @@ -50,3 +53,28 @@ def get_related_providers(
for dep_name in DEPENDENCIES[provider_to_check]["cross-providers-deps"]:
related_providers.add(dep_name)
return related_providers


def generate_providers_metadata_for_package(
provider_id: str, constraints: dict[str, dict[str, str]]
) -> dict[str, dict[str, str]]:
provider_yaml_dict = yaml.safe_load(
(AIRFLOW_PROVIDERS_ROOT.joinpath(*provider_id.split(".")) / "provider.yaml").read_text()
)
provider_metadata: dict[str, dict[str, str]] = {}
last_airflow_version = "2.0.0"
package_name = "apache-airflow-providers-" + provider_id.replace(".", "-")
for provider_version in reversed(provider_yaml_dict["versions"]):
for airflow_version in constraints.keys():
if constraints[airflow_version].get(package_name) == provider_version:
last_airflow_version = airflow_version
date_released = get_tag_date(
tag="providers-" + provider_id.replace(".", "-") + "/" + provider_version
)
if date_released is None:
continue
provider_metadata[provider_version] = {
"associated_airflow_version": last_airflow_version,
"date_released": date_released,
}
return provider_metadata
5 changes: 5 additions & 0 deletions generated/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,8 @@ You can read more about pre-commit hooks [here](../STATIC_CODE_CHECKS.rst#pre-co
* `provider_dependencies.json` - is generated based on `provider.yaml` files in `airflow/providers` and
based on the imports in the provider code. If you want to add new dependency to a provider, you
need to modify the corresponding `provider.yaml` file

* `provider_metadata.json` - is generated based on `provider.yaml` files, airflow constraints and tags for
the providers. It contains historical metadata in providers that were released - it is useful to generate
information in release notes and it is used to generate SBOM information for the providers. It is manually
regenerated using `breeze release-management generate-providers-metadata` command."
Loading