From b98f3ba9db1511e8e814fea85116f58d1f979b6d Mon Sep 17 00:00:00 2001 From: Alex Oladele Date: Wed, 15 Apr 2026 23:17:07 -0400 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Add=20GPG-signed=20commits=20via=20?= =?UTF-8?q?GitHub=20Git=20Data=20API?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create backport commits through GitHub's Git Data REST API so they are automatically signed by GitHub's web-flow GPG key and show as "Verified". The cherry-pick still runs locally via git subprocess. Extract git subprocess operations into git_cli.py and GitHub API calls into git_api.py, following the existing one-module-per-API pattern (checks_api.py, comments_api.py, locking_api.py). Ref: sanitizers/patchback-github-app#1 Co-Authored-By: Claude Opus 4.6 (1M context) --- patchback/event_handlers.py | 215 ++++++++++-------------------------- patchback/git_api.py | 99 +++++++++++++++++ patchback/git_cli.py | 188 +++++++++++++++++++++++++++++++ 3 files changed, 345 insertions(+), 157 deletions(-) create mode 100644 patchback/git_api.py create mode 100644 patchback/git_cli.py diff --git a/patchback/event_handlers.py b/patchback/event_handlers.py index f67ce7f..e48ffd6 100644 --- a/patchback/event_handlers.py +++ b/patchback/event_handlers.py @@ -2,9 +2,6 @@ import http import logging -import pathlib -import tempfile -from subprocess import CalledProcessError, check_output, check_call from anyio import run_in_thread from gidgethub import BadRequest, ValidationError @@ -15,38 +12,15 @@ from .checks_api import ChecksAPI from .comments_api import CommentsAPI -from .locking_api import LockingAPI from .config import get_patchback_config +from .git_api import GitAPI +from .git_cli import cherry_pick_to_backport_branch from .github_reporter import PullRequestReporter +from .locking_api import LockingAPI logger = logging.getLogger(__name__) -spawn_proc = lambda *cmd: check_call(cmd, env={}) - - -# Refs: -# * https://github.community/t/github-actions-bot-email-address/17204/6 -# * https://github.com/actions/checkout/issues/13#issuecomment-724415212 -# * https://api.github.com/users/patchback%5Bbot%5D -# TODO: Figure out how to generate this automatically, on startup. -BOT_USER_GH_ID = 45432694 -GIT_USERNAME = 'patchback[bot]' -GIT_EMAIL = f'{BOT_USER_GH_ID:d}+{GIT_USERNAME!s}@users.noreply.github.com' - - -CMD_RUN_OUT_TMPL = """ -$ {cmd!s} - -[RETURN CODE]: {cmd_rc:d} - -[OUTPUT]: -{cmd_out!s} - -[STDERR]: -{cmd_err!s} -""" - MANUAL_BACKPORT_GUIDE_MD_TMPL = """ @@ -104,132 +78,6 @@ async def event_handler_wrapper(*, number, pull_request, **kwargs): return event_handler_wrapper -def backport_pr_sync( - pr_number: int, merge_commit_sha: str, target_branch: str, - backport_pr_branch: str, - repo_slug: str, repo_remote: str, installation_access_token: str, -) -> None: - """Returns a branch with backported PR pushed to GitHub. - - It clones the ``repo_remote`` using a GitHub App Installation token - ``installation_access_token`` to authenticate. Then, it cherry-picks - ``merge_commit_sha`` onto a new branch based on the - ``target_branch`` and pushes it back to ``repo_remote``. - """ - def sanitize_token_in_str(inp): - nonlocal installation_access_token - token_mask = '*' * len(installation_access_token) - return inp.replace( - installation_access_token, token_mask, - ) - - repo_remote_w_creds = repo_remote.replace( - # NOTE: this is a hack for auth to work - 'https://github.com/', - f'https://x-access-token:{installation_access_token}@github.com/', - 1, # count - ) - with tempfile.TemporaryDirectory( - prefix=f'{repo_slug.replace("/", "--")}---' - f'{target_branch.replace("/", "--")}---', - suffix=f'---PR-{pr_number}.git', - ) as tmp_dir: - logger.info('Created a temporary dir: `%s`', tmp_dir) - check_call(('git', 'init', tmp_dir), env={}) - git_cmd = ( - 'git', - '--git-dir', str(pathlib.Path(tmp_dir) / '.git'), - '--work-tree', tmp_dir, - '-c', f'user.email={GIT_EMAIL}', - '-c', f'user.name={GIT_USERNAME}', - '-c', 'diff.algorithm=histogram', - # '-c', 'protocol.version=2', # Needs Git 2.18+ - ) - spawn_proc(*git_cmd, 'remote', 'add', 'origin', repo_remote_w_creds) - try: - spawn_proc(*git_cmd, 'fetch', '--prune', 'origin') - except CalledProcessError as proc_err: - raise LookupError(f'Failed to fetch {repo_remote}') from proc_err - else: - logger.info('Fetched `%s`', repo_remote) - - try: - check_call( - ( - *git_cmd, 'checkout', - '-b', backport_pr_branch, f'origin/{target_branch}', - ), - ) - except CalledProcessError as proc_err: - raise LookupError( - f'Failed to find branch {target_branch}', - ) from proc_err - else: - logger.info('Checked out `%s`', backport_pr_branch) - - logger.info( - 'Cherry-picking `%s` into `%s`...', - merge_commit_sha, backport_pr_branch, - ) - merge_check_cmd = ( - *git_cmd, 'rev-list', - '--no-walk', '--count', '--merges', - merge_commit_sha, '--', - ) - is_merge_commit = int(check_output(merge_check_cmd, env={})) > 0 - logger.info( - '`%s` is%s a merge commit', - merge_commit_sha, ('' if is_merge_commit else ' not'), - ) - - try: - spawn_proc( - *git_cmd, 'cherry-pick', '-x', - '--strategy-option=diff-algorithm=histogram', - '--strategy-option=find-renames', - *(('--mainline', '1') if is_merge_commit else ()), - merge_commit_sha, - ) - except CalledProcessError as proc_err: - raise ValueError( - f'Failed to cleanly apply {merge_commit_sha} ' - f'on top of {backport_pr_branch}', - ) from proc_err - else: - logger.info('Backported the commit into `%s`', backport_pr_branch) - - logger.info('Pushing `%s` back to GitHub...', backport_pr_branch) - try: - spawn_proc( - *git_cmd, 'push', - # We manage the branch and thus don't care about rewrites: - '--force-with-lease', - 'origin', 'HEAD', - ) - except CalledProcessError as proc_err: - logger.error(sanitize_token_in_str(str(proc_err))) - - cmd_log = CMD_RUN_OUT_TMPL.format( - cmd=sanitize_token_in_str(' '.join(proc_err.cmd)), - cmd_out=sanitize_token_in_str(proc_err.stdout or ''), - cmd_err=sanitize_token_in_str(proc_err.stderr or ''), - cmd_rc=proc_err.returncode, - ) - - raise PermissionError( - 'Current GitHub App installation does not grant sufficient ' - f'privileges for pushing to {repo_remote}. Lacking ' - '`Contents: write` or `Workflows: write` permissions ' - 'are known to cause this.\n\n' - 'the underlying command output was:\n\n' - '```console\n' - f'{cmd_log}\n' - '```', - ) from proc_err - else: - logger.info('Push to GitHub succeeded...') - - @process_event_actions('pull_request', {'closed'}) @process_webhook_payload @ensure_pr_merged @@ -360,6 +208,7 @@ async def process_pr_backport_labels( api=gh_api, repo_slug=repo_slug, pr_number=pr_number, is_locked=pr_is_locked, lock_reason=pr_lock_reason, ) + git_data_api = GitAPI(api=gh_api, repo_slug=repo_slug) pr_reporter = PullRequestReporter( checks_api=checks_api, comments_api=comments_api, @@ -375,8 +224,8 @@ async def process_pr_backport_labels( ) manual_backport_guide = MANUAL_BACKPORT_GUIDE_MD_TMPL.format_map(locals()) try: - await run_in_thread( - backport_pr_sync, + backport = await run_in_thread( + cherry_pick_to_backport_branch, pr_number, pr_merge_commit, target_branch, @@ -427,6 +276,58 @@ async def process_pr_backport_labels( else: logger.info('Backport PR branch: `%s`', backport_pr_branch) + try: + parent_sha = await git_data_api.get_branch_head_sha(target_branch) + except PermissionError as perm_err: + logger.info( + 'Failed to read target branch `%s` for PR #%d backport', + target_branch, pr_number, + ) + await pr_reporter.finish_reporting( + subtitle=( + '💔 signed commit failed — could not read target branch' + ), + text=manual_backport_guide, + summary=f'❌ {perm_err!s}', + ) + return + + try: + commit_sha = await git_data_api.create_commit( + tree_sha=backport.tree_sha, + message=backport.commit_message, + parent_sha=parent_sha, + ) + except PermissionError as perm_err: + logger.info( + 'Failed to create signed commit for PR #%d backport to `%s`', + pr_number, target_branch, + ) + await pr_reporter.finish_reporting( + subtitle='💔 signed commit failed — could not create commit', + text=manual_backport_guide, + summary=f'❌ {perm_err!s}', + ) + return + logger.info('Created signed commit `%s`', commit_sha) + + try: + await git_data_api.create_branch( + branch_name=backport_pr_branch, sha=commit_sha, + ) + except PermissionError as perm_err: + logger.info( + 'Failed to create branch `%s` for PR #%d backport', + backport_pr_branch, pr_number, + ) + await pr_reporter.finish_reporting( + subtitle='💔 signed commit failed — could not create branch', + text=manual_backport_guide, + summary=f'❌ {perm_err!s}', + ) + return + logger.info('Created branch `%s`', backport_pr_branch) + backport_pr_branch_msg = f'Backport PR branch: `{backport_pr_branch}`' await pr_reporter.update_progress( subtitle='cherry-pick succeeded', diff --git a/patchback/git_api.py b/patchback/git_api.py new file mode 100644 index 0000000..343dcd4 --- /dev/null +++ b/patchback/git_api.py @@ -0,0 +1,99 @@ +"""GitHub Git Data REST API wrapper for signed commit creation.""" + +import http.client +import logging + +from gidgethub import BadRequest + + +logger = logging.getLogger(__name__) + + +def _handle_bad_request(bad_req_err: BadRequest) -> None: + """Re-raise as ``PermissionError`` if the request was denied.""" + if ( + bad_req_err.status_code != http.client.FORBIDDEN or + str(bad_req_err) != 'Resource not accessible by integration' + ): + raise + + raise PermissionError(str(bad_req_err)) from bad_req_err + + +class GitAPI: + """Git Data API for creating signed commits and branch refs.""" + + def __init__(self, *, api, repo_slug: str) -> None: + """Initialize a GitAPI instance for a given repo.""" + self._api = api + self._repo_slug = repo_slug + + async def get_branch_head_sha(self, branch_name: str) -> str: + """Return the HEAD commit SHA of a branch.""" + try: + ref = await self._api.getitem( + f'/repos/{self._repo_slug}/git/ref/heads/{branch_name}', + ) + except BadRequest as bad_req_err: + _handle_bad_request(bad_req_err) + return ref['object']['sha'] + + async def create_commit( + self, *, tree_sha: str, message: str, parent_sha: str, + ) -> str: + """Create a commit and return its SHA. + + Commits created through the Git Data API are automatically + signed by GitHub's web-flow GPG key. + """ + try: + resp = await self._api.post( + f'/repos/{self._repo_slug}/git/commits', + data={ + 'message': message, + 'tree': tree_sha, + 'parents': [parent_sha], + }, + ) + except BadRequest as bad_req_err: + _handle_bad_request(bad_req_err) + return resp['sha'] + + async def create_branch( + self, *, branch_name: str, sha: str, + ) -> None: + """Create a branch ref pointing to the given commit SHA.""" + try: + await self._api.post( + f'/repos/{self._repo_slug}/git/refs', + data={ + 'ref': f'refs/heads/{branch_name}', + 'sha': sha, + }, + ) + except BadRequest as bad_req_err: + _handle_bad_request(bad_req_err) + + async def create_signed_branch( + self, *, + tree_sha: str, + message: str, + parent_branch: str, + branch_name: str, + ) -> str: + """Create a signed commit on a new branch. + + Fetches the parent branch HEAD, creates a signed commit from + the given tree, and points a new branch at it. Returns the + signed commit SHA. + """ + parent_sha = await self.get_branch_head_sha(parent_branch) + commit_sha = await self.create_commit( + tree_sha=tree_sha, + message=message, + parent_sha=parent_sha, + ) + logger.info('Created signed commit `%s`', commit_sha) + await self.create_branch(branch_name=branch_name, sha=commit_sha) + logger.info('Created branch `%s`', branch_name) + return commit_sha diff --git a/patchback/git_cli.py b/patchback/git_cli.py new file mode 100644 index 0000000..6b42929 --- /dev/null +++ b/patchback/git_cli.py @@ -0,0 +1,188 @@ +"""Git CLI subprocess wrapper for backport operations.""" + +import logging +import pathlib +import secrets +import tempfile +from subprocess import CalledProcessError, check_call, check_output + +import attr + + +logger = logging.getLogger(__name__) + +# Refs: +# * https://github.community/t/github-actions-bot-email-address/17204/6 +# * https://github.com/actions/checkout/issues/13#issuecomment-724415212 +# * https://api.github.com/users/patchback%5Bbot%5D +# TODO: Figure out how to generate this automatically, on startup. +BOT_USER_GH_ID = 45432694 +GIT_USERNAME = 'patchback[bot]' +GIT_EMAIL = f'{BOT_USER_GH_ID:d}+{GIT_USERNAME!s}@users.noreply.github.com' + +CMD_RUN_OUT_TMPL = """ +$ {cmd!s} + +[RETURN CODE]: {cmd_rc:d} + +[OUTPUT]: +{cmd_out!s} + +[STDERR]: +{cmd_err!s} +""" + + +@attr.dataclass +class BackportResult: + """Result of a successful cherry-pick with objects uploaded to GitHub.""" + + tree_sha: str = attr.ib() + """Tree SHA from the local cherry-pick.""" + + commit_message: str = attr.ib() + """Commit message including the cherry-pick trailer.""" + + +def _run(*cmd: str) -> None: + """Run a git command with an empty environment for isolation.""" + check_call(cmd, env={}) + + +def _run_output(*cmd: str) -> str: + """Run a git command and return its stripped decoded output.""" + return check_output(cmd, env={}).decode().strip() + + + +def cherry_pick_to_backport_branch( + pr_number: int, + merge_commit_sha: str, + target_branch: str, + backport_pr_branch: str, + repo_slug: str, + repo_remote: str, + installation_access_token: str, +) -> BackportResult: + """Clone a repo, cherry-pick a commit, and upload objects to GitHub. + + Returns a :class:`BackportResult` containing the tree SHA, commit + message, and temporary ref name for signed commit creation via + the Git Data API. The caller is responsible for deleting the + temporary ref after the signed commit is created. + + :raises LookupError: if the repo or target branch cannot be found + :raises ValueError: if the cherry-pick has conflicts + :raises PermissionError: if the push to the temporary ref fails + """ + token_mask = '*' * len(installation_access_token) + sanitize_token = lambda text: text.replace( + installation_access_token, token_mask, + ) + + repo_remote_w_creds = repo_remote.replace( + # NOTE: this is a hack for auth to work + 'https://github.com/', + f'https://x-access-token:{installation_access_token}@github.com/', + 1, # count + ) + with tempfile.TemporaryDirectory( + prefix=f'{repo_slug.replace("/", "--")}---' + f'{target_branch.replace("/", "--")}---', + suffix=f'---PR-{pr_number}.git', + ) as tmp_dir: + logger.info('Created a temporary dir: `%s`', tmp_dir) + _run('git', 'init', tmp_dir) + git_cmd = ( + 'git', + '--git-dir', str(pathlib.Path(tmp_dir) / '.git'), + '--work-tree', tmp_dir, + '-c', f'user.email={GIT_EMAIL}', + '-c', f'user.name={GIT_USERNAME}', + '-c', 'diff.algorithm=histogram', + ) + _run(*git_cmd, 'remote', 'add', 'origin', repo_remote_w_creds) + + try: + _run(*git_cmd, 'fetch', '--prune', 'origin') + except CalledProcessError as proc_err: + raise LookupError( + f'Failed to fetch {repo_remote}', + ) from proc_err + logger.info('Fetched `%s`', repo_remote) + + try: + _run( + *git_cmd, 'checkout', + '-b', backport_pr_branch, f'origin/{target_branch}', + ) + except CalledProcessError as proc_err: + raise LookupError( + f'Failed to find branch {target_branch}', + ) from proc_err + logger.info('Checked out `%s`', backport_pr_branch) + + logger.info( + 'Cherry-picking `%s` into `%s`...', + merge_commit_sha, backport_pr_branch, + ) + is_merge_commit = int(_run_output( + *git_cmd, 'rev-list', + '--no-walk', '--count', '--merges', + merge_commit_sha, '--', + )) > 0 + logger.info( + '`%s` is%s a merge commit', + merge_commit_sha, ('' if is_merge_commit else ' not'), + ) + + try: + _run( + *git_cmd, 'cherry-pick', '-x', + '--strategy-option=diff-algorithm=histogram', + '--strategy-option=find-renames', + *(('--mainline', '1') if is_merge_commit else ()), + merge_commit_sha, + ) + except CalledProcessError as proc_err: + raise ValueError( + f'Failed to cleanly apply {merge_commit_sha} ' + f'on top of {backport_pr_branch}', + ) from proc_err + logger.info('Backported the commit into `%s`', backport_pr_branch) + + tree_sha = _run_output(*git_cmd, 'log', '--format=%T', '-1') + commit_message = _run_output(*git_cmd, 'log', '--format=%B', '-1') + + temp_ref = f'{backport_pr_branch}/{secrets.token_hex(16)}' + logger.info('Uploading git objects via temp ref `%s`...', temp_ref) + try: + _run(*git_cmd, 'push', 'origin', f'HEAD:{temp_ref}') + except CalledProcessError as proc_err: + logger.error(sanitize_token(str(proc_err))) + cmd_log = CMD_RUN_OUT_TMPL.format( + cmd=sanitize_token(' '.join(proc_err.cmd)), + cmd_out=sanitize_token(proc_err.stdout or ''), + cmd_err=sanitize_token(proc_err.stderr or ''), + cmd_rc=proc_err.returncode, + ) + raise PermissionError( + f'Could not push temporary ref `{temp_ref}`. ' + 'This may be caused by branch protection rulesets ' + 'blocking pushes to this ref pattern, or by lacking ' + '`Contents: write` or `Workflows: write` permissions.' + '\n\nthe underlying command output was:\n\n' + f'```console\n{cmd_log}\n```', + ) from proc_err + + try: + _run(*git_cmd, 'push', '-d', 'origin', temp_ref) + except CalledProcessError: + logger.warning('Failed to delete temp ref `%s`', temp_ref) + else: + logger.info('Deleted temp ref `%s`', temp_ref) + + return BackportResult( + tree_sha=tree_sha, + commit_message=commit_message, + )