From 873fc6d355f78b924abe27272c331b3d95bba1a7 Mon Sep 17 00:00:00 2001 From: Joel Savitz Date: Tue, 7 May 2024 16:37:10 -0400 Subject: [PATCH 1/7] watchtower: introduce dockerignore and initify watcher The dockerignore will prevent host build artifacts from getting put into the container image by accident. By handling SIGTERM explicitly, we can avoid having to wait ten seconds for podman-compose to resort to force-killing the container with SIGKILL. Signed-off-by: Joel Savitz --- watchtower/.dockerignore | 2 ++ watchtower/watcher.c | 5 +++++ 2 files changed, 7 insertions(+) create mode 100644 watchtower/.dockerignore diff --git a/watchtower/.dockerignore b/watchtower/.dockerignore new file mode 100644 index 00000000..6b5c8c80 --- /dev/null +++ b/watchtower/.dockerignore @@ -0,0 +1,2 @@ +watcher +run-at diff --git a/watchtower/watcher.c b/watchtower/watcher.c index 469ba024..2cd47f2e 100644 --- a/watchtower/watcher.c +++ b/watchtower/watcher.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -25,6 +26,10 @@ static struct inotify_event *get_event(void) int main(int argc, char **argv) { + // we need to explicitly handle sigterm because when running as PID 1 inside + // a container all signals without handlers (except SIG{KILL,STP}) are ignored + signal(SIGTERM, _Exit); + if (0 > (inotifyfd = inotify_init1(IN_CLOEXEC))) err(1, "inotify_init1"); From 5189f6295efa05c6059e99a79fdb26bfef3b8aa5 Mon Sep 17 00:00:00 2001 From: charliemirabile <46761267+charliemirabile@users.noreply.github.com> Date: Thu, 9 May 2024 16:11:24 -0400 Subject: [PATCH 2/7] watchtower: watcher: use `SA_NOCLDWAIT` instead of `SIG_IGN`ing `SIGCHLD` There is more than one way to not have to reap your children processes the simplest being explicitly setting `SIG_IGN` as the handler for `SIGCHLD`. However, unfortunately, doing this causes that setting to persist across `execve` which means that children inherit it and thus cannot reap their children (unless the explicitly reset it). This interferes with the behavior of functions like `system` or `popen`. Setting the `SA_NOCLDWAIT` flag on the handler involves using the more complicated `sigaction` interface, but the setting is properly reset during a call to `execve` so that children can properly manage their descendant processes. --- watchtower/watcher.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/watchtower/watcher.c b/watchtower/watcher.c index 2cd47f2e..90dccbd9 100644 --- a/watchtower/watcher.c +++ b/watchtower/watcher.c @@ -24,11 +24,24 @@ static struct inotify_event *get_event(void) return evt; } -int main(int argc, char **argv) +static void setup_signal_handler(void) { + struct sigaction child_act; + if(0 > sigaction(SIGCHLD, NULL, &child_act)) + err(1, "failed to get default signal action for SIGCHLD (this is a bug)"); + child_act.sa_flags |= SA_NOCLDWAIT; //avoid needing to reap children processes + if(0 > sigaction(SIGCHLD, &child_act, NULL)) + err(1, "failed to set signal action for SIGCHLD (this is a bug)"); // we need to explicitly handle sigterm because when running as PID 1 inside // a container all signals without handlers (except SIG{KILL,STP}) are ignored - signal(SIGTERM, _Exit); + if(SIG_ERR == signal(SIGTERM, _Exit)) + err(1, "failed to set handler for SIGTERM (this is a bug)"); +} + + +int main(int argc, char **argv) +{ + setup_signal_handler(); if (0 > (inotifyfd = inotify_init1(IN_CLOEXEC))) err(1, "inotify_init1"); @@ -41,8 +54,6 @@ int main(int argc, char **argv) if (0 > watch_desc) err(1, "failed to create watch for directory: '%s'", dir); - //avoid needing to reap children - signal(SIGCHLD, SIG_IGN); for (;;) { struct inotify_event *event = get_event(); if (!event) From 9259f6735140d1dd83bd353e0a92fb1a292792e3 Mon Sep 17 00:00:00 2001 From: Joel Savitz Date: Tue, 7 May 2024 16:35:21 -0400 Subject: [PATCH 3/7] denis: automate denis using container technology We can rebuild him. The future is now. The singularity is here. Signed-off-by: Joel Savitz --- container-compose.yml | 19 +++++++++++++++++++ denis/Containerfile | 34 ++++++++++++++++++++++++++++++++++ denis/db.py | 22 ++++++++++++++++++++++ denis/submit.py | 37 +++++++++++++++++++++++++++++++++++++ 4 files changed, 112 insertions(+) create mode 100644 denis/Containerfile create mode 100755 denis/db.py create mode 100755 denis/submit.py diff --git a/container-compose.yml b/container-compose.yml index 76f6eacf..b47eb1b7 100644 --- a/container-compose.yml +++ b/container-compose.yml @@ -101,13 +101,32 @@ services: networks: - submatrix - orbit + denis: + build: + context: denis + dockerfile: Containerfile + additional_contexts: + - watchtower_source=./watchtower + volumes: + - type: volume + source: email + target: /mnt/email_data + read_only: true + - type: volume + source: grades-db + target: /var/lib/denis + read_only: false + networks: + - denis networks: orbit: smtp: pop: submatrix: + denis: volumes: ssl-certs: email: orbit-db: + grades-db: submatrix-data: diff --git a/denis/Containerfile b/denis/Containerfile new file mode 100644 index 00000000..83486ece --- /dev/null +++ b/denis/Containerfile @@ -0,0 +1,34 @@ +FROM alpine:3.19 AS build +RUN apk add \ + clang \ + make \ + ; + +COPY --from=watchtower_source . /watchtower + +RUN make -C /watchtower CC='clang -static' watcher + +FROM alpine:3.19 AS denis + +RUN apk add \ + py3-peewee \ + ; + + +WORKDIR /usr/local/share/denis + +COPY . . + +RUN mkdir -p /var/lib/denis/ && \ + ./db.py \ + : + +RUN chown -R 100:100 /var/lib/denis + +COPY --from=build /watchtower/watcher /usr/local/bin/watcher + +VOLUME /mnt/email_data/ + +USER 100:100 + +ENTRYPOINT ["/usr/local/bin/watcher", "/mnt/email_data/logs", "submit.py"] diff --git a/denis/db.py b/denis/db.py new file mode 100755 index 00000000..c23a47fd --- /dev/null +++ b/denis/db.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +import peewee + +DB = peewee.SqliteDatabase("/var/lib/denis/grades.db") + + +class BaseModel(peewee.Model): + class Meta: + database = DB + strict_tables = True + + +class Submission(BaseModel): + submission_id = peewee.TextField(unique=True) + assignment = peewee.TextField() + timestamp = peewee.IntegerField() + user = peewee.TextField() + status = peewee.TextField() + + +if __name__ == '__main__': + DB.create_tables(BaseModel.__subclasses__()) diff --git a/denis/submit.py b/denis/submit.py new file mode 100755 index 00000000..a07a656c --- /dev/null +++ b/denis/submit.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 + +import db +from pathlib import Path +import sys + +ASSIGNMENT_LIST = ["introductions", + "exercise0", "exercise1", "exercise2", + "programming0", "programming1", "programming2", + "final0", "final1"] + + +# We assume inputs are correct as a precondition +# Otherwise we simply crash +def main(argv): + _, logdir, logfile = argv + with open(Path(logdir) / logfile) as log: + header, *email_lines = log.readlines() + timestamp, user = header.split() + emails = [line.split() for line in email_lines] + recpts = {email[0] for email in emails} + + if len(recpts) != 1: + return + (to,) = recpts + + if to not in ASSIGNMENT_LIST: + # TODO process peer review + return 0 + + # At this point, we know this is an assignment submission + db.Submission.create(submission_id=logfile, assignment=to, + timestamp=timestamp, user=user, status="new") + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) From 6c4457cbb6d1177e4b239578ebde8054368c5154 Mon Sep 17 00:00:00 2001 From: Joel Savitz Date: Tue, 7 May 2024 16:38:51 -0400 Subject: [PATCH 4/7] orbit: radius: dashboard: initial working draft Give orbit access to the grades.db where the submissions live. A simple query can retrieve all submissions for a given student, enabling a basic submission dashboard. Signed-off-by: Joel Savitz --- container-compose.yml | 5 +++++ orbit/Containerfile | 1 + orbit/header.html | 1 + orbit/radius.py | 21 ++++++++++++++++++++- 4 files changed, 27 insertions(+), 1 deletion(-) diff --git a/container-compose.yml b/container-compose.yml index b47eb1b7..af44b1e9 100644 --- a/container-compose.yml +++ b/container-compose.yml @@ -40,6 +40,7 @@ services: - orbit_singularity_git_dir=./.git - orbit_docs_source=./docs - orbit_repos_source=./cgit_repos + - denis_source=./denis target: orbit args: orbit_version_info: "singularity ${SINGULARITY_VERSION} ${SINGULARITY_DEPLOYMENT_STATUS} https://github.com/underground-software/singularity" @@ -48,6 +49,10 @@ services: source: orbit-db target: /var/orbit read_only: false + - type: volume + source: grades-db + target: /var/lib/denis + read_only: true networks: - orbit smtp: diff --git a/orbit/Containerfile b/orbit/Containerfile index a31ef9ff..9500af6d 100644 --- a/orbit/Containerfile +++ b/orbit/Containerfile @@ -34,6 +34,7 @@ WORKDIR /usr/local/share/orbit COPY --from=build /usr/local/share/orbit /usr/local/share/orbit COPY --from=orbit_docs_source . ./docs +COPY --from=denis_source . ./denis COPY --from=build /var/orbit /var/orbit COPY --from=orbit_singularity_git_dir . /var/git/singularity COPY --from=orbit_repos_source . /etc/cgit diff --git a/orbit/header.html b/orbit/header.html index 33fe1604..c235addf 100644 --- a/orbit/header.html +++ b/orbit/header.html @@ -19,6 +19,7 @@

Kernel Development Learning Pipeline


diff --git a/orbit/radius.py b/orbit/radius.py index 0f335408..08b5db9a 100644 --- a/orbit/radius.py +++ b/orbit/radius.py @@ -17,6 +17,7 @@ # === internal imports & constants === import config import db +import denis.db sec_per_min = 60 min_per_ses = config.minutes_each_session_token_is_valid @@ -381,7 +382,25 @@ def handle_stub(rocket, more=[]): def handle_dashboard(rocket): if not rocket.session: return rocket.raw_respond(HTTPStatus.UNAUTHORIZED) - return handle_stub(rocket, ['dashboard in development, check back later']) + + submissions = (denis.db.Submission.select() + .where(denis.db.Submission.user == rocket.session.username) + .order_by(- denis.db.Submission.timestamp)) + + def submission_fields(sub): + return (datetime.fromtimestamp(sub.timestamp).isoformat(), + sub.assignment, sub.submission_id, sub.status) + + # Split data from Submission table into values for HTML table + table_data = [[f'{val}' for val in submission_fields(sub)] + for sub in submissions] + table_content = '\n'.join(''.join(row) for row in table_data) + + return rocket.respond(f""" + +{table_content} +
TimestampAssignmentSubmission IDStatus
+""") def find_creds_for_registration(student_id): From dc2605fcc6f5fe6cc5ba8339e527e48131161cb2 Mon Sep 17 00:00:00 2001 From: Joel Savitz Date: Tue, 7 May 2024 21:05:23 -0400 Subject: [PATCH 5/7] denis: submit: implement basic patchset checks Check that the patches apply and check whether any of them contain whitespace errors. Signed-off-by: Joel Savitz --- denis/Containerfile | 1 + denis/submit.py | 63 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 62 insertions(+), 2 deletions(-) diff --git a/denis/Containerfile b/denis/Containerfile index 83486ece..1aae31ab 100644 --- a/denis/Containerfile +++ b/denis/Containerfile @@ -12,6 +12,7 @@ FROM alpine:3.19 AS denis RUN apk add \ py3-peewee \ + py3-gitpython \ ; diff --git a/denis/submit.py b/denis/submit.py index a07a656c..1f892f78 100755 --- a/denis/submit.py +++ b/denis/submit.py @@ -3,12 +3,25 @@ import db from pathlib import Path import sys +import git +import tempfile ASSIGNMENT_LIST = ["introductions", "exercise0", "exercise1", "exercise2", "programming0", "programming1", "programming2", "final0", "final1"] +MAIL_DIR_ABSPATH = "/mnt/email_data/mail" + + +def try_or_false(do, exc): + try: + do() + return True + except exc as e: + print(e, file=sys.stderr) + return False + # We assume inputs are correct as a precondition # Otherwise we simply crash @@ -19,6 +32,7 @@ def main(argv): timestamp, user = header.split() emails = [line.split() for line in email_lines] recpts = {email[0] for email in emails} + email_files = [email[1] for email in emails] if len(recpts) != 1: return @@ -29,8 +43,53 @@ def main(argv): return 0 # At this point, we know this is an assignment submission - db.Submission.create(submission_id=logfile, assignment=to, - timestamp=timestamp, user=user, status="new") + sub = db.Submission.create(submission_id=logfile, assignment=to, + timestamp=timestamp, user=user, status="new") + + if len(email_files) < 2: + sub.status = "no cover letter" + sub.save() + return 0 + + coverletter_file, *patch_files = email_files + # TODO process cover letter + + with tempfile.TemporaryDirectory() as repo_path: + repo = git.Repo.init(repo_path) + maildir = Path(MAIL_DIR_ABSPATH) + author_args = ["-c", "user.name=Denis", "-c", + "user.email=daemon@denis.d"] + git_am_args = ["git", *author_args, "am"] + whitespace_errors = [] + for i, patch_file in enumerate(patch_files): + patch_abspath = str(maildir / patch_file) + + # Try and apply and fail if there are whitespace errors + def do_git_am(extra_args=[]): + repo.git.execute([*git_am_args, *extra_args, patch_abspath]), + + # If this fails, the patch may apply with whitespace errors + if try_or_false(lambda: do_git_am(['--whitespace=error-all']), + git.GitCommandError): + continue + + repo.git.execute(["git", *author_args, "am", "--abort"]) + + # Try again, if we succeed, count this patch as a whitespace error + if try_or_false(lambda: do_git_am(), git.GitCommandError): + whitespace_errors.append(str(i+1)) + continue + + # If we still fail, the patch does not apply + sub.status = f'patch #{i+1} failed to apply' + sub.save() + return 0 + + if whitespace_errors: + sub.status = f'whitespace error patche(s) {",".join(whitespace_errors)}' # NOQA: E501 + else: + sub.status = 'patchset applies' + sub.save() if __name__ == "__main__": From 77d822799605a14f60d13db6fe180231e249e549 Mon Sep 17 00:00:00 2001 From: charliemirabile <46761267+charliemirabile@users.noreply.github.com> Date: Thu, 9 May 2024 12:25:01 -0400 Subject: [PATCH 6/7] denis: submit: push tag with sucessfully applied patch If the patches apply, we try to push a tag with the patches to a git remote running on the host. --- denis/submit.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/denis/submit.py b/denis/submit.py index 1f892f78..b6fbcbcd 100755 --- a/denis/submit.py +++ b/denis/submit.py @@ -13,6 +13,8 @@ MAIL_DIR_ABSPATH = "/mnt/email_data/mail" +REMOTE_URL = "http://host.containers.internal:3366/cgi-bin/git-receive-pack/grading.git" # NOQA: E501 + def try_or_false(do, exc): try: @@ -91,6 +93,10 @@ def do_git_am(extra_args=[]): sub.status = 'patchset applies' sub.save() + repo.create_tag(logfile) + repo.create_remote("origin", REMOTE_URL) + repo.git.push("origin", tags=True) + if __name__ == "__main__": sys.exit(main(sys.argv)) From 14576e7e8e0b74dad74561c19cb65b43eb26fbc5 Mon Sep 17 00:00:00 2001 From: charliemirabile <46761267+charliemirabile@users.noreply.github.com> Date: Thu, 9 May 2024 16:20:00 -0400 Subject: [PATCH 7/7] denis: submit: refactor patchset processing logic to improve clarity The previous logic was a bit confusing. Explcitly enumerating the cases with explanatory comments hopefully makes this code easier to understand and maintain. No major functional changes, only maybe in some edge cases (like more than one email sent at once, but not addressed to a submission inbox). --- denis/submit.py | 59 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/denis/submit.py b/denis/submit.py index b6fbcbcd..2123dc4f 100755 --- a/denis/submit.py +++ b/denis/submit.py @@ -1,10 +1,12 @@ #!/usr/bin/env python3 +import collections import db from pathlib import Path import sys import git import tempfile +import time ASSIGNMENT_LIST = ["introductions", "exercise0", "exercise1", "exercise2", @@ -25,6 +27,14 @@ def try_or_false(do, exc): return False +Email = collections.namedtuple('Email', ['rcpt', 'msg_id']) + + +def email_from_log_line(line): + recipient, message_id = line.split() + return Email(rcpt=recipient, msg_id=message_id) + + # We assume inputs are correct as a precondition # Otherwise we simply crash def main(argv): @@ -32,29 +42,41 @@ def main(argv): with open(Path(logdir) / logfile) as log: header, *email_lines = log.readlines() timestamp, user = header.split() - emails = [line.split() for line in email_lines] - recpts = {email[0] for email in emails} - email_files = [email[1] for email in emails] - if len(recpts) != 1: - return - (to,) = recpts + emails = [email_from_log_line(line) for line in email_lines] + + # no emails in session, just logged in and didn't send anything + if not emails: + return 0 + + cover_letter, *patches = emails - if to not in ASSIGNMENT_LIST: + # if the 'cover letter' is not addressed to + # an assignment inbox, this email session + # isn't a patchset at all + if cover_letter.rcpt not in ASSIGNMENT_LIST: # TODO process peer review return 0 - # At this point, we know this is an assignment submission - sub = db.Submission.create(submission_id=logfile, assignment=to, - timestamp=timestamp, user=user, status="new") + sub = db.Submission(submission_id=logfile, assignment=cover_letter.rcpt, + timestamp=timestamp, user=user, status='new') - if len(email_files) < 2: - sub.status = "no cover letter" + # only one email + if not patches: + # only one patch, but addressed to an + # assignment inbox. This cannot be valid. + sub.status = 'no patches or no cover letter' sub.save() return 0 - coverletter_file, *patch_files = email_files - # TODO process cover letter + mis_addressed_patches = [str(i+1) for i, patch in enumerate(patches) + if patch.rcpt != cover_letter.rcpt] + + if mis_addressed_patches: + sub.status = (f'patch(es) {",".join(mis_addressed_patches)} ' + f'not addressed to {cover_letter.rcpt}') + sub.save() + return 0 with tempfile.TemporaryDirectory() as repo_path: repo = git.Repo.init(repo_path) @@ -63,8 +85,8 @@ def main(argv): "user.email=daemon@denis.d"] git_am_args = ["git", *author_args, "am"] whitespace_errors = [] - for i, patch_file in enumerate(patch_files): - patch_abspath = str(maildir / patch_file) + for i, patch in enumerate(patches): + patch_abspath = str(maildir / patch.msg_id) # Try and apply and fail if there are whitespace errors def do_git_am(extra_args=[]): @@ -83,12 +105,13 @@ def do_git_am(extra_args=[]): continue # If we still fail, the patch does not apply - sub.status = f'patch #{i+1} failed to apply' + sub.status = f'patch {i+1} failed to apply' sub.save() return 0 if whitespace_errors: - sub.status = f'whitespace error patche(s) {",".join(whitespace_errors)}' # NOQA: E501 + sub.status = ('whitespace error patch(es) ' + f'{",".join(whitespace_errors)}') else: sub.status = 'patchset applies' sub.save()