aodn · mhidas · Nov 15, 2017 · Nov 15, 2017 · Nov 15, 2017 · Nov 28, 2017
diff --git a/aodncore/bin/logview.py b/aodncore/bin/logview.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+
+"""
+Script to parse and view logs generated by pipelines.
+"""
+
+import argparse
+import os
+import re
+
+# location of logs
+from aodncore.util.logviewer import LOG_WATCH, LOGDIR_PROCESS, LogViewer
+
+
+def find_log(input_file):
+    """
+    Given the name of an uploaded file, find the log file(s) from the pipeline process that handled it.
+
+    :param str input_file: Name of uploaded file
+    :return: List of full paths to log files
+
+    """
+    # TODO: implement find_log
+    # Things to try:
+    #   Read all process logs in LOGDIR_PROCESS and use pattern match
+
+    # first, if the input file name includes a task_id at the end, remove it
+    filename = os.path.basename(input_file)
+    assert filename, 'No input file name provided!'
+    match = re.match(
+        r"(.+?)([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})?$",
+        filename
+    )
+    filename, task_id = match.groups()
+    task_name_pattern = re.compile(r"task_name='(tasks.\w+)'.*pathname='.*{fn}'".format(fn=filename))
+
+    logfiles = []
+    # read LOG_WATCH file and find the file name
+    with open(LOG_WATCH) as watchlog:
+        for line in watchlog:
+            match = task_name_pattern.search(line)
+            if match:
+                logfiles.append(
+                    os.path.join(LOGDIR_PROCESS, '{}.log'.format(match.group(1)))
+                )
+
+    return None
+
+
+def parse_args():
+    """Parse the command line"""
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-l', '--logfile', help='path to pipeline log file')
+    parser.add_argument('-t', '--task_name', help='log for pipeline task')
+    parser.add_argument('-i', '--task_id', help='filter by task_id', metavar='ID')
+    parser.add_argument('-e', '--errors', help='error lines only', action='store_true')
+    parser.add_argument('-w', '--warnings', help='warning & error lines only', action='store_true')
+    parser.add_argument('-p', '--pattern', help='lines matching regex pattern', metavar='REGEX')
+    parser.add_argument('-f', '--file', help='name of processed file')
+
+    args = parser.parse_args()
+
+    if not args.logfile:
+        if args.task_name:
+            args.logfile = os.path.join(LOGDIR_PROCESS, 'tasks.{}.log'.format(args.task_name))
+        if args.file:
+            args.logfile = find_log(args.file)
+
+    args.levels = None
+    if args.errors:
+        args.levels = ('ERROR', 'CRITICAL')
+    if args.warnings:
+        args.levels = ('WARNING', 'ERROR', 'CRITICAL')
+
+    print('Args: {}\n'.format(args))
+
+    return args
+
+
+if __name__ == '__main__':
+    args = parse_args()
+
+    # TODO: filter by file name (parent or child)
+
+    lv = LogViewer(args.logfile)
+    lv.show(task_id=args.task_id, levels=args.levels, pattern=args.pattern)
+
+    exit(0)
diff --git a/aodncore/util/logviewer.py b/aodncore/util/logviewer.py
@@ -0,0 +1,91 @@
+import os
+import re
+import sys
+from collections import OrderedDict
+
+LOGDIR_BASE = '/sw/chef/src/tmp/p2_logs'
+LOG_WATCH = LOGDIR_BASE + '/watchservice/pipeline_watchservice-stderr.log'
+LOGDIR_CELERY = LOGDIR_BASE + '/celery'
+LOGDIR_PROCESS = LOGDIR_BASE + '/process'
+
+# regular expressions to match log format and define fields extracted from log
+LOG_FIELDS = OrderedDict([
+    ('time', r"(?P<time>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})\s+"),
+    ('level', r"(?P<level>[A-Z]+)\s+"),
+    ('task_name', r"tasks.(?P<task_name>\w+)"),
+    ('task_id', r"\[(?P<task_id>[0-9a-f-]+)\]\s+"),
+    ('message', r"(?P<message>.*)")
+])
+INPUT_REGEX = re.compile(''.join(LOG_FIELDS.values()))
+DEFAULT_FORMAT = '{time:20} {level:>9} {message}\n'
+
+
+class LogViewer(object):
+    """
+    Class to parse logs written by pipelines and output various filtered or summary views.
+    """
+
+    def __init__(self, logfile):
+        if not os.path.isfile(logfile):
+            raise ValueError('{logfile}: no such file!'.format(logfile=logfile))
+        self.logfile = logfile
+
+    def log_entries(self):
+        """Parse the log and return a tuple (raw, data) for one log entry at a time, where
+        raw is te full text from the log, and data is a dictionary of extracted fields as
+        per INPUT_REGEX.
+
+        """
+        # TODO: option to read from stdin
+        with open(self.logfile) as log:
+            for line in log:
+                line = line.strip()
+                m = INPUT_REGEX.match(line)
+                if m is None:
+                    # TODO: deal with unformatted lines
+                    continue
+                data = m.groupdict()
+
+                yield line, data
+
+    def filtered_entries(self, task_id=None, levels=None, pattern=None):
+        """
+        Filter the tuples returned by log_entries according to the filters specified.
+
+        :param str task_id: only include log for given task uuid
+        :param list levels: only include include messages with the given logging levels
+        :param str pattern: only include log messages matching pattern (regular expression)
+        :return: tuple (raw, data) as for log_entries
+
+        """
+        if pattern:
+            pattern = re.compile(pattern)
+
+        for raw, data in self.log_entries():
+            if task_id and data['task_id'] != task_id:
+                continue
+            if levels and data['level'] not in levels:
+                continue
+            if pattern and not pattern.search(data['message']):
+                continue
+            # TODO: filter by handler step?
+            yield raw, data
+
+    def show(self, task_id=None, levels=None, pattern=None, fmt=DEFAULT_FORMAT):
+        """
+        Print a filtered & re-formatted view of the log to stdout
+
+        :param str task_id: only include log for given task uuid
+        :param list levels: only include include messages with the given logging levels
+        :param str pattern: only include log messages matching pattern (regular expression)
+        :param str fmt: output format (fmt.format() applied to dict of LOG_FIELDS extracted from log)
+
+        """
+        for raw, data in self.filtered_entries(task_id=task_id, levels=levels, pattern=pattern):
+            line_out = fmt.format(**data)
+            try:
+                sys.stdout.write(line_out)
+                sys.stdout.flush()
+            except IOError:
+                # this can happen if output is piped to `head` or `less`
+                pass
diff --git a/test_aodncore/util/test_logviewer.py b/test_aodncore/util/test_logviewer.py
@@ -0,0 +1,22 @@
+import os
+import unittest
+
+from aodncore.testlib import BaseTestCase
+from aodncore.util.logviewer import LogViewer
+
+from .test_misc import get_nonexistent_path
+
+
+TEST_ROOT = os.path.join(os.path.dirname(__file__))
+LOG_FILE = os.path.join(TEST_ROOT, 'tasks.ANMN_SA.log')
+
+
+class TestLogViewer(BaseTestCase):
+    def test_init(self):
+        lv = LogViewer(LOG_FILE)
+        self.assertEqual(LOG_FILE, lv.logfile)
+        self.assertRaises(ValueError, LogViewer, get_nonexistent_path())
+
+
+if __name__ == '__main__':
+    unittest.main()