Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions aodncore/bin/logview.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#!/usr/bin/env python

"""
Script to parse and view logs generated by pipelines.
"""

import argparse
import os
import re

# location of logs
from aodncore.util.logviewer import LOG_WATCH, LOGDIR_PROCESS, LogViewer


def find_log(input_file):
"""
Given the name of an uploaded file, find the log file(s) from the pipeline process that handled it.

:param str input_file: Name of uploaded file
:return: List of full paths to log files

"""
# TODO: implement find_log
# Things to try:
# Read all process logs in LOGDIR_PROCESS and use pattern match

# first, if the input file name includes a task_id at the end, remove it
filename = os.path.basename(input_file)
assert filename, 'No input file name provided!'
match = re.match(
r"(.+?)([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})?$",
filename
)
filename, task_id = match.groups()
task_name_pattern = re.compile(r"task_name='(tasks.\w+)'.*pathname='.*{fn}'".format(fn=filename))

logfiles = []
# read LOG_WATCH file and find the file name
with open(LOG_WATCH) as watchlog:
for line in watchlog:
match = task_name_pattern.search(line)
if match:
logfiles.append(
os.path.join(LOGDIR_PROCESS, '{}.log'.format(match.group(1)))
)

return None


def parse_args():
"""Parse the command line"""
parser = argparse.ArgumentParser()
parser.add_argument('-l', '--logfile', help='path to pipeline log file')
parser.add_argument('-t', '--task_name', help='log for pipeline task')
parser.add_argument('-i', '--task_id', help='filter by task_id', metavar='ID')
parser.add_argument('-e', '--errors', help='error lines only', action='store_true')
parser.add_argument('-w', '--warnings', help='warning & error lines only', action='store_true')
parser.add_argument('-p', '--pattern', help='lines matching regex pattern', metavar='REGEX')
parser.add_argument('-f', '--file', help='name of processed file')

args = parser.parse_args()

if not args.logfile:
if args.task_name:
args.logfile = os.path.join(LOGDIR_PROCESS, 'tasks.{}.log'.format(args.task_name))
if args.file:
args.logfile = find_log(args.file)

args.levels = None
if args.errors:
args.levels = ('ERROR', 'CRITICAL')
if args.warnings:
args.levels = ('WARNING', 'ERROR', 'CRITICAL')

print('Args: {}\n'.format(args))

return args


if __name__ == '__main__':
args = parse_args()

# TODO: filter by file name (parent or child)

lv = LogViewer(args.logfile)
lv.show(task_id=args.task_id, levels=args.levels, pattern=args.pattern)

exit(0)
91 changes: 91 additions & 0 deletions aodncore/util/logviewer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import os
import re
import sys
from collections import OrderedDict

LOGDIR_BASE = '/sw/chef/src/tmp/p2_logs'
LOG_WATCH = LOGDIR_BASE + '/watchservice/pipeline_watchservice-stderr.log'
LOGDIR_CELERY = LOGDIR_BASE + '/celery'
LOGDIR_PROCESS = LOGDIR_BASE + '/process'

# regular expressions to match log format and define fields extracted from log
LOG_FIELDS = OrderedDict([
('time', r"(?P<time>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})\s+"),
('level', r"(?P<level>[A-Z]+)\s+"),
('task_name', r"tasks.(?P<task_name>\w+)"),
('task_id', r"\[(?P<task_id>[0-9a-f-]+)\]\s+"),
('message', r"(?P<message>.*)")
])
INPUT_REGEX = re.compile(''.join(LOG_FIELDS.values()))
DEFAULT_FORMAT = '{time:20} {level:>9} {message}\n'


class LogViewer(object):
"""
Class to parse logs written by pipelines and output various filtered or summary views.
"""

def __init__(self, logfile):
if not os.path.isfile(logfile):
raise ValueError('{logfile}: no such file!'.format(logfile=logfile))
self.logfile = logfile

def log_entries(self):
"""Parse the log and return a tuple (raw, data) for one log entry at a time, where
raw is te full text from the log, and data is a dictionary of extracted fields as
per INPUT_REGEX.

"""
# TODO: option to read from stdin
with open(self.logfile) as log:
for line in log:
line = line.strip()
m = INPUT_REGEX.match(line)
if m is None:
# TODO: deal with unformatted lines
continue
data = m.groupdict()

yield line, data

def filtered_entries(self, task_id=None, levels=None, pattern=None):
"""
Filter the tuples returned by log_entries according to the filters specified.

:param str task_id: only include log for given task uuid
:param list levels: only include include messages with the given logging levels
:param str pattern: only include log messages matching pattern (regular expression)
:return: tuple (raw, data) as for log_entries

"""
if pattern:
pattern = re.compile(pattern)

for raw, data in self.log_entries():
if task_id and data['task_id'] != task_id:
continue
if levels and data['level'] not in levels:
continue
if pattern and not pattern.search(data['message']):
continue
# TODO: filter by handler step?
yield raw, data

def show(self, task_id=None, levels=None, pattern=None, fmt=DEFAULT_FORMAT):
"""
Print a filtered & re-formatted view of the log to stdout

:param str task_id: only include log for given task uuid
:param list levels: only include include messages with the given logging levels
:param str pattern: only include log messages matching pattern (regular expression)
:param str fmt: output format (fmt.format() applied to dict of LOG_FIELDS extracted from log)

"""
for raw, data in self.filtered_entries(task_id=task_id, levels=levels, pattern=pattern):
line_out = fmt.format(**data)
try:
sys.stdout.write(line_out)
sys.stdout.flush()
except IOError:
# this can happen if output is piped to `head` or `less`
pass
22 changes: 22 additions & 0 deletions test_aodncore/util/test_logviewer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import os
import unittest

from aodncore.testlib import BaseTestCase
from aodncore.util.logviewer import LogViewer

from .test_misc import get_nonexistent_path


TEST_ROOT = os.path.join(os.path.dirname(__file__))
LOG_FILE = os.path.join(TEST_ROOT, 'tasks.ANMN_SA.log')


class TestLogViewer(BaseTestCase):
def test_init(self):
lv = LogViewer(LOG_FILE)
self.assertEqual(LOG_FILE, lv.logfile)
self.assertRaises(ValueError, LogViewer, get_nonexistent_path())


if __name__ == '__main__':
unittest.main()