Source code for plaso.parsers.google_logging

# -*- coding: utf-8 -*-
"""Parser for Google-formatted log files."""

import re

from dfdatetime import time_elements as dfdatetime_time_elements

import pyparsing

from plaso.containers import events
from plaso.containers import time_events
from plaso.lib import errors
from plaso.lib import definitions
from plaso.parsers import manager
from plaso.parsers import text_parser

[docs]class GoogleLogEventData(events.EventData): """Google-formatted log file event data. See: This format is also used by Kubernetes, see Attributes: file_name (str): the name of the source file that logged the message. line_number (int): the line number in the source file where the logging statement is. message (str): the log message. priority (str): the priority of the message - I, W, E or F. These values represent messages logged at INFO, WARNING, ERROR or FATAL severities, respectively. thread_identifier (int): the identifier of the thread that recorded the message. """ DATA_TYPE = 'googlelog:log' def __init__(self, data_type=DATA_TYPE): """Initializes event data. Args: data_type (Optional[str]): event data type indicator. """ super(GoogleLogEventData, self).__init__(data_type=data_type) self.file_name = None self.line_number = None self.message = None self.priority = None self.thread_identifier = None
[docs]class GoogleLogParser(text_parser.PyparsingMultiLineTextParser): """Parser for Google-formatted log files.""" NAME = 'googlelog' DATA_FORMAT = 'Google-formatted log file' # PyParsing components used to construct grammars for parsing lines. _PYPARSING_COMPONENTS = { 'priority': ( pyparsing.oneOf(['I', 'W', 'E', 'F']).setResultsName('priority')), 'year': text_parser.PyparsingConstants.FOUR_DIGITS.setResultsName( 'year'), 'month_number': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName( 'month_number'), 'day': text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName( 'day'), 'hour': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName( 'hour'), 'minute': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName( 'minute'), 'second': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName( 'second'), 'microsecond': pyparsing.Word(pyparsing.nums, exact=6).setParseAction( text_parser.PyParseIntCast).setResultsName('microsecond'), 'thread_identifier': pyparsing.Word(pyparsing.nums).setResultsName( 'thread_identifier'), 'file_name': (pyparsing.Word(pyparsing.printables.replace(':', '')).setResultsName( 'file_name')), 'line_number': ( pyparsing.Word(pyparsing.nums).setResultsName('line_number')), 'message': ( pyparsing.Regex('.*?(?=($|\n[IWEF][0-9]{4}))', re.DOTALL). setResultsName('message'))} _PYPARSING_COMPONENTS['date'] = ( _PYPARSING_COMPONENTS['month_number'] + _PYPARSING_COMPONENTS['day'] + _PYPARSING_COMPONENTS['hour'] + pyparsing.Suppress(':') + _PYPARSING_COMPONENTS['minute'] + pyparsing.Suppress(':') + _PYPARSING_COMPONENTS['second'] + pyparsing.Optional( pyparsing.Suppress('.') + _PYPARSING_COMPONENTS['microsecond'])) # Grammar for individual log event lines. _LOG_LINE = ( _PYPARSING_COMPONENTS['priority'] + _PYPARSING_COMPONENTS['date'] + _PYPARSING_COMPONENTS['thread_identifier'] + _PYPARSING_COMPONENTS['file_name'] + pyparsing.Suppress(':') + _PYPARSING_COMPONENTS['line_number'] + pyparsing.Suppress('] ') + _PYPARSING_COMPONENTS['message'] + pyparsing.lineEnd()) # Grammar for the log file greeting. _GREETING = ( _PYPARSING_COMPONENTS['year'] + pyparsing.Suppress('/') + _PYPARSING_COMPONENTS['month_number'] + pyparsing.Suppress('/') + _PYPARSING_COMPONENTS['day'] + _PYPARSING_COMPONENTS['hour'] + pyparsing.Suppress(':') + _PYPARSING_COMPONENTS['minute'] + pyparsing.Suppress(':') + _PYPARSING_COMPONENTS['second'] + pyparsing.Regex('.*?(?=($|\n[IWEF][0-9]{4}))', re.DOTALL) + pyparsing.lineEnd()) _GREETING_START = 'Log file created at: ' # Our initial buffer length is the length of the string we verify with. _INITIAL_BUFFER_SIZE = len(_GREETING_START) # Once we're sure we're reading a valid file, we'll use a larger read buffer. _RUNNING_BUFFER_SIZE = 5120 # Order is important here, as the structures are checked against each line # sequentially, so we put the most common first, and the most expensive # last. LINE_STRUCTURES = [ ('log_entry', _LOG_LINE), ('greeting_start', pyparsing.Literal(_GREETING_START)), ('greeting', _GREETING)] _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES]) def __init__(self): """Initializes a Google log formatted file parser.""" super(GoogleLogParser, self).__init__() # Set the size of the file we need to read to verify it. self._buffer_size = self._INITIAL_BUFFER_SIZE self._maximum_year = 0 # The year to use for events. The initial year is stored in the log file # greeting. self._year = 0 # The month the last observed event occurred. self._last_month = 0 def _UpdateYear(self, mediator, month): """Updates the year to use for events, based on last observed month. Args: mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. month (int): month observed by the parser, where January is 1. """ if not self._year: self._year = mediator.GetEstimatedYear() if not self._maximum_year: self._maximum_year = mediator.GetLatestYear() if not self._last_month: self._last_month = month return # TODO: Check whether out of order events are possible if self._last_month > (month + 1): if self._year != self._maximum_year: self._year += 1 self._last_month = month def _ReadGreeting(self, structure): """Extract useful information from the logfile greeting. Args: structure (pyparsing.ParseResults): elements parsed from the file. """ self._year = self._GetValueFromStructure(structure, 'year') self._last_month = self._GetValueFromStructure(structure, 'month_number') def _ParseLine(self, parser_mediator, structure): """Process a single log line into a GoogleLogEvent. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): elements parsed from the file. """ month = self._GetValueFromStructure(structure, 'month_number') if month != 0: self._UpdateYear(parser_mediator, month) day = self._GetValueFromStructure(structure, 'day') hours = self._GetValueFromStructure(structure, 'hour') minutes = self._GetValueFromStructure(structure, 'minute') seconds = self._GetValueFromStructure(structure, 'second') microseconds = self._GetValueFromStructure(structure, 'microsecond') time_elements_tuple = ( self._year, month, day, hours, minutes, seconds, microseconds) try: date_time = dfdatetime_time_elements.TimeElementsInMicroseconds( time_elements_tuple=time_elements_tuple) date_time.is_local_time = True except ValueError: parser_mediator.ProduceExtractionWarning( 'invalid date time value: {0!s}'.format(time_elements_tuple)) return event_data = GoogleLogEventData() event_data.priority = self._GetValueFromStructure(structure, 'priority') event_data.thread_identifier = self._GetValueFromStructure( structure, 'thread_identifier') event_data.file_name = self._GetValueFromStructure(structure, 'file_name') event_data.line_number = self._GetValueFromStructure( structure, 'line_number') event_data.message = self._GetValueFromStructure(structure, 'message') event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data)
[docs] def ParseRecord(self, parser_mediator, key, structure): """Parses a matching entry. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): elements parsed from the file. Raises: ParseError: when the structure type is unknown. """ if key not in self._SUPPORTED_KEYS: raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) if key == 'greeting': self._ReadGreeting(structure) elif key == 'log_entry': self._ParseLine(parser_mediator, structure)
[docs] def VerifyStructure(self, parser_mediator, lines): """Verifies that this is a google log-formatted file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. lines (str): one or more lines from the text file. Returns: bool: True if this is the correct parser, False otherwise. """ if not lines.startswith(self._GREETING_START): return False # Now that we know this is a valid log, expand the read buffer to the # maximum size we expect a log event to be (which is quite large). self._buffer_size = self._RUNNING_BUFFER_SIZE self._year = parser_mediator.year return True