Source code for plaso.parsers.text_plugins.gdrive_synclog

# -*- coding: utf-8 -*-
"""Text parser plugin for Google Drive Sync log files."""

import pyparsing

from dfdatetime import time_elements as dfdatetime_time_elements

from plaso.containers import events
from plaso.lib import errors
from plaso.parsers import text_parser
from plaso.parsers.text_plugins import interface


[docs]class GoogleDriveSyncLogEventData(events.EventData): """Google Drive Sync log event data. Attributes: added_time (dfdatetime.DateTimeValues): date and time the log entry was added. level (str): logging level of event such as "DEBUG", "WARN", "INFO" and "ERROR". message (str): log message. process_identifier (int): process identifier of process which logged event. source_code (str): filename:line_number of source file which logged event. thread (str): colon-separated thread identifier in the form "ID:name" which logged event. """ DATA_TYPE = 'google_drive_sync_log:entry' def __init__(self): """Initializes event data.""" super(GoogleDriveSyncLogEventData, self).__init__(data_type=self.DATA_TYPE) self.added_time = None self.level = None self.message = None self.process_identifier = None self.source_code = None self.thread = None
[docs]class GoogleDriveSyncLogTextPlugin(interface.TextPluginWithLineContinuation): """Text parser plugin for Google Drive Sync log files.""" NAME = 'gdrive_synclog' DATA_FORMAT = 'Google Drive Sync log file' ENCODING = 'utf-8' _INTEGER = pyparsing.Word(pyparsing.nums).setParseAction( lambda tokens: int(tokens[0], 10)) _TWO_DIGITS = pyparsing.Word(pyparsing.nums, exact=2).setParseAction( lambda tokens: int(tokens[0], 10)) _THREE_DIGITS = pyparsing.Word(pyparsing.nums, exact=3).setParseAction( lambda tokens: int(tokens[0], 10)) _FOUR_DIGITS = pyparsing.Word(pyparsing.nums, exact=4).setParseAction( lambda tokens: int(tokens[0], 10)) _FRACTION_OF_SECOND = pyparsing.Word('.,', exact=1).suppress() + _THREE_DIGITS _TIME_ZONE_OFFSET = pyparsing.Group( pyparsing.Word('+-', exact=1) + _TWO_DIGITS + _TWO_DIGITS) _DATE_TIME = pyparsing.Group( _FOUR_DIGITS + pyparsing.Suppress('-') + _TWO_DIGITS + pyparsing.Suppress('-') + _TWO_DIGITS + _TWO_DIGITS + pyparsing.Suppress(':') + _TWO_DIGITS + pyparsing.Suppress(':') + _TWO_DIGITS + _FRACTION_OF_SECOND + _TIME_ZONE_OFFSET).setResultsName('date_time') _PROCESS_IDENTIFIER = ( pyparsing.Suppress('pid=') + _INTEGER.setResultsName('process_identifier')) _THREAD = pyparsing.Combine( pyparsing.Word(pyparsing.nums) + pyparsing.Literal(':') + pyparsing.Word(pyparsing.printables)) _END_OF_LINE = pyparsing.Suppress(pyparsing.LineEnd()) _LOG_LINE_START = ( _DATE_TIME + pyparsing.Word(pyparsing.alphas).setResultsName('level') + _PROCESS_IDENTIFIER + # TODO: consider stripping thread identifier/cleaning up thread name? _THREAD.setResultsName('thread') + pyparsing.Word(pyparsing.printables).setResultsName('source_code')) _LOG_LINE = ( _LOG_LINE_START + pyparsing.restOfLine().setResultsName('body') + _END_OF_LINE) _LINE_STRUCTURES = [('log_line', _LOG_LINE)] # Using a regular expression here is faster on non-match than the log line # grammar. VERIFICATION_GRAMMAR = pyparsing.Regex( r'(?P<date_time>[0-9]{4}-[0-9]{2}-[0-9]{2} ' r'[0-9]{2}:[0-9]{2}:[0-9]{2}[,.][0-9]{3} [+-][0-9]{4}) ' r'[A-Z]+ pid=[0-9]+ [0-9]+:\S+[ ]+\S+:[0-9]+ .*\n') VERIFICATION_LITERALS = [' ERROR ', ' FATAL ', ' INFO ', ' pid=', ' WARNING '] def __init__(self): """Initializes a text parser plugin.""" super(GoogleDriveSyncLogTextPlugin, self).__init__() self._body_lines = None self._event_data = None def _ParseFinalize(self, parser_mediator): """Finalizes parsing. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfVFS. """ if self._event_data: self._event_data.message = ' '.join(self._body_lines) self._body_lines = None parser_mediator.ProduceEventData(self._event_data) self._event_data = None def _ParseLogline(self, structure): """Parses a log line. Args: structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. """ time_elements_structure = self._GetValueFromStructure( structure, 'date_time') body = self._GetValueFromStructure(structure, 'body', default_value='') body = body.strip() event_data = GoogleDriveSyncLogEventData() event_data.added_time = self._ParseTimeElements(time_elements_structure) event_data.level = self._GetValueFromStructure(structure, 'level') event_data.process_identifier = self._GetValueFromStructure( structure, 'process_identifier') event_data.thread = self._GetValueFromStructure(structure, 'thread') event_data.source_code = self._GetValueFromStructure( structure, 'source_code') self._event_data = event_data self._body_lines = [body] def _ParseRecord(self, parser_mediator, key, structure): """Parses a pyparsing structure. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfVFS. key (str): name of the parsed structure. structure (pyparsing.ParseResults): tokens from a parsed log line. Raises: ParseError: if the structure cannot be parsed. """ if key == '_line_continuation': body = structure.replace('\n', ' ').strip() self._body_lines.append(body) else: if self._event_data: self._event_data.message = ' '.join(self._body_lines) parser_mediator.ProduceEventData(self._event_data) self._ParseLogline(structure) def _ParseTimeElements(self, time_elements_structure): """Parses date and time elements of a log line. Args: time_elements_structure (pyparsing.ParseResults): date and time elements of a log line. Returns: dfdatetime.TimeElements: date and time value. Raises: ParseError: if a valid date and time value cannot be derived from the time elements. """ # Ensure time_elements_tuple is not a pyparsing.ParseResults otherwise # copy.deepcopy() of the dfDateTime object will fail on Python 3.8 with: # "TypeError: 'str' object is not callable" due to pyparsing.ParseResults # overriding __getattr__ with a function that returns an empty string when # named token does not exist. try: (year, month, day_of_month, hours, minutes, seconds, milliseconds, time_zone_group) = time_elements_structure time_zone_sign, time_zone_hours, time_zone_minutes = time_zone_group time_elements_tuple = ( year, month, day_of_month, hours, minutes, seconds, milliseconds) time_zone_offset = (time_zone_hours * 60) + time_zone_minutes if time_zone_sign == '-': time_zone_offset *= -1 return dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=time_elements_tuple, time_zone_offset=time_zone_offset) except (TypeError, ValueError): return None def _ResetState(self): """Resets stored values.""" self._body_lines = None self._event_data = None
[docs] def CheckRequiredFormat(self, parser_mediator, text_reader): """Check if the log record has the minimal structure required by the parser. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfVFS. text_reader (EncodedTextReader): text reader. Returns: bool: True if this is the correct parser, False otherwise. """ try: structure = self._VerifyString(text_reader.lines) except errors.ParseError: return False date_time_structure = self._GetValueFromStructure(structure, 'date_time') time_elements_structure = self._DATE_TIME.parseString(date_time_structure) try: self._ParseTimeElements(time_elements_structure) except errors.ParseError: return False self._ResetState() return True
text_parser.TextLogParser.RegisterPlugin(GoogleDriveSyncLogTextPlugin)