Source code for plaso.parsers.trendmicroav

# -*- coding: utf-8 -*-
"""Parser for Trend Micro Antivirus logs.

Trend Micro uses two log files to track the scans (both manual/scheduled and
real-time) and the web reputation (network scan/filtering).

Currently only the first log is supported.
"""

from dfdatetime import definitions as dfdatetime_definitions
from dfdatetime import posix_time as dfdatetime_posix_time
from dfdatetime import time_elements as dfdatetime_time_elements

from plaso.containers import events
from plaso.containers import time_events
from plaso.lib import definitions
from plaso.lib import errors
from plaso.parsers import dsv_parser
from plaso.parsers import manager


[docs]class TrendMicroAVEventData(events.EventData): """Trend Micro AV Log event data. Attributes: action (str): action. filename (str): filename. path (str): path. scan_type (str): scan_type. threat (str): threat. """ DATA_TYPE = 'av:trendmicro:scan' def __init__(self): """Initializes event data.""" super(TrendMicroAVEventData, self).__init__(data_type=self.DATA_TYPE) self.action = None self.filename = None self.path = None self.scan_type = None self.threat = None
[docs]class TrendMicroBaseParser(dsv_parser.DSVParser): """Common code for parsing Trend Micro log files. The file format is reminiscent of CSV, but is not quite the same; the delimiter is a three-character sequence and there is no provision for quoting or escaping. """ # pylint: disable=abstract-method DELIMITER = '<;>' # Subclasses must define an integer MIN_COLUMNS value. MIN_COLUMNS = None # Subclasses must define a list of field names. COLUMNS = () def _CreateDictReader(self, line_reader): """Iterates over the log lines and provide a reader for the values. Args: line_reader (iter): yields each line in the log file. Yields: dict[str, str]: column values keyed by column header. Raises: UnableToParseFile: if a log line cannot be parsed. """ for line in line_reader: stripped_line = line.strip() values = stripped_line.split(self.DELIMITER) number_of_values = len(values) number_of_columns = len(self.COLUMNS) if number_of_values < self.MIN_COLUMNS: raise errors.UnableToParseFile( 'Expected at least {0:d} values, found {1:d}'.format( self.MIN_COLUMNS, number_of_values)) if number_of_values > number_of_columns: raise errors.UnableToParseFile( 'Expected at most {0:d} values, found {1:d}'.format( number_of_columns, number_of_values)) yield dict(zip(self.COLUMNS, values)) def _ParseTimestamp(self, parser_mediator, row): """Provides a timestamp for the given row. If the Trend Micro log comes from a version that provides a POSIX timestamp, use that directly; it provides the advantages of UTC and of second precision. Otherwise fall back onto the local-timezone date and time. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. row (dict[str, str]): fields of a single row, as specified in COLUMNS. Returns: dfdatetime.interface.DateTimeValue: date and time value. """ timestamp = row.get('timestamp', None) if timestamp is not None: try: timestamp = int(timestamp, 10) except (ValueError, TypeError): parser_mediator.ProduceExtractionWarning( 'Unable to parse timestamp value: {0!s}'.format(timestamp)) return dfdatetime_posix_time.PosixTime(timestamp=timestamp) # The timestamp is not available; parse the local date and time instead. try: return self._ConvertToTimestamp(row['date'], row['time']) except ValueError as exception: parser_mediator.ProduceExtractionWarning(( 'Unable to parse time string: "{0:s} {1:s}" with error: ' '{2!s}').format(repr(row['date']), repr(row['time']), exception)) def _ConvertToTimestamp(self, date, time): """Converts date and time strings into a timestamp. Recent versions of Office Scan write a log field with a Unix timestamp. Older versions may not write this field; their logs only provide a date and a time expressed in the local time zone. This functions handles the latter case. Args: date (str): date as an 8-character string in the YYYYMMDD format. time (str): time as a 3 or 4-character string in the [H]HMM format or a 6-character string in the HHMMSS format. Returns: dfdatetime_time_elements.TimestampElements: the parsed timestamp. Raises: ValueError: if the date and time values cannot be parsed. """ # Check that the strings have the correct length. if len(date) != 8: raise ValueError( 'Unsupported length of date string: {0!s}'.format(repr(date))) if len(time) < 3 or len(time) > 4: raise ValueError( 'Unsupported length of time string: {0!s}'.format(repr(time))) # Extract the date. try: year = int(date[:4], 10) month = int(date[4:6], 10) day = int(date[6:8], 10) except (TypeError, ValueError): raise ValueError('Unable to parse date string: {0!s}'.format(repr(date))) # Extract the time. Note that a single-digit hour value has no leading zero. try: hour = int(time[:-2], 10) minutes = int(time[-2:], 10) except (TypeError, ValueError): raise ValueError('Unable to parse time string: {0!s}'.format(repr(date))) time_elements_tuple = (year, month, day, hour, minutes, 0) date_time = dfdatetime_time_elements.TimeElements( time_elements_tuple=time_elements_tuple) date_time.is_local_time = True # TODO: add functionality to dfdatetime to control precision. date_time._precision = dfdatetime_definitions.PRECISION_1_MINUTE # pylint: disable=protected-access return date_time
[docs]class OfficeScanVirusDetectionParser(TrendMicroBaseParser): """Parses the Trend Micro Office Scan Virus Detection Log.""" NAME = 'trendmicro_vd' DATA_FORMAT = 'Trend Micro Office Scan Virus Detection log file' COLUMNS = [ 'date', 'time', 'threat', 'action', 'scan_type', 'unused1', 'path', 'filename', 'unused2', 'timestamp', 'unused3', 'unused4'] MIN_COLUMNS = 8 _SUPPORTED_SCAN_RESULTS = frozenset([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 25])
[docs] def ParseRow(self, parser_mediator, row_offset, row): """Parses a line of the log file and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. row_offset (int): line number of the row. row (dict[str, str]): fields of a single row, as specified in COLUMNS. """ timestamp = self._ParseTimestamp(parser_mediator, row) if timestamp is None: return try: action = int(row['action'], 10) except (ValueError, TypeError): action = None try: scan_type = int(row['scan_type'], 10) except (ValueError, TypeError): scan_type = None event_data = TrendMicroAVEventData() event_data.action = action event_data.filename = row['filename'] event_data.offset = row_offset event_data.path = row['path'] event_data.scan_type = scan_type event_data.threat = row['threat'] event = time_events.DateTimeValuesEvent( timestamp, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data)
[docs] def VerifyRow(self, parser_mediator, row): """Verifies if a line of the file is in the expected format. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. row (dict[str, str]): fields of a single row, as specified in COLUMNS. Returns: bool: True if this is the correct parser, False otherwise. """ if len(row) < self.MIN_COLUMNS: return False # Check the date format! # If it doesn't parse, then this isn't a Trend Micro AV log. try: timestamp = self._ConvertToTimestamp(row['date'], row['time']) except (ValueError, TypeError): return False if timestamp is None: return False # Check that the action value is plausible. try: action = int(row['action'], 10) except (ValueError, TypeError): return False return action in self._SUPPORTED_SCAN_RESULTS
[docs]class TrendMicroUrlEventData(events.EventData): """Trend Micro Web Reputation Log event data. Attributes: block_mode (str): operation mode. url (str): accessed URL. group_code (str): group code. group_name (str): group name. credibility_rating (int): credibility rating. credibility_score (int): credibility score. policy_identifier (int): policy identifier. application_name (str): application name. ip (str): IP address. threshold (int): threshold value. """ DATA_TYPE = 'av:trendmicro:webrep' def __init__(self): """Initializes event data.""" super(TrendMicroUrlEventData, self).__init__(data_type=self.DATA_TYPE) self.block_mode = None self.url = None self.group_code = None self.group_name = None self.credibility_rating = None self.credibility_score = None self.policy_identifier = None self.application_name = None self.ip = None self.threshold = None
[docs]class OfficeScanWebReputationParser(TrendMicroBaseParser): """Parses the Trend Micro Office Scan Web Reputation detection log.""" NAME = 'trendmicro_url' DATA_FORMAT = 'Trend Micro Office Web Reputation log file' COLUMNS = ( 'date', 'time', 'block_mode', 'url', 'group_code', 'group_name', 'credibility_rating', 'policy_identifier', 'application_name', 'credibility_score', 'ip', 'threshold', 'timestamp', 'unused') MIN_COLUMNS = 12 _SUPPORTED_BLOCK_MODES = frozenset([0, 1])
[docs] def ParseRow(self, parser_mediator, row_offset, row): """Parses a line of the log file and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. row_offset (int): line number of the row. row (dict[str, str]): fields of a single row, as specified in COLUMNS. """ timestamp = self._ParseTimestamp(parser_mediator, row) if timestamp is None: return event_data = TrendMicroUrlEventData() event_data.offset = row_offset # Convert and store integer values. for field in ( 'credibility_rating', 'credibility_score', 'policy_identifier', 'threshold', 'block_mode'): try: value = int(row[field], 10) except (ValueError, TypeError): value = None setattr(event_data, field, value) # Store string values. for field in ('url', 'group_name', 'group_code', 'application_name', 'ip'): setattr(event_data, field, row[field]) event = time_events.DateTimeValuesEvent( timestamp, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data)
[docs] def VerifyRow(self, parser_mediator, row): """Verifies if a line of the file is in the expected format. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. row (dict[str, str]): fields of a single row, as specified in COLUMNS. Returns: bool: True if this is the correct parser, False otherwise. """ if len(row) < self.MIN_COLUMNS: return False # Check the date format! # If it doesn't parse, then this isn't a Trend Micro AV log. try: timestamp = self._ConvertToTimestamp(row['date'], row['time']) except ValueError: return False if timestamp is None: return False try: block_mode = int(row['block_mode'], 10) except (ValueError, TypeError): return False return block_mode in self._SUPPORTED_BLOCK_MODES
manager.ParsersManager.RegisterParsers([ OfficeScanVirusDetectionParser, OfficeScanWebReputationParser])