Source code for plaso.parsers.java_idx

# -*- coding: utf-8 -*-
"""Parser for Java Cache IDX files."""

# TODO:
#  * 6.02 files did not retain IP addresses. However, the
#    deploy_resource_codebase header field may contain the host IP.
#    This needs to be researched further, as that field may not always
#    be present. 6.02 files will currently return 'Unknown'.

import os

from dfdatetime import java_time as dfdatetime_java_time
from dfdatetime import time_elements as dfdatetime_time_elements

from plaso.containers import events
from plaso.lib import dtfabric_helper
from plaso.lib import errors
from plaso.parsers import interface
from plaso.parsers import manager


[docs] class JavaIDXEventData(events.EventData): """Java IDX cache file event data. Attributes: downloaded_time (dfdatetime.DateTimeValues): date and time the content was downloaded. expiration_time (dfdatetime.DateTimeValues): date and time the cached download expires. idx_version (str): format version of IDX file. ip_address (str): IP address of the host in the URL. modification_time (dfdatetime.DateTimeValues): date and time the cached download expires. url (str): URL of the downloaded file. """ DATA_TYPE = 'java:download:idx'
[docs] def __init__(self): """Initializes event data.""" super(JavaIDXEventData, self).__init__(data_type=self.DATA_TYPE) self.downloaded_time = None self.expiration_time = None self.idx_version = None self.ip_address = None self.modification_time = None self.url = None
[docs] class JavaIDXParser(interface.FileObjectParser, dtfabric_helper.DtFabricHelper): """Parser for Java WebStart Cache IDX files. There are five structures defined. 6.02 files had one generic section that retained all data. From 6.03, the file went to a multi-section format where later sections were optional and had variable-lengths. 6.03, 6.04, and 6.05 files all have their main data section (#2) begin at offset 128. The short structure is because 6.05 files deviate after the 8th byte. So, grab the first 8 bytes to ensure it's valid, get the file version, then continue on with the correct structures. """ NAME = 'java_idx' DATA_FORMAT = 'Java WebStart Cache IDX file' _INITIAL_FILE_OFFSET = None _DEFINITION_FILE = os.path.join( os.path.dirname(__file__), 'java_idx.yaml') _SUPPORTED_FORMAT_VERSIONS = (602, 603, 604, 605)
[docs] def ParseFileObject(self, parser_mediator, file_object): """Parses a Java WebStart Cache IDX file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object to parse. Raises: WrongParser: when the file cannot be parsed. """ file_header_map = self._GetDataTypeMap('java_idx_file_header') try: file_header, file_offset = self._ReadStructureFromFileObject( file_object, 0, file_header_map) except (ValueError, errors.ParseError) as exception: raise errors.WrongParser( 'Unable to parse file header with error: {0!s}'.format( exception)) if not file_header.format_version in self._SUPPORTED_FORMAT_VERSIONS: raise errors.WrongParser('Unsupported format version.') if file_header.format_version == 602: section1_map = self._GetDataTypeMap('java_idx_602_section1') elif file_header.format_version in (603, 604): section1_map = self._GetDataTypeMap('java_idx_603_section1') elif file_header.format_version == 605: section1_map = self._GetDataTypeMap('java_idx_605_section1') try: section1, data_size = self._ReadStructureFromFileObject( file_object, file_offset, section1_map) except (ValueError, errors.ParseError) as exception: raise errors.WrongParser(( 'Unable to parse section 1 (format version: {0:d}) with error: ' '{1!s}').format(file_header.format_version, exception)) file_offset += data_size if file_header.format_version == 602: section2_map = self._GetDataTypeMap('java_idx_602_section2') elif file_header.format_version in (603, 604, 605): file_offset = 128 section2_map = self._GetDataTypeMap('java_idx_603_section2') try: section2, data_size = self._ReadStructureFromFileObject( file_object, file_offset, section2_map) except (ValueError, errors.ParseError) as exception: raise errors.WrongParser(( 'Unable to parse section 2 (format version: {0:d}) with error: ' '{1!s}').format(file_header.format_version, exception)) file_offset += data_size if not section2.url: raise errors.WrongParser('URL not found in file.') date_http_header = None for _ in range(section2.number_of_http_headers): http_header_map = self._GetDataTypeMap('java_idx_http_header') try: http_header, data_size = self._ReadStructureFromFileObject( file_object, file_offset, http_header_map) except (ValueError, errors.ParseError) as exception: parser_mediator.ProduceExtractionWarning( 'Unable to parse HTTP header value at offset: 0x{0:08x}'.format( file_offset)) break file_offset += data_size if http_header.name == 'date': date_http_header = http_header break if date_http_header: # A HTTP header date and time should be formatted according to RFC 1123. # The date "should" be in UTC or have associated time zone information # in the string itself. If that is not the case then there is no reliable # method for Plaso to determine the proper time zone, so the assumption # is that the date and time is in UTC. try: downloaded_time = dfdatetime_time_elements.TimeElements() downloaded_time.CopyFromStringRFC1123(date_http_header.value) except ValueError as exception: parser_mediator.ProduceExtractionWarning(( 'Unable to parse date HTTP header string: {0:s} with error: ' '{1!s}').format(date_http_header.value, exception)) event_data = JavaIDXEventData() event_data.downloaded_time = downloaded_time event_data.idx_version = file_header.format_version event_data.ip_address = getattr(section2, 'ip_address', None) event_data.modification_time = dfdatetime_java_time.JavaTime( timestamp=section1.modification_time) event_data.url = section2.url if section1.expiration_time: event_data.expiration_time = dfdatetime_java_time.JavaTime( timestamp=section1.expiration_time) parser_mediator.ProduceEventData(event_data)
manager.ParsersManager.RegisterParser(JavaIDXParser)