"""Parser for the Microsoft Internet Explorer WebCache ESE database.
The WebCache database (WebCacheV01.dat or WebCacheV24.dat) are used by MSIE as of
version 10.
"""
from dfdatetime import filetime as dfdatetime_filetime
from dfdatetime import semantic_time as dfdatetime_semantic_time
from plaso.containers import events
from plaso.parsers import esedb
from plaso.parsers.esedb_plugins import interface
[docs]
class MsieWebCacheContainersEventData(events.EventData):
"""MSIE WebCache Containers table event data.
Attributes:
access_time (dfdatetime.DateTimeValues): last access date and time.
container_identifier (int): container identifier.
directory (str): name of the cache directory.
name (str): name of the cache container.
scavenge_time (dfdatetime.DateTimeValues): last scavenge date and time.
set_identifier (int): set identifier.
"""
DATA_TYPE = "msie:webcache:containers"
[docs]
def __init__(self):
"""Initializes event data."""
super().__init__(data_type=self.DATA_TYPE)
self.access_time = None
self.container_identifier = None
self.directory = None
self.name = None
self.scavenge_time = None
self.set_identifier = None
[docs]
class MsieWebCacheContainerEventData(events.EventData):
"""MSIE WebCache Container table event data.
Attributes:
access_count (int): access count.
access_time (dfdatetime.DateTimeValues): last access date and time.
cached_filename (str): name of the cached file.
cached_file_size (int): size of the cached file.
cache_identifier (int): cache identifier.
container_identifier (int): container identifier.
creation_time (dfdatetime.DateTimeValues): creation date and time.
entry_identifier (int): entry identifier.
expiration_time (dfdatetime.DateTimeValues): expiration date and time.
file_extension (str): file extension.
modification_time (dfdatetime.DateTimeValues): modification date and time.
post_check_time (dfdatetime.DateTimeValues): post check date and time.
redirect_url (str): URL from which the request was redirected.
request_headers (str): request headers.
response_headers (str): response headers.
synchronization_count (int): synchronization count.
synchronization_time (dfdatetime.DateTimeValues): synchronization date
and time.
url (str): URL.
"""
DATA_TYPE = "msie:webcache:container"
[docs]
def __init__(self):
"""Initializes event data."""
super().__init__(data_type=self.DATA_TYPE)
self.access_count = None
self.access_time = None
self.cached_filename = None
self.cached_file_size = None
self.cache_identifier = None
self.container_identifier = None
self.creation_time = None
self.entry_identifier = None
self.expiration_time = None
self.file_extension = None
self.modification_time = None
self.post_check_time = None
self.redirect_url = None
self.request_headers = None
self.response_headers = None
self.synchronization_count = None
self.synchronization_time = None
self.url = None
[docs]
class MsieWebCacheCookieData(events.EventData):
"""MSIE WebCache Container table event data.
Attributes:
container_identifier (int): container identifier.
cookie_hash (str): a similarity hash of the cookie contents
cookie_name (str): name of the cookie
cookie_value_raw (str): raw value of cookie in hex
cookie_value (str): value of the cookie encoded in ascii
entry_identifier (int): entry identifier.
expiration_time (dfdatetime.DateTimeValues): expiration date and time.
flags (int): an representation of cookie flags
modification_time (dfdatetime.DateTimeValues): modification date and time.
request_domain (str): Request domain for which the cookie was set.
"""
DATA_TYPE = "msie:webcache:cookie"
[docs]
def __init__(self):
"""Initializes event data."""
super().__init__(data_type=self.DATA_TYPE)
self.container_identifier = None
self.cookie_hash = None
self.cookie_name = None
self.cookie_value = None
self.cookie_value_raw = None
self.entry_identifier = None
self.expiration_time = None
self.flags = None
self.modification_time = None
self.request_domain = None
[docs]
class MsieWebCacheLeakFilesEventData(events.EventData):
"""MSIE WebCache LeakFiles event data.
Attributes:
cached_filename (str): name of the cached file.
creation_time (dfdatetime.DateTimeValues): creation date and time.
leak_identifier (int): leak identifier.
"""
DATA_TYPE = "msie:webcache:leak_file"
[docs]
def __init__(self):
"""Initializes event data."""
super().__init__(data_type=self.DATA_TYPE)
self.cached_filename = None
self.creation_time = None
self.leak_identifier = None
[docs]
class MsieWebCachePartitionsEventData(events.EventData):
"""MSIE WebCache Partitions table event data.
Attributes:
directory (str): directory.
partition_identifier (int): partition identifier.
partition_type (int): partition type.
scavenge_time (dfdatetime.DateTimeValues): last scavenge date and time.
table_identifier (int): table identifier.
"""
DATA_TYPE = "msie:webcache:partitions"
[docs]
def __init__(self):
"""Initializes event data."""
super().__init__(data_type=self.DATA_TYPE)
self.directory = None
self.partition_identifier = None
self.partition_type = None
self.scavenge_time = None
self.table_identifier = None
[docs]
class MsieWebCacheESEDBPlugin(interface.ESEDBPlugin):
"""Parses a MSIE WebCache ESE database file."""
NAME = "msie_webcache"
DATA_FORMAT = (
"Internet Explorer WebCache ESE database (WebCacheV01.dat, "
"WebCacheV24.dat) file"
)
# TODO: add support for AppCache_#, AppCacheEntry_#, DependencyEntry_#
REQUIRED_TABLES = {
"Containers": "ParseContainersTable",
"LeakFiles": "ParseLeakFilesTable",
}
OPTIONAL_TABLES = {
"Partitions": "ParsePartitionsTable",
"PartitionsEx": "ParsePartitionsTable",
}
_CONTAINER_TABLE_VALUE_MAPPINGS = {
"RequestHeaders": "_ConvertHeadersValues",
"ResponseHeaders": "_ConvertHeadersValues",
}
_SUPPORTED_CONTAINER_NAMES = frozenset(
[
"BackgroundTransferApi",
"Content",
"Cookies",
"DOMStore",
"History",
"iedownload",
]
)
_IGNORED_CONTAINER_NAMES = frozenset(
[
"MicrosoftEdge_DNTException",
"MicrosoftEdge_EmieSiteList",
"MicrosoftEdge_EmieUserList",
]
)
def _ConvertHeadersValues(self, value):
"""Converts a headers value into a string.
Args:
value (bytes): binary data value containing the headers as an ASCII string
or None.
Returns:
str: string representation of headers value or None.
"""
if value:
value = value.decode("utf-8")
header_values = "; ".join(
[value.strip() for value in value.split("\r\n") if value]
)
return f"[{header_values:s}]"
return None
def _GetDateTimeValue(self, record_values, value_name):
"""Retrieves a date and time record value.
Args:
record_values (dict[str, object]): values per column name.
value_name (str): name of the record value.
Returns:
dfdatetime.DateTimeValues: date and time or None if not set.
"""
filetime = record_values.get(value_name)
if not filetime:
return None
# TODO: add support for filetime == 1 and other edge cases.
if filetime == 0x7FFFFFFFFFFFFFFF:
return dfdatetime_semantic_time.SemanticTime(string="Infinite")
return dfdatetime_filetime.Filetime(timestamp=filetime)
def _ParseContainerTable(self, parser_mediator, table, container_name):
"""Parses a Container_# table.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
table (pyesedb.table): table.
container_name (str): container name, which indicates the table type.
"""
for record_index, esedb_record in enumerate(table.records):
if parser_mediator.abort:
break
# TODO: add support for:
# wpnidm, iecompat, iecompatua, DNTException, DOMStore
if container_name == "Content":
value_mappings = self._CONTAINER_TABLE_VALUE_MAPPINGS
else:
value_mappings = None
try:
record_values = self._GetRecordValues(
parser_mediator,
table.name,
record_index,
esedb_record,
value_mappings=value_mappings,
)
except UnicodeDecodeError:
parser_mediator.ProduceExtractionWarning(
f"Unable to retrieve record values from record: {record_index:d} "
f"in table: {table.name:s}"
)
continue
if (
container_name in self._SUPPORTED_CONTAINER_NAMES
or container_name.startswith("MSHist")
):
url = record_values.get("Url", "")
# Ignore a URL that start with a binary value.
if ord(url[0]) < 0x20 or ord(url[0]) == 0x7F:
url = None
request_headers = record_values.get("RequestHeaders")
# Ignore non-Unicode request headers values.
if not isinstance(request_headers, str):
request_headers = None
response_headers = record_values.get("ResponseHeaders")
# Ignore non-Unicode response headers values.
if not isinstance(response_headers, str):
response_headers = None
event_data = MsieWebCacheContainerEventData()
event_data.access_count = record_values.get("AccessCount")
event_data.access_time = self._GetDateTimeValue(
record_values, "AccessedTime"
)
event_data.cached_filename = record_values.get("Filename")
event_data.cached_file_size = record_values.get("FileSize")
event_data.cache_identifier = record_values.get("CacheId")
event_data.container_identifier = record_values.get("ContainerId")
event_data.creation_time = self._GetDateTimeValue(
record_values, "CreationTime"
)
event_data.entry_identifier = record_values.get("EntryId")
event_data.expiration_time = self._GetDateTimeValue(
record_values, "ExpiryTime"
)
event_data.file_extension = record_values.get("FileExtension")
event_data.modification_time = self._GetDateTimeValue(
record_values, "ModifiedTime"
)
event_data.post_check_time = self._GetDateTimeValue(
record_values, "PostCheckTime"
)
event_data.redirect_url = record_values.get("RedirectUrl")
event_data.request_headers = request_headers
event_data.response_headers = response_headers
event_data.synchronization_count = record_values.get("SyncCount")
event_data.synchronization_time = self._GetDateTimeValue(
record_values, "SyncTime"
)
event_data.url = url
parser_mediator.ProduceEventData(event_data)
def _CookieHexToAscii(self, raw_cookie):
"""Translates a cookie from a binary string to a string.
Args:
raw_cookie (bytes): the raw binary string of a cookie field.
Returns:
str: the decoded binary string or None if not available.
"""
if raw_cookie is not None:
try:
string_value = raw_cookie.decode("utf-8")
return string_value.rstrip("\x00")
except UnicodeDecodeError:
pass
return None
[docs]
def GetRawCookieValue(self, record_values, value_name):
"""Retrieves the binary string as a hexadecimal formatted string.
Args:
record_values (dict[str, object]): values per column name.
value_name (str): the name of the value we are converting
Returns:
str: the hexadecimal formatted binary string or None if not available.
"""
cookie_hash = record_values.get(value_name)
if cookie_hash is not None:
return cookie_hash.hex()
return None
def _ParseCookieExTable(self, parser_mediator, table):
"""Parses a CookieEntryEx_# table.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
table (pyesedb.table): table.
container_name (str): container name, which indicates the table type.
"""
for record_index, esedb_record in enumerate(table.records):
if parser_mediator.abort:
break
try:
record_values = self._GetRecordValues(
parser_mediator, table.name, record_index, esedb_record
)
except UnicodeDecodeError:
parser_mediator.ProduceExtractionWarning(
f"Unable to retrieve record values from record: {record_index:d} "
f"in table: {table.name:s}"
)
continue
cookie_name = self._CookieHexToAscii(record_values.get("Name"))
cookie_value = self._CookieHexToAscii(record_values.get("Value"))
cookie_hash = self.GetRawCookieValue(record_values, "CookieHash")
cookie_value_raw = self.GetRawCookieValue(record_values, "Value")
event_data = MsieWebCacheCookieData()
event_data.container_identifier = record_values.get("ContainerId")
event_data.cookie_hash = cookie_hash
event_data.cookie_name = cookie_name
event_data.cookie_value_raw = cookie_value_raw
event_data.cookie_value = cookie_value
event_data.entry_identifier = record_values.get("EntryId")
event_data.flags = record_values.get("Flags")
event_data.expiration_time = self._GetDateTimeValue(
record_values, "Expires"
)
event_data.modification_time = self._GetDateTimeValue(
record_values, "LastModified"
)
event_data.request_domain = record_values.get("RDomain")
parser_mediator.ProduceEventData(event_data)
[docs]
def ParseContainersTable(
self, parser_mediator, database=None, table=None, **unused_kwargs
):
"""Parses a Containers table.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
database (Optional[ESEDatabase]): ESE database.
table (Optional[pyesedb.table]): table.
Raises:
ValueError: if the database or table value is missing.
"""
if database is None:
raise ValueError("Missing database value.")
if table is None:
raise ValueError("Missing table value.")
for record_index, esedb_record in enumerate(table.records):
if parser_mediator.abort:
break
record_values = self._GetRecordValues(
parser_mediator, table.name, record_index, esedb_record
)
event_data = MsieWebCacheContainersEventData()
event_data.access_time = self._GetDateTimeValue(
record_values, "LastAccessTime"
)
event_data.container_identifier = record_values.get("ContainerId")
event_data.directory = record_values.get("Directory")
event_data.name = record_values.get("Name")
event_data.scavenge_time = self._GetDateTimeValue(
record_values, "LastScavengeTime"
)
event_data.set_identifier = record_values.get("SetId")
parser_mediator.ProduceEventData(event_data)
container_identifier = record_values.get("ContainerId")
container_name = record_values.get("Name")
if not container_identifier or not container_name:
continue
if container_name in self._IGNORED_CONTAINER_NAMES:
parser_mediator.ProduceExtractionWarning(
f"Skipped container (ContainerId: {container_identifier:d}, Name: "
f"{container_name:s})"
)
continue
esedb_table = database.GetTableByName(f"Container_{container_identifier:d}")
if esedb_table:
self._ParseContainerTable(parser_mediator, esedb_table, container_name)
esedb_table = database.GetTableByName(
f"CookieEntryEx_{container_identifier:d}"
)
if esedb_table:
self._ParseCookieExTable(parser_mediator, esedb_table)
[docs]
def ParseLeakFilesTable(
self, parser_mediator, database=None, table=None, **unused_kwargs
):
"""Parses a LeakFiles table.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
database (Optional[ESEDatabase]): ESE database.
table (Optional[pyesedb.table]): table.
Raises:
ValueError: if the database or table value is missing.
"""
if database is None:
raise ValueError("Missing database value.")
if table is None:
raise ValueError("Missing table value.")
for record_index, esedb_record in enumerate(table.records):
if parser_mediator.abort:
break
record_values = self._GetRecordValues(
parser_mediator, table.name, record_index, esedb_record
)
event_data = MsieWebCacheLeakFilesEventData()
event_data.cached_filename = record_values.get("Filename")
event_data.creation_time = self._GetDateTimeValue(
record_values, "CreationTime"
)
event_data.leak_identifier = record_values.get("LeakId")
parser_mediator.ProduceEventData(event_data)
[docs]
def ParsePartitionsTable(
self, parser_mediator, database=None, table=None, **unused_kwargs
):
"""Parses a Partitions or PartitionsEx table.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
database (Optional[ESEDatabase]): ESE database.
table (Optional[pyesedb.table]): table.
Raises:
ValueError: if the database or table value is missing.
"""
if database is None:
raise ValueError("Missing database value.")
if table is None:
raise ValueError("Missing table value.")
for record_index, esedb_record in enumerate(table.records):
if parser_mediator.abort:
break
record_values = self._GetRecordValues(
parser_mediator, table.name, record_index, esedb_record
)
event_data = MsieWebCachePartitionsEventData()
event_data.directory = record_values.get("Directory")
event_data.partition_identifier = record_values.get("PartitionId")
event_data.partition_type = record_values.get("PartitionType")
event_data.scavenge_time = self._GetDateTimeValue(
record_values, "LastScavengeTime"
)
event_data.table_identifier = record_values.get("TableId")
parser_mediator.ProduceEventData(event_data)
esedb.ESEDBParser.RegisterPlugin(MsieWebCacheESEDBPlugin)