Source code for plaso.parsers.olecf_plugins.summary
"""Plugin to parse the OLECF summary/document summary information items."""
from dfdatetime import filetime as dfdatetime_filetime
import pyolecf
from plaso.containers import events
from plaso.parsers import olecf
from plaso.parsers.olecf_plugins import interface
[docs]
class OLECFDocumentSummaryInformationEventData(events.EventData):
"""OLECF document summary information event data.
Attributes:
application_version (str): application version.
category (str): category of the document, such as memo or proposal.
codepage (str): codepage of the document summary information.
company (str): name of the company of the document.
content_status (str): content status.
content_type (str): content type.
document_parts (list[str]): names of document parts.
document_version (int): Version of the document.
item_creation_time (dfdatetime.DateTimeValues): creation date and time of
the item.
item_modification_time (dfdatetime.DateTimeValues): modification date and
time of the item.
language (str): Language of the document.
links_up_to_date (bool): True if the links are up to date.
manager (str): name of the manager of the document.
number_of_bytes (int): size of the document in bytes.
number_of_characters_with_white_space (int): number of characters including
spaces in the document.
number_of_clips (int): number of multi-media clips in the document.
number_of_hidden_slides (int): number of hidden slides in the document.
number_of_lines (int): number of lines in the document.
number_of_notes (int): number of notes in the document.
number_of_paragraphs (int): number of paragraphs in the document.
number_of_slides (int): number of slides in the document.
presentation_format (str): target format for presentation, such as 35mm,
printer or video.
scale (bool): True if scaling of the thumbnail is desired or false if
cropping is desired.
shared_document (bool): True if the document is shared.
"""
DATA_TYPE = "olecf:document_summary_info"
[docs]
def __init__(self):
"""Initializes event data."""
super().__init__(data_type=self.DATA_TYPE)
self.application_version = None
self.category = None
self.codepage = None
self.company = None
self.content_status = None
self.content_type = None
self.document_parts = None
self.document_version = None
self.item_creation_time = None
self.item_modification_time = None
self.language = None
self.links_up_to_date = None
self.manager = None
self.number_of_bytes = None
self.number_of_characters_with_white_space = None
self.number_of_clips = None
self.number_of_hidden_slides = None
self.number_of_lines = None
self.number_of_notes = None
self.number_of_paragraphs = None
self.number_of_slides = None
self.presentation_format = None
self.scale = None
self.shared_document = None
[docs]
class OLECFSummaryInformationEventData(events.EventData):
"""OLECF summary information event data.
Attributes:
application (str): name of application that created document.
author (str): author of the document.
codepage (str): codepage of the summary information.
comments (str): comments.
creation_time (dfdatetime.DateTimeValues): creation date and time of
the document.
edit_duration (int): total editing time.
item_creation_time (dfdatetime.DateTimeValues): creation date and time of
the item.
item_modification_time (dfdatetime.DateTimeValues): modification date and
time of the item.
keywords (str): keywords.
last_printed_time (dfdatetime.DateTimeValues): date and time the document
was last printed.
last_saved_by (str): name of user that last saved the document.
last_save_time (dfdatetime.DateTimeValues): date and time the document was
last saved.
number_of_characters (int): number of characters without spaces in
the document.
number_of_pages (int): number of pages in the document.
number_of_words (int): number of words in the document.
revision_number (int): revision number.
security_flags (int): security flags.
subject (str): subject.
template (str): name of the template used to created the document.
title (str): title of the document.
"""
DATA_TYPE = "olecf:summary_info"
[docs]
def __init__(self):
"""Initializes event data."""
super().__init__(data_type=self.DATA_TYPE)
self.application = None
self.author = None
self.codepage = None
self.comments = None
self.creation_time = None
self.edit_duration = None
self.item_creation_time = None
self.item_modification_time = None
self.keywords = None
self.last_printed_time = None
self.last_saved_by = None
self.last_save_time = None
self.number_of_characters = None
self.number_of_pages = None
self.number_of_words = None
self.revision_number = None
self.security_flags = None
self.subject = None
self.template = None
self.title = None
[docs]
class OLECFPropertySetStream:
"""OLECF property set stream.
Attributes:
date_time_properties (dict[str, dfdatetime.DateTimeValues]): date and time
properties and values.
"""
_CLASS_IDENTIFIER = None
_INTEGER_TYPES = frozenset(
[
pyolecf.value_types.INTEGER_16BIT_SIGNED,
pyolecf.value_types.INTEGER_32BIT_SIGNED,
]
)
_STRING_TYPES = frozenset(
[pyolecf.value_types.STRING_ASCII, pyolecf.value_types.STRING_UNICODE]
)
_PROPERTY_NAMES = None
_PROPERTY_VALUE_MAPPINGS = None
[docs]
def __init__(self, olecf_item):
"""Initialize an OLECF property set stream.
Args:
olecf_item (pyolecf.property_set_stream): OLECF item.
"""
super().__init__()
self._properties = {}
self.date_time_properties = {}
if olecf_item:
self._ReadPropertySet(olecf_item.set)
def _GetValueAsObject(self, property_value):
"""Retrieves the property value as a Python object.
Args:
property_value (pyolecf.property_value): OLECF property value.
Returns:
object: property value as a Python object.
"""
if property_value.type == pyolecf.value_types.BOOLEAN:
return property_value.data_as_boolean
if property_value.type == pyolecf.value_types.FILETIME:
filetime = property_value.data_as_integer
if not filetime:
return None
return dfdatetime_filetime.Filetime(timestamp=filetime)
if property_value.type in self._INTEGER_TYPES:
return property_value.data_as_integer
if property_value.type in self._STRING_TYPES:
return property_value.data_as_string
try:
data = property_value.data
except OSError:
data = None
return data
def _ReadPropertySet(self, property_set):
"""Reads properties from a property set.
Args:
property_set (pyolecf.property_set): OLECF property set.
"""
# Combine the values of multiple property sections
# but do not override properties that are already set.
for property_section in property_set.sections:
if property_section.class_identifier != self._CLASS_IDENTIFIER:
continue
for property_value in property_section.properties:
property_name = self._PROPERTY_NAMES.get(
property_value.identifier, None
)
if not property_name:
property_name = f"0x{property_value.identifier:04}"
value = self._GetValueAsObject(property_value)
if self._PROPERTY_VALUE_MAPPINGS:
value_callback_name = self._PROPERTY_VALUE_MAPPINGS.get(
property_name, None
)
if value_callback_name:
value_callback_method = getattr(self, value_callback_name, None)
if value_callback_method:
value = value_callback_method(value)
properties_dict = self._properties
if property_name not in properties_dict:
properties_dict[property_name] = value
[docs]
def SetEventData(self, event_data):
"""Sets the properties as event data.
Args:
event_data (EventData): event data.
"""
for property_name, property_value in self._properties.items():
if isinstance(property_value, bytes):
property_value = repr(property_value)
setattr(event_data, property_name, property_value)
[docs]
class OLECFDocumentSummaryInformation(OLECFPropertySetStream):
"""OLECF Document Summary information property set."""
_CLASS_IDENTIFIER = "d5cdd502-2e9c-101b-9397-08002b2cf9ae"
_PROPERTY_NAMES = {
0x0001: "codepage", # PIDDSI_CODEPAGE
0x0002: "category", # PIDDSI_CATEGORY
0x0003: "presentation_format", # PIDDSI_PRESFORMAT
0x0004: "number_of_bytes", # PIDDSI_BYTECOUNT
0x0005: "number_of_lines", # PIDDSI_LINECOUNT
0x0006: "number_of_paragraphs", # PIDDSI_PARCOUNT
0x0007: "number_of_slides", # PIDDSI_SLIDECOUNT
0x0008: "number_of_notes", # PIDDSI_NOTECOUNT
0x0009: "number_of_hidden_slides", # PIDDSI_HIDDENCOUNT
0x000A: "number_of_clips", # PIDDSI_MMCLIPCOUNT
0x000B: "scale", # PIDDSI_SCALE
# 0x000c: 'heading_pair', # PIDDSI_HEADINGPAIR
0x000D: "document_parts", # PIDDSI_DOCPARTS
0x000E: "manager", # PIDDSI_MANAGER
0x000F: "company", # PIDDSI_COMPANY
0x0010: "links_up_to_date", # PIDDSI_LINKSDIRTY
0x0011: "number_of_characters_with_white_space", # PIDDSI_CCHWITHSPACES
0x0013: "shared_document", # PIDDSI_SHAREDDOC
0x0017: "application_version", # PIDDSI_VERSION
0x001A: "content_type", # PIDDSI_CONTENTTYPE
0x001B: "content_status", # PIDDSI_CONTENTSTATUS
0x001C: "language", # PIDDSI_LANGUAGE
0x001D: "document_version",
} # PIDDSI_DOCVERSION
_PROPERTY_VALUE_MAPPINGS = {"application_version": "_FormatApplicationVersion"}
def _FormatApplicationVersion(self, application_version):
"""Formats the application version.
Args:
application_version (int): application version.
Returns:
str: formatted application version.
"""
# The application version consists of 2 16-bit values that make up
# the version number. Where the upper 16-bit is the major number
# and the lower 16-bit the minor number.
major_version = application_version >> 16
minor_version = application_version & 0xFFFF
return f"{major_version:d}.{minor_version:d}"
[docs]
class OLECFSummaryInformation(OLECFPropertySetStream):
"""OLECF Summary information property set."""
_CLASS_IDENTIFIER = "f29f85e0-4ff9-1068-ab91-08002b27b3d9"
_PROPERTY_NAMES = {
0x0001: "codepage", # PIDSI_CODEPAGE
0x0002: "title", # PIDSI_TITLE
0x0003: "subject", # PIDSI_SUBJECT
0x0004: "author", # PIDSI_AUTHOR
0x0005: "keywords", # PIDSI_KEYWORDS
0x0006: "comments", # PIDSI_COMMENTS
0x0007: "template", # PIDSI_TEMPLATE
0x0008: "last_saved_by", # PIDSI_LASTAUTHOR
0x0009: "revision_number", # PIDSI_REVNUMBER
0x000A: "edit_duration", # PIDSI_EDITTIME
0x000B: "last_printed_time", # PIDSI_LASTPRINTED
0x000C: "creation_time", # PIDSI_CREATE_DTM
0x000D: "last_save_time", # PIDSI_LASTSAVE_DTM
0x000E: "number_of_pages", # PIDSI_PAGECOUNT
0x000F: "number_of_words", # PIDSI_WORDCOUNT
0x0010: "number_of_characters", # PIDSI_CHARCOUNT
# 0x0011: 'thumbnail', # PIDSI_THUMBNAIL
0x0012: "application", # PIDSI_APPNAME
0x0013: "security_flags",
} # PIDSI_SECURITY
[docs]
class DocumentSummaryInformationOLECFPlugin(interface.OLECFPlugin):
"""Plugin that parses DocumentSummaryInformation item from an OLECF file."""
NAME = "olecf_document_summary"
DATA_FORMAT = "Document summary information (\\0x05DocumentSummaryInformation)"
# pylint: disable=anomalous-backslash-in-string
REQUIRED_ITEMS = frozenset(["\005DocumentSummaryInformation"])
[docs]
def Process(self, parser_mediator, root_item=None, **kwargs):
"""Extracts events from a document summary information OLECF item.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
root_item (Optional[pyolecf.item]): root item of the OLECF file.
Raises:
ValueError: If the root item is not set.
"""
# This will raise if unhandled keyword arguments are passed.
super().Process(parser_mediator, **kwargs)
if not root_item:
raise ValueError("Root item not set.")
for item_name in self.REQUIRED_ITEMS:
item = root_item.get_sub_item_by_name(item_name)
if item:
event_data = OLECFDocumentSummaryInformationEventData()
event_data.item_creation_time = self._GetCreationTime(root_item)
event_data.item_modification_time = self._GetModificationTime(root_item)
summary_information = OLECFDocumentSummaryInformation(item)
summary_information.SetEventData(event_data)
parser_mediator.ProduceEventData(event_data)
[docs]
class SummaryInformationOLECFPlugin(interface.OLECFPlugin):
"""Plugin that parses the SummaryInformation item from an OLECF file."""
NAME = "olecf_summary"
DATA_FORMAT = "Summary information (\\0x05SummaryInformation) (top-level only)"
# pylint: disable=anomalous-backslash-in-string
REQUIRED_ITEMS = frozenset(["\005SummaryInformation"])
[docs]
def Process(self, parser_mediator, root_item=None, **kwargs):
"""Extracts events from a summary information OLECF item.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
root_item (Optional[pyolecf.item]): root item of the OLECF file.
Raises:
ValueError: If the root item is not set.
"""
# This will raise if unhandled keyword arguments are passed.
super().Process(parser_mediator, **kwargs)
if not root_item:
raise ValueError("Root item not set.")
for item_name in self.REQUIRED_ITEMS:
item = root_item.get_sub_item_by_name(item_name)
if item:
event_data = OLECFSummaryInformationEventData()
event_data.item_creation_time = self._GetCreationTime(root_item)
event_data.item_modification_time = self._GetModificationTime(root_item)
summary_information = OLECFSummaryInformation(item)
summary_information.SetEventData(event_data)
parser_mediator.ProduceEventData(event_data)
olecf.OLECFParser.RegisterPlugins(
[DocumentSummaryInformationOLECFPlugin, SummaryInformationOLECFPlugin]
)