Source code for plaso.parsers.opera

# -*- coding: utf-8 -*-
"""Parsers for Opera Browser history files."""

import os

from urllib import parse as urlparse

from defusedxml import ElementTree
from dfdatetime import posix_time as dfdatetime_posix_time
from dfdatetime import time_elements as dfdatetime_time_elements
from dfvfs.helpers import text_file

from plaso.containers import events
from plaso.lib import errors
from plaso.parsers import interface
from plaso.parsers import manager


[docs] class OperaGlobalHistoryEventData(events.EventData): """Opera global history entry data. Attributes: description (str): description. last_visited_time (dfdatetime.DateTimeValues): date and time the URL was last visited. popularity_index (int): popularity index. title (str): title. url (str): URL. """ DATA_TYPE = 'opera:history:entry'
[docs] def __init__(self): """Initializes event data.""" super(OperaGlobalHistoryEventData, self).__init__(data_type=self.DATA_TYPE) self.description = None self.last_visited_time = None self.popularity_index = None self.title = None self.url = None
[docs] class OperaTypedHistoryEventData(events.EventData): """Opera typed history entry data. Attributes: entry_selection (str): information about whether the URL was directly typed in or the result of the user choosing from the auto complete. entry_type (str): information about whether the URL was directly typed in or the result of the user choosing from the auto complete. last_typed_time (dfdatetime.DateTimeValues): date and time the URL was last typed. url (str): typed URL or hostname. """ DATA_TYPE = 'opera:history:typed_entry'
[docs] def __init__(self): """Initializes event data.""" super(OperaTypedHistoryEventData, self).__init__(data_type=self.DATA_TYPE) self.entry_selection = None self.entry_type = None self.last_typed_time = None self.url = None
[docs] class OperaTypedHistoryParser(interface.FileObjectParser): """Parses the Opera typed_history.xml file.""" NAME = 'opera_typed_history' DATA_FORMAT = 'Opera typed history (typed_history.xml) file' _HEADER_READ_SIZE = 128
[docs] def ParseFileObject(self, parser_mediator, file_object): """Parses an Opera typed history file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfVFS. file_object (dfvfs.FileIO): file-like object. Raises: WrongParser: when the file cannot be parsed. """ data = file_object.read(self._HEADER_READ_SIZE) if not data.startswith(b'<?xml'): raise errors.WrongParser( 'Not an Opera typed history file [not a XML]') _, _, data = data.partition(b'\n') if not data.startswith(b'<typed_history'): raise errors.WrongParser( 'Not an Opera typed history file [wrong XML root key]') # For ElementTree to work we need to work on a file object seeked # to the beginning. file_object.seek(0, os.SEEK_SET) xml = ElementTree.parse(file_object) for history_item in xml.iterfind('typed_history_item'): last_typed_time = history_item.get('last_typed', None) if last_typed_time is None: parser_mediator.ProduceExtractionWarning('missing last typed time.') continue date_time = dfdatetime_time_elements.TimeElements() try: date_time.CopyFromStringISO8601(last_typed_time) except ValueError as exception: parser_mediator.ProduceExtractionWarning( 'unsupported last typed time: {0:s} with error: {1!s}.'.format( last_typed_time, exception)) continue event_data = OperaTypedHistoryEventData() event_data.entry_type = history_item.get('type', None) event_data.last_typed_time = date_time event_data.url = history_item.get('content', None) if event_data.entry_type == 'selected': event_data.entry_selection = 'Filled from autocomplete.' elif event_data.entry_type == 'text': event_data.entry_selection = 'Manually typed.' parser_mediator.ProduceEventData(event_data)
[docs] class OperaGlobalHistoryParser(interface.FileObjectParser): """Parses the Opera global_history.dat file.""" NAME = 'opera_global' DATA_FORMAT = 'Opera global history (global_history.dat) file' _ENCODING = 'utf-8' _MAXIMUM_LINE_SIZE = 512 _SUPPORTED_URL_SCHEMES = frozenset(['file', 'http', 'https', 'ftp']) def _IsValidUrl(self, url): """Checks if a URL is considered valid. Returns: bool: True if the URL is valid. """ parsed_url = urlparse.urlparse(url) return parsed_url.scheme in self._SUPPORTED_URL_SCHEMES def _ParseRecord(self, parser_mediator, text_file_object): """Parses an Opera global history record. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfVFS. text_file_object (dfvfs.TextFile): text file. Returns: bool: True if the record was successfully parsed. """ try: title = text_file_object.readline() except UnicodeDecodeError: parser_mediator.ProduceExtractionWarning( 'unable to read and decode title') return False if not title: return False try: url = text_file_object.readline() except UnicodeDecodeError: parser_mediator.ProduceExtractionWarning( 'unable to read and decode url') return False try: timestamp = text_file_object.readline() except UnicodeDecodeError: parser_mediator.ProduceExtractionWarning( 'unable to read and decode timestamp') return False try: popularity_index = text_file_object.readline() except UnicodeDecodeError: parser_mediator.ProduceExtractionWarning( 'unable to read and decode popularity index') return False title = title.strip() timestamp = timestamp.strip() try: timestamp = int(timestamp, 10) except ValueError: parser_mediator.ProduceExtractionWarning( 'unable to convert timestamp: {0:s}'.format(timestamp)) timestamp = None popularity_index = popularity_index.strip() try: popularity_index = int(popularity_index, 10) except ValueError: parser_mediator.ProduceExtractionWarning( 'unable to convert popularity index: {0:s}'.format(popularity_index)) popularity_index = None event_data = OperaGlobalHistoryEventData() event_data.popularity_index = popularity_index event_data.url = url.strip() if timestamp: event_data.last_visited_time = dfdatetime_posix_time.PosixTime( timestamp=timestamp) if title != event_data.url: event_data.title = title if event_data.popularity_index < 0: event_data.description = 'First and Only Visit' else: event_data.description = 'Last Visit' parser_mediator.ProduceEventData(event_data) return True def _ParseAndValidateRecord(self, parser_mediator, text_file_object): """Parses and validates an Opera global history record. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfVFS. text_file_object (dfvfs.TextFile): text file. Returns: bool: True if the record was successfully parsed. """ try: title = text_file_object.readline(size=self._MAXIMUM_LINE_SIZE) url = text_file_object.readline(size=self._MAXIMUM_LINE_SIZE) timestamp = text_file_object.readline(size=self._MAXIMUM_LINE_SIZE) popularity_index = text_file_object.readline(size=self._MAXIMUM_LINE_SIZE) except UnicodeDecodeError: return False if len(title) == self._MAXIMUM_LINE_SIZE and title[-1] != '\n': return False if len(url) == self._MAXIMUM_LINE_SIZE and url[-1] != '\n': return False if len(timestamp) == self._MAXIMUM_LINE_SIZE and timestamp[-1] != '\n': return False if (len(popularity_index) == self._MAXIMUM_LINE_SIZE and popularity_index[-1] != '\n'): return False title = title.strip() url = url.strip() timestamp = timestamp.strip() popularity_index = popularity_index.strip() if not title or not url or not timestamp or not popularity_index: return False if not self._IsValidUrl(url): return False try: timestamp = int(timestamp, 10) except (TypeError, ValueError): return False try: popularity_index = int(popularity_index, 10) except (TypeError, ValueError): return False event_data = OperaGlobalHistoryEventData() event_data.last_visited_time = dfdatetime_posix_time.PosixTime( timestamp=timestamp) event_data.popularity_index = popularity_index event_data.url = url if title != url: event_data.title = title if event_data.popularity_index < 0: event_data.description = 'First and Only Visit' else: event_data.description = 'Last Visit' parser_mediator.ProduceEventData(event_data) return True
[docs] def ParseFileObject(self, parser_mediator, file_object): """Parses an Opera global history file-like object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfVFS. file_object (dfvfs.FileIO): file-like object. Raises: WrongParser: when the file cannot be parsed. """ encoding = self._ENCODING if not encoding: encoding = parser_mediator.GetCodePage() text_file_object = text_file.TextFile(file_object, encoding=encoding) if not self._ParseAndValidateRecord(parser_mediator, text_file_object): raise errors.WrongParser('Unable to parse as Opera global_history.dat.') while self._ParseRecord(parser_mediator, text_file_object): pass
manager.ParsersManager.RegisterParsers([ OperaTypedHistoryParser, OperaGlobalHistoryParser])