"""Parsers for Opera Browser history files."""
import os
from urllib import parse as urlparse
from defusedxml import ElementTree
from dfdatetime import posix_time as dfdatetime_posix_time
from dfdatetime import time_elements as dfdatetime_time_elements
from dfvfs.helpers import text_file
from plaso.containers import events
from plaso.lib import errors
from plaso.parsers import interface
from plaso.parsers import manager
[docs]
class OperaGlobalHistoryEventData(events.EventData):
"""Opera global history entry data.
Attributes:
description (str): description.
last_visited_time (dfdatetime.DateTimeValues): date and time the URL was
last visited.
popularity_index (int): popularity index.
title (str): title.
url (str): URL.
"""
DATA_TYPE = "opera:history:entry"
[docs]
def __init__(self):
"""Initializes event data."""
super().__init__(data_type=self.DATA_TYPE)
self.description = None
self.last_visited_time = None
self.popularity_index = None
self.title = None
self.url = None
[docs]
class OperaTypedHistoryEventData(events.EventData):
"""Opera typed history entry data.
Attributes:
entry_selection (str): information about whether the URL was directly
typed in or the result of the user choosing from the auto complete.
entry_type (str): information about whether the URL was directly typed in
or the result of the user choosing from the auto complete.
last_typed_time (dfdatetime.DateTimeValues): date and time the URL was
last typed.
url (str): typed URL or hostname.
"""
DATA_TYPE = "opera:history:typed_entry"
[docs]
def __init__(self):
"""Initializes event data."""
super().__init__(data_type=self.DATA_TYPE)
self.entry_selection = None
self.entry_type = None
self.last_typed_time = None
self.url = None
[docs]
class OperaTypedHistoryParser(interface.FileObjectParser):
"""Parses the Opera typed_history.xml file."""
NAME = "opera_typed_history"
DATA_FORMAT = "Opera typed history (typed_history.xml) file"
_HEADER_READ_SIZE = 128
[docs]
def ParseFileObject(self, parser_mediator, file_object):
"""Parses an Opera typed history file-like object.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
file_object (dfvfs.FileIO): file-like object.
Raises:
WrongParser: when the file cannot be parsed.
"""
data = file_object.read(self._HEADER_READ_SIZE)
if not data.startswith(b"<?xml"):
raise errors.WrongParser("Not an Opera typed history file [not a XML]")
_, _, data = data.partition(b"\n")
if not data.startswith(b"<typed_history"):
raise errors.WrongParser(
"Not an Opera typed history file [wrong XML root key]"
)
# For ElementTree to work we need to work on a file object seeked
# to the beginning.
file_object.seek(0, os.SEEK_SET)
xml = ElementTree.parse(file_object)
for history_item in xml.iterfind("typed_history_item"):
last_typed_time = history_item.get("last_typed")
if last_typed_time is None:
parser_mediator.ProduceExtractionWarning("missing last typed time.")
continue
date_time = dfdatetime_time_elements.TimeElements()
try:
date_time.CopyFromStringISO8601(last_typed_time)
except ValueError as exception:
parser_mediator.ProduceExtractionWarning(
f"unsupported last typed time: {last_typed_time!s} "
f"with error: {exception!s}."
)
continue
event_data = OperaTypedHistoryEventData()
event_data.entry_type = history_item.get("type")
event_data.last_typed_time = date_time
event_data.url = history_item.get("content")
if event_data.entry_type == "selected":
event_data.entry_selection = "Filled from autocomplete."
elif event_data.entry_type == "text":
event_data.entry_selection = "Manually typed."
parser_mediator.ProduceEventData(event_data)
[docs]
class OperaGlobalHistoryParser(interface.FileObjectParser):
"""Parses the Opera global_history.dat file."""
NAME = "opera_global"
DATA_FORMAT = "Opera global history (global_history.dat) file"
_ENCODING = "utf-8"
_MAXIMUM_LINE_SIZE = 512
_SUPPORTED_URL_SCHEMES = frozenset(["file", "http", "https", "ftp"])
def _IsValidUrl(self, url):
"""Checks if a URL is considered valid.
Returns:
bool: True if the URL is valid.
"""
parsed_url = urlparse.urlparse(url)
return parsed_url.scheme in self._SUPPORTED_URL_SCHEMES
def _ParseRecord(self, parser_mediator, text_file_object):
"""Parses an Opera global history record.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
text_file_object (dfvfs.TextFile): text file.
Returns:
bool: True if the record was successfully parsed.
"""
try:
title = text_file_object.readline()
except UnicodeDecodeError:
parser_mediator.ProduceExtractionWarning("unable to read and decode title")
return False
if not title:
return False
try:
url = text_file_object.readline()
except UnicodeDecodeError:
parser_mediator.ProduceExtractionWarning("unable to read and decode url")
return False
try:
timestamp = text_file_object.readline()
except UnicodeDecodeError:
parser_mediator.ProduceExtractionWarning(
"unable to read and decode timestamp"
)
return False
try:
popularity_index = text_file_object.readline()
except UnicodeDecodeError:
parser_mediator.ProduceExtractionWarning(
"unable to read and decode popularity index"
)
return False
title = title.strip()
timestamp = timestamp.strip()
try:
timestamp = int(timestamp, 10)
except ValueError:
parser_mediator.ProduceExtractionWarning(
f"unable to convert timestamp: {timestamp!s}"
)
timestamp = None
popularity_index = popularity_index.strip()
try:
popularity_index = int(popularity_index, 10)
except ValueError:
parser_mediator.ProduceExtractionWarning(
f"unable to convert popularity index: {popularity_index!s}"
)
popularity_index = None
event_data = OperaGlobalHistoryEventData()
event_data.popularity_index = popularity_index
event_data.url = url.strip()
if timestamp:
event_data.last_visited_time = dfdatetime_posix_time.PosixTime(
timestamp=timestamp
)
if title != event_data.url:
event_data.title = title
if event_data.popularity_index < 0:
event_data.description = "First and Only Visit"
else:
event_data.description = "Last Visit"
parser_mediator.ProduceEventData(event_data)
return True
def _ParseAndValidateRecord(self, parser_mediator, text_file_object):
"""Parses and validates an Opera global history record.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
text_file_object (dfvfs.TextFile): text file.
Returns:
bool: True if the record was successfully parsed.
"""
try:
title = text_file_object.readline(size=self._MAXIMUM_LINE_SIZE)
url = text_file_object.readline(size=self._MAXIMUM_LINE_SIZE)
timestamp = text_file_object.readline(size=self._MAXIMUM_LINE_SIZE)
popularity_index = text_file_object.readline(size=self._MAXIMUM_LINE_SIZE)
except UnicodeDecodeError:
return False
if len(title) == self._MAXIMUM_LINE_SIZE and title[-1] != "\n":
return False
if len(url) == self._MAXIMUM_LINE_SIZE and url[-1] != "\n":
return False
if len(timestamp) == self._MAXIMUM_LINE_SIZE and timestamp[-1] != "\n":
return False
if (
len(popularity_index) == self._MAXIMUM_LINE_SIZE
and popularity_index[-1] != "\n"
):
return False
title = title.strip()
url = url.strip()
timestamp = timestamp.strip()
popularity_index = popularity_index.strip()
if not title or not url or not timestamp or not popularity_index:
return False
if not self._IsValidUrl(url):
return False
try:
timestamp = int(timestamp, 10)
except (TypeError, ValueError):
return False
try:
popularity_index = int(popularity_index, 10)
except (TypeError, ValueError):
return False
event_data = OperaGlobalHistoryEventData()
event_data.last_visited_time = dfdatetime_posix_time.PosixTime(
timestamp=timestamp
)
event_data.popularity_index = popularity_index
event_data.url = url
if title != url:
event_data.title = title
if event_data.popularity_index < 0:
event_data.description = "First and Only Visit"
else:
event_data.description = "Last Visit"
parser_mediator.ProduceEventData(event_data)
return True
[docs]
def ParseFileObject(self, parser_mediator, file_object):
"""Parses an Opera global history file-like object.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
file_object (dfvfs.FileIO): file-like object.
Raises:
WrongParser: when the file cannot be parsed.
"""
encoding = self._ENCODING
if not encoding:
encoding = parser_mediator.GetCodePage()
text_file_object = text_file.TextFile(file_object, encoding=encoding)
if not self._ParseAndValidateRecord(parser_mediator, text_file_object):
raise errors.WrongParser("Unable to parse as Opera global_history.dat.")
while self._ParseRecord(parser_mediator, text_file_object):
pass
manager.ParsersManager.RegisterParsers(
[OperaTypedHistoryParser, OperaGlobalHistoryParser]
)