# -*- coding: utf-8 -*-
"""This file contains a plugin for parsing Google Analytics cookies."""
from urllib import parse as urlparse
from plaso.containers import events
from plaso.parsers.cookie_plugins import interface
from plaso.parsers.cookie_plugins import manager
# TODO: determine if __utmc always 0?
[docs]
class GoogleAnalyticsUtmaEventData(events.EventData):
"""Google analytics __utma cookie event data.
Attributes:
cookie_name (str): name of cookie.
domain_hash (str): domain hash.
sessions (int): number of sessions.
url (str): URL or path where the cookie got set.
visited_times (list[dfdatetime.DateTimeValues]): dates and times the URL
was visited.
visitor_identifier (str): visitor identifier.
"""
DATA_TYPE = 'cookie:google:analytics:utma'
[docs]
def __init__(self):
"""Initializes event data."""
super(GoogleAnalyticsUtmaEventData, self).__init__(data_type=self.DATA_TYPE)
self.cookie_name = None
self.domain_hash = None
self.sessions = None
self.url = None
self.visited_times = None
self.visitor_identifier = None
[docs]
class GoogleAnalyticsUtmbEventData(events.EventData):
"""Google analytics __utmb cookie event data.
Attributes:
cookie_name (str): name of cookie.
domain_hash (str): domain hash.
last_visited_time (dfdatetime.DateTimeValues): date and time the URL was
last visited.
pages_viewed (int): number of pages viewed.
url (str): URL or path where the cookie got set.
"""
DATA_TYPE = 'cookie:google:analytics:utmb'
[docs]
def __init__(self):
"""Initializes event data."""
super(GoogleAnalyticsUtmbEventData, self).__init__(data_type=self.DATA_TYPE)
self.cookie_name = None
self.domain_hash = None
self.last_visited_time = None
self.pages_viewed = None
self.url = None
[docs]
class GoogleAnalyticsUtmtEventData(events.EventData):
"""Google analytics __utmt cookie event data.
Attributes:
cookie_name (str): name of cookie.
last_visited_time (dfdatetime.DateTimeValues): date and time the URL was
last visited.
url (str): URL or path where the cookie got set.
"""
DATA_TYPE = 'cookie:google:analytics:utmt'
[docs]
def __init__(self):
"""Initializes event data."""
super(GoogleAnalyticsUtmtEventData, self).__init__(data_type=self.DATA_TYPE)
self.cookie_name = None
self.last_visited_time = None
self.url = None
[docs]
class GoogleAnalyticsUtmzEventData(events.EventData):
"""Google analytics __utmz cookie event data.
Attributes:
cookie_name (str): name of cookie.
domain_hash (str): domain hash.
last_visited_time (dfdatetime.DateTimeValues): date and time the URL was
last visited.
sessions (int): number of sessions.
sources (int): number of sources.
url (str): URL or path where the cookie got set.
"""
DATA_TYPE = 'cookie:google:analytics:utmz'
[docs]
def __init__(self):
"""Initializes event data."""
super(GoogleAnalyticsUtmzEventData, self).__init__(data_type=self.DATA_TYPE)
self.cookie_name = None
self.domain_hash = None
self.last_visited_time = None
self.sessions = None
self.sources = None
self.url = None
[docs]
class GoogleAnalyticsUtmaPlugin(interface.BaseCookiePlugin):
"""A browser cookie plugin for __utma Google Analytics cookies.
The structure of the cookie data:
<domain hash>.<visitor ID>.<first visit>.<previous visit>.<last visit>.
<number of sessions>
For example:
137167072.1215918423.1383170166.1383170166.1383170166.1
Or:
<last visit>
For example:
13113225820000000
"""
NAME = 'google_analytics_utma'
DATA_FORMAT = 'Google Analytics __utma cookie'
COOKIE_NAME = '__utma'
def _ParseCookieData(
self, parser_mediator, cookie_data=None, url=None, **kwargs):
"""Extracts events from cookie data.
Args:
parser_mediator (ParserMediator): parser mediator.
cookie_data (str): cookie data.
url (str): URL or path where the cookie got set.
"""
fields = cookie_data.split('.')
number_of_fields = len(fields)
if number_of_fields not in (1, 6):
parser_mediator.ProduceExtractionWarning(
'unsupported number of fields: {0:d} in cookie: {1:s}'.format(
number_of_fields, self.COOKIE_NAME))
return
visited_times = []
if number_of_fields == 1:
domain_hash = None
visitor_identifier = None
number_of_sessions = None
date_time = self._ParsePosixTimeIn100Nanoseconds(fields[0])
if date_time:
visited_times.append(date_time)
elif number_of_fields == 6:
domain_hash = fields[0]
visitor_identifier = fields[1]
# TODO: Double check this time is stored in UTC and not local time.
date_time = self._ParsePosixTime(fields[2])
if date_time:
visited_times.append(date_time)
date_time = self._ParsePosixTime(fields[3])
if date_time:
visited_times.append(date_time)
date_time = self._ParsePosixTime(fields[4])
if date_time:
visited_times.append(date_time)
number_of_sessions = self._ParseIntegerValue(fields[5])
else:
domain_hash = None
number_of_sessions = None
visitor_identifier = None
event_data = GoogleAnalyticsUtmaEventData()
event_data.cookie_name = self.COOKIE_NAME
event_data.domain_hash = domain_hash
event_data.sessions = number_of_sessions
event_data.url = url
event_data.visited_times = visited_times or None
event_data.visitor_identifier = visitor_identifier
parser_mediator.ProduceEventData(event_data)
[docs]
class GoogleAnalyticsUtmbPlugin(interface.BaseCookiePlugin):
"""A browser cookie plugin for __utmb Google Analytics cookies.
The structure of the cookie data:
<domain hash>.<pages viewed>.<unknown>.<last time>
For example:
137167072.1.10.1383170166
173272373.6.8.1440489514899
173272373.4.9.1373300660574
Or:
<last time>
For example:
13113225820000000
"""
NAME = 'google_analytics_utmb'
DATA_FORMAT = 'Google Analytics __utmb cookie'
COOKIE_NAME = '__utmb'
def _ParseCookieData(
self, parser_mediator, cookie_data=None, url=None, **kwargs):
"""Extracts events from cookie data.
Args:
parser_mediator (ParserMediator): parser mediator.
cookie_data (bytes): cookie data.
url (str): URL or path where the cookie got set.
"""
fields = cookie_data.split('.')
number_of_fields = len(fields)
if number_of_fields not in (1, 4):
parser_mediator.ProduceExtractionWarning(
'unsupported number of fields: {0:d} in cookie: {1:s}'.format(
number_of_fields, self.COOKIE_NAME))
return
if number_of_fields == 1:
domain_hash = None
number_of_pages_viewed = None
date_time = self._ParsePosixTimeIn100Nanoseconds(fields[0])
elif number_of_fields == 4:
domain_hash = fields[0]
number_of_pages_viewed = self._ParseIntegerValue(fields[1])
if fields[2] in ('8', '9'):
date_time = self._ParsePosixTimeInMilliseconds(fields[3])
else:
date_time = self._ParsePosixTime(fields[3])
else:
date_time = None
domain_hash = None
number_of_pages_viewed = None
event_data = GoogleAnalyticsUtmbEventData()
event_data.cookie_name = self.COOKIE_NAME
event_data.domain_hash = domain_hash
event_data.last_visited_time = date_time
event_data.pages_viewed = number_of_pages_viewed
event_data.url = url
parser_mediator.ProduceEventData(event_data)
[docs]
class GoogleAnalyticsUtmtPlugin(interface.BaseCookiePlugin):
"""A browser cookie plugin for __utmt Google Analytics cookies.
The structure of the cookie data:
<last time>
For example:
13113215173000000
"""
NAME = 'google_analytics_utmt'
DATA_FORMAT = 'Google Analytics __utmt cookie'
COOKIE_NAME = '__utmt'
def _ParseCookieData(
self, parser_mediator, cookie_data=None, url=None, **kwargs):
"""Extracts events from cookie data.
Args:
parser_mediator (ParserMediator): parser mediator.
cookie_data (bytes): cookie data.
url (str): URL or path where the cookie got set.
"""
fields = cookie_data.split('.')
number_of_fields = len(fields)
if number_of_fields != 1:
parser_mediator.ProduceExtractionWarning(
'unsupported number of fields: {0:d} in cookie: {1:s}'.format(
number_of_fields, self.COOKIE_NAME))
return
event_data = GoogleAnalyticsUtmtEventData()
event_data.cookie_name = self.COOKIE_NAME
event_data.last_visited_time = self._ParsePosixTimeIn100Nanoseconds(
fields[0])
event_data.url = url
parser_mediator.ProduceEventData(event_data)
[docs]
class GoogleAnalyticsUtmzPlugin(interface.BaseCookiePlugin):
"""A browser cookie plugin for __utmz Google Analytics cookies.
The structure of the cookie data:
<domain hash>.<last time>.<sessions>.<sources>.<variables>
For example:
207318870.1383170190.1.1.utmcsr=google|utmccn=(organic)|utmcmd=organic|
utmctr=(not%20provided)
Or:
<last time>
For example:
13128990382000000
"""
NAME = 'google_analytics_utmz'
DATA_FORMAT = 'Google Analytics __utmz cookie'
COOKIE_NAME = '__utmz'
def _ParseCookieData(
self, parser_mediator, cookie_data=None, url=None, **kwargs):
"""Extracts events from cookie data.
Args:
parser_mediator (ParserMediator): parser mediator.
cookie_data (str): cookie data.
url (str): URL or path where the cookie got set.
"""
fields = cookie_data.split('.')
number_of_fields = len(fields)
if number_of_fields > 5:
variables = '.'.join(fields[4:])
fields = fields[0:4]
fields.append(variables)
number_of_fields = len(fields)
if number_of_fields not in (1, 5):
parser_mediator.ProduceExtractionWarning(
'unsupported number of fields: {0:d} in cookie: {1:s}'.format(
number_of_fields, self.COOKIE_NAME))
return
if number_of_fields == 1:
domain_hash = None
number_of_sessions = None
number_of_sources = None
extra_attributes = {}
date_time = self._ParsePosixTimeIn100Nanoseconds(fields[0])
elif number_of_fields == 5:
domain_hash = fields[0]
date_time = self._ParsePosixTime(fields[1])
number_of_sessions = self._ParseIntegerValue(fields[2])
number_of_sources = self._ParseIntegerValue(fields[3])
extra_variables = fields[4].split('|')
extra_attributes = {}
for variable in extra_variables:
key, _, value = variable.partition('=')
extra_attributes[key] = urlparse.unquote(value)
else:
date_time = None
domain_hash = None
number_of_sessions = None
number_of_sources = None
event_data = GoogleAnalyticsUtmzEventData()
event_data.cookie_name = self.COOKIE_NAME
event_data.domain_hash = domain_hash
event_data.last_visited_time = date_time
event_data.sessions = number_of_sessions
event_data.sources = number_of_sources
event_data.url = url
# TODO: explicitly define these as attributes of
# GoogleAnalyticsUtmzEventData.
for key, value in extra_attributes.items():
setattr(event_data, key, value)
parser_mediator.ProduceEventData(event_data)
manager.CookiePluginsManager.RegisterPlugins([
GoogleAnalyticsUtmaPlugin, GoogleAnalyticsUtmbPlugin,
GoogleAnalyticsUtmtPlugin, GoogleAnalyticsUtmzPlugin])