# -*- coding: utf-8 -*-
"""Text plugin for Confluence access log (conf_access_log[DATE].log) files.
Also see:
https://confluence.atlassian.com/doc/configure-access-logs-1044780567.html
https://confluence.atlassian.com/confkb/audit-confluence-using-the-tomcat-valve-component-223216846.html
"""
import pyparsing
from dfdatetime import time_elements as dfdatetime_time_elements
from plaso.containers import events
from plaso.lib import errors
from plaso.parsers import text_parser
from plaso.parsers.text_plugins import interface
[docs]
class ConfluenceAccessEventData(events.EventData):
"""Confluence access event data.
Attributes:
forwarded_for (str): request X-FORWARDED-FOR header value.
http_request_method (str): HTTP request method.
http_request_referer (str): HTTP request referer header information.
http_request_uri (str): HTTP request URI.
http_request_user_agent (str): HTTP request user agent header information.
http_response_bytes (int): HTTP response bytes size without headers.
http_response_code (int): HTTP response code from server.
http_version (str): HTTP request version.
process_duration (int): time taken to process the request in milliseconds.
recorded_time (dfdatetime.DateTimeValues): date and time the log entry
was recorded.
remote_name (str): remote hostname or IP address
thread_name (str): name of the thread that handled the request.
user_name (str): response X-AUSERNAME header value.
"""
DATA_TYPE = 'confluence:access'
[docs]
def __init__(self):
"""Initializes event data."""
super(ConfluenceAccessEventData, self).__init__(data_type=self.DATA_TYPE)
self.forwarded_for = None
self.http_request_method = None
self.http_request_referer = None
self.http_request_uri = None
self.http_request_user_agent = None
self.http_response_bytes = None
self.http_response_code = None
self.http_version = None
self.process_duration = None
self.recorded_time = None
self.remote_name = None
self.thread_name = None
self.user_name = None
[docs]
class ConfluenceAccessTextPlugin(interface.TextPlugin):
"""Text plugin for Confluence access log (conf_access_log[DATE].log) files."""
NAME = 'confluence_access'
DATA_FORMAT = 'Confluence access log (access.log) file'
_INTEGER = pyparsing.Word(pyparsing.nums).set_parse_action(
lambda tokens: int(tokens[0], 10))
_TWO_DIGITS = pyparsing.Word(pyparsing.nums, exact=2).set_parse_action(
lambda tokens: int(tokens[0], 10))
_FOUR_DIGITS = pyparsing.Word(pyparsing.nums, exact=4).set_parse_action(
lambda tokens: int(tokens[0], 10))
_THREE_LETTERS = pyparsing.Word(pyparsing.alphas, exact=3)
_MONTH_DICT = {
'jan': 1,
'feb': 2,
'mar': 3,
'apr': 4,
'may': 5,
'jun': 6,
'jul': 7,
'aug': 8,
'sep': 9,
'oct': 10,
'nov': 11,
'dec': 12}
_TIME_ZONE_OFFSET = (
pyparsing.Word('+-', exact=1) + _TWO_DIGITS + _TWO_DIGITS)
# Date and time values are formatted as:
# [18/Sep/2011:19:18:28 -0400]
_DATE_TIME = pyparsing.Group(
pyparsing.Suppress('[') + _TWO_DIGITS +
pyparsing.Suppress('/') + _THREE_LETTERS +
pyparsing.Suppress('/') + _FOUR_DIGITS +
pyparsing.Suppress(':') + _TWO_DIGITS +
pyparsing.Suppress(':') + _TWO_DIGITS +
pyparsing.Suppress(':') + _TWO_DIGITS +
_TIME_ZONE_OFFSET + pyparsing.Suppress(']')).set_results_name('date_time')
_IP_ADDRESS = (
pyparsing.pyparsing_common.ipv4_address |
pyparsing.pyparsing_common.ipv6_address)
_RESPONSE_BYTES = (
pyparsing.Literal('-') | _INTEGER).set_results_name('response_bytes')
_REFERER = pyparsing.Word(pyparsing.alphanums + '/-_.?=%&:+<>#~[]')
_THREAD_NAME = (
pyparsing.Word(pyparsing.alphanums + '-').set_results_name('thread_name'))
_USER_AGENT = pyparsing.restOfLine().set_results_name('user_agent')
_USER_NAME = (
pyparsing.Word(pyparsing.alphanums + '@' + pyparsing.alphanums + '.') |
pyparsing.Word(pyparsing.alphanums) |
pyparsing.Literal('-')).set_results_name('user_name')
_HTTP_METHOD = pyparsing.one_of([
'CONNECT', 'DELETE', 'GET', 'HEAD', 'OPTIONS', 'PATCH', 'POST', 'PUT',
'TRACE'])
_REMOTE_NAME = (
_IP_ADDRESS |
pyparsing.Word(pyparsing.alphanums + '-' + '.')).set_results_name(
'remote_name')
_HTTP_VERSION = pyparsing.Word(pyparsing.alphanums + '/.').set_results_name(
'http_version')
_REQUEST_URI = pyparsing.Word(pyparsing.alphanums + '/-_.?=%&:+<>#~[]')
_END_OF_LINE = pyparsing.Suppress(pyparsing.LineEnd())
# Default (pre 7.11) format log line:
# %t %{X-AUSERNAME}o %I %h %r %s %Dms %b %{Referer}i %{User-Agent}i
_PRE_711_FORMAT_LOG_LINE = (
_DATE_TIME +
_USER_NAME +
_THREAD_NAME +
_REMOTE_NAME +
_HTTP_METHOD.set_results_name('http_method') +
_REQUEST_URI.set_results_name('request_url') +
_HTTP_VERSION +
_INTEGER.set_results_name('response_code') +
_INTEGER.set_results_name('process_duration') +
pyparsing.Literal('ms') +
_RESPONSE_BYTES +
_REFERER.set_results_name('referer') +
_USER_AGENT +
_END_OF_LINE)
# Post 7.11 format log line:
# %t %{X-Forwarded-For}i %{X-AUSERNAME}o %I %h %r %s %Dms %b %{Referer}i
# %{User-Agent}i
_POST_711_FORMAT_LOG_LINE = (
_DATE_TIME +
_IP_ADDRESS.set_results_name('forwarded_for') +
_USER_NAME +
_THREAD_NAME +
_REMOTE_NAME +
_HTTP_METHOD.set_results_name('http_method') +
_REQUEST_URI.set_results_name('request_url') +
_HTTP_VERSION +
_INTEGER.set_results_name('response_code') +
_INTEGER.set_results_name('process_duration') +
pyparsing.Literal('ms') +
_RESPONSE_BYTES +
_REFERER.set_results_name('referer') +
_USER_AGENT +
_END_OF_LINE)
_LINE_STRUCTURES = [
('pre_711_format', _PRE_711_FORMAT_LOG_LINE),
('post_711_format', _POST_711_FORMAT_LOG_LINE)]
VERIFICATION_GRAMMAR = _PRE_711_FORMAT_LOG_LINE ^ _POST_711_FORMAT_LOG_LINE
VERIFICATION_LITERALS = [
' CONNECT ', ' DELETE ', ' GET ', ' HEAD ', ' HTTP/', ' OPTIONS ',
' PATCH ', ' POST ', ' PUT ', ' TRACE ']
def _ParseRecord(self, parser_mediator, key, structure):
"""Parses a pyparsing structure.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
key (str): name of the parsed structure.
structure (pyparsing.ParseResults): tokens from a parsed log line.
Raises:
ParseError: if the structure cannot be parsed.
"""
time_elements_structure = self._GetValueFromStructure(
structure, 'date_time')
event_data = ConfluenceAccessEventData()
event_data.http_request_method = self._GetValueFromStructure(
structure, 'http_method')
event_data.http_request_referer = self._GetValueFromStructure(
structure, 'referer')
event_data.http_request_uri = self._GetValueFromStructure(
structure, 'request_url')
event_data.http_request_user_agent = self._GetStringValueFromStructure(
structure, 'user_agent')
event_data.http_response_code = self._GetValueFromStructure(
structure, 'response_code')
event_data.http_response_bytes = self._GetValueFromStructure(
structure, 'response_bytes')
event_data.http_version = self._GetValueFromStructure(
structure, 'http_version')
event_data.process_duration = self._GetValueFromStructure(
structure, 'process_duration')
event_data.recorded_time = self._ParseTimeElements(time_elements_structure)
event_data.remote_name = self._GetValueFromStructure(
structure, 'remote_name')
event_data.thread_name = self._GetValueFromStructure(
structure, 'thread_name')
event_data.user_name = self._GetValueFromStructure(
structure, 'user_name')
if key == 'post_711_format':
event_data.forwarded_for = self._GetValueFromStructure(
structure, 'forwarded_for')
parser_mediator.ProduceEventData(event_data)
def _ParseTimeElements(self, time_elements_structure):
"""Parses date and time elements of a log line.
Args:
time_elements_structure (pyparsing.ParseResults): date and time elements
of a log line.
Returns:
dfdatetime.TimeElements: date and time value.
Raises:
ParseError: if a valid date and time value cannot be derived from
the time elements.
"""
try:
(day_of_month, month_string, year, hours, minutes, seconds,
time_zone_sign, time_zone_hours, time_zone_minutes) = (
time_elements_structure)
month = self._MONTH_DICT.get(month_string.lower(), 0)
time_zone_offset = (time_zone_hours * 60) + time_zone_minutes
if time_zone_sign == '-':
time_zone_offset *= -1
time_elements_tuple = (year, month, day_of_month, hours, minutes, seconds)
return dfdatetime_time_elements.TimeElements(
time_elements_tuple=time_elements_tuple,
time_zone_offset=time_zone_offset)
except (TypeError, ValueError) as exception:
raise errors.ParseError(
f'Unable to parse time elements with error: {exception!s}')
[docs]
def CheckRequiredFormat(self, parser_mediator, text_reader):
"""Check if the log record has the minimal structure required by the plugin.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
text_reader (EncodedTextReader): text reader.
Returns:
bool: True if this is the correct plugin, False otherwise.
"""
try:
structure = self._VerifyString(text_reader.lines)
except errors.ParseError:
return False
time_elements_structure = self._GetValueFromStructure(
structure, 'date_time')
try:
self._ParseTimeElements(time_elements_structure)
except errors.ParseError:
return False
return True
text_parser.TextLogParser.RegisterPlugin(ConfluenceAccessTextPlugin)