# -*- coding: utf-8 -*-
"""Text parser plugin for AWS ELB access logs.
The AWS documentation is not clear about the meaning of the "target_port_list"
field. The assumption is that it refers to a list of possible backend instances'
IP addresses that could receive the client's request. This parser stores the
"target_port_list" data in the "destination_list" attribute of an EventData
object.
Also see:
https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-access-logs.html
https://docs.aws.amazon.com/elasticloadbalancing/latest/classic/access-log-collection.html
https://docs.aws.amazon.com/elasticloadbalancing/latest/network/load-balancer-access-logs.html
"""
import pyparsing
from dfdatetime import time_elements as dfdatetime_time_elements
from plaso.containers import events
from plaso.lib import errors
from plaso.parsers import text_parser
from plaso.parsers.text_plugins import interface
[docs]
class AWSELBEventData(events.EventData):
"""AWS Elastic Load Balancer access log event data.
Attributes:
actions_executed (str): The actions taken when processing the request.
alpn_back_end_protocol (str): The application protocol negotiated with
the target, in string format. If no ALPN policy is configured in
the TLS listener, no matching protocol is found, or no valid protocol
list is sent, this value is set to -.
(only for network load balancer logs)
alpn_client_preference_list (str): The value of the
application_layer_protocol_negotiation extension in the client hello
message. This value is URL-encoded. Each protocol is enclosed in double
quotes and protocols are separated by a comma. If no ALPN policy is
configured in the TLS listener, no valid client hello message is sent,
or the extension is not present, this value is set to -. The string is
truncated if it is longer than 256 bytes.
(only for network load balancer logs)
alpn_front_end_protocol (str): The application protocol negotiated with
the client, in string format. If no ALPN policy is configured in
the TLS listener, no matching protocol is found, or no valid protocol
list is sent, this value is set to -.
(only for network load balancer logs)
chosen_cert_arn (str): The ARN of the certificate
presented to the source.
chosen_cert_serial (str): Reserved for future use.
This value is always set to -.
(only for network load balancer logs)
classification (str): The classification for desync mitigation.
classification_reason (str): The classification reason code.
connection_duration (str): duration of the connection to complete, from
start to closure, in milliseconds.
(only for network load balancer logs)
destination_group_arn (str): The Amazon Resource Name (ARN) of the
destination group.
destination_ip_address (str): The IP address of the destination
that processed this request.
destination_list (str): A space-delimited list of IP addresses
and ports for the destinations that processed this request.
destination_port (int): The port of the destination that processed
this request.
destination_processing_duration (str): duration from the time the load
balancer sent the request to a destination until the destination
started to send the response headers.
destination_status_code (int): The status code of the response
from the destination.
destination_status_code_list (str): A space-delimited list of status codes.
domain_name (str): The SNI domain provided by the
source during the TLS handshake.
error_reason (str): The error reason code, enclosed in
double quotes.
handshake_duration (str): duration of the handshake to complete after
the TCP connection is established, including client-side delays,
in milliseconds. This time is included in the connection_duration field.
(only for network load balancer logs)
incoming_tls_alert (str): The integer value of TLS alerts received by the
load balancer from the client, if present.
(only for network load balancer logs)
listener (str): The resource ID of the TLS listener for the connection.
(only for network load balancer logs)
matched_rule_priority (int): The priority value of the rule that
matched the request.
received_bytes (int): The size of the request, in bytes, received from
the source.
redirect_url (str): The URL of the redirect destination.
request_processing_duration (str): total duration from the time the load
balancer received the request until the time it sent the request to
a destination.
request_time (dfdatetime.DateTimeValues): date and time a request
was sent.
request_type (str): The type of request or connection.
resource_identifier (str): The resource ID of the load balancer.
response_processing_duration (str): duration of processing a response.
response_time (dfdatetime.DateTimeValues): date and time a response
was sent.
sent_bytes (int): The size of the response, in bytes, sent to the source.
ssl_cipher (str): The SSL cipher of the HTTPS listener.
ssl_protocol (str): The SSL protocol of the HTTPS listener.
source_ip_address (str): The IP address of the requesting source.
source_port (int): The port of the requesting source.
tls_cipher (str): The cipher suite negotiated with the client, in OpenSSL
format. If TLS negotiation does not complete, this value is set to -.
(only for network load balancer logs)
tls_named_group (str): Reserved for future use.
This value is always set to -.
(only for network load balancer logs)
tls_protocol_version (str): The TLS protocol negotiated with the client,
in string format. If TLS negotiation does not complete,
this value is set to -.
(only for network load balancer logs)
trace_identifier (str): The contents of the X-Amzn-Trace-Id header.
user_agent (str): A User-Agent string.
version (str): The version of the log entry.
(only for network load balancer logs)
"""
DATA_TYPE = 'aws:elb:access'
[docs]
def __init__(self):
"""Initializes event data."""
super(AWSELBEventData, self).__init__(data_type=self.DATA_TYPE)
self.actions_executed = None
self.alpn_back_end_protocol = None
self.alpn_client_preference_list = None
self.alpn_front_end_protocol = None
self.chosen_cert_arn = None
self.chosen_cert_serial = None
self.classification = None
self.classification_reason = None
self.connection_duration = None
self.destination_group_arn = None
self.destination_ip_address = None
self.destination_list = None
self.destination_port = None
self.destination_processing_duration = None
self.destination_status_code = None
self.destination_status_code_list = None
self.domain_name = None
self.elb_status_code = None
self.error_reason = None
self.handshake_duration = None
self.incoming_tls_alert = None
self.listener = None
self.matched_rule_priority = None
self.received_bytes = None
self.redirect_url = None
self.request = None
self.request_processing_duration = None
self.request_time = None
self.request_type = None
self.resource_identifier = None
self.response_processing_duration = None
self.response_time = None
self.sent_bytes = None
self.source_ip_address = None
self.source_port = None
self.ssl_cipher = None
self.ssl_protocol = None
self.tls_cipher = None
self.tls_named_group = None
self.tls_protocol_version = None
self.trace_identifier = None
self.user_agent = None
self.version = None
[docs]
class AWSELBTextPlugin(interface.TextPlugin):
"""Text parser plugin for AWS ELB access log files."""
NAME = 'aws_elb_access'
DATA_FORMAT = 'AWS ELB Access log file'
ENCODING = 'utf-8'
_BLANK = pyparsing.Literal('"-"') | pyparsing.Literal('-')
_WORD = pyparsing.Word(pyparsing.printables) | _BLANK
_INTEGER = pyparsing.Word(pyparsing.nums).set_parse_action(
lambda tokens: int(tokens[0], 10))
_UNSIGNED_INTEGER = _INTEGER | _BLANK
_SIGNED_INTEGER = pyparsing.Word('-', pyparsing.nums) | _UNSIGNED_INTEGER
_FLOATING_POINT = (
pyparsing.Word(pyparsing.nums + '.') | pyparsing.Literal('-1'))
_IP_ADDRESS = (
pyparsing.pyparsing_common.ipv4_address |
pyparsing.pyparsing_common.ipv6_address)
_PORT = pyparsing.Word(pyparsing.nums, max=6).set_parse_action(
lambda tokens: int(tokens[0], 10)) | _BLANK
_SOURCE_IP_ADDRESS_AND_PORT = pyparsing.Group(
_IP_ADDRESS.set_results_name('source_ip_address') +
pyparsing.Suppress(':') + _PORT.set_results_name('source_port') | _BLANK)
_DESTINATION_IP_ADDRESS_AND_PORT = pyparsing.Group(
_IP_ADDRESS.set_results_name('destination_ip_address') +
pyparsing.Suppress(':') + _PORT.set_results_name('destination_port') |
_BLANK)
# Date and time values are formatted as:
# 2020-01-11T16:56:05.917294Z
_DATE_TIME_ISOFORMAT_STRING = pyparsing.Regex(
r'([0-9]{4}-[0-9]{2}-[0-9]{2}T'
r'[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]{6})Z')
# Date and time values are formatted as:
# 2022-12-20T02:59:40
_DATE_TIME_ISOFORMAT_STRING_WITHOUT_TIMEZONE = pyparsing.Regex(
r'([0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2})')
_REQUEST = pyparsing.quotedString.set_results_name(
'request').set_parse_action(pyparsing.removeQuotes)
_USER_AGENT = pyparsing.quotedString.set_results_name(
'user_agent').set_parse_action(pyparsing.removeQuotes)
_ALPN_CLIENT_PREFERENCE_LIST = pyparsing.quotedString.set_results_name(
'alpn_client_preference_list').set_parse_action(pyparsing.removeQuotes)
_END_OF_LINE = pyparsing.Suppress(pyparsing.LineEnd())
# A log line is defined as in the AWS ELB documentation
_APPLICATION_LOG_LINE = (
_WORD.set_results_name('request_type') +
_DATE_TIME_ISOFORMAT_STRING.set_results_name('response_time') +
_WORD.set_results_name('resource_identifier') +
_SOURCE_IP_ADDRESS_AND_PORT.set_results_name('source_ip_port') +
_DESTINATION_IP_ADDRESS_AND_PORT.set_results_name('destination_ip_port') +
_FLOATING_POINT.set_results_name('request_processing_duration') +
_FLOATING_POINT.set_results_name('destination_processing_duration') +
_FLOATING_POINT.set_results_name('response_processing_duration') +
_UNSIGNED_INTEGER.set_results_name('elb_status_code') +
_UNSIGNED_INTEGER.set_results_name('destination_status_code') +
_UNSIGNED_INTEGER.set_results_name('received_bytes') +
_UNSIGNED_INTEGER.set_results_name('sent_bytes') +
_REQUEST +
_USER_AGENT +
_WORD.set_results_name('ssl_cipher') +
_WORD.set_results_name('ssl_protocol') +
_WORD.set_results_name('destination_group_arn') +
pyparsing.quotedString.set_results_name(
'trace_identifier').set_parse_action(pyparsing.removeQuotes) +
pyparsing.quotedString.set_results_name(
'domain_name').set_parse_action(pyparsing.removeQuotes) +
pyparsing.quotedString.set_results_name(
'chosen_cert_arn').set_parse_action(pyparsing.removeQuotes) +
_SIGNED_INTEGER.set_results_name('matched_rule_priority') +
_DATE_TIME_ISOFORMAT_STRING.set_results_name('request_time') +
pyparsing.quotedString.set_results_name(
'actions_executed').set_parse_action(pyparsing.removeQuotes) +
pyparsing.quotedString.set_results_name(
'redirect_url').set_parse_action(pyparsing.removeQuotes) +
pyparsing.quotedString.set_results_name(
'error_reason').set_parse_action(pyparsing.removeQuotes) +
pyparsing.quotedString.set_results_name(
'destination_list').set_parse_action(pyparsing.removeQuotes) +
pyparsing.quotedString.set_results_name(
'destination_status_code_list').set_parse_action(
pyparsing.removeQuotes) +
pyparsing.quotedString.set_results_name(
'classification').set_parse_action(pyparsing.removeQuotes) +
pyparsing.quotedString.set_results_name(
'classification_reason').set_parse_action(pyparsing.removeQuotes) +
_END_OF_LINE)
_NETWORK_LOG_LINE = (
_WORD.set_results_name('request_type') +
_WORD.set_results_name('version') +
_DATE_TIME_ISOFORMAT_STRING_WITHOUT_TIMEZONE.set_results_name(
'response_time') +
_WORD.set_results_name('resource_identifier') +
_WORD.set_results_name('listener') +
_SOURCE_IP_ADDRESS_AND_PORT.set_results_name('source_ip_port') +
_DESTINATION_IP_ADDRESS_AND_PORT.set_results_name('destination_ip_port') +
_UNSIGNED_INTEGER.set_results_name('connection_duration') +
_UNSIGNED_INTEGER.set_results_name('handshake_duration') +
_UNSIGNED_INTEGER.set_results_name('received_bytes') +
_UNSIGNED_INTEGER.set_results_name('sent_bytes') +
_WORD.set_results_name('incoming_tls_alert') +
_WORD.set_results_name('chosen_cert_arn') +
_WORD.set_results_name('chosen_cert_serial') +
_WORD.set_results_name('tls_cipher') +
_WORD.set_results_name('tls_protocol_version') +
_WORD.set_results_name('tls_named_group') +
_WORD.set_results_name('domain_name') +
_WORD.set_results_name('alpn_front_end_protocol') +
_WORD.set_results_name('alpn_back_end_protocol') +
(_ALPN_CLIENT_PREFERENCE_LIST | pyparsing.Literal('-')) +
_END_OF_LINE)
_CLASSIC_LOG_LINE = (
_DATE_TIME_ISOFORMAT_STRING.set_results_name('response_time') +
_WORD.set_results_name('resource_identifier') +
_SOURCE_IP_ADDRESS_AND_PORT.set_results_name('source_ip_port') +
_DESTINATION_IP_ADDRESS_AND_PORT.set_results_name('destination_ip_port') +
_FLOATING_POINT.set_results_name('request_processing_duration') +
_FLOATING_POINT.set_results_name('destination_processing_duration') +
_FLOATING_POINT.set_results_name('response_processing_duration') +
_UNSIGNED_INTEGER.set_results_name('elb_status_code') +
_UNSIGNED_INTEGER.set_results_name('destination_status_code') +
_SIGNED_INTEGER.set_results_name('received_bytes') +
_SIGNED_INTEGER.set_results_name('sent_bytes') +
_REQUEST +
_USER_AGENT +
_WORD.set_results_name('ssl_cipher') +
_WORD.set_results_name('ssl_protocol') +
_END_OF_LINE)
_LINE_STRUCTURES = [
('elb_application_accesslog', _APPLICATION_LOG_LINE),
('elb_classic_accesslog', _CLASSIC_LOG_LINE),
('elb_network_accesslog', _NETWORK_LOG_LINE)]
VERIFICATION_GRAMMAR = (
_APPLICATION_LOG_LINE ^ _CLASSIC_LOG_LINE ^ _NETWORK_LOG_LINE)
def _GetValueFromGroup(self, structure, name, key_name):
"""Retrieves a value from a Pyparsing.Group structure.
Args:
structure (pyparsing.ParseResults): tokens from a parsed log line.
name (str): name of the token.
key_name (str): key name to retrieve the value of.
Returns:
object: value for the specified key or None if not available.
"""
structure_value = self._GetValueFromStructure(structure, name)
if structure_value is None:
return None
return structure_value.get(key_name)
def _ParseRecord(self, parser_mediator, key, structure):
"""Parses a pyparsing structure.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
key (str): name of the parsed structure.
structure (pyparsing.ParseResults): tokens from a parsed log line.
Raises:
ParseError: if the structure cannot be parsed.
"""
destination_list = self._GetValueFromStructure(
structure, 'destination_list')
if destination_list:
destination_list = destination_list.split()
chosen_cert_serial = self._GetValueFromStructure(
structure, 'chosen_cert_serial')
if chosen_cert_serial == '-':
chosen_cert_serial = None
classification = self._GetValueFromStructure(structure, 'classification')
if classification == '-':
classification = None
classification_reason = self._GetValueFromStructure(
structure, 'classification_reason')
if classification_reason == '-':
classification_reason = None
destination_status_code = self._GetValueFromStructure(
structure, 'destination_status_code')
if destination_status_code == '-':
destination_status_code = None
elb_status_code = self._GetValueFromStructure(structure, 'elb_status_code')
if elb_status_code == '-':
elb_status_code = None
error_reason = self._GetValueFromStructure(structure, 'error_reason')
if error_reason == '-':
error_reason = None
incoming_tls_alert = self._GetValueFromStructure(
structure, 'incoming_tls_alert')
if incoming_tls_alert == '-':
incoming_tls_alert = None
redirect_url = self._GetValueFromStructure(structure, 'redirect_url')
if redirect_url == '-':
redirect_url = None
ssl_cipher = self._GetValueFromStructure(structure, 'ssl_cipher')
if ssl_cipher == '-':
ssl_cipher = None
ssl_protocol = self._GetValueFromStructure(structure, 'ssl_protocol')
if ssl_protocol == '-':
ssl_protocol = None
tls_named_group = self._GetValueFromStructure(structure, 'tls_named_group')
if tls_named_group == '-':
tls_named_group = None
user_agent = self._GetValueFromStructure(structure, 'user_agent')
if user_agent == '-':
user_agent = None
event_data = AWSELBEventData()
event_data.request_type = self._GetValueFromStructure(
structure, 'request_type')
event_data.resource_identifier = self._GetValueFromStructure(
structure, 'resource_identifier')
event_data.source_ip_address = self._GetValueFromGroup(
structure, 'source_ip_port', 'source_ip_address')
event_data.source_port = self._GetValueFromGroup(
structure, 'source_ip_port', 'source_port')
event_data.destination_ip_address = self._GetValueFromGroup(
structure, 'destination_ip_port', 'destination_ip_address')
event_data.destination_port = self._GetValueFromGroup(
structure, 'destination_ip_port', 'destination_port')
event_data.request_processing_duration = self._GetValueFromStructure(
structure, 'request_processing_duration')
event_data.destination_processing_duration = self._GetValueFromStructure(
structure, 'destination_processing_duration')
event_data.response_processing_duration = self._GetValueFromStructure(
structure, 'response_processing_duration')
event_data.elb_status_code = elb_status_code
event_data.destination_status_code = destination_status_code
event_data.received_bytes = self._GetValueFromStructure(
structure, 'received_bytes')
event_data.sent_bytes = self._GetValueFromStructure(structure, 'sent_bytes')
event_data.request = self._GetValueFromStructure(structure, 'request')
event_data.user_agent = user_agent
event_data.ssl_cipher = ssl_cipher
event_data.ssl_protocol = ssl_protocol
event_data.destination_group_arn = self._GetValueFromStructure(
structure, 'destination_group_arn')
event_data.trace_identifier = self._GetValueFromStructure(
structure, 'trace_identifier')
event_data.domain_name = self._GetValueFromStructure(
structure, 'domain_name')
event_data.chosen_cert_arn = self._GetValueFromStructure(
structure, 'chosen_cert_arn')
event_data.matched_rule_priority = self._GetValueFromStructure(
structure, 'matched_rule_priority')
event_data.actions_executed = self._GetValueFromStructure(
structure, 'actions_executed')
event_data.redirect_url = redirect_url
event_data.error_reason = error_reason
event_data.destination_status_code_list = self._GetValueFromStructure(
structure, 'destination_status_code_list')
event_data.classification = classification
event_data.classification_reason = classification_reason
event_data.destination_list = destination_list
event_data.version = self._GetValueFromStructure(structure, 'version')
event_data.listener = self._GetValueFromStructure(structure, 'listener')
event_data.connection_duration = self._GetValueFromStructure(
structure, 'connection_duration')
event_data.handshake_duration = self._GetValueFromStructure(
structure, 'handshake_duration')
event_data.incoming_tls_alert = incoming_tls_alert
event_data.chosen_cert_serial = chosen_cert_serial
event_data.tls_named_group = tls_named_group
event_data.tls_cipher = self._GetValueFromStructure(structure, 'tls_cipher')
event_data.tls_protocol_version = self._GetValueFromStructure(
structure, 'tls_protocol_version')
event_data.alpn_front_end_protocol = self._GetValueFromStructure(
structure, 'alpn_front_end_protocol')
event_data.alpn_back_end_protocol = self._GetValueFromStructure(
structure, 'alpn_back_end_protocol')
event_data.alpn_client_preference_list = self._GetValueFromStructure(
structure, 'alpn_client_preference_list')
response_time_structure = self._GetValueFromStructure(
structure, 'response_time')
if response_time_structure:
event_data.response_time = self._ParseTimeElements(
response_time_structure)
request_time_structure = structure.get('request_time')
if request_time_structure:
event_data.request_time = self._ParseTimeElements(request_time_structure)
parser_mediator.ProduceEventData(event_data)
def _ParseTimeElements(self, time_elements_structure):
"""Parses date and time elements of a log line.
Args:
time_elements_structure (pyparsing.ParseResults): date and time elements
of a log line.
Returns:
dfdatetime.TimeElements: date and time value.
Raises:
ParseError: if a valid date and time value cannot be derived from
the time elements.
"""
try:
if len(time_elements_structure) == 27:
date_time = dfdatetime_time_elements.TimeElementsInMicroseconds()
date_time.CopyFromStringISO8601(time_elements_structure)
else:
date_time = dfdatetime_time_elements.TimeElements()
date_time.CopyFromStringISO8601(time_elements_structure)
date_time.is_local_time = True
return date_time
except (TypeError, ValueError) as exception:
raise errors.ParseError(
f'Unable to parse time elements with error: {exception!s}')
[docs]
def CheckRequiredFormat(self, parser_mediator, text_reader):
"""Check if the log record has the minimal structure required by the plugin.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
text_reader (EncodedTextReader): text reader.
Returns:
bool: True if this is the correct plugin, False otherwise.
"""
try:
structure = self._VerifyString(text_reader.lines)
except errors.ParseError:
return False
time_elements_structure = self._GetValueFromStructure(
structure, 'response_time')
if time_elements_structure:
try:
self._ParseTimeElements(time_elements_structure)
except errors.ParseError:
return False
time_elements_structure = self._GetValueFromStructure(
structure, 'request_time')
if time_elements_structure:
try:
self._ParseTimeElements(time_elements_structure)
except errors.ParseError:
return False
return True
text_parser.TextLogParser.RegisterPlugin(AWSELBTextPlugin)