Source code for plaso.parsers.text_plugins.bitbucket_audit

"""Text parser plugin for Atlassian Bitbucket audit log files.

This is for the atlassian-bitbucket-audit.log file, one of multiple log files
produced by a Bitbucket DC/Server installation.

The audit log is a pipe-delimited file with the following fields:
  ip_address | event_name | user | timestamp_ms | entity | details |
  request_identifier | session_identifier

The timestamp field is milliseconds since the Unix epoch (January 1, 1970).

Also see:
  https://support.atlassian.com/bitbucket-data-center/kb/how-to-read-the-bitbucket-data-center-log-formats/
"""

import pyparsing

from dfdatetime import posix_time as dfdatetime_posix_time

from plaso.containers import events
from plaso.lib import errors
from plaso.parsers import text_parser
from plaso.parsers.text_plugins import interface


[docs] class BitbucketAuditEventData(events.EventData): """Bitbucket audit log event data. Attributes: details (str): JSON-encoded details of the affected entity. entity (str): the affected entity in the format PRODUCT/key, such as BITBUCKET/bitbucket or PROJECT/myproject. event_name (str): the name of the audited event, such as RestrictedRefAddedEvent. recorded_time (dfdatetime.DateTimeValues): date and time the audit event was recorded. remote_address (str): remote IP address(es), including X-Forwarded-For proxies. Multiple addresses are comma-separated; the first address is the originating IP and the last is the directly connected IP. request_identifier (str): unique identifier for the request that triggered the audit event, correlatable with the access log. session_identifier (str): session identifier, correlatable with the access log. user_name (str): the name of the user who triggered the event. """ DATA_TYPE = 'atlassian:bitbucket:audit'
[docs] def __init__(self): """Initializes event data.""" super().__init__(data_type=self.DATA_TYPE) self.details = None self.entity = None self.event_name = None self.recorded_time = None self.remote_address = None self.request_identifier = None self.session_identifier = None self.user_name = None
[docs] class BitbucketAuditTextPlugin(interface.TextPlugin): """Text parser plugin for Atlassian Bitbucket audit log files.""" NAME = 'bitbucket_audit' DATA_FORMAT = ( 'Atlassian Bitbucket audit log (atlassian-bitbucket-audit.log) file') ENCODING = 'utf-8' # Pipe as field separator. _SEPARATOR = pyparsing.Suppress(pyparsing.Literal('|')) # Remote IP address field: one or more IPv4/IPv6 addresses separated by # commas, such as "63.246.22.199,172.16.1.187" or "0:0:0:0:0:0:0:1" _REMOTE_ADDRESS = pyparsing.Combine( pyparsing.Word(pyparsing.alphanums + '.:,')).set_results_name( 'remote_address') # Event name: CamelCase Java event class name, such as RestrictedRefAddedEvent _EVENT_NAME = pyparsing.Word( pyparsing.alphanums + '_').set_results_name('event_name') # User name: alphanumeric with dots/hyphens/underscores, or '-' _USER_NAME = ( pyparsing.Word(pyparsing.alphanums + '-_./@') | pyparsing.Literal('-')).set_results_name('user_name') # Timestamp: milliseconds since Unix epoch, such as 1400681361906 _TIMESTAMP_MS = pyparsing.Word(pyparsing.nums).set_parse_action( lambda tokens: int(tokens[0], 10)).set_results_name('timestamp_ms') # Entity: PRODUCT/key format, such as BITBUCKET/bitbucket or PROJECT/myproject # May also be '-' when not applicable _ENTITY = ( pyparsing.Word(pyparsing.alphanums + '/-_.') | pyparsing.Literal('-')).set_results_name('entity') # Details: JSON object or array, or '-' when not applicable. # Use SkipTo to capture everything up to the next ' | ' separator. _DETAILS = pyparsing.SkipTo( pyparsing.Literal('|')).set_results_name('details') # Request identifier, such as '@8KJQAGx969x538x0', '*1APC3V1x...' or '-'. _REQUEST_IDENTIFIER = ( pyparsing.Word(pyparsing.alphanums + '@*-_x') | pyparsing.Literal('-')).set_results_name('request_identifier') # Session identifier, short alphanumeric identifier or '-'. _SESSION_IDENTIFIER = ( pyparsing.Word(pyparsing.alphanums + '-_') | pyparsing.Literal('-')).set_results_name('session_identifier') _END_OF_LINE = pyparsing.Suppress(pyparsing.LineEnd()) # Audit log line format (pipe-delimited): # ip_address | event_name | user | timestamp_ms | entity | details | # request_identifier | session_identifier _AUDIT_LOG_LINE = ( _REMOTE_ADDRESS + _SEPARATOR + _EVENT_NAME + _SEPARATOR + _USER_NAME + _SEPARATOR + _TIMESTAMP_MS + _SEPARATOR + _ENTITY + _SEPARATOR + _DETAILS + _SEPARATOR + _REQUEST_IDENTIFIER + _SEPARATOR + _SESSION_IDENTIFIER + _END_OF_LINE) _LINE_STRUCTURES = [('audit_entry', _AUDIT_LOG_LINE)] VERIFICATION_GRAMMAR = _AUDIT_LOG_LINE def _GetDateTimeValue(self, structure, name): """Retrieves a date and time value from a Pyparsing structure. Args: structure (pyparsing.ParseResults): tokens from a parsed log line. name (str): name of the token. Returns: dfdatetime.TimeElements: date and time value or None if not available. """ timestamp = self._GetValueFromStructure(structure, name) if not timestamp: return None return dfdatetime_posix_time.PosixTimeInMilliseconds( timestamp=timestamp) def _GetStrippedValue(self, structure, name, default_value=None): """Retrieves a token value from a Pyparsing structure and strips '' or '-'. Args: structure (pyparsing.ParseResults): tokens from a parsed log line. name (str): name of the token. default_value (Optional[object]): default value. Returns: object: value in the token or default value if the token is not available in the structure. """ value = self._GetValueFromStructure(structure, name) if not value or value == '-': return default_value return value def _ParseRecord(self, parser_mediator, key, structure): """Parses a pyparsing structure. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfVFS. key (str): name of the parsed structure. structure (pyparsing.ParseResults): tokens from a parsed log line. Raises: ParseError: if the structure cannot be parsed. """ if key != 'audit_entry': raise errors.ParseError(f'Unsupported structure: {key:s}') details = self._GetValueFromStructure( structure, 'details', default_value='').strip() event_data = BitbucketAuditEventData() event_data.details = None if not details or details == '-' else details event_data.entity = self._GetStrippedValue(structure, 'entity') event_data.event_name = self._GetValueFromStructure( structure, 'event_name') event_data.recorded_time = self._GetDateTimeValue(structure, 'timestamp_ms') event_data.remote_address = self._GetValueFromStructure( structure, 'remote_address') event_data.request_identifier = self._GetStrippedValue( structure, 'request_identifier') event_data.session_identifier = self._GetStrippedValue( structure, 'session_identifier') event_data.user_name = self._GetStrippedValue(structure, 'user_name') parser_mediator.ProduceEventData(event_data)
[docs] def CheckRequiredFormat(self, parser_mediator, text_reader): """Check if the log record has the minimal structure required by the plugin. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfVFS. text_reader (EncodedTextReader): text reader. Returns: bool: True if this is the correct plugin, False otherwise. """ try: structure = self._VerifyString(text_reader.lines) except errors.ParseError: return False timestamp_ms = self._GetValueFromStructure(structure, 'timestamp_ms') # The timestamp must look like a plausible millisecond epoch value. # Reject values that are too small (before year 2000) or too large. # Year 2000 in ms = 946684800000; year 2100 in ms = 4102444800000 if (timestamp_ms is None or not 946684800000 <= timestamp_ms <= 4102444800000): return False return True
text_parser.TextLogParser.RegisterPlugin(BitbucketAuditTextPlugin)