Source code for plaso.parsers.text_plugins.atlassian_bitbucket

"""Text parser plugin for Atlassian Bitbucket log files.

This is for the atlassian-bitbucket.log file, one of multiple log files
produced by a Bitbucket DC/Server installation.

The default log format (logback pattern: %date %-5level [%thread] %request
%logger{36} %m%n%eThrowable) produces lines of the form:

  YYYY-MM-DD HH:MM:SS,mmm LEVEL [thread] logger_class message

With the optional %request context fields between [thread] and logger_class:

  YYYY-MM-DD HH:MM:SS,mmm LEVEL [thread] user request_identifier
  session_identifier ip_address "action" logger_class message

All %request fields are optional and may be absent. In practice, lines often
omit some or all of these fields depending on the context of the log event.

Also see:
  https://support.atlassian.com/bitbucket-data-center/kb/how-to-read-the-bitbucket-data-center-log-formats/
  https://support.atlassian.com/bitbucket-data-center/kb/how-to-change-the-bitbucket-application-log-format/
"""

import pyparsing

from dfdatetime import time_elements as dfdatetime_time_elements

from plaso.containers import events
from plaso.lib import errors
from plaso.parsers import text_parser
from plaso.parsers.text_plugins import interface


[docs] class AtlassianBitbucketEventData(events.EventData): """Bitbucket application log event data. Attributes: body (str): the freeform body of the log line (the log message). ip_address (str): the client IP address associated with the request, if present in the log line. level (str): the logging level of the event, such as INFO, WARN or ERROR. logger_class (str): the abbreviated or full class name responsible for logging the event, such as c.a.b.m.r.DefaultRepositoryManager. request_action (str): the request action string, such as "TransactionService/Transact", if present. request_id (str): the unique request identifier, such as 2CM38K4Fx339x113x2, if present. session_identifier (str): the session identifier, if present. thread (str): the JVM thread name from which the log event originated. user_name (str): the name of the user associated with the request, if present in the log line. written_time (dfdatetime.DateTimeValues): entry written date and time. """ DATA_TYPE = 'atlassian:bitbucket:line'
[docs] def __init__(self): """Initializes event data.""" super().__init__(data_type=self.DATA_TYPE) self.body = None self.ip_address = None self.level = None self.logger_class = None self.request_action = None self.request_identifier = None self.session_identifier = None self.thread = None self.user_name = None self.written_time = None
[docs] class AtlassianBitbucketTextPlugin(interface.TextPlugin): """Text parser plugin for Atlassian Bitbucket log files.""" NAME = 'atlassian_bitbucket' DATA_FORMAT = 'Atlassian Bitbucket log file' ENCODING = 'utf-8' _TWO_DIGITS = pyparsing.Word(pyparsing.nums, exact=2).set_parse_action( lambda tokens: int(tokens[0], 10)) _THREE_DIGITS = pyparsing.Word(pyparsing.nums, exact=3).set_parse_action( lambda tokens: int(tokens[0], 10)) _FOUR_DIGITS = pyparsing.Word(pyparsing.nums, exact=4).set_parse_action( lambda tokens: int(tokens[0], 10)) # Bitbucket log levels. _BITBUCKET_LEVELS = ['DEBUG', 'ERROR', 'FATAL', 'INFO', 'TRACE', 'WARN'] # Date and time format: 2020-09-08 07:53:45,084 _DATE_TIME = ( _FOUR_DIGITS + pyparsing.Suppress('-') + _TWO_DIGITS + pyparsing.Suppress('-') + _TWO_DIGITS + _TWO_DIGITS + pyparsing.Suppress(':') + _TWO_DIGITS + pyparsing.Suppress(':') + _TWO_DIGITS + pyparsing.Suppress(',') + _THREE_DIGITS).set_results_name( 'date_time') # Log level (DEBUG, ERROR, FATAL, INFO, TRACE, WARN). _LOG_LEVEL = pyparsing.oneOf(_BITBUCKET_LEVELS).set_results_name('level') # Thread name enclosed in brackets. Thread names do not contain ']'. _BITBUCKET_THREAD = ( pyparsing.Suppress('[') + pyparsing.SkipTo(']').set_results_name('thread') + pyparsing.Suppress(']')) # Logger class, which can be abbreviated or full Java class name, such as: # c.a.b.m.r.DefaultRepositoryManager # com.atlassian.bitbucket.internal.boot.log.BuildInfoLogger # o.h.i.ExceptionMapperStandardImpl # org.hibernate.SQL # # The class name must start with a letter and contain at least one dot. _BITBUCKET_LOGGER = pyparsing.Regex( r'[a-zA-Z][a-zA-Z0-9_$]*(?:\.[a-zA-Z][a-zA-Z0-9_$]*)+').set_results_name( 'logger_class') # Log message body: rest of line. _BITBUCKET_LOG_MESSAGE = pyparsing.SkipTo( pyparsing.LineEnd()).set_results_name('body') # The %request context block between [thread] and logger_class is optional. # It may contain: user, request_identifier, session_identifier, ip_address, # "action". # We capture everything between ']' and the logger class as original text and # parse it afterwards. _REQUEST_CONTEXT_TEXT = pyparsing.SkipTo( _BITBUCKET_LOGGER).set_results_name('request_context_text') # Guard that ensures the logger class is NOT preceded by '[', which # would indicate a Confluence-format log line where the logger is wrapped # in brackets: [thread] [logger.class] message. _NOT_BRACKET = pyparsing.NotAny(pyparsing.Literal('[')) # Complete log line structure: # <timestamp> <level> [<thread>] [optional request context] <logger> <body> # _NOT_BRACKET before _REQUEST_CONTEXT_RAW ensures the first non-whitespace # character after [thread] is not '[', rejecting Confluence-format lines. _BITBUCKET_LOG_LINE = ( _DATE_TIME + _LOG_LEVEL + _BITBUCKET_THREAD + _REQUEST_CONTEXT_TEXT + _BITBUCKET_LOGGER + _BITBUCKET_LOG_MESSAGE) _LINE_STRUCTURES = [('log_entry', _BITBUCKET_LOG_LINE)] # Sub-patterns for parsing the raw request context string. # Request identifier: alphanumeric token with 'x' separators. _REQUEST_IDENTIFIER = pyparsing.Regex(r'[0-9A-Za-z]{6,}x[0-9]+x[0-9]+x[0-9]+') # Session identifier: @/*/alphanumeric token, optionally comma-separated with # mesh. _SESSION_IDENTIFIER = pyparsing.Regex( r'[@*][A-Za-z0-9_x]+(?:,[A-Za-z0-9_x]+)?') # IPv4 or IPv6 address. _RE_IP_ADDRESS = pyparsing.Regex( r'(?:\d{1,3}\.){3}\d{1,3}|(?:[0-9a-fA-F]*:){2,7}[0-9a-fA-F]*') # Quoted action string. _RE_REQUEST_ACTION = pyparsing.QuotedString('"') # Username: word characters, dots, hyphens, slashes. _RE_USER_NAME = pyparsing.Regex(r'[A-Za-z][A-Za-z0-9._\-/]*') _REQUEST_CONTEXT_GRAMMAR = ( pyparsing.Optional(_RE_USER_NAME.set_results_name('request_user')) + pyparsing.Optional(_REQUEST_IDENTIFIER.set_results_name( 'request_identifier')) + pyparsing.Optional(_SESSION_IDENTIFIER.set_results_name( 'session_identifier')) + pyparsing.Optional(_RE_IP_ADDRESS.set_results_name('ip_address')) + pyparsing.Optional(_RE_REQUEST_ACTION.set_results_name('request_action'))) _LINE_STRUCTURES = [('log_entry', _BITBUCKET_LOG_LINE)] VERIFICATION_GRAMMAR = _BITBUCKET_LOG_LINE def _ParseRecord(self, parser_mediator, key, structure): """Parses a pyparsing structure. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfVFS. key (str): name of the parsed structure. structure (pyparsing.ParseResults): tokens from a parsed log line. Raises: ParseError: if the structure cannot be parsed. """ if key != 'log_entry': raise errors.ParseError(f'Unsupported structure: {key:s}') time_elements_structure = self._GetValueFromStructure( structure, 'date_time') request_context_text = self._GetValueFromStructure( structure, 'request_context_text', default_value='').strip() event_data = AtlassianBitbucketEventData() event_data.body = self._GetValueFromStructure( structure, 'body', default_value='').strip() or None event_data.level = self._GetValueFromStructure(structure, 'level') event_data.logger_class = self._GetValueFromStructure( structure, 'logger_class') event_data.thread = self._GetValueFromStructure(structure, 'thread') if request_context_text: request_context = self._REQUEST_CONTEXT_GRAMMAR.parse_string( request_context_text, parse_all=False) event_data.user_name = request_context.get('request_user') or None event_data.request_identifier = request_context.get( 'request_identifier') or None event_data.session_identifier = request_context.get( 'session_identifier') or None event_data.ip_address = request_context.get('ip_address') or None event_data.request_action = request_context.get('request_action') or None event_data.written_time = self._ParseTimeElements(time_elements_structure) parser_mediator.ProduceEventData(event_data) def _ParseTimeElements(self, time_elements_structure): """Parses date and time elements of a log line. Args: time_elements_structure (pyparsing.ParseResults): date and time elements of a log line. Returns: dfdatetime.TimeElements: date and time value. Raises: ParseError: if a valid date and time value cannot be derived from the time elements. """ try: date_time = dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=time_elements_structure) date_time.is_local_time = True return date_time except (IndexError, TypeError, ValueError) as exception: raise errors.ParseError( f'Unable to parse time elements with error: {exception!s}')
[docs] def CheckRequiredFormat(self, parser_mediator, text_reader): """Check if the log record has the minimal structure required by the plugin. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfVFS. text_reader (EncodedTextReader): text reader. Returns: bool: True if this is the correct plugin, False otherwise. """ # Require at least 2 non-empty matching lines to guard against false # positives. verified_lines = 0 for line in text_reader.lines.splitlines(): if not line: continue try: structure = self._VerifyString(line) except errors.ParseError: return False verified_lines += 1 if verified_lines >= 2: break if verified_lines < 2: return False time_elements_structure = self._GetValueFromStructure( structure, 'date_time') try: self._ParseTimeElements(time_elements_structure) except errors.ParseError: return False return True
text_parser.TextLogParser.RegisterPlugin(AtlassianBitbucketTextPlugin)