"""Text parser plugin for PostgreSQL application log files.
This is a multi-line log format that records internal database application
logs as well as authentication attempts.
Also see:
https://www.postgresql.org/docs/current/runtime-config-logging.html
"""
import pyparsing
from dfdatetime import time_elements as dfdatetime_time_elements
from plaso.containers import events
from plaso.lib import errors
from plaso.parsers import text_parser
from plaso.parsers.text_plugins import interface
[docs]
class PostgreSQLEventData(events.EventData):
"""PostgreSQL application log data.
Attributes:
log_line (str): log message.
pid (int): process identifier (PID).
recorded_time (dfdatetime.DateTimeValues): date and time the log entry
was recorded.
severity (str): severity.
user (str): "user@database" string if present. Records the user account and
database name that was authenticated or attempting to authenticate.
"""
DATA_TYPE = "postgresql:application_log:entry"
[docs]
def __init__(self):
"""Initializes event data."""
super().__init__(data_type=self.DATA_TYPE)
self.log_line = None
self.pid = None
self.recorded_time = None
self.severity = None
self.user = None
[docs]
class PostgreSQLTextPlugin(interface.TextPlugin):
"""Text parser plugin for PostgreSQL application log files."""
NAME = "postgresql"
DATA_FORMAT = "PostgreSQL application log file"
ENCODING = "utf-8"
_INTEGER = pyparsing.Word(pyparsing.nums).set_parse_action(
lambda tokens: int(tokens[0], 10)
)
_TWO_DIGITS = pyparsing.Word(pyparsing.nums, exact=2).set_parse_action(
lambda tokens: int(tokens[0], 10)
)
_THREE_DIGITS = pyparsing.Word(pyparsing.nums, exact=3).set_parse_action(
lambda tokens: int(tokens[0], 10)
)
_FOUR_DIGITS = pyparsing.Word(pyparsing.nums, exact=4).set_parse_action(
lambda tokens: int(tokens[0], 10)
)
# Date and time values are formatted as: YYYY-MM-DD hh:mm:ss.### UTC
# For example: 2022-04-12 00:16:05.526 UTC
_DATE_TIME = (
pyparsing.LineStart()
+ _FOUR_DIGITS.set_results_name("year")
+ pyparsing.Suppress("-")
+ _TWO_DIGITS.set_results_name("month")
+ pyparsing.Suppress("-")
+ _TWO_DIGITS.set_results_name("day_of_month")
+ _TWO_DIGITS.set_results_name("hours")
+ pyparsing.Suppress(":")
+ _TWO_DIGITS.set_results_name("minutes")
+ pyparsing.Suppress(":")
+ _TWO_DIGITS.set_results_name("seconds")
+ pyparsing.Optional(
pyparsing.Suppress(".") + _THREE_DIGITS.set_results_name("milliseconds")
)
).set_results_name("date_time")
_TIME_ZONE = pyparsing.Word(pyparsing.printables).set_results_name("time_zone")
_PID = (
pyparsing.Suppress("[")
+ pyparsing.OneOrMore(_INTEGER)
+ pyparsing.Optional(pyparsing.Literal("-"))
+ pyparsing.ZeroOrMore(_INTEGER)
+ pyparsing.Suppress("]")
).set_results_name("pid")
_USER_AND_DATABASE = (
pyparsing.Word(pyparsing.alphanums)
+ pyparsing.Literal("@")
+ pyparsing.Word(pyparsing.alphanums)
).set_results_name("user_and_database")
_SEVERITY = pyparsing.Word(pyparsing.string.ascii_uppercase)
_LOG_LINE_END = pyparsing.StringEnd() | (_DATE_TIME + _TIME_ZONE)
_END_OF_LINE = pyparsing.Suppress(pyparsing.LineEnd())
_LOG_LINE = (
_DATE_TIME
+ _TIME_ZONE
+ _PID
+ pyparsing.Optional(_USER_AND_DATABASE)
+ _SEVERITY.set_results_name("severity")
+ pyparsing.Suppress(":")
+ pyparsing.SkipTo(_LOG_LINE_END).set_results_name("log_line")
+ pyparsing.ZeroOrMore(_END_OF_LINE)
)
_LINE_STRUCTURES = [("log_line", _LOG_LINE)]
VERIFICATION_GRAMMAR = _LOG_LINE
# TODO: move this into timeliner
# Extracted from /usr/share/postgresql/13/timezonesets/Default
# See https://www.postgresql.org/docs/current/datetime-config-files.html
_PSQL_TIME_ZONE_MAPPING = {
"ACDT": "Australia/Adelaide",
"ACST": "Australia/Adelaide",
"ADT": "America/Glace_Bay",
"AEDT": "Australia/Brisbane",
"AEST": "Australia/Brisbane",
"AKDT": "America/Anchorage",
"AKST": "America/Anchorage",
"AST": "America/Anguilla",
"AWST": "Australia/Perth",
"BST": "Europe/London",
"CDT": "America/Chicago",
"CEST": "Africa/Ceuta",
"CET": "Africa/Algiers",
"CETDST": "Africa/Ceuta",
"CST": "America/Chicago",
"EAT": "Africa/Addis_Ababa",
"EDT": "America/Detroit",
"EEST": "Africa/Cairo",
"EET": "Africa/Cairo",
"EETDST": "Africa/Cairo",
"EST": "America/Cancun",
"GMT": "Africa/Abidjan",
"HKT": "Asia/Hong_Kong",
"HST": "Pacific/Honolulu",
"IDT": "Asia/Jerusalem",
"IST": "Asia/Jerusalem",
"JST": "Asia/Tokyo",
"KST": "Asia/Seoul",
"MDT": "America/Boise",
"MSK": "Europe/Moscow",
"MST": "America/Boise",
"NDT": "America/St_Johns",
"NST": "America/St_Johns",
"NZDT": "Antarctica/McMurdo",
"NZST": "Antarctica/McMurdo",
"PDT": "America/Dawson",
"PKST": "Asia/Karachi",
"PKT": "Asia/Karachi",
"PST": "America/Dawson",
"SAST": "Africa/Johannesburg",
"UCT": "Etc/UCT",
"WAT": "Africa/Bangui",
"WET": "Africa/Casablanca",
"WETDST": "Atlantic/Canary",
}
def _ParseRecord(self, parser_mediator, key, structure):
"""Parses a pyparsing structure.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
key (str): name of the parsed structure.
structure (pyparsing.ParseResults): tokens from a parsed log line.
Raises:
ParseError: if the structure cannot be parsed.
"""
time_elements_structure = self._GetValueFromStructure(structure, "date_time")
log_line = self._GetValueFromStructure(structure, "log_line", default_value="")
log_line = log_line.lstrip().rstrip()
pids = self._GetValueFromStructure(structure, "pid", default_value=[])
time_zone_string = self._GetValueFromStructure(structure, "time_zone")
user_and_database = self._GetValueFromStructure(
structure, "user_and_database", default_value=""
)
user_and_database = "".join(user_and_database)
# TODO: move this into timeliner
time_zone_string = self._PSQL_TIME_ZONE_MAPPING.get(
time_zone_string, time_zone_string
)
date_time = self._ParseTimeElements(time_elements_structure)
if time_zone_string != "UTC":
date_time.is_local_time = True
date_time.time_zone_hint = time_zone_string
event_data = PostgreSQLEventData()
event_data.log_line = log_line or None
event_data.pid = "".join([str(pid) for pid in pids])
event_data.recorded_time = date_time
event_data.severity = self._GetValueFromStructure(structure, "severity")
event_data.user = user_and_database or None
parser_mediator.ProduceEventData(event_data)
def _ParseTimeElements(self, time_elements_structure):
"""Parses date and time elements of a log line.
Args:
time_elements_structure (pyparsing.ParseResults): date and time elements
of a log line.
Returns:
dfdatetime.TimeElements: date and time value.
Raises:
ParseError: if a valid date and time value cannot be derived from
the time elements.
"""
# Ensure time_elements_tuple is not a pyparsing.ParseResults otherwise
# copy.deepcopy() of the dfDateTime object will fail on Python 3.8 with:
# "TypeError: 'str' object is not callable" due to pyparsing.ParseResults
# overriding __getattr__ with a function that returns an empty string when
# named token does not exist.
try:
if len(time_elements_structure) == 6:
year, month, day_of_month, hours, minutes, seconds = (
time_elements_structure
)
date_time = dfdatetime_time_elements.TimeElements(
time_elements_tuple=(
year,
month,
day_of_month,
hours,
minutes,
seconds,
)
)
else:
year, month, day_of_month, hours, minutes, seconds, milliseconds = (
time_elements_structure
)
date_time = dfdatetime_time_elements.TimeElementsInMilliseconds(
time_elements_tuple=(
year,
month,
day_of_month,
hours,
minutes,
seconds,
milliseconds,
)
)
return date_time
except (IndexError, TypeError, ValueError) as exception:
raise errors.ParseError(
f"Unable to parse time elements with error: {exception!s}"
)
[docs]
def CheckRequiredFormat(self, parser_mediator, text_reader):
"""Check if the log record has the minimal structure required by the parser.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
text_reader (EncodedTextReader): text reader.
Returns:
bool: True if this is the correct plugin, False otherwise.
"""
try:
structure = self._VerifyString(text_reader.lines)
except errors.ParseError:
return False
time_elements_structure = self._GetValueFromStructure(structure, "date_time")
try:
self._ParseTimeElements(time_elements_structure)
except errors.ParseError:
return False
return True
text_parser.TextLogParser.RegisterPlugin(PostgreSQLTextPlugin)