"""Text parser plugin for Microsoft IIS log files."""
import pyparsing
from dfdatetime import time_elements as dfdatetime_time_elements
from plaso.containers import events
from plaso.lib import errors
from plaso.parsers import text_parser
from plaso.parsers.text_plugins import interface
[docs]
class IISEventData(events.EventData):
"""IIS log event data.
Attributes:
cs_cookie (str): Content of a sent or received cookie.
cs_host (str): HTTP host header name.
cs_referrer (str): Site that referred to the requested site.
cs_uri_query (str): URI query that was requested.
cs_username (str): Username of the authenticated user that accessed
the server, where anonymous users are indicated by a hyphen.
dest_ip (str): IP address of the server that generated the logged activity.
dest_port (str): Server port number.
http_method (str): HTTP request method, such as GET or POST.
http_status (str): HTTP status code that was returned by the server.
last_written_time (dfdatetime.DateTimeValues): entry last written date and
time.
protocol_version (str): HTTP protocol version that was used.
received_bytes (str): Number of bytes received and processed by the server.
requested_uri_stem (str): File requested, such as index.php or Default.htm
s_computername (str): Name of the server that generated the logged activity.
sc_substatus (str): HTTP substatus error code that was returned by the
server.
sc_win32_status (str): Windows status code of the server.
sent_bytes (str): Number of bytes sent by the server.
source_ip (str): IP address of the client that made the request.
s_sitename (str): Service name and instance number that was running on the
client.
time_taken (str): Time taken, in milliseconds, to process the request.
user_agent (str): User agent that was used.
"""
DATA_TYPE = "iis:log:line"
[docs]
def __init__(self):
"""Initializes event data."""
super().__init__(data_type=self.DATA_TYPE)
self.cs_cookie = None
self.cs_host = None
self.cs_referrer = None
self.cs_uri_query = None
self.cs_username = None
self.dest_ip = None
self.dest_port = None
self.http_method = None
self.http_status = None
self.last_written_time = None
self.protocol_version = None
self.received_bytes = None
self.requested_uri_stem = None
self.s_computername = None
self.sc_substatus = None
self.sc_win32_status = None
self.sent_bytes = None
self.source_ip = None
self.s_sitename = None
self.time_taken = None
self.user_agent = None
[docs]
class WinIISTextPlugin(interface.TextPlugin):
"""Text parser plugin for Microsoft IIS log files."""
NAME = "winiis"
DATA_FORMAT = "Microsoft IIS log file"
# Log file are all extended ASCII encoded unless UTF-8 is explicitly enabled.
# TODO: fix
ENCODING = "utf-8"
_BLANK = pyparsing.Literal("-")
_HTTP_METHOD = pyparsing.Word(pyparsing.alphanums + "-_") | _BLANK
_INTEGER = (
pyparsing.Word(pyparsing.nums).set_parse_action(
lambda tokens: int(tokens[0], 10)
)
| _BLANK
)
_TWO_DIGITS = pyparsing.Word(pyparsing.nums, exact=2).set_parse_action(
lambda tokens: int(tokens[0], 10)
)
_FOUR_DIGITS = pyparsing.Word(pyparsing.nums, exact=4).set_parse_action(
lambda tokens: int(tokens[0], 10)
)
_IP_ADDRESS = (
pyparsing.pyparsing_common.ipv4_address
| pyparsing.pyparsing_common.ipv6_address
| _BLANK
)
PORT = (
pyparsing.Word(pyparsing.nums, max=6).set_parse_action(
lambda tokens: int(tokens[0], 10)
)
| _BLANK
)
# Username can consist of: "domain.username", "domain\username",
# "domain\user$", "domain/user", "user@domain" or "-" for an anonymous user.
_USERNAME = pyparsing.Word(pyparsing.alphanums + "-.\\$@/") | _BLANK
_URI_SAFE_CHARACTERS = "/.?&+;_=()-:,%"
_URI = pyparsing.Word(pyparsing.alphanums + _URI_SAFE_CHARACTERS) | _BLANK
_URI_STEM = (
pyparsing.Word(pyparsing.alphanums + _URI_SAFE_CHARACTERS + "$") | _BLANK
)
_UA = pyparsing.Word(pyparsing.alphanums + _URI_SAFE_CHARACTERS + "[]") | _BLANK
_COOKIE = (
pyparsing.Word(pyparsing.alphanums + _URI_SAFE_CHARACTERS + '@{}"\\') | _BLANK
)
# Per https://blogs.iis.net/nazim/use-of-special-characters-like-in-an-iis-url
# IIS does not require that a query comply with RFC1738 restrictions on valid
# URI characters
_QUERY = (
pyparsing.Word(pyparsing.alphanums + _URI_SAFE_CHARACTERS + "{}|\\^~[]`'\"<>@$")
| _BLANK
)
_DATE = (
_FOUR_DIGITS
+ pyparsing.Suppress("-")
+ _TWO_DIGITS
+ pyparsing.Suppress("-")
+ _TWO_DIGITS
)
_TIME = (
_TWO_DIGITS
+ pyparsing.Suppress(":")
+ _TWO_DIGITS
+ pyparsing.Suppress(":")
+ _TWO_DIGITS
)
_DATE_TIME_METADATA = (
pyparsing.Suppress("Date: ")
+ _DATE.set_results_name("date")
+ _TIME.set_results_name("time")
)
_FIELDS_METADATA = pyparsing.Suppress(
"Fields: "
) + pyparsing.restOfLine().set_results_name("fields")
_METADATA = _DATE_TIME_METADATA | _FIELDS_METADATA | pyparsing.restOfLine()
_END_OF_LINE = pyparsing.Suppress(pyparsing.LineEnd())
_COMMENT_LOG_LINE = pyparsing.Suppress("#") + _METADATA + _END_OF_LINE
# IIS 6.x fields: date time s-sitename s-ip cs-method cs-uri-stem
# cs-uri-query s-port cs-username c-ip cs(User-Agent) sc-status
# sc-substatus sc-win32-status
_IIS_6_0_LOG_LINE = (
_DATE.set_results_name("date")
+ _TIME.set_results_name("time")
+ _URI.set_results_name("s_sitename")
+ _IP_ADDRESS.set_results_name("dest_ip")
+ _HTTP_METHOD.set_results_name("http_method")
+ _URI.set_results_name("cs_uri_stem")
+ _URI.set_results_name("cs_uri_query")
+ PORT.set_results_name("dest_port")
+ _USERNAME.set_results_name("cs_username")
+ _IP_ADDRESS.set_results_name("source_ip")
+ _UA.set_results_name("user_agent")
+ _INTEGER.set_results_name("sc_status")
+ _INTEGER.set_results_name("sc_substatus")
+ _INTEGER.set_results_name("sc_win32_status")
+ _END_OF_LINE
)
# IIS 7.x fields: date time s-ip cs-method cs-uri-stem cs-uri-query
# s-port cs-username c-ip cs(User-Agent) sc-status sc-substatus
# sc-win32-status time-taken
_LOG_LINE_STRUCTURES = {}
# Common fields. Set results name with underscores, not hyphens because regex
# will not pick them up.
_LOG_LINE_STRUCTURES["date"] = _DATE.set_results_name("date")
_LOG_LINE_STRUCTURES["time"] = _TIME.set_results_name("time")
_LOG_LINE_STRUCTURES["s-sitename"] = _URI.set_results_name("s_sitename")
_LOG_LINE_STRUCTURES["s-ip"] = _IP_ADDRESS.set_results_name("dest_ip")
_LOG_LINE_STRUCTURES["cs-method"] = _HTTP_METHOD.set_results_name("http_method")
_LOG_LINE_STRUCTURES["cs-uri-stem"] = _URI_STEM.set_results_name(
"requested_uri_stem"
)
_LOG_LINE_STRUCTURES["cs-uri-query"] = _QUERY.set_results_name("cs_uri_query")
_LOG_LINE_STRUCTURES["s-port"] = PORT.set_results_name("dest_port")
_LOG_LINE_STRUCTURES["cs-username"] = _USERNAME.set_results_name("cs_username")
_LOG_LINE_STRUCTURES["c-ip"] = _IP_ADDRESS.set_results_name("source_ip")
_LOG_LINE_STRUCTURES["cs(User-Agent)"] = _UA.set_results_name("user_agent")
_LOG_LINE_STRUCTURES["sc-status"] = _INTEGER.set_results_name("http_status")
_LOG_LINE_STRUCTURES["sc-substatus"] = _INTEGER.set_results_name("sc_substatus")
_LOG_LINE_STRUCTURES["sc-win32-status"] = _INTEGER.set_results_name(
"sc_win32_status"
)
# Less common fields.
_LOG_LINE_STRUCTURES["s-computername"] = _URI.set_results_name("s_computername")
_LOG_LINE_STRUCTURES["sc-bytes"] = _INTEGER.set_results_name("sent_bytes")
_LOG_LINE_STRUCTURES["cs-bytes"] = _INTEGER.set_results_name("received_bytes")
_LOG_LINE_STRUCTURES["time-taken"] = _INTEGER.set_results_name("time_taken")
_LOG_LINE_STRUCTURES["cs-version"] = _URI.set_results_name("protocol_version")
_LOG_LINE_STRUCTURES["cs-host"] = _URI.set_results_name("cs_host")
_LOG_LINE_STRUCTURES["cs(Cookie)"] = _COOKIE.set_results_name("cs_cookie")
_LOG_LINE_STRUCTURES["cs(Referrer)"] = _URI.set_results_name("cs_referrer")
_LOG_LINE_STRUCTURES["cs(Referer)"] = _URI.set_results_name("cs_referrer")
# Define the available log line structures. Default to the IIS v. 6.0
# common format.
_HEADER_GRAMMAR = pyparsing.OneOrMore(_COMMENT_LOG_LINE)
_LINE_STRUCTURES = [("log_line", _IIS_6_0_LOG_LINE)]
_COMMENT_SOFTWARE_LINE = (
pyparsing.Regex(
"#Software: Microsoft Internet Information Services [0-9]+.[0-9]+"
)
+ _END_OF_LINE
)
VERIFICATION_GRAMMAR = (
pyparsing.ZeroOrMore(
pyparsing.Regex("#(Date|Fields|Version): .*") + _END_OF_LINE
)
+ _COMMENT_SOFTWARE_LINE
)
VERIFICATION_LITERALS = ["#Software: Microsoft Internet Information Services "]
[docs]
def __init__(self):
"""Initializes a parser."""
super().__init__()
self._day_of_month = None
self._month = None
self._year = None
def _ParseFieldsMetadata(self, parser_mediator, fields):
"""Parses the fields metadata and updates the log line definition to match.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
fields (str): field definitions.
"""
log_line_structure = pyparsing.Empty()
for member in fields.split(" "):
if not member:
continue
field_structure = self._LOG_LINE_STRUCTURES.get(member)
if not field_structure:
field_structure = self._URI
parser_mediator.ProduceExtractionWarning(
f"missing definition for field: {member:s} defaulting to URI"
)
log_line_structure += field_structure
log_line_structure += self._END_OF_LINE
self._SetLineStructures([("log_line", log_line_structure)])
def _ParseHeader(self, parser_mediator, text_reader):
"""Parses a text-log file header.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
text_reader (EncodedTextReader): text reader.
Raises:
ParseError: when the header cannot be parsed.
"""
try:
structure_generator = self._HEADER_GRAMMAR.scan_string(
text_reader.lines, max_matches=1
)
structure, start, end = next(structure_generator)
except StopIteration:
structure = None
except pyparsing.ParseException as exception:
raise errors.ParseError(exception)
if not structure or start != 0:
raise errors.ParseError("No match found.")
date_elements_tuple = self._GetValueFromStructure(structure, "date")
if date_elements_tuple:
self._year, self._month, self._day_of_month = date_elements_tuple
fields = self._GetValueFromStructure(structure, "fields", default_value="")
fields = fields.strip()
if fields:
self._ParseFieldsMetadata(parser_mediator, fields)
text_reader.SkipAhead(end)
def _ParseLogLine(self, parser_mediator, structure):
"""Parse a single log line.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
structure (pyparsing.ParseResults): tokens from a parsed log line.
"""
event_data = IISEventData()
event_data.cs_cookie = self._GetValueFromStructure(structure, "cs_cookie")
event_data.cs_host = self._GetValueFromStructure(structure, "cs_host")
event_data.cs_referrer = self._GetValueFromStructure(structure, "cs_referrer")
event_data.cs_uri_query = self._GetValueFromStructure(structure, "cs_uri_query")
event_data.cs_username = self._GetValueFromStructure(structure, "cs_username")
event_data.dest_ip = self._GetValueFromStructure(structure, "dest_ip")
event_data.dest_port = self._GetValueFromStructure(structure, "dest_port")
event_data.http_method = self._GetValueFromStructure(structure, "http_method")
event_data.http_status = self._GetValueFromStructure(structure, "http_status")
event_data.protocol_version = self._GetValueFromStructure(
structure, "protocol_version"
)
event_data.last_written_time = self._ParseTimeElements(structure)
event_data.received_bytes = self._GetValueFromStructure(
structure, "received_bytes"
)
event_data.requested_uri_stem = self._GetValueFromStructure(
structure, "requested_uri_stem"
)
event_data.s_computername = self._GetValueFromStructure(
structure, "s_computername"
)
event_data.sc_substatus = self._GetValueFromStructure(structure, "sc_substatus")
event_data.sc_win32_status = self._GetValueFromStructure(
structure, "sc_win32_status"
)
event_data.sent_bytes = self._GetValueFromStructure(structure, "sent_bytes")
event_data.source_ip = self._GetValueFromStructure(structure, "source_ip")
event_data.s_sitename = self._GetValueFromStructure(structure, "s_sitename")
event_data.time_taken = self._GetValueFromStructure(structure, "time_taken")
event_data.user_agent = self._GetValueFromStructure(structure, "user_agent")
parser_mediator.ProduceEventData(event_data)
def _ParseRecord(self, parser_mediator, key, structure):
"""Parses a pyparsing structure.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
key (str): name of the parsed structure.
structure (pyparsing.ParseResults): tokens from a parsed log line.
Raises:
ParseError: if the structure cannot be parsed.
"""
self._ParseLogLine(parser_mediator, structure)
def _ParseTimeElements(self, structure):
"""Parses date and time elements of a log line.
Args:
structure (pyparsing.ParseResults): tokens from a parsed log line.
Returns:
dfdatetime.TimeElements: date and time value.
Raises:
ParseError: if a valid date and time value cannot be derived from
the time elements.
"""
try:
time_elements_structure = self._GetValueFromStructure(structure, "time")
hours, minutes, seconds = time_elements_structure
date_elements_structure = self._GetValueFromStructure(structure, "date")
if date_elements_structure:
year, month, day_of_month = date_elements_structure
time_elements_tuple = (
year,
month,
day_of_month,
hours,
minutes,
seconds,
)
else:
time_elements_tuple = (
self._year,
self._month,
self._day_of_month,
hours,
minutes,
seconds,
)
return dfdatetime_time_elements.TimeElements(
time_elements_tuple=time_elements_tuple
)
except (IndexError, TypeError, ValueError) as exception:
raise errors.ParseError(
f"Unable to parse time elements with error: {exception!s}"
)
def _ResetState(self):
"""Resets stored values."""
self._day_of_month = None
self._month = None
self._year = None
self._SetLineStructures(self._LINE_STRUCTURES)
[docs]
def CheckRequiredFormat(self, parser_mediator, text_reader):
"""Check if the log record has the minimal structure required by the plugin.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
text_reader (EncodedTextReader): text reader.
Returns:
bool: True if this is the correct plugin, False otherwise.
"""
try:
self._VerifyString(text_reader.lines)
except errors.ParseError:
return False
self._ResetState()
return True
text_parser.TextLogParser.RegisterPlugin(WinIISTextPlugin)