Source code for plaso.parsers.text_plugins.iis

"""Text parser plugin for Microsoft IIS log files."""

import pyparsing

from dfdatetime import time_elements as dfdatetime_time_elements

from plaso.containers import events
from plaso.lib import errors
from plaso.parsers import text_parser
from plaso.parsers.text_plugins import interface


[docs] class IISEventData(events.EventData): """IIS log event data. Attributes: cs_cookie (str): Content of a sent or received cookie. cs_host (str): HTTP host header name. cs_referrer (str): Site that referred to the requested site. cs_uri_query (str): URI query that was requested. cs_username (str): Username of the authenticated user that accessed the server, where anonymous users are indicated by a hyphen. dest_ip (str): IP address of the server that generated the logged activity. dest_port (str): Server port number. http_method (str): HTTP request method, such as GET or POST. http_status (str): HTTP status code that was returned by the server. last_written_time (dfdatetime.DateTimeValues): entry last written date and time. protocol_version (str): HTTP protocol version that was used. received_bytes (str): Number of bytes received and processed by the server. requested_uri_stem (str): File requested, such as index.php or Default.htm s_computername (str): Name of the server that generated the logged activity. sc_substatus (str): HTTP substatus error code that was returned by the server. sc_win32_status (str): Windows status code of the server. sent_bytes (str): Number of bytes sent by the server. source_ip (str): IP address of the client that made the request. s_sitename (str): Service name and instance number that was running on the client. time_taken (str): Time taken, in milliseconds, to process the request. user_agent (str): User agent that was used. """ DATA_TYPE = "iis:log:line"
[docs] def __init__(self): """Initializes event data.""" super().__init__(data_type=self.DATA_TYPE) self.cs_cookie = None self.cs_host = None self.cs_referrer = None self.cs_uri_query = None self.cs_username = None self.dest_ip = None self.dest_port = None self.http_method = None self.http_status = None self.last_written_time = None self.protocol_version = None self.received_bytes = None self.requested_uri_stem = None self.s_computername = None self.sc_substatus = None self.sc_win32_status = None self.sent_bytes = None self.source_ip = None self.s_sitename = None self.time_taken = None self.user_agent = None
[docs] class WinIISTextPlugin(interface.TextPlugin): """Text parser plugin for Microsoft IIS log files.""" NAME = "winiis" DATA_FORMAT = "Microsoft IIS log file" # Log file are all extended ASCII encoded unless UTF-8 is explicitly enabled. # TODO: fix ENCODING = "utf-8" _BLANK = pyparsing.Literal("-") _HTTP_METHOD = pyparsing.Word(pyparsing.alphanums + "-_") | _BLANK _INTEGER = ( pyparsing.Word(pyparsing.nums).set_parse_action( lambda tokens: int(tokens[0], 10) ) | _BLANK ) _TWO_DIGITS = pyparsing.Word(pyparsing.nums, exact=2).set_parse_action( lambda tokens: int(tokens[0], 10) ) _FOUR_DIGITS = pyparsing.Word(pyparsing.nums, exact=4).set_parse_action( lambda tokens: int(tokens[0], 10) ) _IP_ADDRESS = ( pyparsing.pyparsing_common.ipv4_address | pyparsing.pyparsing_common.ipv6_address | _BLANK ) PORT = ( pyparsing.Word(pyparsing.nums, max=6).set_parse_action( lambda tokens: int(tokens[0], 10) ) | _BLANK ) # Username can consist of: "domain.username", "domain\username", # "domain\user$", "domain/user", "user@domain" or "-" for an anonymous user. _USERNAME = pyparsing.Word(pyparsing.alphanums + "-.\\$@/") | _BLANK _URI_SAFE_CHARACTERS = "/.?&+;_=()-:,%" _URI = pyparsing.Word(pyparsing.alphanums + _URI_SAFE_CHARACTERS) | _BLANK _URI_STEM = ( pyparsing.Word(pyparsing.alphanums + _URI_SAFE_CHARACTERS + "$") | _BLANK ) _UA = pyparsing.Word(pyparsing.alphanums + _URI_SAFE_CHARACTERS + "[]") | _BLANK _COOKIE = ( pyparsing.Word(pyparsing.alphanums + _URI_SAFE_CHARACTERS + '@{}"\\') | _BLANK ) # Per https://blogs.iis.net/nazim/use-of-special-characters-like-in-an-iis-url # IIS does not require that a query comply with RFC1738 restrictions on valid # URI characters _QUERY = ( pyparsing.Word(pyparsing.alphanums + _URI_SAFE_CHARACTERS + "{}|\\^~[]`'\"<>@$") | _BLANK ) _DATE = ( _FOUR_DIGITS + pyparsing.Suppress("-") + _TWO_DIGITS + pyparsing.Suppress("-") + _TWO_DIGITS ) _TIME = ( _TWO_DIGITS + pyparsing.Suppress(":") + _TWO_DIGITS + pyparsing.Suppress(":") + _TWO_DIGITS ) _DATE_TIME_METADATA = ( pyparsing.Suppress("Date: ") + _DATE.set_results_name("date") + _TIME.set_results_name("time") ) _FIELDS_METADATA = pyparsing.Suppress( "Fields: " ) + pyparsing.restOfLine().set_results_name("fields") _METADATA = _DATE_TIME_METADATA | _FIELDS_METADATA | pyparsing.restOfLine() _END_OF_LINE = pyparsing.Suppress(pyparsing.LineEnd()) _COMMENT_LOG_LINE = pyparsing.Suppress("#") + _METADATA + _END_OF_LINE # IIS 6.x fields: date time s-sitename s-ip cs-method cs-uri-stem # cs-uri-query s-port cs-username c-ip cs(User-Agent) sc-status # sc-substatus sc-win32-status _IIS_6_0_LOG_LINE = ( _DATE.set_results_name("date") + _TIME.set_results_name("time") + _URI.set_results_name("s_sitename") + _IP_ADDRESS.set_results_name("dest_ip") + _HTTP_METHOD.set_results_name("http_method") + _URI.set_results_name("cs_uri_stem") + _URI.set_results_name("cs_uri_query") + PORT.set_results_name("dest_port") + _USERNAME.set_results_name("cs_username") + _IP_ADDRESS.set_results_name("source_ip") + _UA.set_results_name("user_agent") + _INTEGER.set_results_name("sc_status") + _INTEGER.set_results_name("sc_substatus") + _INTEGER.set_results_name("sc_win32_status") + _END_OF_LINE ) # IIS 7.x fields: date time s-ip cs-method cs-uri-stem cs-uri-query # s-port cs-username c-ip cs(User-Agent) sc-status sc-substatus # sc-win32-status time-taken _LOG_LINE_STRUCTURES = {} # Common fields. Set results name with underscores, not hyphens because regex # will not pick them up. _LOG_LINE_STRUCTURES["date"] = _DATE.set_results_name("date") _LOG_LINE_STRUCTURES["time"] = _TIME.set_results_name("time") _LOG_LINE_STRUCTURES["s-sitename"] = _URI.set_results_name("s_sitename") _LOG_LINE_STRUCTURES["s-ip"] = _IP_ADDRESS.set_results_name("dest_ip") _LOG_LINE_STRUCTURES["cs-method"] = _HTTP_METHOD.set_results_name("http_method") _LOG_LINE_STRUCTURES["cs-uri-stem"] = _URI_STEM.set_results_name( "requested_uri_stem" ) _LOG_LINE_STRUCTURES["cs-uri-query"] = _QUERY.set_results_name("cs_uri_query") _LOG_LINE_STRUCTURES["s-port"] = PORT.set_results_name("dest_port") _LOG_LINE_STRUCTURES["cs-username"] = _USERNAME.set_results_name("cs_username") _LOG_LINE_STRUCTURES["c-ip"] = _IP_ADDRESS.set_results_name("source_ip") _LOG_LINE_STRUCTURES["cs(User-Agent)"] = _UA.set_results_name("user_agent") _LOG_LINE_STRUCTURES["sc-status"] = _INTEGER.set_results_name("http_status") _LOG_LINE_STRUCTURES["sc-substatus"] = _INTEGER.set_results_name("sc_substatus") _LOG_LINE_STRUCTURES["sc-win32-status"] = _INTEGER.set_results_name( "sc_win32_status" ) # Less common fields. _LOG_LINE_STRUCTURES["s-computername"] = _URI.set_results_name("s_computername") _LOG_LINE_STRUCTURES["sc-bytes"] = _INTEGER.set_results_name("sent_bytes") _LOG_LINE_STRUCTURES["cs-bytes"] = _INTEGER.set_results_name("received_bytes") _LOG_LINE_STRUCTURES["time-taken"] = _INTEGER.set_results_name("time_taken") _LOG_LINE_STRUCTURES["cs-version"] = _URI.set_results_name("protocol_version") _LOG_LINE_STRUCTURES["cs-host"] = _URI.set_results_name("cs_host") _LOG_LINE_STRUCTURES["cs(Cookie)"] = _COOKIE.set_results_name("cs_cookie") _LOG_LINE_STRUCTURES["cs(Referrer)"] = _URI.set_results_name("cs_referrer") _LOG_LINE_STRUCTURES["cs(Referer)"] = _URI.set_results_name("cs_referrer") # Define the available log line structures. Default to the IIS v. 6.0 # common format. _HEADER_GRAMMAR = pyparsing.OneOrMore(_COMMENT_LOG_LINE) _LINE_STRUCTURES = [("log_line", _IIS_6_0_LOG_LINE)] _COMMENT_SOFTWARE_LINE = ( pyparsing.Regex( "#Software: Microsoft Internet Information Services [0-9]+.[0-9]+" ) + _END_OF_LINE ) VERIFICATION_GRAMMAR = ( pyparsing.ZeroOrMore( pyparsing.Regex("#(Date|Fields|Version): .*") + _END_OF_LINE ) + _COMMENT_SOFTWARE_LINE ) VERIFICATION_LITERALS = ["#Software: Microsoft Internet Information Services "]
[docs] def __init__(self): """Initializes a parser.""" super().__init__() self._day_of_month = None self._month = None self._year = None
def _ParseFieldsMetadata(self, parser_mediator, fields): """Parses the fields metadata and updates the log line definition to match. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfVFS. fields (str): field definitions. """ log_line_structure = pyparsing.Empty() for member in fields.split(" "): if not member: continue field_structure = self._LOG_LINE_STRUCTURES.get(member) if not field_structure: field_structure = self._URI parser_mediator.ProduceExtractionWarning( f"missing definition for field: {member:s} defaulting to URI" ) log_line_structure += field_structure log_line_structure += self._END_OF_LINE self._SetLineStructures([("log_line", log_line_structure)]) def _ParseHeader(self, parser_mediator, text_reader): """Parses a text-log file header. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfVFS. text_reader (EncodedTextReader): text reader. Raises: ParseError: when the header cannot be parsed. """ try: structure_generator = self._HEADER_GRAMMAR.scan_string( text_reader.lines, max_matches=1 ) structure, start, end = next(structure_generator) except StopIteration: structure = None except pyparsing.ParseException as exception: raise errors.ParseError(exception) if not structure or start != 0: raise errors.ParseError("No match found.") date_elements_tuple = self._GetValueFromStructure(structure, "date") if date_elements_tuple: self._year, self._month, self._day_of_month = date_elements_tuple fields = self._GetValueFromStructure(structure, "fields", default_value="") fields = fields.strip() if fields: self._ParseFieldsMetadata(parser_mediator, fields) text_reader.SkipAhead(end) def _ParseLogLine(self, parser_mediator, structure): """Parse a single log line. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfVFS. structure (pyparsing.ParseResults): tokens from a parsed log line. """ event_data = IISEventData() event_data.cs_cookie = self._GetValueFromStructure(structure, "cs_cookie") event_data.cs_host = self._GetValueFromStructure(structure, "cs_host") event_data.cs_referrer = self._GetValueFromStructure(structure, "cs_referrer") event_data.cs_uri_query = self._GetValueFromStructure(structure, "cs_uri_query") event_data.cs_username = self._GetValueFromStructure(structure, "cs_username") event_data.dest_ip = self._GetValueFromStructure(structure, "dest_ip") event_data.dest_port = self._GetValueFromStructure(structure, "dest_port") event_data.http_method = self._GetValueFromStructure(structure, "http_method") event_data.http_status = self._GetValueFromStructure(structure, "http_status") event_data.protocol_version = self._GetValueFromStructure( structure, "protocol_version" ) event_data.last_written_time = self._ParseTimeElements(structure) event_data.received_bytes = self._GetValueFromStructure( structure, "received_bytes" ) event_data.requested_uri_stem = self._GetValueFromStructure( structure, "requested_uri_stem" ) event_data.s_computername = self._GetValueFromStructure( structure, "s_computername" ) event_data.sc_substatus = self._GetValueFromStructure(structure, "sc_substatus") event_data.sc_win32_status = self._GetValueFromStructure( structure, "sc_win32_status" ) event_data.sent_bytes = self._GetValueFromStructure(structure, "sent_bytes") event_data.source_ip = self._GetValueFromStructure(structure, "source_ip") event_data.s_sitename = self._GetValueFromStructure(structure, "s_sitename") event_data.time_taken = self._GetValueFromStructure(structure, "time_taken") event_data.user_agent = self._GetValueFromStructure(structure, "user_agent") parser_mediator.ProduceEventData(event_data) def _ParseRecord(self, parser_mediator, key, structure): """Parses a pyparsing structure. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfVFS. key (str): name of the parsed structure. structure (pyparsing.ParseResults): tokens from a parsed log line. Raises: ParseError: if the structure cannot be parsed. """ self._ParseLogLine(parser_mediator, structure) def _ParseTimeElements(self, structure): """Parses date and time elements of a log line. Args: structure (pyparsing.ParseResults): tokens from a parsed log line. Returns: dfdatetime.TimeElements: date and time value. Raises: ParseError: if a valid date and time value cannot be derived from the time elements. """ try: time_elements_structure = self._GetValueFromStructure(structure, "time") hours, minutes, seconds = time_elements_structure date_elements_structure = self._GetValueFromStructure(structure, "date") if date_elements_structure: year, month, day_of_month = date_elements_structure time_elements_tuple = ( year, month, day_of_month, hours, minutes, seconds, ) else: time_elements_tuple = ( self._year, self._month, self._day_of_month, hours, minutes, seconds, ) return dfdatetime_time_elements.TimeElements( time_elements_tuple=time_elements_tuple ) except (IndexError, TypeError, ValueError) as exception: raise errors.ParseError( f"Unable to parse time elements with error: {exception!s}" ) def _ResetState(self): """Resets stored values.""" self._day_of_month = None self._month = None self._year = None self._SetLineStructures(self._LINE_STRUCTURES)
[docs] def CheckRequiredFormat(self, parser_mediator, text_reader): """Check if the log record has the minimal structure required by the plugin. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfVFS. text_reader (EncodedTextReader): text reader. Returns: bool: True if this is the correct plugin, False otherwise. """ try: self._VerifyString(text_reader.lines) except errors.ParseError: return False self._ResetState() return True
text_parser.TextLogParser.RegisterPlugin(WinIISTextPlugin)