1
votes

I am using a Grok pattern to parse firewall logs and the Grok pattern works when using Grok Debugger. The log data changes and I've created patterns to match each change. My issue is that ELK is producing multiple duplicate fields for the parsed data. I am sure there is a way to shorten my grok pattern but at this point I am not able to figure out how. So any assistance would be awesome. See below for examples:

Sample logs:

Nov 15 12:18:31 removed_ip 2017:11:15-12:18:31 sophie ulogd[23109]: id="2001" severity="info" sys="SecureNet" sub="packetfilter" name="Packet dropped" action="drop" fwrule="60001" initf="eth3" srcmac="removed_mac" dstmac="removed_mac" srcip="removed_ip" dstip="removed_ip" proto="6" length="40" tos="0x00" prec="0x00" ttl="247" srcport="58261" dstport="5315" tcpflags="SYN"
Nov 15 12:33:01 removed_ip 2017:11:15-12:33:01 sophie ulogd[23109]: id="2001" severity="info" sys="SecureNet" sub="packetfilter" name="Packet dropped" action="drop" fwrule="60003" outitf="wlan1" srcmac="removed_mac" srcip="removed_ip" dstip="removed_ip" proto="6" length="40" tos="0x00" prec="0x00" ttl="64" srcport="443" dstport="49824" tcpflags="RST" 
Nov 15 12:20:29 removed_ip 2017:11:15-12:20:29 sophie httpproxy[6835]: id="0001" severity="info" sys="SecureWeb" sub="http" name="http access" action="pass" method="GET" srcip="removed_ip" dstip="removed_ip" user="" group="" ad_domain="" statuscode="200" cached="0" profile="REF_DefaultHTTPProfile (Default Web Filter Profile)" filteraction="REF_DefaultHTTPCFFAction (Default content filter action)" size="371" request="0xd3a9ac00" url="http://removed_ip/icingaweb2/monitoring/tactical?view=compact" referer="http://removed_ip/icingaweb2/dashboard" error="" authtime="0" dnstime="218" cattime="0" avscantime="2584" fullreqtime="15393756" device="0" auth="0" ua="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36" exceptions="" overridecategory="1" overridereputation="1" category="105" reputation="trusted" categoryname="Business" country="United States" sandbox="-" content-type="text/xml"
Nov 15 12:30:33 removed_ip 2017:11:15-12:30:33 sophie httpproxy[6835]: id="0001" severity="info" sys="SecureWeb" sub="http" name="http access" action="pass" method="CONNECT" srcip="removed_ip" dstip="removed_ip" user="" group="" ad_domain="" statuscode="200" cached="0" profile="REF_DefaultHTTPProfile (Default Web Filter Profile)" filteraction="REF_DefaultHTTPCFFAction (Default content filter action)" size="11571" request="0xd21c1800" url="https://www.google.com/" referer="" error="" authtime="0" dnstime="1" cattime="97" avscantime="0" fullreqtime="361956728" device="0" auth="0" ua="" exceptions="" category="145" reputation="neutral" categoryname="Search Engines" country="United States" application="google" app-id="182"

Sample Grok Pattern:

filter { 
if [type] == "utm"{
grok {
  match => {"message" => "%{SYSLOGTIMESTAMP:timestamp} %{HOSTNAME:hostname} (?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}\[%{INT:pid}]\: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{DATA:action}\" fwrule=\"%{INT:fwrule}\" initf=\"%{NOTSPACE:initf}\" outitf=\"%{NOTSPACE:outitf}\" mark=\"%{WORD:mark}\" app=\"%{WORD:app}\" srcmac=\"%{MAC:srcmac}\" dstmac=\"%{MAC:dstmac}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" proto=\"%{WORD:protocol}\" length=\"%{INT:length}\" tos=\"%{DATA:tos}\" prec=\"%{DATA:prec}\" ttl=\"%{INT:ttl}\" srcport=\"%{INT:srcport}\" dstport=\"%{INT:dstport}\" "}
}
grok {
  match => { "message" => "%{SYSLOGTIMESTAMP:timestamp} %{HOSTNAME:hostname} (?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}\[%{INT:pid}]\: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{DATA:action}\" fwrule=\"%{INT:fwrule}\" initf=\"%{NOTSPACE:initf}\" outitf=\"%{NOTSPACE:outitf}\" srcmac=\"%{MAC:srcmac}\" dstmac=\"%{MAC:dstmac}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" proto=\"%{WORD:protocol}\" length=\"%{INT:length}\" tos=\"%{DATA:tos}\" prec=\"%{DATA:prec}\" ttl=\"%{INT:ttl}\" srcport=\"%{INT:srcport}\" dstport=\"%{INT:dstport}\" "}
 }
grok {
  match => { "message" => "%{SYSLOGTIMESTAMP:timestamp} %{HOSTNAME:hostname} (?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}\[%{INT:pid}]\: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{DATA:action}\" fwrule=\"%{INT:fwrule}\" initf=\"%{NOTSPACE:initf}\" outitf=\"%{NOTSPACE:outitf}\" srcmac=\"%{MAC:srcmac}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" proto=\"%{WORD:protocol}\" length=\"%{INT:length}\" tos=\"%{DATA:tos}\" prec=\"%{DATA:prec}\" ttl=\"%{INT:ttl}\" srcport=\"%{INT:srcport}\" dstport=\"%{INT:dstport}\" "}
 }
grok {
  match => {"message" => "%{SYSLOGTIMESTAMP:timestamp} %{HOSTNAME:hostname} (?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}\[%{INT:pid}]\: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{DATA:action}\" fwrule=\"%{INT:fwrule}\" outitf=\"%{NOTSPACE:outitf}\" mark=\"%{DATA:mark}\" app=\"%{DATA:app}\" srcmac=\"%{MAC:srcmac}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" proto=\"%{WORD:protocol}\" length=\"%{INT:length}\" tos=\"%{DATA:tos}\" prec=\"%{DATA:prec}\" ttl=\"%{INT:ttl}\" srcport=\"%{INT:srcport}\" dstport=\"%{INT:dstport}\" "}
 }
grok {
  match => { "message" => "%{SYSLOGTIMESTAMP:timestamp} %{HOSTNAME:hostname} (?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}\[%{INT:pid}]\: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{DATA:action}\" fwrule=\"%{INT:fwrule}\" initf=\"%{NOTSPACE:initf}\" srcmac=\"%{MAC:srcmac}\" dstmac=\"%{MAC:dstmac}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" proto=\"%{WORD:protocol}\" length=\"%{INT:length}\" tos=\"%{DATA:tos}\" prec=\"%{DATA:prec}\" ttl=\"%{INT:ttl}\" srcport=\"%{INT:srcport}\" dstport=\"%{INT:dstport}\" "}
 }
grok {
  match => { "message" => "%{SYSLOGTIMESTAMP:timestamp} %{HOSTNAME:hostname} (?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}\[%{INT:pid}]\: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{DATA:action}\" fwrule=\"%{INT:fwrule}\" outitf=\"%{NOTSPACE:outitf}\" srcmac=\"%{MAC:srcmac}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" proto=\"%{WORD:protocol}\" length=\"%{INT:length}\" tos=\"%{DATA:tos}\" prec=\"%{DATA:prec}\" ttl=\"%{INT:ttl}\" srcport=\"%{INT:srcport}\" dstport=\"%{INT:dstport}\" "}
 }
grok {
  match => {"message" => "%{SYSLOGTIMESTAMP:timestamp} %{HOSTNAME:hostname} (?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}\[%{INT:pid}]\: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{DATA:action}\" fwrule=\"%{INT:fwrule}\" initf=\"%{NOTSPACE:initf}\" outitf=\"%{NOTSPACE:outitf}\" srcmac=\"%{MAC:srcmac}\" dstmac=\"%{MAC:dstmac}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" proto=\"%{WORD:protocol}\" length=\"%{INT:length}\" tos=\"%{DATA:tos}\" prec=\"%{DATA:prec}\" ttl=\"%{INT:ttl}\" srcport=\"%{INT:srcport}\" dstport=\"%{INT:dstport}\" tcpflags=\"%{DATA:tcpflags}\" \"(,)\" "}
}
grok {
 match => {"message" => "%{SYSLOGTIMESTAMP:timestamp} %{HOSTNAME:hostname} (?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}\[%{INT:pid}]\: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{DATA:action}\" fwrule=\"%{INT:fwrule}\" initf=\"%{NOTSPACE:initf}\" srcmac=\"%{MAC:srcmac}\" dstmac=\"%{MAC:dstmac}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" proto=\"%{WORD:protocol}\" length=\"%{INT:length}\" tos=\"%{DATA:tos}\" prec=\"%{DATA:prec}\" ttl=\"%{INT:ttl}\" srcport=\"%{INT:srcport}\" dstport=\"%{INT:dstport}\" tcpflags=\"%{DATA:tcpflags}\" "}
}
grok {
 match => {"message" => "%{SYSLOGTIMESTAMP:timestamp} %{HOSTNAME:hostname} (?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}\[%{INT:pid}]\: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{DATA:action}\" fwrule=\"%{INT:fwrule}\" outitf=\"%{NOTSPACE:outitf}\" srcmac=\"%{MAC:srcmac}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" proto=\"%{WORD:protocol}\" length=\"%{INT:length}\" tos=\"%{DATA:tos}\" prec=\"%{DATA:prec}\" ttl=\"%{INT:ttl}\" srcport=\"%{INT:srcport}\" dstport=\"%{INT:dstport}\" tcpflags=\"%{DATA:tcpflags}\" "}
}
grok {
 match => {"message" => "%{SYSLOGTIMESTAMP:timestamp} %{HOSTNAME:hostname} (?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}\[%{INT:pid}]\: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{DATA:action}\" fwrule=\"%{INT:fwrule}\" initf=\"%{NOTSPACE:initf}\" outitf=\"%{NOTSPACE:outitf}\" mark=\"%{WORD:mark}\" app=\"%{WORD:app}\" srcmac=\"%{MAC:srcmac}\" dstmac=\"%{MAC:dstmac}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" proto=\"%{WORD:protocol}\" length=\"%{INT:length}\" tos=\"%{DATA:tos}\" prec=\"%{DATA:prec}\" ttl=\"%{INT:ttl}\" srcport=\"%{INT:srcport}\" dstport=\"%{INT:dstport}\" tcpflags=\"%{DATA:tcpflags}\" "}
}
grok {
 match => {"message" => "%{SYSLOGTIMESTAMP:timestamp} %{HOSTNAME:hostname} (?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}\[%{INT:pid}]\: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{DATA:action}\" fwrule=\"%{INT:fwrule}\" initf=\"%{NOTSPACE:initf}\" outitf=\"%{NOTSPACE:outitf}\" mark=\"%{WORD:mark}\" srcmac=\"%{MAC:srcmac}\" dstmac=\"%{MAC:dstmac}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" proto=\"%{WORD:protocol}\" length=\"%{INT:length}\" tos=\"%{DATA:tos}\" prec=\"%{DATA:prec}\" ttl=\"%{INT:ttl}\" srcport=\"%{INT:srcport}\" dstport=\"%{INT:dstport}\" tcpflags=\"%{DATA:tcpflags}\" "}
}
}
if "httpproxy" in [message]{
grok {
match => {"message" => "%{SYSLOGTIMESTAMP:timestamp} %{HOSTNAME:hostname} (?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}\[%{INT:pid}]\: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{DATA:action}\" method=\"%{WORD:method}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" user=\"%{DATA:user}\" group=\"%{DATA:group}\" ad_domain=\"%{DATA:ad_domain}\" statuscode=\"%{INT:statuscode}\" cached=\"%{INT:cached}\" profile=\"%{DATA:profile}\" filteraction=\"%{DATA:filteraction}\" size=\"%{INT:size}\" request=\"%{BASE16FLOAT:request}\" url=\"%{URI:url}\" referer=\"%{DATA:referer}\" error=\"%{DATA:error}\" authtime=\"%{INT:authtime}\" dnstime=\"%{INT:dnstime}\" cattime=\"%{INT:cattime}\" avscantime=\"%{INT:avscantime}\" fullreqtime=\"%{INT:fullreqtime}\" device=\"%{INT:device}\" auth=\"%{INT:auth}\" ua=\"%{DATA:ua}\" exceptions=\"%{DATA:exceptions}\" category=\"%{INT:category}\" reputation=\"%{WORD:reputation}\" categoryname=\"%{DATA:categoryname}\" country=\"%{DATA:country}\" application=\"%{WORD:application}\" app-id=\"%{INT:app-id}\" sandbox=\"%{DATA:sandbox}\" content-type=\"%{DATA:content-type}\" "}
}
grok {
match => {"message" => "%{SYSLOGTIMESTAMP:timestamp} %{HOSTNAME:hostname} (?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}\[%{INT:pid}]\: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{DATA:action}\" method=\"%{WORD:method}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" user=\"%{DATA:user}\" group=\"%{DATA:group}\" ad_domain=\"%{DATA:ad_domain}\" statuscode=\"%{INT:statuscode}\" cached=\"%{INT:cached}\" profile=\"%{DATA:profile}\" filteraction=\"%{DATA:filteraction}\" size=\"%{INT:size}\" request=\"%{BASE16FLOAT:request}\" url=\"%{URI:url}\" referer=\"%{DATA:referer}\" error=\"%{DATA:error}\" authtime=\"%{INT:authtime}\" dnstime=\"%{INT:dnstime}\" cattime=\"%{INT:cattime}\" avscantime=\"%{INT:avscantime}\" fullreqtime=\"%{INT:fullreqtime}\" device=\"%{INT:device}\" auth=\"%{INT:auth}\" ua=\"%{DATA:ua}\" exceptions=\"%{DATA:exceptions}\" overridecategory=\"%{INT:overridecategory}\" overridereputation=\"%{INT:overridereputation}\" category=\"%{INT:category}\" reputation=\"%{WORD:reputation}\" categoryname=\"%{DATA:categoryname}\" country=\"%{DATA:country}\" sandbox=\"%{DATA:sandbox}\" content-type=\"%{DATA:content-type}\" "}
}
grok {
 match => {"message" => "%{SYSLOGTIMESTAMP:timestamp} %{HOSTNAME:hostname} (?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}\[%{INT:pid}]\: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{DATA:action}\" method=\"%{WORD:method}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" user=\"%{DATA:user}\" group=\"%{DATA:group}\" ad_domain=\"%{DATA:ad_domain}\" statuscode=\"%{INT:statuscode}\" cached=\"%{INT:cached}\" profile=\"%{DATA:profile}\" filteraction=\"%{DATA:filteraction}\" size=\"%{INT:size}\" request=\"%{BASE16FLOAT:request}\" url=\"%{URI:url}\" referer=\"%{DATA:referer}\" error=\"%{DATA:error}\" authtime=\"%{INT:authtime}\" dnstime=\"%{INT:dnstime}\" cattime=\"%{INT:cattime}\" avscantime=\"%{INT:avscantime}\" fullreqtime=\"%{INT:fullreqtime}\" device=\"%{INT:device}\" auth=\"%{INT:auth}\" ua=\"%{DATA:ua}\" exceptions=\"%{DATA:exceptions}\" category=\"%{INT:category}\" reputation=\"%{WORD:reputation}\" categoryname=\"%{DATA:categoryname}\" sandbox=\"%{DATA:sandbox}\" content-type=\"%{DATA:content-type}\" "}
}
grok {
match => {"message" => "%{SYSLOGTIMESTAMP:timestamp} %{HOSTNAME:hostname} (?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}\[%{INT:pid}]\: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{DATA:action}\" method=\"%{WORD:method}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" user=\"%{DATA:user}\" group=\"%{DATA:group}\" ad_domain=\"%{DATA:ad_domain}\" statuscode=\"%{INT:statuscode}\" cached=\"%{INT:cached}\" profile=\"%{DATA:profile}\" filteraction=\"%{DATA:filteraction}\" size=\"%{INT:size}\" request=\"%{BASE16FLOAT:request}\" url=\"%{URI:url}\" referer=\"%{DATA:referer}\" error=\"%{DATA:error}\" authtime=\"%{INT:authtime}\" dnstime=\"%{INT:dnstime}\" cattime=\"%{INT:cattime}\" avscantime=\"%{INT:avscantime}\" fullreqtime=\"%{INT:fullreqtime}\" device=\"%{INT:device}\" auth=\"%{INT:auth}\" ua=\"%{DATA:ua}\" exceptions=\"%{DATA:exceptions}\" "}
}
grok {
match => {"message" => "%{SYSLOGTIMESTAMP:timestamp} %{HOSTNAME:hostname} (?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}\[%{INT:pid}]\: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{DATA:action}\" method=\"%{WORD:method}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" user=\"%{DATA:user}\" group=\"%{DATA:group}\" ad_domain=\"%{DATA:ad_domain}\" statuscode=\"%{INT:statuscode}\" cached=\"%{INT:cached}\" profile=\"%{DATA:profile}\" filteraction=\"%{DATA:filteraction}\" size=\"%{INT:size}\" request=\"%{BASE16FLOAT:request}\" url=\"%{URI:url}\" referer=\"%{DATA:referer}\" error=\"%{DATA:error}\" authtime=\"%{INT:authtime}\" dnstime=\"%{INT:dnstime}\" cattime=\"%{INT:cattime}\" avscantime=\"%{INT:avscantime}\" fullreqtime=\"%{INT:fullreqtime}\" device=\"%{INT:device}\" auth=\"%{INT:auth}\" ua=\"%{DATA:ua}\" exceptions=\"%{DATA:exceptions}\" overridecategory=\"%{INT:overridecategory}\" overridereputation=\"%{INT:overridereputation}\" country=\"%{DATA:country}\" "}
}
grok {
match => {"message" => "%{SYSLOGTIMESTAMP:timestamp} %{HOSTNAME:hostname} (?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}\[%{INT:pid}]\: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{DATA:action}\" method=\"%{WORD:method}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" user=\"%{DATA:user}\" group=\"%{DATA:group}\" ad_domain=\"%{DATA:ad_domain}\" statuscode=\"%{INT:statuscode}\" cached=\"%{INT:cached}\" profile=\"%{DATA:profile}\" filteraction=\"%{DATA:filteraction}\" size=\"%{INT:size}\" request=\"%{BASE16FLOAT:request}\" url=\"%{URI:url}\" referer=\"%{DATA:referer}\" error=\"%{DATA:error}\" authtime=\"%{INT:authtime}\" dnstime=\"%{INT:dnstime}\" cattime=\"%{INT:cattime}\" avscantime=\"%{INT:avscantime}\" fullreqtime=\"%{INT:fullreqtime}\" device=\"%{INT:device}\" auth=\"%{INT:auth}\" ua=\"%{DATA:ua}\" exceptions=\"%{DATA:exceptions}\" overridecategory=\"%{INT:overridecategory}\" overridereputation=\"%{INT:overridereputation}\" category=\"%{INT:category}\" reputation=\"%{WORD:reputation}\" categoryname=\"%{DATA:categoryname}\" country=\"%{DATA:country}\" "}
}
grok {
match => {"message" => "%{SYSLOGTIMESTAMP:timestamp} %{HOSTNAME:hostname} (?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}\[%{INT:pid}]\: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{DATA:action}\" method=\"%{WORD:method}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" user=\"%{DATA:user}\" group=\"%{DATA:group}\" ad_domain=\"%{DATA:ad_domain}\" statuscode=\"%{INT:statuscode}\" cached=\"%{INT:cached}\" profile=\"%{DATA:profile}\" filteraction=\"%{DATA:filteraction}\" size=\"%{INT:size}\" request=\"%{BASE16FLOAT:request}\" url=\"%{URI:url}\" referer=\"%{DATA:referer}\" error=\"%{DATA:error}\" authtime=\"%{INT:authtime}\" dnstime=\"%{INT:dnstime}\" cattime=\"%{INT:cattime}\" avscantime=\"%{INT:avscantime}\" fullreqtime=\"%{INT:fullreqtime}\" device=\"%{INT:device}\" auth=\"%{INT:auth}\" ua=\"%{DATA:ua}\" exceptions=\"%{DATA:exceptions}\" category=\"%{INT:category}\" reputation=\"%{WORD:reputation}\" categoryname=\"%{DATA:categoryname}\" country=\"%{DATA:country}\" "}
}
grok {
match => {"message" => "%{SYSLOGTIMESTAMP:timestamp} %{HOSTNAME:hostname} (?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}\[%{INT:pid}]\: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{DATA:action}\" method=\"%{WORD:method}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" user=\"%{DATA:user}\" group=\"%{DATA:group}\" ad_domain=\"%{DATA:ad_domain}\" statuscode=\"%{INT:statuscode}\" cached=\"%{INT:cached}\" profile=\"%{DATA:profile}\" filteraction=\"%{DATA:filteraction}\" size=\"%{INT:size}\" request=\"%{BASE16FLOAT:request}\" url=\"%{URI:url}\" referer=\"%{DATA:referer}\" error=\"%{DATA:error}\" authtime=\"%{INT:authtime}\" dnstime=\"%{INT:dnstime}\" cattime=\"%{INT:cattime}\" avscantime=\"%{INT:avscantime}\" fullreqtime=\"%{INT:fullreqtime}\" device=\"%{INT:device}\" auth=\"%{INT:auth}\" ua=\"%{DATA:ua}\" exceptions=\"%{DATA:exceptions}\" category=\"%{INT:category}\" reputation=\"%{WORD:reputation}\" categoryname=\"%{DATA:categoryname}\" country=\"%{DATA:country}\" application=\"%{WORD:application}\" app-id=\"%{INT:app-id}\" "}
}
}
}

As you can see I spent sometime trying to account for each variation of log entries the firewall will produce. For the most part the data is the same but after a certain point it changes. When logstash parse the data it produce multiple data for one field. Here is a pic of the duplicate fields screenshot from ELK

My assumption is that my grok patterns are too similar. I originally tried break_on_match => false but that produced the same duplicates.

1
You can put multiple pattern in the same grok filter, with the break_on_match => true, it will stop after the first match.baudsp

1 Answers

1
votes

Like @Phonolog said, since you have multiple grok filter, your message will be matched once with each filter. And if more than one filter successfully match its pattern against your message, you'll have duplicates of the field.

Instead of trying to match each possible combination of key-value pair at the end of your message, you can put the part with the key-value pairs in one field and then use the kv filter on it.

It would look like this:

grok {
  match => {"message" => "%{SYSLOGTIMESTAMP:timestamp} %{HOSTNAME:hostname} (?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}\[%{INT:pid}]\: %{GREEDYDATA:kvdata}"}
}

kv {
    source => "kvdata"
    trim_value => "\""
 }
}