Goal: Parse an XML file with nested data into different elasticsearch documents.
I've chose to use logstash to help me here, but since the files will be on different servers I decided to use filebeat to serve these to logstash. The setup seems sound.
However, I can't seem to get filebeat to send all of the lines in one message, I'm getting them line by line:
{
"@timestamp" => 2017-10-15T20:30:11.825Z,
"offset" => 44,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => "<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
{
"@timestamp" => 2017-10-15T20:30:11.825Z,
"offset" => 108,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => "<foo:statistics xsi:schemaLocation=\"http://www.foo.no foo.xsd\" ",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
{
"@timestamp" => 2017-10-15T20:30:11.825Z,
"offset" => 141,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => " xmlns:foo=\"http://www.foo.no\" ",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
{
"@timestamp" => 2017-10-15T20:30:11.826Z,
"offset" => 198,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
{
"@timestamp" => 2017-10-15T20:30:11.826Z,
"offset" => 231,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => " <foo:version>1.0</foo:version>",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
{
"@timestamp" => 2017-10-15T20:30:11.826Z,
"offset" => 258,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => " <foo:name>bar</foo:name>",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
{
"@timestamp" => 2017-10-15T20:30:11.826Z,
"offset" => 313,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => " <foo:start>2017-01-01T00:06:34.880+02:00</foo:start>",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
{
"@timestamp" => 2017-10-15T20:30:11.826Z,
"offset" => 366,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => " <foo:stop>2017-05-01T00:06:34.880+02:00</foo:stop>",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
{
"@timestamp" => 2017-10-15T20:30:11.826Z,
"offset" => 380,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => " <foo:place>",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
{
"@timestamp" => 2017-10-15T20:30:11.826Z,
"offset" => 409,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => " <foo:name>baz</foo:name>",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
{
"@timestamp" => 2017-10-15T20:30:11.826Z,
"offset" => 442,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => " <foo:id>1B445T4UV-W</foo:id>",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
{
"@timestamp" => 2017-10-15T20:30:11.826Z,
"offset" => 457,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => " </foo:place>",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
{
"@timestamp" => 2017-10-15T20:30:11.826Z,
"offset" => 471,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => " <foo:visit>",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
{
"@timestamp" => 2017-10-15T20:30:11.826Z,
"offset" => 526,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => " <foo:date>2017-04-17T04:06:34.880+02:00</foo:date>",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
{
"@timestamp" => 2017-10-15T20:30:11.826Z,
"offset" => 557,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => " <foo:status>2</foo:status>",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
{
"@timestamp" => 2017-10-15T20:30:11.826Z,
"offset" => 572,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => " </foo:visit>",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
{
"@timestamp" => 2017-10-15T20:30:11.826Z,
"offset" => 586,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => " <foo:visit>",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
{
"@timestamp" => 2017-10-15T20:30:11.826Z,
"offset" => 641,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => " <foo:date>2017-04-18T04:06:34.880+02:00</foo:date>",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
{
"@timestamp" => 2017-10-15T20:30:11.826Z,
"offset" => 672,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => " <foo:status>3</foo:status>",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
{
"@timestamp" => 2017-10-15T20:30:11.826Z,
"offset" => 687,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => " </foo:visit>",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
{
"@timestamp" => 2017-10-15T20:30:11.826Z,
"offset" => 701,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => " <foo:visit>",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
{
"@timestamp" => 2017-10-15T20:30:11.826Z,
"offset" => 756,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => " <foo:date>2017-04-19T04:06:34.880+02:00</foo:date>",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
{
"@timestamp" => 2017-10-15T20:30:11.826Z,
"offset" => 787,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => " <foo:status>1</foo:status>",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
[2017-10-15T20:30:42,853][WARN ][logstash.filters.split ] Only String and Array types are splittable. field:visits is of type = NilClass
{
"@timestamp" => 2017-10-15T20:30:11.826Z,
"offset" => 802,
"@version" => "1",
"beat" => {
"name" => "bb1ee9b80d2d",
"hostname" => "bb1ee9b80d2d",
"version" => "6.0.0-rc1"
},
"host" => "bb1ee9b80d2d",
"source" => "/mnt/log/test4.xml",
"message" => " </foo:visit>",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_split_type_failure"
]
}
Here is my filebeat configuration
filebeat.prospectors:
- type: log
paths:
- /mnt/log/*.xml
multiline.pattern: '<?xml .*'
multiline.negate: false
multiline.match: after
output.logstash:
hosts: ["logstash:5000"]
and my XML file:
<?xml version="1.0" encoding="iso-8859-1"?>
<foo:statistics xsi:schemaLocation="http://www.foo.no foo.xsd"
xmlns:foo="http://www.foo.no"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<foo:version>1.0</foo:version>
<foo:name>bar</foo:name>
<foo:start>2017-01-01T00:06:34.880+02:00</foo:start>
<foo:stop>2017-05-01T00:06:34.880+02:00</foo:stop>
<foo:place>
<foo:name>baz</foo:name>
<foo:id>1B445T4UV-W</foo:id>
</foo:place>
<foo:visit>
<foo:date>2017-04-17T04:06:34.880+02:00</foo:date>
<foo:status>2</foo:status>
</foo:visit>
<foo:visit>
<foo:date>2017-04-18T04:06:34.880+02:00</foo:date>
<foo:status>3</foo:status>
</foo:visit>
<foo:visit>
<foo:date>2017-04-19T04:06:34.880+02:00</foo:date>
<foo:status>1</foo:status>
</foo:visit>
</foo:statistics>
I want the whole file passed into logstash, here is the config for that.
input {
beats {
port => 5000
}
}
filter {
xml {
namespaces => {
"foo" => "http://www.foo.no"
"xsi" => "http://www.w3.org/2001/XMLSchema-instance"
}
source => "message"
store_xml => "false"
xpath => ["/foo:statistics/foo:start/text()", "start"]
xpath => ["/foo:statistics/foo:stop/text()", "stop"]
xpath => ["/foo:statistics/foo:place/name/text()", "place_name"]
xpath => ["/foo:statistics/foo:place/id/text()", "place_id"]
xpath => ["/foo:statistics/foo:visit", "visits"]
}
split {
field => "visits"
remove_field => "message"
}
xml {
source => "visits"
store_xml => "false"
xpath => ["/foo:visit/foo:date/text()", "date"]
xpath => ["/foo:visit/foo:status/text()", "status"]
remove_field => "visits"
}
date {
match => ["date", "ISO8601"]
}
}
output {
stdout { codec => rubydebug }
elasticsearch {
hosts => "elasticsearch:9200"
index => "maaling-%{+YYYY.MM.dd}"
}
}
Any help is massively appreciated.
EDIT: changed pattern to '