0
votes

I'm working with ELK 6.7.0 on docker with official images. This is my conf file:

input {
  file {
    path => "/usr/share/logstash/logs/*.xml"
    type => "xml"
    sincedb_path => "/dev/null"
    codec => multiline {
      pattern => "<root>"
      negate => "true"
      what => "previous"
    }
  }
}

filter {  
  xml {
    source => "message"
    store_xml => false
    xpath => [
        "/root/ChainId/text()", "ChainId",
        "/root/SubChainId/text()", "SubChainId",
        "/root/StoreId/text()", "StoreId",
        "/root/BikoretNo/text()", "BikoretNo",
        "/root/DllVerNo/text()", "DllVerNo"
    ]
  }
}

output {
  elasticsearch {
    hosts => "elasticsearch:9200"
    index => "xml_index"
  }

  stdout { 
    codec => rubydebug 
  }
}

My XML file is:

<?xml version="1.0" encoding="UTF-8"?>
<root>
    <ChainId>7290027600007</ChainId>
    <SubChainId>001</SubChainId>
    <StoreId>001</StoreId>
    <BikoretNo>9</BikoretNo>
    <DllVerNo>8.0.1.3</DllVerNo>
</root>

I'm trying to parse incoming XML files, but when a new file is created on the path folder logstash parsing it as following:

logstash_1       | {
logstash_1       |           "path" => "/usr/share/logstash/logs/example10.xml",
logstash_1       |       "@version" => "1",
logstash_1       |        "message" => "<?xml version=\"1.0\" encoding=\"UTF-8\"?>",
logstash_1       |           "type" => "xml",
logstash_1       |     "@timestamp" => 2019-04-02T04:42:59.248Z,
logstash_1       |           "host" => "a4f1bf64a3d5"
logstash_1       | }

However, When I reload my conf file Logstash surprisingly is parsing my XML successfully:

logstash_1       | {
logstash_1       |        "StoreId" => [
logstash_1       |         [0] "001"
logstash_1       |     ],
logstash_1       |        "message" => "<root>\n    <ChainId>7290027600007</ChainId>\n    <SubChainId>001</SubChainId>\n    <StoreId>001</StoreId>\n    <BikoretNo>9</BikoretNo>\n    <DllVerNo>8.0.1.3</DllVerNo>",
logstash_1       |       "DllVerNo" => [
logstash_1       |         [0] "8.0.1.3"
logstash_1       |     ],
logstash_1       |           "type" => "xml",
logstash_1       |     "SubChainId" => [
logstash_1       |         [0] "001"
logstash_1       |     ],
logstash_1       |      "BikoretNo" => [
logstash_1       |         [0] "9"
logstash_1       |     ],
logstash_1       |           "path" => "/usr/share/logstash/logs/example10.xml",
logstash_1       |       "@version" => "1",
logstash_1       |        "ChainId" => [
logstash_1       |         [0] "7290027600007"
logstash_1       |     ],
logstash_1       |           "tags" => [
logstash_1       |         [0] "multiline"
logstash_1       |     ],
logstash_1       |     "@timestamp" => 2019-04-02T04:43:18.439Z,
logstash_1       |           "host" => "a4f1bf64a3d5"
logstash_1       | }
logstash_1       | {
logstash_1       |        "StoreId" => [
logstash_1       |         [0] "001"
logstash_1       |     ],
logstash_1       |        "message" => "<root>\n    <ChainId>7290027600007</ChainId>\n    <SubChainId>001</SubChainId>\n    <StoreId>001</StoreId>\n    <BikoretNo>9</BikoretNo>\n    <DllVerNo>8.0.1.3</DllVerNo>",
logstash_1       |       "DllVerNo" => [
logstash_1       |         [0] "8.0.1.3"
logstash_1       |     ],
logstash_1       |           "type" => "xml",
logstash_1       |     "SubChainId" => [
logstash_1       |         [0] "001"
logstash_1       |     ],
logstash_1       |      "BikoretNo" => [
logstash_1       |         [0] "9"
logstash_1       |     ],
logstash_1       |           "path" => "/usr/share/logstash/logs/example11.xml",
logstash_1       |       "@version" => "1",
logstash_1       |        "ChainId" => [
logstash_1       |         [0] "7290027600007"
logstash_1       |     ],
logstash_1       |           "tags" => [
logstash_1       |         [0] "multiline"
logstash_1       |     ],
logstash_1       |     "@timestamp" => 2019-04-02T04:43:18.440Z,
logstash_1       |           "host" => "a4f1bf64a3d5"
logstash_1       | }

The message field in both events is different parts of the file and seems like Logstash is splitting the file before and after the pattern. Even so, not clear why it doing so just on conf file reload.

1

1 Answers

0
votes

Yes, you are right. It splits the each event based on the multiline pattern. To ship the whole file as is, use a pattern that never matches. Something like this in the input definition.

codec => multiline { pattern => "^Spalanzani" negate => true what => "previous" auto_flush_interval => 1 }