0
votes

I am trying to filter some e-mails in logstash before sending it to ES.

I have one field still containing e-mail adresses and can't gsub it by mutate filter.

mutate {
    gsub => [
        "log", "[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,6}", "--- FILTERED FROM LOGS ---",
        "message", "[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,6}", "--- FILTERED FROM LOGS ---"
            ]
        }

JSON:

{
  "_index": "logs-2021.03.09.11",
  "_type": "doc",
  "_id": "sdfsdf",
  "_version": 1,
  "_score": null,
  "_source": {
    "source": "stderr",
    "@timestamp": "2021-03-09T11:39:38.413Z",
    "kubernetes": {
      "namespace": "sdfsdk",
      "labels": {
        "pod-template-hash": "sdfsdf",
        "app": {
          "softwear": {
            "co/name": "sdfsd",
            "co/domain": "sdfsdf"
          }
        },
    "log": {
      "extra_fields": {
        "ctxt_response": "{\"records_id\":[{\"ext_id\":\"sdfsdf\",\"fcc_id\":sdfsdfsd,\"external_id\":\"sdfsdf\"}],\"success\":true}",
        "requestDevice": "\"\"",
        "ctxt_request": "{\"hash\":\"56kdfhsdfjshdkf\",\"change\":\"sdsd\",\"campaigns_id\":114,\"method\":\"sha1\",\"login\":\"test\",\"records\":[{\"emails\":[\"[email protected]\"],\"external_id\":\"sdsdK\"}]}",
        "ctxt_response_code": "200"
      },

How can I get nested field and gsub it? [log][extra_fields][ctxt_request]

1

1 Answers

0
votes

Try this:

mutate {
    gsub => [
        "[log][extra_fields][ctxt_request]", "[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,6}", "--- FILTERED FROM LOGS ---",
        "message", "[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,6}", "--- FILTERED FROM LOGS ---"
    ]
}

I also noticed a possible improvement in your regex:

  • [A-Za-z0-9._%+-]
  • [A-Za-z0-9\._%+-] (escape the dot . character - you'll need to do this on both sides of the @)

You might also want to look at using the JSON Filter Plugin to parse the ctxt_request field - then you could just overwrite the contents of that one subfield without using gsub at all.

Here's an example of how that might work. Caveats are a) that it hasn't been tested, b) that doing the remove_field on the JSON output might not work (although removing the source field will work if the JSON was successfully parsed), and c) that you might have other ideas for how you structure the fields.

json {
    source => "[log][extra_fields][ctxt_request]"
    target => "[log][extra_fields][parsed][ctxt_request]"
    remove_field => [
        "[log][extra_fields][ctxt_request]",
        # remove the field completely
        "[log][extra_fields][parsed][ctxt_request][records][emails]"
    ]
}
mutate {
    # or replace it with the text from your question
    replace => {
        "[log][extra_fields][parsed][ctxt_request][records][emails]" =>
        "--- FILTERED FROM LOGS ---"
    }
}