logstash sends data to the custom template of elasticsearch

1. First configure logstash.conf

# Input from filebeat
input {
  beats {
    port => "5044"
  }
}

# filter
filter {

        grok {
                match =>{
                        "message"=>"(?<data>({.*}))"
                }
        }

        grok {
                match =>{
                        "message"=>"%{TIMESTAMP_ISO8601:logTime}"
                }
        }

        grok {
                match =>{
                        "message"=>"%{LOGLEVEL:logLevel}"
                }
        }


        grok {
                match => {
                        "message"=>"(?<userId>(?<=\"userId\":)(\d+))"
                }
        }

        # Set the time of East Zone 8
        ruby {
                code => "event.set('logstashTime', event.get('@timestamp').time.localtime + 8*60*60);
                        event.set('@timestamp', event.get('logstashTime'))"
        }
		
		# Ignore field
        mutate {
                remove_field => "offset"
                remove_field => "@version"
                remove_field => "input_type"
                # remove_field => "beat"
                remove_field => "host"
                remove_field => "source"
                remove_field => "type"
                remove_field => "tags"
                remove_field => "prospector"
                remove_field => "input"
                remove_field => "log"
        }

}

# Append output to log
output {
  stdout { codec => rubydebug }
}

# Output to es
output {
  elasticsearch {
  	# host
    hosts => ["http://elasticsearch:9200"]
    # Enable the logstash automatic template management function. The default is manage_ The template parameter is true, otherwise logstash will not call Elasticsearch API to create a template.  
    manage_template => true
    # Custom index split by day
    index => "my-log-%{+YYYY.MM.dd}"
    # Custom type
    document_type=> "_doc"
    # Location of the mapping template file
    template => "/usr/share/logstash/templates/my-log.json"
    #template_name => "my-log"
    # Whether to overwrite the existing template, template_ If overwrite is true, the template with high order will overwrite the template with low order if it meets the same conditions (e.g. both start with searchlog -)
    template_overwrite => true
  }
}

2 configuration template file my-log.json

{
		# Match by name
        "template": "my-log-*",
        # sort
        "order": 1,
        # Index fragmentation and other configurations
        "settings": {
                "number_of_shards": 1,
                "number_of_replicas": 0,
                "refresh_interval": "60s"
        },
        # mapping
        "mappings": {
                "_doc": {
                		# Strict mapping
                        "dynamic":"strict",
                        "properties": {

                                "message": {
                                		# Tokenizer 
                                        "analyzer": "ik_max_word",
                                        "index": true,
                                        "store": false,
                                        "type": "text"
                                },

                                "data": {
                                        "analyzer": "ik_max_word",
                                        "index": true,
                                        "store": false,
                                        "type": "text"
                                },

                                "userId": {
                                        "type": "long"
                                },

                                "logLevel": {
                                        "store": false,
                                        "type": "keyword"
                                },
                                "from": {
                                        "store": false,
                                        "type": "keyword"
                                },

                                ""      
                                "@timestamp": {
                                        "format": "strict_date_optional_time||yyyy-MM-dd HH:mm:ss.SSS||epoch_millis",
                                        "type": "date"
                                },

                                "logstashTime": {
                                        "format": "strict_date_optional_time||yyyy-MM-dd HH:mm:ss.SSS||epoch_millis",
                                        "type": "date"
                                },

                                "logTime": {
                                        "format": "strict_date_optional_time||yyyy-MM-dd HH:mm:ss.SSS||epoch_millis",
                                        "type": "date"
                                },

                                        "type": "date"
                                },

                                "beat": {
                                        "properties": {
                                                "hostname": {
                                                        "store": false,
                                                        "type": "keyword"
                                                },
                                                "name": {
                                                        "store": false,
                                                        "type": "keyword"
                                                },
                                                "version": {
                                                        "store": false,
                                                        "type": "keyword"
                                                }
                                        }
                                }

                        }
                }
        }
}

3 format of date

"format": "strict_date_optional_time||yyyy-MM-dd HH:mm:ss.SSS||epoch_millis",
Corresponding date format =>
2021-09-09T17:19:21.262Z||2021-09-09 17:19:20.000||time stamp

4 about the "dynamic" field

In general, mapping can be divided into dynamic mapping, explicit mapping and strict mapping, which are controlled by dynamic attributes.

  • Dynamic mapping (dynamic: true)
  • Static mapping (dynamic: false)
  • Strict mode (dynamic: strict)

preface

In general, mapping can be divided into dynamic mapping, explicit mapping and strict mapping, which are controlled by dynamic attributes.

Dynamic mapping (dynamic: true)

Now there is such an index:

PUT m1
{
  "mappings": {
    "doc":{
      "properties": {
        "name": {
          "type": "text"
        },
        "age": {
          "type": "long"
        }
      }
    }
  }
}

By GET m1/_mapping look at the mappings information:

{
  "m1" : {
    "mappings" : {
      "doc" : {
        "dynamic" : "true",
        "properties" : {
          "age" : {
            "type" : "long"
          },
          "name" : {
            "type" : "text"
          }
        }
      }
    }
  }
}

Add some data and add a sex field:

PUT m1/doc/1
{
  "name": "Xiao Hei",
  "age": 18,
  "sex": "Unknown"
}

Of course, there is no problem with the new field query:

GET m1/doc/_search
{
  "query": {
    "match": {
      "sex": "Unknown"
    }
  }
}

Return result:

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 1,
    "max_score" : 0.5753642,
    "hits" : [
      {
        "_index" : "m1",
        "_type" : "doc",
        "_id" : "1",
        "_score" : 0.5753642,
        "_source" : {
          "name" : "Xiao Hei",
          "age" : 18,
          "sex" : "Unknown"
        }
      }
    ]
  }
}

Now, everything is normal, just like when elastic search is automatically created. That is because when Elasticsearch encounters a field that has not been encountered before in the document, it uses dynamic mapping to determine the data type of the field and automatically adds a new field to the type mapping. Let's take another look at mappings, and you will understand:

{
  "m1" : {
    "mappings" : {
      "doc" : {
        "dynamic" : "true",
        "properties" : {
          "age" : {
            "type" : "long"
          },
          "name" : {
            "type" : "text"
          },
          "sex" : {
            "type" : "text",
            "fields" : {
              "keyword" : {
                "type" : "keyword",
                "ignore_above" : 256
              }
            }
          }
        }
      }
    }
  }
}

Through the above example, we can send it. Elasticsearch helps us add a mapping of sex. So. It all seems so natural. All this is due to the dynamic attribute. We know that in a relational database, fields are never changed after they are created unless they are manually modified. However, elasticsearch allows new fields to be added by default, that is, dynamic: true.
In fact, when creating an index, it is like this:

PUT m1
{
  "mappings": {
    "doc":{
      "dynamic":true,
      "properties": {
        "name": {
          "type": "text"
        },
        "age": {
          "type": "long"
        }
      }
    }
  }
}

In the above example, when dynamic is set to true, elasticsearch will help us dynamically add mapping attributes. That means nothing!
One thing to note here is that once mappings are created, they cannot be modified. Because Lucene cannot change the inverted index after it generates it.

Static mapping (dynamic: false)

Now we set the dynamic value to false:

PUT m2
{
  "mappings": {
    "doc":{
      "dynamic":false,
      "properties": {
        "name": {
          "type": "text"
        },
        "age": {
          "type": "long"
        }
      }
    }
  }
}

Now let's test the difference between false and true:

PUT m2/doc/1
{
  "name": "Xiao Hei",
  "age":18
}
PUT m2/doc/2
{
  "name": "Xiaobai",
  "age": 16,
  "sex": "Unknown"
}

Compared with the first data, the second data has an additional sex attribute. Let's query it based on sex:

GET m2/doc/_search
{
  "query": {
    "match": {
      "sex": "Unknown"
    }
  }
}

The results are as follows:

{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 0,
    "max_score" : null,
    "hits" : [ ]
  }
}

The result is empty, that is, nothing is found. Why? To GET m2/_mapping the mapping information of m2 at this time:

{
  "m2" : {
    "mappings" : {
      "doc" : {
        "dynamic" : "false",
        "properties" : {
          "age" : {
            "type" : "long"
          },
          "name" : {
            "type" : "text"
          }
        }
      }
    }
  }
}

You can see that elastic search does not establish a mapping relationship for the newly added sex. So we can't find it.
When elasticsearch detects a new field, it will ignore it because of the relationship of dynamic:false, but it will still store it.
In some cases, dynamic:false is still not enough, so more rigorous strategies are needed to further limit it.

Strict mode (dynamic: strict)

Let's create another mappings and change the state of dynamic to strict:

PUT m3
{
  "mappings": {
    "doc": {
      "dynamic": "strict", 
      "properties": {
        "name": {
          "type": "text"
        },
        "age": {
          "type": "long"
        }
      }
    }
  }
}

Now add two documents:

PUT m3/doc/1
{
  "name": "Xiao Hei",
  "age": 18
}
PUT m3/doc/2
{
  "name": "Xiaobai",
  "age": 18,
  "sex": "Unknown"
}

The first document has no problem adding and querying. However, when you add the second document, you will find an error:

{
  "error": {
    "root_cause": [
      {
        "type": "strict_dynamic_mapping_exception",
        "reason": "mapping set to strict, dynamic introduction of [sex] within [doc] is not allowed"
      }
    ],
    "type": "strict_dynamic_mapping_exception",
    "reason": "mapping set to strict, dynamic introduction of [sex] within [doc] is not allowed"
  },
  "status": 400
}

Error prompt, strict dynamic mapping exception! In other words, when dynamic:strict, elasticsearch will throw an exception if it encounters a new field.
The above rigorous style is called strict mode!

Summary:

  • Dynamic mapping (dynamic: true): dynamically add new fields (or default).
  • Static mapping (dynamic: false): ignore new fields. On the basis of the original mapping, when there is a new field, it will not actively add a new mapping relationship, but only appear in the query as the query result.
  • Strict mode (dynamic: strict): throw an exception if a new field is encountered.

Generally, static mapping is used more. Like the img tag of HTML, src is a self-contained attribute. You can add id or class attributes when needed.
Of course, if you know your data very well and won't change for a long time in the future, strict is a good choice.

Tags: ElasticSearch LogStash

Posted on Sat, 20 Nov 2021 22:36:16 -0500 by phpnow