SpringBoot integrates ES advanced query

SpringBoot integrates ES advanced query

springboot version: 2.0.5.RELEASE

elasticsearch version: 7.9.1

1. Disposition

Import dependency:

<dependency>
    <groupId>org.elasticsearch.client</groupId>
    <artifactId>elasticsearch-rest-high-level-client</artifactId>
    <version>7.9.1</version>
</dependency>
<dependency>
    <groupId>org.elasticsearch</groupId>
    <artifactId>elasticsearch</artifactId>
    <version>7.9.1</version>
</dependency>

application.properties configuration file:

elasticsearch.schema=http
elasticsearch.address=192.168.80.130:9200,192.168.80.131:9200,192.168.80.132:9200
elasticsearch.connectTimeout=10000
elasticsearch.socketTimeout=60000
elasticsearch.connectionRequestTimeout=10000
elasticsearch.maxConnectNum=200
elasticsearch.maxConnectPerRoute=200
# No password to ignore
elasticsearch.userName=elastic
elasticsearch.password=123456

Connection configuration:

import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

import javax.annotation.PreDestroy;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;


@Configuration
public class ElasticSearchConfig {
    /**
     * agreement
     */
    @Value("${elasticsearch.schema:http}")
    private String schema;

    /**
     * Cluster address. If there are multiple addresses, separate them with ","
     */
    @Value("${elasticsearch.address}")
    private String address;

    /**
     * Cluster address. If there are multiple addresses, separate them with ","
     */
    @Value("${elasticsearch.userName}")
    private String userName;

    /**
     * Cluster address. If there are multiple addresses, separate them with ","
     */
    @Value("${elasticsearch.password}")
    private String password;

    /**
     * Connection timeout
     */
    @Value("${elasticsearch.connectTimeout:5000}")
    private int connectTimeout;

    /**
     * Socket Connection timeout
     */
    @Value("${elasticsearch.socketTimeout:10000}")
    private int socketTimeout;

    /**
     * Gets the timeout for the connection
     */
    @Value("${elasticsearch.connectionRequestTimeout:5000}")
    private int connectionRequestTimeout;

    /**
     * maximum connection
     */
    @Value("${elasticsearch.maxConnectNum:100}")
    private int maxConnectNum;

    /**
     * Maximum number of routed connections
     */
    @Value("${elasticsearch.maxConnectPerRoute:100}")
    private int maxConnectPerRoute;

    private RestHighLevelClient restHighLevelClient;

    @Bean
    public RestHighLevelClient restHighLevelClient() {
        final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
        UsernamePasswordCredentials elastic = new UsernamePasswordCredentials(userName, password);
        credentialsProvider.setCredentials(AuthScope.ANY,elastic);

        // Split address
        List<HttpHost> hostLists = new ArrayList<>();
        String[] hostList = address.split(",");
        for (String addr : hostList) {
            String host = addr.split(":")[0];
            String port = addr.split(":")[1];
            hostLists.add(new HttpHost(host, Integer.parseInt(port), schema));
        }
        // Convert to HttpHost array
        HttpHost[] httpHost = hostLists.toArray(new HttpHost[]{});
        // Building connection objects
        RestClientBuilder builder = RestClient.builder(httpHost);
        // Asynchronous connection delay configuration
        builder.setRequestConfigCallback(requestConfigBuilder -> {
            requestConfigBuilder.setConnectTimeout(connectTimeout);
            requestConfigBuilder.setSocketTimeout(socketTimeout);
            requestConfigBuilder.setConnectionRequestTimeout(connectionRequestTimeout);
            return requestConfigBuilder;
        });
        // Configuration of asynchronous connections
        builder.setHttpClientConfigCallback(httpClientBuilder -> {
            httpClientBuilder.setMaxConnTotal(maxConnectNum);
            httpClientBuilder.setMaxConnPerRoute(maxConnectPerRoute);
            httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
            return httpClientBuilder;
        });
        restHighLevelClient = new RestHighLevelClient(builder);
        return restHighLevelClient;
    }

    @PreDestroy
    public void clientClose() {
        try {
            this.restHighLevelClient.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

2. API operation ES

2.1 query index list

Index names can be vaguely matched

@Test
public void tset() throws IOException {
    GetIndexRequest getIndexRequest = new GetIndexRequest("log*");
    // Get all indexes under es prefix filtering
    GetIndexResponse getIndexResponse = restHighLevelClient.indices().get(getIndexRequest, RequestOptions.DEFAULT);
    // Convert the index found by es into a list
    List<String> elasticsearchList = new ArrayList<>(getIndexResponse.getMappings().keySet());
    elasticsearchList.forEach(System.out::println);
}

2.2 TermsQuery

The tree query of es is an exact matching query. The description of the. keyword added after the serviceName field here is as follows:

  1. es5.0 and later versions cancel the String type and split the original String type into text and keyword. The difference between them is that text will segment the field, while keyword will not.

  2. If mapping is not specified in advance for the index field, es will use Dynamic Mapping to dynamically map the field by inferring the value of the field in the document you pass in. For example, if the value of the field total in the incoming document is 12, total will be mapped to long type; If the value of the field addr is "192.168.0.1", then addr will be mapped to ip type. However, for ordinary strings that do not meet the ip and long formats, the situation is somewhat different: es will map them to text type, but in order to retain the ability to accurately query and aggregate these fields, it also maps them to keyword type as the fields attribute of the field_ Mapping. For example, the field "serviceName" I use here is used to store the service name string type, and the following Dynamic Mapping will be done for it:

    "serviceName" : {
        "type" : "text",
        "fields" : {
            "keyword" : {
                "type" : "keyword",
                "ignore_above" : 256
            }
        }
    }
    

    In subsequent queries, using serviceName is to query serviceName as text type, while using serviceName.keyword is to query serviceName as keyword type. The former will segment the query content and then match, while the latter will directly match the query results accurately.

  3. The tree query of es does exact matching instead of word segmentation query, so term query on text type fields will not find results (unless the field itself remains unchanged after being processed by the word splitter and is not converted or word segmented). In this case, serviceName.keyword must be used to accurately match the serviceName field with keyword type.

GET logdata-log-center-2021.05.06/_search
{
  "query": {
    "terms": {
      "serviceName.keyword": [
        "log-center-user-portal",
        "log-center-collect-manage"
      ]
    }
  }
}

Java API

@Test
public void test() throws IOException {
    //Build query source builder
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//  termQuery can only match one value. The first input parameter is the field name, and the second parameter is the incoming value, which is equivalent to the value in sql=
//  searchSourceBuilder.query(QueryBuilders.termQuery("serviceName.keyword", "log-center-user-portal-web"));
    //termsQuery can match multiple values at one time, which is equivalent to in in sql
    searchSourceBuilder.query(QueryBuilders.termsQuery("serviceName.keyword", "log-center-user-portal-web", "log-center-collect-manage"));
    //Build the query request object, and the input parameter is the index
    SearchRequest searchRequest = new SearchRequest("log-web-up-log-center-2021.10.30");
    //Configure the search source in the search request object
    searchRequest.source(searchSourceBuilder);
    // Perform a search and initiate an http request to ES
    SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

    if (RestStatus.OK.equals(response.status())) {
        long total = response.getHits().getTotalHits().value; //Total number of eligible retrieved
        SearchHit[] hits = response.getHits().getHits();
        //No size is specified. The default query is 10
        for (SearchHit hit : hits) {
            String index = hit.getIndex();//Index name
            String id = hit.getId(); //Document id
            JSONObject jsonObject = JSON.parseObject(hit.getSourceAsString(), JSONObject.class); //Document content
            System.out.println(jsonObject);
        }
    }
}

2.3 WildcardQuery

The wildcard query of es is a fuzzy matching query, similar to like in sql, and the "*" sign before and after the value value is similar to "%" in sql.

GET logdata-log-center-2021.05.06/_search
{
  "query": {
    "wildcard": {
      "serviceName.keyword": {
        "value": "*user-portal*"
      }
    }
  }
}

Java API

searchSourceBuilder.query(QueryBuilders.wildcardQuery("serviceName.keyword", "*" + "user-portal" + "*"));

2.4 RangeQuery

The range query of es is a range query, which is equivalent to between... and

GET log-web-up-log-center-2021.10.30/_search
{
  "query": {
    "range": {
      "timestamp": {
        "gte": "2021-10-30 15:00:00",
        "lte": "2021-10-30 16:00:00",
        "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd HH:mm:ss.SSS"
      }
    }
  }
}

Java API

searchSourceBuilder.query(QueryBuilders.rangeQuery("timestamp")
                              .gte("2021-10-30 15:00:00") //Starting value
                              .lte("2021-10-30 16:00:00")   //End value
                              .format("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd HH:mm:ss.SSS"));//Multiple formatting standards can be specified and separated by |

2.5 MatchQuery

es's match query does full-text retrieval, which will segment keywords and match entries.

GET log-web-up-log-center-2021.10.30/_search
{
  "query": {
    "match": {
      "orgName": {
        "query": "limited company"
      }
    }
  }
}

query: search keywords. For English keywords, if there are multiple words, they should be separated by half width commas, while for Chinese keywords, they can be separated by commas or not

Java API

//Full text retrieval, support word segmentation matching
searchSourceBuilder.query(QueryBuilders.matchQuery("orgName", "limited company");

2.6 MultiMatchQuery

The MatchQuery above has a short board. If the user enters a keyword, we don't know which field it is when searching. At this time, we don't use anything. However, the emergence of MultiMatchQuery solves this problem. It can set multiple fields for joint search through the fields attribute. The specific usage is as follows

GET log-web-up-log-center-2021.10.30/_search
{
  "query": {
    "multi_match": {
      "query": "user-portal",
      "fields": ["serviceName", "systemName"]
    }
  }
}

Java API

//Full text retrieval supports word segmentation matching and multi field retrieval
searchSourceBuilder.query(QueryBuilders.multiMatchQuery("user-portal", "serviceName", "systemName", "description"));

2.7 ExistsQuery

The exists query of es retrieves the data existing in a field, that is, non null data. The specified field can be a specific field or a json structure.

GET logdata-log-center-2021.05.06/_search
{
  "query": {
    "exists": {
      "field": "networkLogDetailInfo"
    }
  }
}

Java API

//Query data whose networkLogDetailInfo is not null
searchSourceBuilder.query(QueryBuilders.existsQuery("networkLogDetailInfo"));

2.8 BoolQuery

es's bool query combines multiple queries to retrieve data. The main combination parameters are must, should, mustNot, etc.

  • Must: the data must match the query criteria contained in must, which is equivalent to "AND"“
  • Should: the data matches one OR more query criteria contained in should, which is equivalent to "OR"“
  • mustNot: the data must NOT match the query criteria contained in mustNot, which is equivalent to "NOT"“
GET logdata-log-center-2021.05.06/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "exists": {
            "field": "networkLogDetailInfo"
          }
        },
        {
          "range": {
            "timestamp": {
              "gte": "2021-05-05 00:00:00",
              "lte": "2021-05-07 00:00:00",
              "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd HH:mm:ss.SSS"
            }
          }
        }
      ],
      "must_not": [
        {
          "exists": {
            "field": "serviceLogDetailInfo"
          }
        }
      ]
    }
  }
}

Java API

@Test
public void test() throws IOException {
    //Build query source builder
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    //Building a bool type query
    BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
    //Using must connection, which is equivalent to and, to build the first query condition, existsQuery must contain this field
    boolQueryBuilder.must(QueryBuilders.existsQuery("networkLogDetailInfo"));
    //Use must to connect the second condition, rangeQuery range search, which is equivalent to between...and
    boolQueryBuilder.must(QueryBuilders.rangeQuery("timestamp")
                          .from("2021-05-05 00:00:00") //Starting value
                          .to("2021-05-07 00:00:00")   //End value
                          .includeLower(true)          //Is it equal to the starting value
                          .includeUpper(false)         //Is it equal to the end value
                          .format("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd HH:mm:ss.SSS")); //Format time
    //Connect the third condition using mustNot
    boolQueryBuilder.mustNot(QueryBuilders.existsQuery("serviceLogDetailInfo"));

    searchSourceBuilder.query(boolQueryBuilder);

    //Build the query request object, and the input parameter is the index
    SearchRequest searchRequest = new SearchRequest("logdata-log-center-2021.05.06");
    //Configure the search source in the search request object
    searchRequest.source(searchSourceBuilder);
    // Perform a search and initiate an http request to ES
    SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

    if (RestStatus.OK.equals(response.status())) {
        long total = response.getHits().getTotalHits().value; //Total number of eligible retrieved
        SearchHit[] hits = response.getHits().getHits();
        for (SearchHit hit : hits) {
            String index = hit.getIndex();//Index name
            String id = hit.getId(); //Document id
            JSONObject jsonObject = JSON.parseObject(hit.getSourceAsString(), JSONObject.class); //Document content
            System.out.println(jsonObject);
        }
    }
}

2.9 sorting

es uses sort to sort, and multiple fields can be sorted jointly.

GET logdata-log-center-2021.05.06/_search
{
  "query": {
    "bool": {
      "must_not": [
        {
          "exists": {
            "field": "serviceLogDetailInfo"
          }
        }
      ]
    }
  },
  "sort": [
    {
      "serviceName.keyword": {
        "order": "asc"
      },
      "timestamp": {
        "order": "desc"
      }
    }
  ]
}

Sort according to the first field first. If the first field is the same, sort according to the second field.

Java API

//Ascending order
searchSourceBuilder.sort("serviceName.keyword", SortOrder.ASC);
//Descending order
searchSourceBuilder.sort("timestamp", SortOrder.DESC);

2.10 result field filtering

Sometimes only a few fields are needed to retrieve data. es also supports field filtering on the result set. Fields can be fuzzy matched with "*".

GET logdata-log-center-2021.05.06/_search
{
  "_source": {
    "includes": ["messageId", "system*", "service*", "timestamp"],
    "excludes": []
  }
}

Java API

//Filter fields. The first parameter is the required field and the second parameter is the unnecessary field
searchSourceBuilder.fetchSource(new String[] {"messageId", "system*", "service*", "timestamp"}, new String[] {});

2.11 paging

There are three paging methods for es: from+ size, scroll, search_after. The default paging method is from+ size.

2.11.1 from+ size

GET logdata-log-center-2021.05.06/_search
{
  "from": 0, 
  "size": 2, 
  "query": {
    "exists": {
      "field": "networkLogDetailInfo"
    }
  },
  "_source": {
    "includes": ["messageId", "system*", "service*", "timestamp"],
    "excludes": []
  }
}


Through the query results, we can find that after we set the paging parameters, hits.total returns the total number of data 7149. According to the paging rules, we set the size=2, so there are only two pieces of data in hits.hits.

Java API

@Test
public void test() throws IOException {
    //Build query source builder
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    //query criteria
    searchSourceBuilder.query(QueryBuilders.existsQuery("networkLogDetailInfo"));

    int page = 1; // Page number
    int size = 2; // Number of items displayed per page
    int index = (page - 1) * size;
    searchSourceBuilder.from(index); //Set query start position
    searchSourceBuilder.size(size); //Number of data pieces returned from the result set

    //Filter fields. The first parameter is the required field and the second parameter is the unnecessary field
    searchSourceBuilder.fetchSource(new String[] {"messageId", "system*", "service*", "timestamp"}, new String[] {});
    //Build the query request object, and the input parameter is the index
    SearchRequest searchRequest = new SearchRequest("logdata-log-center-2021.05.06");
    //Configure the search source in the search request object
    searchRequest.source(searchSourceBuilder);
    // Perform a search and initiate an http request to ES
    SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

    if (RestStatus.OK.equals(response.status())) {
        long total = response.getHits().getTotalHits().value; //Total number of eligible retrieved
        SearchHit[] hits = response.getHits().getHits();
        //No size is specified. The default query is 10
        for (SearchHit hit : hits) {
            String index = hit.getIndex();//Index name
            String id = hit.getId(); //Document id
            JSONObject jsonObject = JSON.parseObject(hit.getSourceAsString(), JSONObject.class); //Document content
            System.out.println(jsonObject);
        }
    }
}

2.11.2 scroll

A method that can satisfy deep paging. Es provides scroll mode for paging reading. In principle, a cursor scroll is generated for a query_ ID, the subsequent query only needs to get the data according to this cursor, and can only get the data of the next page each time. Until the hits field returned in the result set is empty, it means that the traversal is over. Where scroll=1m is scroll_ The validity period of ID, which means 1 minute. After expiration, it will be es automatically cleaned up. This value will be updated every query.

GET logdata-log-center-2021.05.06/_search?scroll=1m
{
  "size": 2, 
  "query": {
    "exists": {
      "field": "networkLogDetailInfo"
    }
  },
  "_source": {
    "includes": ["messageId", "system*", "service*", "timestamp"],
    "excludes": []
  }
}


In subsequent queries, query criteria do not need to be specified, but only scroll needs to be carried_ ID, which will be displayed in pages according to the first query criteria, and the next query (two methods):

POST /_search/scroll
{
  "scroll": "1m",
  "scroll_id": "FGluY2x1ZGVfY29udGV4dF91dWlkDXF1ZXJ5QW5kRmV0Y2gBFFp0bGhXbjBCQU55Q3EtSDcxaWF4AAAAAACF-OYWV0liWUNLUHVTN09DS1ZtUl9SSHhVdw=="
}
GET /_search/scroll?scroll=1m&scroll_id=FGluY2x1ZGVfY29udGV4dF91dWlkDXF1ZXJ5QW5kRmV0Y2gBFFp0bGhXbjBCQU55Q3EtSDcxaWF4AAAAAACF-OYWV0liWUNLUHVTN09DS1ZtUl9SSHhVdw==

Java API

public void testScroll(String scrollId) throws IOException {
    //Query source builder
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    //2 per page
    searchSourceBuilder.size(2);
    //query criteria
    searchSourceBuilder.query(QueryBuilders.existsQuery("networkLogDetailInfo"));
    //Filter fields. The first parameter is the required field and the second parameter is the unnecessary field
    searchSourceBuilder.fetchSource(new String[] {"messageId", "system*", "service*", "timestamp"}, new String[] {});

    SearchRequest request = new SearchRequest("logdata-log-center-2021.05.06");
    request.source(searchSourceBuilder);


    Scroll scroll = new Scroll(TimeValue.timeValueMinutes(1L));
    request.scroll(scroll);//Roll Away 
    SearchResponse response;
    if (!StringUtils.isBlank(scrollId)) {
        //Scroll query
        SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId);
        scrollRequest.scroll(scroll);
        response = restHighLevelClient.scroll(scrollRequest, RequestOptions.DEFAULT);
    } else {
        //The first query uses a normal query
        response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
    }
    //Update scrollId
    scrollId = response.getScrollId();
    System.out.println(scrollId);
    if (RestStatus.OK.equals(response.status())) {
        //Set query total
        SearchHit[] hits = response.getHits().getHits();
        for (SearchHit hit : hits) {
            String index = hit.getIndex();
            String id = hit.getId();
            JSONObject jsonObject = JSON.parseObject(hit.getSourceAsString(), JSONObject.class);
            System.out.println(jsonObject);
        }
    }
}

2.11.3 search_after

search_after is a new feature provided by ES5.0 and later, search_after query needs to specify sort sorting fields, and multiple sorting fields can be specified. Subsequent queries are somewhat similar to scroll, but different from scroll. It provides an active cursor to perform the next query through the last data of the previous query. It should be explained here that search is used_ After query needs to set from to 0 or - 1. Of course, you can not write

First query:

POST logdata-log-center-2021.05.06/_search
{
  "size": 2, 
  "query": {
    "exists": {
      "field": "networkLogDetailInfo"
    }
  },
  "_source": {
    "includes": ["messageId", "system*", "service*", "timestamp"],
    "excludes": []
  },
  "sort": [
    {
      "timestamp": {
        "order": "desc"
      }
    }
  ]
}

Query result: you can see that each piece of data has a sort part, and the query on the next page needs the sort value of the last piece of the query result as the cursor to realize paging query

Second query:

POST logdata-log-center-2021.05.06/_search
{
  "search_after": [
    1620374316433
  ],
  "size": 2, 
  "query": {
    "exists": {
      "field": "networkLogDetailInfo"
    }
  },
  "_source": {
    "includes": ["messageId", "system*", "service*", "timestamp"],
    "excludes": []
  },
  "sort": [
    {
      "timestamp": {
        "order": "desc"
      }
    }
  ]
}

Java API

public void testSearchAfter(Object[] values) throws IOException {
    //Query source builder
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(2);
    searchSourceBuilder.from(0); //searchAfter needs to set from to 0 or - 1. Of course, it can not be written
    //query criteria
    searchSourceBuilder.query(QueryBuilders.existsQuery("networkLogDetailInfo"));
    //Filter fields. The first parameter is the required field and the second parameter is the unnecessary field
    searchSourceBuilder.fetchSource(new String[] {"messageId", "system*", "service*", "timestamp"}, new String[] {});
    //Sort by timestamp
    searchSourceBuilder.sort("timestamp", SortOrder.DESC);

    if (values != null)
        searchSourceBuilder.searchAfter(values);

    SearchRequest request = new SearchRequest("logdata-log-center-2021.05.06");
    request.source(searchSourceBuilder);
    SearchResponse response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
    if (RestStatus.OK.equals(response.status())) {
        //Set query total
        SearchHit[] hits = response.getHits().getHits();
        for(int i = 0; i < hits.length; i++) {
            String index = hits[i].getIndex();
            String id = hits[i].getId();
            JSONObject jsonObject = JSON.parseObject(hits[i].getSourceAsString(), JSONObject.class);
            System.out.println(jsonObject);
            if (i == hits.length-1) {
                //The sortValue of the last data is used as the cursor value of the next query
                values = hits[i].getSortValues();
                System.out.println(Arrays.toString(values));
            }
        }
    }
}

2.11.4 features of three paging modes

  1. from+size is more suitable for shallow paging mode. In the case of deep paging, the efficiency of this mode is very low. With the continuous increase of paging page number, the efficiency of query will decline sharply. For example, from = 5000, size=20, ES needs to match and sort on each partition to get 5000 * 20 valid data, and then take the last 20 in the result set. In addition to the efficiency problem, another unsolvable problem is that the maximum skip value supported by ES is max_result_window, the default is 10000. That is, when from+size > max_result_window, es will return an error.
  2. Scroll is a kind of paging retrieval in the form of scrolling, which meets the scene of deep paging. A cursor scroll is generated during query_ ID, the value returned each time within the validity period is the same, and subsequent queries only need to retrieve data according to this cursor. Scroll query is a performance intensive method_ The generation of ID can be understood as establishing a temporary historical snapshot. The system will consume a lot of resources to save an image of the current query result set and occupy the file descriptor. Subsequent operations such as adding, deleting, modifying and querying will not affect the results of this snapshot. Therefore, it is not recommended to use it in real-time query. This method is often used for non real-time processing of a large amount of data, such as data migration or index change.
  3. search_after is applicable to deep paging + sorting. Paging is to locate the position of the next page according to the last data of the previous page, so page skipping requests cannot be made. At the same time, in the process of paging requests, if there are additions, deletions and changes to index data, these changes will also be reflected on the cursor in real time. Select search in the_ When sorting fields of after, try to use unique fields such as document id or timestamp. search_ Compared with the shallow paging of from+size and scroll ing query, after has a great performance improvement.

2.22 polymerization

The aggs of es performs aggregate query statistics on the data. The query method is as follows:

## Count the log collection quantity of each system in one month
POST log*/_search
{
  "size": 0,
  "query": {
		"range": {
			"timestamp": {
				"gte": "2021-10-24 00:00:00",
				"lte": "2021-11-24 00:00:00",
				"format": "yyyy-MM-dd HH:mm:ss"
			}
		}
	},
	"aggs": {
	  "allLog": {
	    "terms": {
	      "field": "systemName.keyword",
	      "size": 10
	    }
	  }
	}
}


Java API

@Test
public void test() throws IOException {
    //Aggregate and count the number of logs of each system according to the systemName field
    TermsAggregationBuilder bySystemName = AggregationBuilders.terms("allLog").field("systemName.keyword");

    RangeQueryBuilder timestamp = QueryBuilders.rangeQuery("timestamp")
        .gte("2021-10-24 00:00:00")
        .lte("2021-11-24 00:00:00")
        .format("yyyy-MM-dd HH:mm:ss");

    //Query source builder
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    //Configure aggregation conditions
    searchSourceBuilder.aggregation(bySystemName);
    //Configure query criteria
    searchSourceBuilder.query(timestamp);
    //Set query results not to return, but only aggregate results
    searchSourceBuilder.size(0);
    //Create a query request object and configure query criteria into it
    SearchRequest request = new SearchRequest("log*");
    request.source(searchSourceBuilder);
    // Perform a search and initiate an http request to ES
    SearchResponse response = restHighLevelClient.search(request, RequestOptions.DEFAULT);

    Aggregations aggregations = response.getAggregations();

    if (aggregations != null) {
        Terms terms = aggregations.get("allLog");
        //Analytical bucket
        for (Terms.Bucket bucket : terms.getBuckets()) {
            System.out.print("System name:" + bucket.getKeyAsString());
            System.out.println("\t Total logs:" + bucket.getDocCount());
        }
    }
}

Multilayer nested aggregation

## Count the total number of logs in each system, and count the number of various types of logs by system
POST log*/_search
{
	"size": 0,
	"query": {
		"range": {
			"timestamp": {
				"gte": "2021-10-24 00:00:00",
				"lte": "2021-11-24 00:00:00",
				"format": "yyyy-MM-dd HH:mm:ss"
			}
		}
	},
	"aggs": {
		"allLog": {
			"terms": {
				"field": "systemName.keyword",
				"size": 10
			},
			"aggs": {
				"errorLogNum": {
					"filter": {
						"terms": {
							"level.keyword": [
								"ERROR",
								"FATAL"
							]
						}
					}
				},
				"dbLogNum": {
					"filter": {
						"exists": {
							"field": "dataLogDetailInfo"
						}
					}
				},
				"interfaceLogNum": {
					"filter": {
						"exists": {
							"field": "networkLogDetailInfo"
						}
					}
				},
				"serviceLogNum": {
					"filter": {
						"exists": {
							"field": "serviceLogDetailInfo"
						}
					}
				},
				"webLogNum": {
					"filter": {
						"exists": {
							"field": "browserModel"
						}
					}
				}
			}
		}
	}
}

Java API

@Test
public void test() throws IOException {
    //Error log aggregation condition
    FilterAggregationBuilder errorLogNum = AggregationBuilders.filter("errorLogNum", QueryBuilders.termsQuery("level.keyword", "ERROR", "FATAL"));
    //Database log aggregation conditions
    FilterAggregationBuilder dataLogNum = AggregationBuilders.filter("dbLogNum", QueryBuilders.existsQuery("dataLogDetailInfo"));
    //Interface log aggregation condition
    FilterAggregationBuilder networkLogNum = AggregationBuilders.filter("interfaceLogNum", QueryBuilders.existsQuery("networkLogDetailInfo"));
    //Apply log aggregation conditions
    FilterAggregationBuilder serviceLogNum = AggregationBuilders.filter("serviceLogNum", QueryBuilders.existsQuery("serviceLogDetailInfo"));
    //Front end log aggregation conditions
    FilterAggregationBuilder webUpLogNum = AggregationBuilders.filter("webLogNum", QueryBuilders.existsQuery("browserModel"));
    //Outermost polymerization condition
    TermsAggregationBuilder bySystemName = AggregationBuilders.terms("allLog").field("systemName.keyword").size(10);
    //Sub aggregation of multiple internal conditions, secondary aggregation on the results of system aggregation
    bySystemName.subAggregation(errorLogNum)
        .subAggregation(dataLogNum).
        subAggregation(networkLogNum).
        subAggregation(serviceLogNum).
        subAggregation(webUpLogNum);

    RangeQueryBuilder timestamp = QueryBuilders.rangeQuery("timestamp")
        .gte("2021-10-24 00:00:00")
        .lte("2021-11-24 00:00:00")
        .format("yyyy-MM-dd HH:mm:ss");

    //Query source builder
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    //Configure aggregation conditions
    searchSourceBuilder.aggregation(bySystemName);
    //Configure query criteria
    searchSourceBuilder.query(timestamp);
    //Set query results not to return, but only aggregate results
    searchSourceBuilder.size(0);
    //Create a query request object and configure query criteria into it
    SearchRequest request = new SearchRequest("log*");
    request.source(searchSourceBuilder);
    // Perform a search and initiate an http request to ES
    SearchResponse response = restHighLevelClient.search(request, RequestOptions.DEFAULT);

    Aggregations aggregations = response.getAggregations();

    if (aggregations != null) {
        Terms terms = aggregations.get("allLog");
        for (Terms.Bucket bucket : terms.getBuckets()) {
            ParsedFilter dbFilter = bucket.getAggregations().get("dbLogNum");
            ParsedFilter serviceFilter = bucket.getAggregations().get("serviceLogNum");
            ParsedFilter webFilter = bucket.getAggregations().get("webLogNum");
            ParsedFilter interfaceFilter = bucket.getAggregations().get("interfaceLogNum");
            ParsedFilter errorFilter = bucket.getAggregations().get("errorLogNum");
            System.out.print("System name:" + bucket.getKeyAsString());
            System.out.print("\t General log:" + bucket.getDocCount());
            System.out.print("\t Database log:" + dbFilter.getDocCount());
            System.out.print("\t Service execution log:" + serviceFilter.getDocCount());
            System.out.print("\t Front end operation log:" + webFilter.getDocCount());
            System.out.print("\t Interface log:" + interfaceFilter.getDocCount());
            System.out.println("\t Error log:" + errorFilter.getDocCount());
        }
    }
}

Aggregate query also provides many query rules, such as date aggregation by time, count aggregation, avg aggregation, sum aggregation, min aggregation, max aggregation and so on.

Tags: Java kafka Spring Boot

Posted on Fri, 26 Nov 2021 11:07:31 -0500 by balkar