19.ElasticSearch系列之top_hits去重返回内容唯一值

我们在之前有了解过cardinality基于统计去重,当我们需要基于内容去重时,需要用到top_hits
如需求:统计客户地址所在小区的信息,多个客户地址可能位于一个小区,所以需要内容去重

# _source中指定要返回的小区字段信息,size=1表示只取第一个
POST customer/_search
{
    
    
  "size": 0,
  "query": {
    
    
    "bool": {
    
    
      "must": [
        {
    
    
          "term": {
    
    
            "city": {
    
    
              "value": "上海"
            }
          }
        }
      ]
    }
  }, 
  "aggs": {
    
    
    "seaweed_id": {
    
    
      "terms": {
    
    
        "field": "seaweed_id",
        "size": 2000
      },
      "aggs": {
    
    
        "top_hits": {
    
    
          "top_hits": {
    
    
            "_source": {
    
    
              "includes": [
                "city",
                "region",
                "name",
                "location"
              ]
            }, 
            "size": 1
          }
        }
      }
    }
  }
}

返回结果示例:

{
    
    
  ...
  "aggregations" : {
    
    
    "seaweed_id" : {
    
    
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
    
    
          "key" : "51ff8167d5fd56fe5ac14",
          "doc_count" : 18,
          "top_hits" : {
    
    
            "hits" : {
    
    
              "total" : {
    
    
                "value" : 18,
                "relation" : "eq"
              },
              "max_score" : 0.001962709,
              "hits" : [
                {
    
    
                  "_index" : "customer",
                  "_type" : "_doc",
                  "_id" : "a5cf9e2cf55fb1f788d2fcdfe4d",
                  "_score" : 0.001962709,
                  "_source" : {
    
    
                    "city" : "上海",
                    "name" : "育秀东区",
                    "location" : {
    
    
                      "lon" : 121.469335,
                      "lat" : 30.907972
                    },
                    "region" : "奉贤"
                  }
                }
              ]
            }
          }
        }
...

JAVA代码部分示例:

 private void buildAggParam(Map<String, String> aggMap, AggregationBuilder aggregationBuilder) {
    
    
        for (String key : aggMap.keySet()) {
    
    
            if ("count".equals(aggMap.get(key))) {
    
    
                aggregationBuilder.subAggregation(AggregationBuilders.count(key).field(key));
            } else if ("avg".equals(aggMap.get(key))) {
    
    
                aggregationBuilder.subAggregation(AggregationBuilders.avg(key).field(key));
            } else if ("distinct".equals(aggMap.get(key))) {
    
    
                aggregationBuilder.subAggregation(AggregationBuilders.cardinality(key).field(key));
            } else if ("sum".equals(aggMap.get(key))) {
    
    
                aggregationBuilder.subAggregation(AggregationBuilders.sum(key).field(key));
            } else if ("top_hits".equals(aggMap.get(key))) {
    
    
                aggregationBuilder.subAggregation(AggregationBuilders.topHits(key).fetchSource(key.split(","), null).size(1));
            }
        }
    }
private void parseAggResult(Map<String, Map<String, String>> resultMap, Object key2, Aggregations aggregations) {
    
    
        Map<String, String> subMap = new HashMap<>();
        String key = String.valueOf(key2);
        resultMap.put(key, subMap);

        Map<String, Aggregation> aggregationMap = aggregations.getAsMap();
        for (String subKey : aggregationMap.keySet()) {
    
    
            Aggregation aggregation = aggregationMap.get(subKey);
            String subVal = "-";
            if ("avg".equals(aggregation.getType())) {
    
    
                double value = ((ParsedAvg) aggregation).getValue();
                if ((int) value != value) {
    
    
                    subVal = String.valueOf(value);
                } else {
    
    
                    subVal = String.valueOf((int) value);
                }
            } else if ("value_count".equals(aggregation.getType())) {
    
    
                subVal = String.valueOf((int) ((ParsedValueCount) aggregation).getValue());
            } else if ("cardinality".equals(aggregation.getType())) {
    
    
                subVal = String.valueOf((int) ((ParsedCardinality) aggregation).getValue());
            } else if ("sum".equals(aggregation.getType())) {
    
    
                subVal = String.valueOf((int) ((ParsedSum) aggregation).getValue());
            } else if ("top_hits".equals(aggregation.getType())) {
    
    
                SearchHit searchHit = ((ParsedTopHits)aggregation).getHits().getHits()[0];
                subVal = searchHit.getSourceAsString();
            }
            subMap.put(subKey, subVal);
        }
    }

欢迎关注公众号算法小生

猜你喜欢

转载自blog.csdn.net/SJshenjian/article/details/129963322