我们在之前有了解过cardinality基于统计去重,当我们需要基于内容去重时,需要用到top_hits
如需求:统计客户地址所在小区的信息,多个客户地址可能位于一个小区,所以需要内容去重
# _source中指定要返回的小区字段信息,size=1表示只取第一个
POST customer/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"term": {
"city": {
"value": "上海"
}
}
}
]
}
},
"aggs": {
"seaweed_id": {
"terms": {
"field": "seaweed_id",
"size": 2000
},
"aggs": {
"top_hits": {
"top_hits": {
"_source": {
"includes": [
"city",
"region",
"name",
"location"
]
},
"size": 1
}
}
}
}
}
}
返回结果示例:
{
...
"aggregations" : {
"seaweed_id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "51ff8167d5fd56fe5ac14",
"doc_count" : 18,
"top_hits" : {
"hits" : {
"total" : {
"value" : 18,
"relation" : "eq"
},
"max_score" : 0.001962709,
"hits" : [
{
"_index" : "customer",
"_type" : "_doc",
"_id" : "a5cf9e2cf55fb1f788d2fcdfe4d",
"_score" : 0.001962709,
"_source" : {
"city" : "上海",
"name" : "育秀东区",
"location" : {
"lon" : 121.469335,
"lat" : 30.907972
},
"region" : "奉贤"
}
}
]
}
}
}
...
JAVA代码部分示例:
private void buildAggParam(Map<String, String> aggMap, AggregationBuilder aggregationBuilder) {
for (String key : aggMap.keySet()) {
if ("count".equals(aggMap.get(key))) {
aggregationBuilder.subAggregation(AggregationBuilders.count(key).field(key));
} else if ("avg".equals(aggMap.get(key))) {
aggregationBuilder.subAggregation(AggregationBuilders.avg(key).field(key));
} else if ("distinct".equals(aggMap.get(key))) {
aggregationBuilder.subAggregation(AggregationBuilders.cardinality(key).field(key));
} else if ("sum".equals(aggMap.get(key))) {
aggregationBuilder.subAggregation(AggregationBuilders.sum(key).field(key));
} else if ("top_hits".equals(aggMap.get(key))) {
aggregationBuilder.subAggregation(AggregationBuilders.topHits(key).fetchSource(key.split(","), null).size(1));
}
}
}
private void parseAggResult(Map<String, Map<String, String>> resultMap, Object key2, Aggregations aggregations) {
Map<String, String> subMap = new HashMap<>();
String key = String.valueOf(key2);
resultMap.put(key, subMap);
Map<String, Aggregation> aggregationMap = aggregations.getAsMap();
for (String subKey : aggregationMap.keySet()) {
Aggregation aggregation = aggregationMap.get(subKey);
String subVal = "-";
if ("avg".equals(aggregation.getType())) {
double value = ((ParsedAvg) aggregation).getValue();
if ((int) value != value) {
subVal = String.valueOf(value);
} else {
subVal = String.valueOf((int) value);
}
} else if ("value_count".equals(aggregation.getType())) {
subVal = String.valueOf((int) ((ParsedValueCount) aggregation).getValue());
} else if ("cardinality".equals(aggregation.getType())) {
subVal = String.valueOf((int) ((ParsedCardinality) aggregation).getValue());
} else if ("sum".equals(aggregation.getType())) {
subVal = String.valueOf((int) ((ParsedSum) aggregation).getValue());
} else if ("top_hits".equals(aggregation.getType())) {
SearchHit searchHit = ((ParsedTopHits)aggregation).getHits().getHits()[0];
subVal = searchHit.getSourceAsString();
}
subMap.put(subKey, subVal);
}
}
欢迎关注公众号算法小生