一、需求
A、B、C代表3个用户,第二列代表各自的得分,求A、B、C的最好成绩以及A、B、C最好成绩的均值
A 10
A 11
A 13
B 11
B 11
B 12
C 10
C 10
C 11
C 15
二、思路
先terms分组,求最大值,最后加一个pipeline均值。一开始想用bucket_script解决,实验发现走不通,但是bucket_script在聚合结果之上操作很有用
三、测试数据
PUT sport
{
"mappings": {
"grade": {
"properties": {
"user": {
"type": "keyword"
},
"grade":{
"type": "integer"
}
}
}
}
}
PUT sport/grade/1
{
"user":"A",
"grade":10
}
PUT sport/grade/2
{
"user":"A",
"grade":11
}
PUT sport/grade/3
{
"user":"A",
"grade":13
}
PUT sport/grade/4
{
"user":"B",
"grade":11
}
PUT sport/grade/5
{
"user":"B",
"grade":11
}
PUT sport/grade/6
{
"user":"B",
"grade":12
}
PUT sport/grade/7
{
"user":"C",
"grade":10
}
PUT sport/grade/8
{
"user":"C",
"grade":10
}
PUT sport/grade/9
{
"user":"C",
"grade":11
}
PUT sport/grade/10
{
"user":"C",
"grade":15
}
四、聚合
GET sport/_search
{
"size": 0,
"aggs": {
"avg_score": {
"terms": {
"field": "user"
},
"aggs": {
"max_score": {
"max": {
"field": "grade"
}
}
}
},
"avg_max_score": {
"avg_bucket": {
"buckets_path": "avg_score>max_score"
}
}
}
}
结果:
{
"took": 4,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 10,
"max_score": 0,
"hits": []
},
"aggregations": {
"avg_score": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "C",
"doc_count": 4,
"max_score": {
"value": 15 }
},
{
"key": "A",
"doc_count": 3,
"max_score": {
"value": 13 }
},
{
"key": "B",
"doc_count": 3,
"max_score": {
"value": 12 }
}
]
},
"avg_max_score": {
"value": 13.333333333333334
}
}
}