From 582baea89857a1abcf0cb721ea89d7f85057337b Mon Sep 17 00:00:00 2001 From: Keenan Nemetz Date: Tue, 26 Sep 2023 17:34:57 -0700 Subject: [PATCH 1/2] refactor normalize filter logic --- planner/filter/normalize.go | 269 +++++++++++++++++++------------ planner/filter/normalize_test.go | 160 +++++++++--------- 2 files changed, 247 insertions(+), 182 deletions(-) diff --git a/planner/filter/normalize.go b/planner/filter/normalize.go index 5f7d275418..05c7c508ef 100644 --- a/planner/filter/normalize.go +++ b/planner/filter/normalize.go @@ -16,134 +16,199 @@ import ( ) // normalize normalizes the provided filter conditions. +// // The following cases are subject of normalization: // - _and or _or with one element is removed flattened // - double _not is removed // - any number of consecutive _ands with any number of elements is flattened +// // As the result object is a map with unique keys (a.k.a. properties), // while performing flattening of compound operators if the same property // is present in the result map, both conditions will be moved into an _and func normalize(conditions map[connor.FilterKey]any) map[connor.FilterKey]any { - return normalizeConditions(conditions, false).(map[connor.FilterKey]any) + return normalizeCondition(nil, conditions).(map[connor.FilterKey]any) +} + +// normalizeCondition returns a normalized version of the given condition. +func normalizeCondition(parentKey connor.FilterKey, condition any) (result any) { + switch t := condition.(type) { + case map[connor.FilterKey]any: + result = normalizeConditions(parentKey, t) + + case []any: + conditions := make([]any, len(t)) + for i, c := range t { + conditions[i] = normalizeCondition(parentKey, c) + } + result = conditions + + default: + result = t + } + + return normalizeProperty(parentKey, result) } -func conditionsArrToMap(conditions []any) map[connor.FilterKey]any { +// normalizeConditions returns a normalized version of the given conditions. +func normalizeConditions(parentKey connor.FilterKey, conditions map[connor.FilterKey]any) map[connor.FilterKey]any { result := make(map[connor.FilterKey]any) - for _, clause := range conditions { - if clauseMap, ok := clause.(map[connor.FilterKey]any); ok { - for k, v := range clauseMap { - result[k] = v + for key, val := range conditions { + result[key] = normalizeCondition(key, val) + + // check if the condition is an operator that can be normalized + op, ok := key.(*mapper.Operator) + if !ok { + continue + } + // check if we have any conditions that can be merged + merge := normalizeOperator(parentKey, op, result[key]) + if len(merge) == 0 { + continue + } + delete(result, key) + + // merge properties directly into result + for _, c := range merge { + for key, val := range c.(map[connor.FilterKey]any) { + result[key] = val } } } return result } -func addNormalizedCondition(key connor.FilterKey, val any, m map[connor.FilterKey]any) { - if _, isProp := key.(*mapper.PropertyIndex); isProp { - var andOp *mapper.Operator - var andContent []any - for existingKey := range m { - if op, isOp := existingKey.(*mapper.Operator); isOp && op.Operation == request.FilterOpAnd { - andOp = op - andContent = m[existingKey].([]any) - break - } - } - for existingKey := range m { - if existingKey.Equal(key) { - existingVal := m[existingKey] - delete(m, existingKey) - if andOp == nil { - andOp = &mapper.Operator{Operation: request.FilterOpAnd} - } - m[andOp] = append( - andContent, - map[connor.FilterKey]any{existingKey: existingVal}, - map[connor.FilterKey]any{key: val}, - ) - return - } +// normalizeOperator returns a normalized array of conditions. +func normalizeOperator(parentKey connor.FilterKey, op *mapper.Operator, condition any) []any { + switch op.Operation { + case request.FilterOpNot: + return normalizeOperatorNot(condition) + + case request.FilterOpOr: + return normalizeOperatorOr(condition) + + case request.FilterOpAnd: + return normalizeOperatorAnd(parentKey, condition) + + default: + return nil + } +} + +// normalizeOperatorAnd returns an array of conditions with all _and operators merged. +// +// If the parent operator is _not or _or, the subconditions will not be merged. +func normalizeOperatorAnd(parentKey connor.FilterKey, condition any) []any { + result := condition.([]any) + // always merge if only 1 property + if len(result) == 1 { + return result + } + // always merge if parent is not an operator + parentOp, ok := parentKey.(*mapper.Operator) + if !ok { + return result + } + // don't merge if parent is a _not or _or operator + if parentOp.Operation == request.FilterOpNot || parentOp.Operation == request.FilterOpOr { + return nil + } + return result +} + +// normalizeOperatorOr returns an array of conditions with all single _or operators merged. +func normalizeOperatorOr(condition any) []any { + result := condition.([]any) + // don't merge if more than 1 property + if len(result) > 1 { + return nil + } + return result +} + +// normalizeOperatorNot returns an array of conditions with all double _not operators merged. +func normalizeOperatorNot(condition any) (result []any) { + subConditions := condition.(map[connor.FilterKey]any) + // don't merge if more than 1 property + if len(subConditions) > 1 { + return nil + } + // find double _not occurances + for subKey, subCondition := range subConditions { + op, ok := subKey.(*mapper.Operator) + if ok && op.Operation == request.FilterOpNot { + result = append(result, subCondition) } - for _, andElement := range andContent { - elementMap := andElement.(map[connor.FilterKey]any) - for andElementKey := range elementMap { - if andElementKey.Equal(key) { - m[andOp] = append(andContent, map[connor.FilterKey]any{key: val}) - return - } + } + return result +} + +// normalizeProperty flattens and groups property filters where possible. +// +// Filters targeting the same property will be grouped into a single _and. +func normalizeProperty(parentKey connor.FilterKey, condition any) any { + switch t := condition.(type) { + case map[connor.FilterKey]any: + results := make(map[connor.FilterKey]any) + for _, c := range normalizeProperties(parentKey, []any{t}) { + for key, val := range c.(map[connor.FilterKey]any) { + results[key] = val } } + return results + + case []any: + return normalizeProperties(parentKey, t) + + default: + return t } - m[key] = val } -func normalizeConditions(conditions any, skipRoot bool) any { - result := make(map[connor.FilterKey]any) - switch typedConditions := conditions.(type) { - case map[connor.FilterKey]any: - for rootKey, rootVal := range typedConditions { - rootOpKey, isRootOp := rootKey.(*mapper.Operator) - if isRootOp { - if rootOpKey.Operation == request.FilterOpAnd || rootOpKey.Operation == request.FilterOpOr { - rootValArr := rootVal.([]any) - if len(rootValArr) == 1 || rootOpKey.Operation == request.FilterOpAnd && !skipRoot { - flat := normalizeConditions(conditionsArrToMap(rootValArr), false) - flatMap := flat.(map[connor.FilterKey]any) - for k, v := range flatMap { - addNormalizedCondition(k, v, result) - } - } else { - resultArr := []any{} - for i := range rootValArr { - norm := normalizeConditions(rootValArr[i], !skipRoot) - normMap, ok := norm.(map[connor.FilterKey]any) - if ok { - for k, v := range normMap { - resultArr = append(resultArr, map[connor.FilterKey]any{k: v}) - } - } else { - resultArr = append(resultArr, norm) - } - } - addNormalizedCondition(rootKey, resultArr, result) - } - } else if rootOpKey.Operation == request.FilterOpNot { - notMap := rootVal.(map[connor.FilterKey]any) - if len(notMap) == 1 { - var k connor.FilterKey - for k = range notMap { - break - } - norm := normalizeConditions(notMap, true).(map[connor.FilterKey]any) - delete(notMap, k) - var v any - for k, v = range norm { - break - } - if opKey, ok := k.(*mapper.Operator); ok && opKey.Operation == request.FilterOpNot { - notNotMap := normalizeConditions(v, false).(map[connor.FilterKey]any) - for notNotKey, notNotVal := range notNotMap { - addNormalizedCondition(notNotKey, notNotVal, result) - } - } else { - notMap[k] = v - addNormalizedCondition(rootOpKey, notMap, result) - } - } else { - addNormalizedCondition(rootKey, rootVal, result) - } - } else { - addNormalizedCondition(rootKey, rootVal, result) - } +// normalizeProperty flattens and groups property filters where possible. +// +// Filters targeting the same property will be grouped into a single _and. +func normalizeProperties(parentKey connor.FilterKey, conditions []any) []any { + var merge []any + var result []any + + // can only merge _and groups if parent is not an _or operator + parentOp, isParentOp := parentKey.(*mapper.Operator) + canMergeAnd := !isParentOp || parentOp.Operation != request.FilterOpOr + + // accumulate properties that can be merged into a single _and + // if canMergeAnd is true all _and groups will be merged + props := make(map[int][]any) + for _, c := range conditions { + for key, val := range c.(map[connor.FilterKey]any) { + op, ok := key.(*mapper.Operator) + if canMergeAnd && ok && op.Operation == request.FilterOpAnd { + merge = append(merge, val.([]any)...) + } else if prop, ok := key.(*mapper.PropertyIndex); ok { + props[prop.Index] = append(props[prop.Index], map[connor.FilterKey]any{key: val}) } else { - addNormalizedCondition(rootKey, normalizeConditions(rootVal, false), result) + result = append(result, map[connor.FilterKey]any{key: val}) } } + } + + // merge filters with duplicate keys into a single _and + for _, val := range props { + if len(val) == 1 { + // only 1 property so no merge required + result = append(result, val...) + } else { + // multiple properties require merge with _and + merge = append(merge, val...) + } + } + + // nothing to merge + if len(merge) == 0 { return result - case []any: - return conditionsArrToMap(typedConditions) - default: - return conditions } + + // merge into a single _and operator + key := &mapper.Operator{Operation: request.FilterOpAnd} + result = append(result, map[connor.FilterKey]any{key: merge}) + return result } diff --git a/planner/filter/normalize_test.go b/planner/filter/normalize_test.go index 22e4f69ed0..7c66bfdbf0 100644 --- a/planner/filter/normalize_test.go +++ b/planner/filter/normalize_test.go @@ -208,86 +208,86 @@ func TestNormalizeConditions(t *testing.T) { ), ), }, - { - name: "flatten _and, grand children of _or", - input: r("_or", - r("_and", - r("_and", - m("name", m("_eq", "Islam")), - m("age", m("_eq", "30")), - ), - m("verified", m("_eq", false)), - ), - r("_and", - m("name", m("_eq", "John")), - m("verified", m("_eq", true)), - ), - ), - expected: r("_or", - r("_and", - m("name", m("_eq", "Islam")), - m("age", m("_eq", "30")), - m("verified", m("_eq", false)), - ), - r("_and", - m("name", m("_eq", "John")), - m("verified", m("_eq", true)), - ), - ), - }, - { - name: "squash same keys into _and", - input: map[string]any{ - "_and": []any{ - r("_and", - m("age", m("_gt", 30)), - m("published", m("rating", m("_lt", 4.8))), - ), - r("_and", m("age", m("_lt", 55))), - m("age", m("_ne", 33)), - }, - "name": m("_eq", "John"), - }, - expected: map[string]any{ - "name": m("_eq", "John"), - "published": m("rating", m("_lt", 4.8)), - "_and": []any{ - m("age", m("_gt", 30)), - m("age", m("_lt", 55)), - m("age", m("_ne", 33)), - }, - }, - }, - { - name: "squash same keys into _and (with more matching keys)", - input: map[string]any{ - "_and": []any{ - m("published", m("rating", m("_lt", 4.8))), - r("_and", m("name", m("_ne", "Islam"))), - r("_and", - m("age", m("_gt", 30)), - m("published", m("genre", m("_eq", "Thriller"))), - m("verified", m("_eq", true)), - ), - r("_and", - m("age", m("_lt", 55)), - m("published", m("rating", m("_gt", 4.4)))), - }, - "name": m("_eq", "John"), - }, - expected: map[string]any{ - "_and": []any{ - m("name", m("_eq", "John")), - m("name", m("_ne", "Islam")), - m("published", m("rating", m("_gt", 4.4))), - m("published", m("rating", m("_lt", 4.8))), - m("published", m("genre", m("_eq", "Thriller"))), - m("age", m("_gt", 30)), - m("age", m("_lt", 55)), - }, - "verified": m("_eq", true), - }, - }, + // { + // name: "flatten _and, grand children of _or", + // input: r("_or", + // r("_and", + // r("_and", + // m("name", m("_eq", "Islam")), + // m("age", m("_eq", "30")), + // ), + // m("verified", m("_eq", false)), + // ), + // r("_and", + // m("name", m("_eq", "John")), + // m("verified", m("_eq", true)), + // ), + // ), + // expected: r("_or", + // r("_and", + // m("name", m("_eq", "Islam")), + // m("age", m("_eq", "30")), + // m("verified", m("_eq", false)), + // ), + // r("_and", + // m("name", m("_eq", "John")), + // m("verified", m("_eq", true)), + // ), + // ), + // }, + // { + // name: "squash same keys into _and", + // input: map[string]any{ + // "_and": []any{ + // r("_and", + // m("age", m("_gt", 30)), + // m("published", m("rating", m("_lt", 4.8))), + // ), + // r("_and", m("age", m("_lt", 55))), + // m("age", m("_ne", 33)), + // }, + // "name": m("_eq", "John"), + // }, + // expected: map[string]any{ + // "name": m("_eq", "John"), + // "published": m("rating", m("_lt", 4.8)), + // "_and": []any{ + // m("age", m("_gt", 30)), + // m("age", m("_lt", 55)), + // m("age", m("_ne", 33)), + // }, + // }, + // }, + // { + // name: "squash same keys into _and (with more matching keys)", + // input: map[string]any{ + // "_and": []any{ + // m("published", m("rating", m("_lt", 4.8))), + // r("_and", m("name", m("_ne", "Islam"))), + // r("_and", + // m("age", m("_gt", 30)), + // m("published", m("genre", m("_eq", "Thriller"))), + // m("verified", m("_eq", true)), + // ), + // r("_and", + // m("age", m("_lt", 55)), + // m("published", m("rating", m("_gt", 4.4)))), + // }, + // "name": m("_eq", "John"), + // }, + // expected: map[string]any{ + // "_and": []any{ + // m("name", m("_eq", "John")), + // m("name", m("_ne", "Islam")), + // m("published", m("rating", m("_gt", 4.4))), + // m("published", m("rating", m("_lt", 4.8))), + // m("published", m("genre", m("_eq", "Thriller"))), + // m("age", m("_gt", 30)), + // m("age", m("_lt", 55)), + // }, + // "verified": m("_eq", true), + // }, + // }, } mapping := getDocMapping() From a8050e76fd2a8752aab077543f62c9f4403eb7c0 Mon Sep 17 00:00:00 2001 From: Keenan Nemetz Date: Wed, 27 Sep 2023 10:25:01 -0700 Subject: [PATCH 2/2] all tests passing --- planner/filter/normalize.go | 8 +- planner/filter/normalize_test.go | 160 +++++++++++++++---------------- 2 files changed, 87 insertions(+), 81 deletions(-) diff --git a/planner/filter/normalize.go b/planner/filter/normalize.go index 05c7c508ef..181b1f8485 100644 --- a/planner/filter/normalize.go +++ b/planner/filter/normalize.go @@ -73,6 +73,12 @@ func normalizeConditions(parentKey connor.FilterKey, conditions map[connor.Filte result[key] = val } } + + // if the merged filter was an _or operator + // there may be child filters that can be merged + if op.Operation == request.FilterOpOr { + result = normalizeConditions(parentKey, result) + } } return result } @@ -176,7 +182,7 @@ func normalizeProperties(parentKey connor.FilterKey, conditions []any) []any { canMergeAnd := !isParentOp || parentOp.Operation != request.FilterOpOr // accumulate properties that can be merged into a single _and - // if canMergeAnd is true all _and groups will be merged + // if canMergeAnd is true, all _and groups will be merged props := make(map[int][]any) for _, c := range conditions { for key, val := range c.(map[connor.FilterKey]any) { diff --git a/planner/filter/normalize_test.go b/planner/filter/normalize_test.go index 7c66bfdbf0..22e4f69ed0 100644 --- a/planner/filter/normalize_test.go +++ b/planner/filter/normalize_test.go @@ -208,86 +208,86 @@ func TestNormalizeConditions(t *testing.T) { ), ), }, - // { - // name: "flatten _and, grand children of _or", - // input: r("_or", - // r("_and", - // r("_and", - // m("name", m("_eq", "Islam")), - // m("age", m("_eq", "30")), - // ), - // m("verified", m("_eq", false)), - // ), - // r("_and", - // m("name", m("_eq", "John")), - // m("verified", m("_eq", true)), - // ), - // ), - // expected: r("_or", - // r("_and", - // m("name", m("_eq", "Islam")), - // m("age", m("_eq", "30")), - // m("verified", m("_eq", false)), - // ), - // r("_and", - // m("name", m("_eq", "John")), - // m("verified", m("_eq", true)), - // ), - // ), - // }, - // { - // name: "squash same keys into _and", - // input: map[string]any{ - // "_and": []any{ - // r("_and", - // m("age", m("_gt", 30)), - // m("published", m("rating", m("_lt", 4.8))), - // ), - // r("_and", m("age", m("_lt", 55))), - // m("age", m("_ne", 33)), - // }, - // "name": m("_eq", "John"), - // }, - // expected: map[string]any{ - // "name": m("_eq", "John"), - // "published": m("rating", m("_lt", 4.8)), - // "_and": []any{ - // m("age", m("_gt", 30)), - // m("age", m("_lt", 55)), - // m("age", m("_ne", 33)), - // }, - // }, - // }, - // { - // name: "squash same keys into _and (with more matching keys)", - // input: map[string]any{ - // "_and": []any{ - // m("published", m("rating", m("_lt", 4.8))), - // r("_and", m("name", m("_ne", "Islam"))), - // r("_and", - // m("age", m("_gt", 30)), - // m("published", m("genre", m("_eq", "Thriller"))), - // m("verified", m("_eq", true)), - // ), - // r("_and", - // m("age", m("_lt", 55)), - // m("published", m("rating", m("_gt", 4.4)))), - // }, - // "name": m("_eq", "John"), - // }, - // expected: map[string]any{ - // "_and": []any{ - // m("name", m("_eq", "John")), - // m("name", m("_ne", "Islam")), - // m("published", m("rating", m("_gt", 4.4))), - // m("published", m("rating", m("_lt", 4.8))), - // m("published", m("genre", m("_eq", "Thriller"))), - // m("age", m("_gt", 30)), - // m("age", m("_lt", 55)), - // }, - // "verified": m("_eq", true), - // }, - // }, + { + name: "flatten _and, grand children of _or", + input: r("_or", + r("_and", + r("_and", + m("name", m("_eq", "Islam")), + m("age", m("_eq", "30")), + ), + m("verified", m("_eq", false)), + ), + r("_and", + m("name", m("_eq", "John")), + m("verified", m("_eq", true)), + ), + ), + expected: r("_or", + r("_and", + m("name", m("_eq", "Islam")), + m("age", m("_eq", "30")), + m("verified", m("_eq", false)), + ), + r("_and", + m("name", m("_eq", "John")), + m("verified", m("_eq", true)), + ), + ), + }, + { + name: "squash same keys into _and", + input: map[string]any{ + "_and": []any{ + r("_and", + m("age", m("_gt", 30)), + m("published", m("rating", m("_lt", 4.8))), + ), + r("_and", m("age", m("_lt", 55))), + m("age", m("_ne", 33)), + }, + "name": m("_eq", "John"), + }, + expected: map[string]any{ + "name": m("_eq", "John"), + "published": m("rating", m("_lt", 4.8)), + "_and": []any{ + m("age", m("_gt", 30)), + m("age", m("_lt", 55)), + m("age", m("_ne", 33)), + }, + }, + }, + { + name: "squash same keys into _and (with more matching keys)", + input: map[string]any{ + "_and": []any{ + m("published", m("rating", m("_lt", 4.8))), + r("_and", m("name", m("_ne", "Islam"))), + r("_and", + m("age", m("_gt", 30)), + m("published", m("genre", m("_eq", "Thriller"))), + m("verified", m("_eq", true)), + ), + r("_and", + m("age", m("_lt", 55)), + m("published", m("rating", m("_gt", 4.4)))), + }, + "name": m("_eq", "John"), + }, + expected: map[string]any{ + "_and": []any{ + m("name", m("_eq", "John")), + m("name", m("_ne", "Islam")), + m("published", m("rating", m("_gt", 4.4))), + m("published", m("rating", m("_lt", 4.8))), + m("published", m("genre", m("_eq", "Thriller"))), + m("age", m("_gt", 30)), + m("age", m("_lt", 55)), + }, + "verified": m("_eq", true), + }, + }, } mapping := getDocMapping()