3
0

Prefix DAL pipeline join attributes with source identifier

This helps avoid name collisions in case the identifiers weren't
provided manually.
This commit is contained in:
Tomaž Jerman 2022-09-29 13:41:05 +02:00
parent 5fa45e0cf7
commit c587db760e
9 changed files with 232 additions and 199 deletions

View File

@ -132,10 +132,37 @@ func (def *Join) init(ctx context.Context, left, right Iterator) (exec *joinLeft
def.RightAttributes = collectAttributes(def.relRight)
}
// @todo this isn't quite ok -- the ident of left/right must become the src of the out.
// An edge-case but it should be covered.
// When the output attributes are not provided, we determine them based on the input
// attributes.
//
// The join step prefixes each attribute with the identifier of the source they came from.
// This avoids name collisions.
// @todo consider improving this to only prefix when there is a name collision
// and allow the nested attr. to be more permissive; for example, instead of
// requiring a.b.c only b.c or c would be enough.
if len(def.OutAttributes) == 0 {
def.OutAttributes = append(def.LeftAttributes, def.RightAttributes...)
ins := func(srcIdent string, aa []AttributeMapping) {
for _, a := range aa {
p := a.Properties()
def.OutAttributes = append(def.OutAttributes, SimpleAttr{
Ident: fmt.Sprintf("%s%s%s", srcIdent, attributeNestingSeparator, a.Identifier()),
Expr: a.Expression(),
Src: a.Identifier(),
Props: p,
})
}
}
def.OutAttributes = make([]AttributeMapping, 0, len(def.LeftAttributes)+len(def.RightAttributes))
ins(def.RelLeft, def.LeftAttributes)
ins(def.RelRight, def.RightAttributes)
}
// Assure and attempt to correct the provided sort to conform with the data set and the
// paging cursor (if any)
def.filter, err = assureSort(def.filter, exec.collectPrimaryAttributes())
if err != nil {
return
}
// Index attrs for validations

View File

@ -4,6 +4,7 @@ import (
"context"
"fmt"
"io"
"strings"
"github.com/cortezaproject/corteza-server/pkg/filter"
"github.com/tidwall/btree"
@ -286,15 +287,12 @@ func (xs *joinLeft) joinRight(ctx context.Context, left *Row) (err error) {
}
for _, b := range bb {
for _, r := range b.rows {
for _, right := range b.rows {
// Merge the two
err = mergeRows(xs.def.OutAttributes, r, left, r)
if err != nil {
return
}
xs.mergeRows(xs.def.OutAttributes, right, left, right)
// Assert if we want to keep
k, err := xs.keep(ctx, r)
k, err := xs.keep(ctx, right)
if err != nil {
return err
}
@ -302,7 +300,7 @@ func (xs *joinLeft) joinRight(ctx context.Context, left *Row) (err error) {
continue
}
xs.outSorted.Set(r)
xs.outSorted.Set(right)
}
}
@ -312,7 +310,7 @@ func (xs *joinLeft) joinRight(ctx context.Context, left *Row) (err error) {
// getRelatedBuffers returns all of the right rows corresponding to the given left row
func (xs *joinLeft) getRelatedBuffers(l *Row) (out []*relIndexBuffer, ok bool, err error) {
var aux *relIndexBuffer
for c := uint(0); c < l.counters[xs.def.On.Left]; c++ {
for c := uint(0); c < l.CountValues()[xs.def.On.Left]; c++ {
// @note internal Row struct never errors
v, _ := l.GetValue(xs.def.On.Left, c)
aux, ok = xs.relIndex.Get(v)
@ -366,3 +364,54 @@ func (xs *joinLeft) collectPrimaryAttributes() (out []string) {
return
}
// mergeRows merges the left and right rows based on the provided attrs
func (xs *joinLeft) mergeRows(attrs []AttributeMapping, out, left, right *Row) {
var (
identSide int
srcIdent string
)
for _, attr := range attrs {
srcIdent = attr.Identifier()
identSide = xs.identSide(srcIdent)
if identSide == -1 {
xs.mergeValuesFrom(srcIdent, out, left)
} else if identSide == 1 {
xs.mergeValuesFrom(srcIdent, out, right)
} else {
xs.mergeValuesFrom(srcIdent, out, left, right)
}
}
}
func (xs *joinLeft) mergeValuesFrom(ident string, out *Row, sources ...*Row) {
var (
aux any
)
for _, src := range sources {
for c := uint(0); c < src.CountValues()[ident]; c++ {
aux, _ = src.GetValue(ident, c)
out.SetValue(ident, c, aux)
}
}
}
// identSide returns -1 if the ident belongs to the left source, 1 if it belongs
// to the right side, and 0 if it's either.
//
// @todo consider adding an additional flag to identify what side it is on.
// For now, this should be fine.
func (xs *joinLeft) identSide(ident string) int {
pp := strings.Split(ident, attributeNestingSeparator)
if len(pp) > 1 {
if pp[0] == xs.def.RelLeft {
return -1
} else if pp[0] == xs.def.RelRight {
return 1
}
}
return 0
}

View File

@ -17,9 +17,9 @@ func TestStepJoinLocal(t *testing.T) {
crs1.Set("f_val", "f1 v1", false)
basicAttrs := []simpleAttribute{
{ident: "l_pk", t: TypeID{}},
{ident: "l_pk", t: TypeID{}, primary: true},
{ident: "l_val", t: TypeText{}},
{ident: "f_pk", t: TypeID{}},
{ident: "f_pk", t: TypeID{}, primary: true},
{ident: "f_fk", t: TypeRef{}},
{ident: "f_val", t: TypeText{}},
}
@ -667,6 +667,13 @@ func TestStepJoinLocal(t *testing.T) {
f: internalFilter{
cursor: crs1,
orderBy: filter.SortExprSet{
{Column: "l_pk", Descending: false},
{Column: "l_val", Descending: false},
{Column: "f_pk", Descending: false},
{Column: "f_fk", Descending: false},
{Column: "f_val", Descending: false},
},
},
},
{
@ -696,6 +703,13 @@ func TestStepJoinLocal(t *testing.T) {
f: internalFilter{
expression: "true",
cursor: crs1,
orderBy: filter.SortExprSet{
{Column: "l_pk", Descending: false},
{Column: "l_val", Descending: false},
{Column: "f_pk", Descending: false},
{Column: "f_fk", Descending: false},
{Column: "f_val", Descending: false},
},
},
},
{
@ -719,6 +733,13 @@ func TestStepJoinLocal(t *testing.T) {
f: internalFilter{
expression: "false",
cursor: crs1,
orderBy: filter.SortExprSet{
{Column: "l_pk", Descending: false},
{Column: "l_val", Descending: false},
{Column: "f_pk", Descending: false},
{Column: "f_fk", Descending: false},
{Column: "f_val", Descending: false},
},
},
},
}
@ -799,7 +820,7 @@ func TestStepJoinLocal(t *testing.T) {
OutAttributes: saToMapping(tc.outAttributes...),
LeftAttributes: saToMapping(tc.leftAttributes...),
RightAttributes: saToMapping(tc.rightAttributes...),
filter: tc.f,
Filter: tc.f,
plan: joinPlan{},
}
@ -1181,7 +1202,7 @@ func TestStepJoin_paging(t *testing.T) {
f: internalFilter{
limit: 2,
orderBy: filter.SortExprSet{{Column: "l_pk", Descending: false}},
orderBy: filter.SortExprSet{{Column: "l_pk", Descending: false}, {Column: "f_pk", Descending: false}},
},
outF1: []simpleRow{
@ -1219,7 +1240,7 @@ func TestStepJoin_paging(t *testing.T) {
f: internalFilter{
limit: 2,
orderBy: filter.SortExprSet{{Column: "l_pk", Descending: true}},
orderBy: filter.SortExprSet{{Column: "l_pk", Descending: true}, {Column: "f_pk", Descending: true}},
},
outF1: []simpleRow{
@ -1258,7 +1279,7 @@ func TestStepJoin_paging(t *testing.T) {
f: internalFilter{
limit: 2,
orderBy: filter.SortExprSet{{Column: "f_val", Descending: false}},
orderBy: filter.SortExprSet{{Column: "f_val", Descending: false}, {Column: "l_pk", Descending: false}, {Column: "f_pk", Descending: false}},
},
outF1: []simpleRow{
@ -1296,7 +1317,7 @@ func TestStepJoin_paging(t *testing.T) {
f: internalFilter{
limit: 2,
orderBy: filter.SortExprSet{{Column: "f_val", Descending: true}},
orderBy: filter.SortExprSet{{Column: "f_val", Descending: true}, {Column: "l_pk", Descending: true}, {Column: "f_pk", Descending: true}},
},
outF1: []simpleRow{

View File

@ -20,12 +20,17 @@ type (
limit uint
cursor *filter.PagingCursor
}
parsedFilter interface {
ExpressionParsed() *ql.ASTNode
}
)
func (f internalFilter) Constraints() map[string][]any { return f.constraints }
func (f internalFilter) StateConstraints() map[string]filter.State { return f.stateConstraints }
func (f internalFilter) MetaConstraints() map[string]any { return f.metaConstraints }
func (f internalFilter) Expression() string { return f.expression }
func (f internalFilter) ExpressionParsed() *ql.ASTNode { return f.expParsed }
func (f internalFilter) OrderBy() filter.SortExprSet { return f.orderBy }
func (f internalFilter) Limit() uint { return f.limit }
func (f internalFilter) Cursor() *filter.PagingCursor { return f.cursor }
@ -48,12 +53,25 @@ func toInternalFilter(f filter.Filter) (out internalFilter, err error) {
cursor: f.Cursor(),
}
pf, ok := f.(parsedFilter)
if ok {
out.expParsed = pf.ExpressionParsed()
// In case the filter was already provided, we need to make sure the idents
// are wrapped
out.expParsed.Traverse(func(a *ql.ASTNode) (bool, *ql.ASTNode, error) {
a.Symbol = wrapNestedGvalIdent(a.Symbol)
return true, a, nil
})
}
if out.expParsed != nil {
// We can't mix the two so just clear it out to avoid confusion
out.expression = ""
}
// Parse expression for later use
if out.expression != "" {
pp := newQlParser(nil)
pp := newQlParser()
out.expParsed, err = pp.Parse(out.expression)
out.expParsed, err = newQlParser().Parse(out.expression)
if err != nil {
return
}

View File

@ -218,7 +218,14 @@ var (
func newConverterGval(ii ...ql.IdentHandler) converterGval {
if globalGvalConverter.parser == nil {
globalGvalConverter = converterGval{
parser: newQlParser(ii...),
parser: newQlParser(append(
ii,
// This should always happen for gval expressions
func(i ql.Ident) (ql.Ident, error) {
i.Value = wrapNestedGvalIdent(i.Value)
return i, nil
},
)...),
}
}

View File

@ -12,38 +12,18 @@ func TestRowEvaluatorTest(t *testing.T) {
tc := []struct {
name string
expr string
in map[string]ValueGetter
in *Row
out bool
}{{
name: "single row ok",
expr: `row.test == 10`,
in: map[string]ValueGetter{
"row": (&Row{}).WithValue("test", 0, 10),
},
out: true,
expr: `test == 10`,
in: (&Row{}).WithValue("test", 0, 10),
out: true,
}, {
name: "single row nok",
expr: `row.test == 11`,
in: map[string]ValueGetter{
"row": (&Row{}).WithValue("test", 0, 10),
},
out: false,
}, {
name: "two rows ok",
expr: `local.key == foreign.ref`,
in: map[string]ValueGetter{
"local": (&Row{}).WithValue("key", 0, 10),
"foreign": (&Row{}).WithValue("ref", 0, 10),
},
out: true,
}, {
name: "two rows nok",
expr: `local.key == foreign.ref`,
in: map[string]ValueGetter{
"local": (&Row{}).WithValue("key", 0, 10),
"foreign": (&Row{}).WithValue("ref", 0, 11),
},
out: false,
expr: `test == 11`,
in: (&Row{}).WithValue("test", 0, 10),
out: false,
}}
ctx := context.Background()

View File

@ -35,6 +35,11 @@ type (
valueSet map[string][]any
)
const (
attributeNestingSeparator = "."
attributeNestingGvalSeparator = "___DLTR___"
)
func (sa SimpleAttr) Identifier() string { return sa.Ident }
func (sa SimpleAttr) Expression() (expression string) { return sa.Expr }
func (sa SimpleAttr) Source() (ident string) { return sa.Src }
@ -53,7 +58,7 @@ func (r *Row) WithValue(name string, pos uint, v any) *Row {
}
func (r Row) SelectGVal(ctx context.Context, k string) (interface{}, error) {
return r.GetValue(k, 0)
return r.GetValue(unwrapNestedGvalIdent(k), 0)
}
// Reset clears out the row so the same instance can be reused where possible
@ -310,8 +315,9 @@ func stateConstraintsToExpression(cc map[string]filter.State) string {
// like the cursor one does.
func prepareGenericRowTester(f internalFilter) (_ tester, err error) {
var (
parts = make([]string, 0, 5)
pcNode *ql.ASTNode
parts = make([]string, 0, 5)
pcNode *ql.ASTNode
exprNode *ql.ASTNode
)
{
@ -326,9 +332,12 @@ func prepareGenericRowTester(f internalFilter) (_ tester, err error) {
parts = append(parts, stateConstraintsToExpression(sc))
}
// Convert the expression
if expr := f.Expression(); len(expr) != 0 {
parts = append(parts, expr)
exprNode = f.ExpressionParsed()
if exprNode == nil {
expr := f.Expression()
if expr != "" {
parts = append(parts, f.Expression())
}
}
// Convert the paging cursor
@ -337,36 +346,49 @@ func prepareGenericRowTester(f internalFilter) (_ tester, err error) {
if err != nil {
return
}
pcNode.Traverse(func(a *ql.ASTNode) (bool, *ql.ASTNode, error) {
if a.Symbol != "" {
a.Symbol = strings.Replace(a.Symbol, ".", "___DLMTR___", -1)
}
return true, a, nil
})
}
}
expr := strings.Join(parts, " && ")
// Everything is empty, not doing anything
if len(expr) == 0 && pcNode == nil {
if len(expr) == 0 && exprNode == nil && pcNode == nil {
return nil, nil
}
// Parse the base expression and prepare the QL node
if pcNode != nil {
// Use just the paging cursor node
if len(expr) == 0 {
return newRunnerGvalParsed(pcNode)
}
args := make([]*ql.ASTNode, 0, 5)
// Use both the expression and the paging cursor node and-ed together
// Paging cursors
if pcNode != nil {
args = append(args, pcNode)
}
// Parsed filter expression
if exprNode != nil {
args = append(args, exprNode)
}
// Rest of the generated expression string
if len(expr) > 0 {
expr, err := newConverterGval().Parse(expr)
if err != nil {
return nil, err
}
return newRunnerGvalParsed(&ql.ASTNode{
Ref: "and",
Args: ql.ASTNodeSet{pcNode, expr},
})
args = append(args, expr)
}
// Default, parse the expr from source
return newRunnerGval(expr)
return newRunnerGvalParsed(
&ql.ASTNode{
Ref: "and",
Args: args,
},
)
}
// makeRowComparator returns a ValueGetter comparator for the given sort expr
@ -396,67 +418,6 @@ func evalCmpResult(cmp int, s *filter.SortExpr) (less, skip bool) {
return false, true
}
// mergeRows merges all of the provided rows into the destination row
//
// If AttributeMapping is provided, that is taken into account, else
// everything is merged together with the last value winning.
func mergeRows(mapping []AttributeMapping, dst *Row, rows ...*Row) (err error) {
if len(mapping) == 0 {
return mergeRowsFull(dst, rows...)
}
return mergeRowsMapped(mapping, dst, rows...)
}
// mergeRowsFull merges all of the provided rows into the destination row
// The last provided value takes priority.
func mergeRowsFull(dst *Row, rows ...*Row) (err error) {
for _, r := range rows {
for name, vv := range r.values {
for i, values := range vv {
if dst.values == nil {
dst.values = make(valueSet)
dst.counters = make(map[string]uint)
}
if i == 0 {
dst.values[name] = make([]any, len(vv))
dst.counters[name] = 0
}
err = dst.SetValue(name, uint(i), values)
if err != nil {
return
}
}
}
}
return
}
// mergeRowsMapped merges all of the provided rows into the destination row using the provided mapping
// The last provided value takes priority.
func mergeRowsMapped(mapping []AttributeMapping, out *Row, rows ...*Row) (err error) {
for _, mp := range mapping {
name := mp.Source()
for _, r := range rows {
if r.values[name] != nil {
if out.values == nil {
out.values = make(valueSet)
out.counters = make(map[string]uint)
}
out.values[mp.Identifier()] = r.values[name]
out.counters[mp.Identifier()] = r.counters[name]
break
}
}
}
return
}
func indexAttrs(aa ...AttributeMapping) (out map[string]bool) {
out = make(map[string]bool, len(aa))
indexAttrsInto(out, aa...)
@ -494,3 +455,40 @@ func keysFromExpr(nn ...*ql.ASTNode) (out []string, hasConstants bool) {
return
}
// Assure sort validates that the filter's definition includes all of the primary
// keys and that the paging cursor's sort is compatible
func assureSort(f internalFilter, primaries []string) (out internalFilter, err error) {
out = f
// make sure all primary keys are in there
for _, p := range primaries {
if out.orderBy.Get(p) == nil {
out.orderBy = append(out.orderBy, &filter.SortExpr{
Column: p,
Descending: out.orderBy.LastDescending(),
})
}
}
// No cursor, no problem
if f.cursor == nil {
return
}
// Make sure the cursor can handle this sort def
out.orderBy, err = out.cursor.Sort(out.orderBy)
if err != nil {
return
}
return
}
func wrapNestedGvalIdent(ident string) string {
return strings.ReplaceAll(ident, attributeNestingSeparator, attributeNestingGvalSeparator)
}
func unwrapNestedGvalIdent(ident string) string {
return strings.ReplaceAll(ident, attributeNestingGvalSeparator, attributeNestingSeparator)
}

View File

@ -330,73 +330,6 @@ func TestStateConstraintsToExpression(t *testing.T) {
}
}
func TestMergeRows(t *testing.T) {
tcc := []struct {
name string
a *Row
b *Row
mapping []AttributeMapping
out *Row
}{{
name: "full merge; no mapping",
a: (&Row{}).WithValue("attr1", 0, 10).WithValue("attr2", 0, "hi").WithValue("attr2", 1, "hello"),
b: (&Row{}).WithValue("attr3", 0, true).WithValue("attr4", 0, "ee").WithValue("attr4", 1, 25),
out: (&Row{}).WithValue("attr1", 0, 10).WithValue("attr2", 0, "hi").WithValue("attr2", 1, "hello").WithValue("attr3", 0, true).WithValue("attr4", 0, "ee").WithValue("attr4", 1, 25),
}, {
name: "full merge; no mapping; collision",
a: (&Row{}).WithValue("attr1", 0, 10).WithValue("attr2", 0, "hi").WithValue("attr2", 1, "hello"),
b: (&Row{}).WithValue("attr2", 0, true).WithValue("attr3", 0, "ee").WithValue("attr3", 1, 25),
out: (&Row{}).WithValue("attr1", 0, 10).WithValue("attr2", 0, true).WithValue("attr3", 0, "ee").WithValue("attr3", 1, 25),
},
{
name: "mapped merge",
a: (&Row{}).WithValue("attr1", 0, 10).WithValue("attr2", 0, "hi").WithValue("attr2", 1, "hello"),
b: (&Row{}).WithValue("attr3", 0, true).WithValue("attr4", 0, "ee").WithValue("attr4", 1, 25),
out: (&Row{}).WithValue("a", 0, 10).WithValue("b", 0, "hi").WithValue("b", 1, "hello").WithValue("c", 0, true).WithValue("d", 0, "ee").WithValue("d", 1, 25),
mapping: saToMapping([]simpleAttribute{{
ident: "a",
source: "attr1",
}, {
ident: "b",
source: "attr2",
}, {
ident: "c",
source: "attr3",
}, {
ident: "d",
source: "attr4",
}}...),
}, {
name: "mapped merge with conflicts",
a: (&Row{}).WithValue("attr1", 0, 10).WithValue("attr2", 0, "hi").WithValue("attr2", 1, "hello"),
b: (&Row{}).WithValue("attr3", 0, true).WithValue("attr4", 0, "ee").WithValue("attr4", 1, 25),
out: (&Row{}).WithValue("a", 0, 10).WithValue("b", 0, true).WithValue("c", 0, "ee").WithValue("c", 1, 25),
mapping: saToMapping([]simpleAttribute{{
ident: "a",
source: "attr1",
}, {
ident: "b",
source: "attr2",
}, {
ident: "b",
source: "attr3",
}, {
ident: "c",
source: "attr4",
}}...),
}}
for _, c := range tcc {
t.Run(c.name, func(t *testing.T) {
out := &Row{}
err := mergeRows(c.mapping, out, c.a, c.b)
require.NoError(t, err)
require.Equal(t, c.out, out)
})
}
}
func TestRowComparator(t *testing.T) {
// @todo add some more extreme cases
tcc := []struct {

View File

@ -142,7 +142,7 @@ func (f *filter) Constraints() map[string][]any { return f.constaints }
func (f *filter) StateConstraints() map[string]State { return f.stateConditions }
func (f *filter) MetaConstraints() map[string]any { return f.metaConditions }
func (f *filter) Expression() string { return f.expression }
func (f *filter) ExpressionParsed() string { return f.expression }
func (f *filter) ExpressionParsed() *ql.ASTNode { return f.expressionParsed }
func (f *filter) OrderBy() SortExprSet { return f.orderBy }
func (f *filter) Limit() uint { return f.limit }
func (f *filter) Cursor() *PagingCursor { return f.cursor }