Extends DeDup capabilities
Support modifier for value search and allow ability to select matching criteria for multi value field - Removed name from rule for now - Value modifier to search with are ignore-case, case-sensitive, fuzzy-search, sounds-like - Multi value matching criteria are one-of, equal - Migrate RecordDeDup config for module, by adding upgrade fix for module.config.recordDeDup to migrate as per to the latest DeDupRule struct.
This commit is contained in:
parent
5f4d02ac84
commit
e7fa07cfae
@ -571,6 +571,12 @@ func (svc record) Bulk(ctx context.Context, oo ...*types.RecordBulkOperation) (r
|
||||
// before we start storing any changes
|
||||
rves = &types.RecordValueErrorSet{}
|
||||
|
||||
// duplication errors
|
||||
ddes = &types.RecordValueErrorSet{}
|
||||
|
||||
// merge of record value errors and duplication errors
|
||||
ee = &types.RecordValueErrorSet{}
|
||||
|
||||
action func(props ...*recordActionProps) *recordAction
|
||||
r *types.Record
|
||||
|
||||
@ -599,11 +605,11 @@ func (svc record) Bulk(ctx context.Context, oo ...*types.RecordBulkOperation) (r
|
||||
switch p.Operation {
|
||||
case types.OperationTypeCreate:
|
||||
action = RecordActionCreate
|
||||
r, dd, err = svc.create(ctx, r)
|
||||
r, ddes, err = svc.create(ctx, r)
|
||||
|
||||
case types.OperationTypeUpdate:
|
||||
action = RecordActionUpdate
|
||||
r, dd, err = svc.update(ctx, r)
|
||||
r, ddes, err = svc.update(ctx, r)
|
||||
|
||||
case types.OperationTypeDelete:
|
||||
action = RecordActionDelete
|
||||
@ -613,8 +619,13 @@ func (svc record) Bulk(ctx context.Context, oo ...*types.RecordBulkOperation) (r
|
||||
aProp.setChanged(r)
|
||||
|
||||
// Attach meta ID to each value error for FE identification
|
||||
if !dd.HasStrictErrors() && r != nil {
|
||||
dd.SetMetaID(r.ID)
|
||||
if !ddes.HasStrictErrors() && r != nil {
|
||||
ddes.SetMetaID(r.ID)
|
||||
}
|
||||
if !ddes.IsValid() && dd == nil {
|
||||
dd = ddes
|
||||
} else {
|
||||
dd.Merge(ddes)
|
||||
}
|
||||
|
||||
if rve := types.IsRecordValueErrorSet(err); rve != nil {
|
||||
@ -644,9 +655,14 @@ func (svc record) Bulk(ctx context.Context, oo ...*types.RecordBulkOperation) (r
|
||||
}
|
||||
}
|
||||
|
||||
if !rves.IsValid() {
|
||||
// merge record value errors and strict duplication errors
|
||||
if dd.HasStrictErrors() {
|
||||
ee.Merge(rves, dd)
|
||||
}
|
||||
|
||||
if !ee.IsValid() {
|
||||
// Any errors gathered?
|
||||
return RecordErrValueInput().Wrap(rves)
|
||||
return RecordErrValueInput().Wrap(ee)
|
||||
}
|
||||
|
||||
return nil
|
||||
@ -701,10 +717,19 @@ func (svc record) create(ctx context.Context, new *types.Record) (rec *types.Rec
|
||||
new.SetModule(m)
|
||||
|
||||
{
|
||||
// handle deDup error/warnings
|
||||
dd, err = svc.DupDetection(ctx, m, new)
|
||||
|
||||
// handle input payload errors
|
||||
if rve = svc.procCreate(ctx, invokerID, m, new); !rve.IsValid() {
|
||||
return nil, dd, RecordErrValueInput().Wrap(rve)
|
||||
}
|
||||
|
||||
// record value errors from dup detection
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if err = svc.eventbus.WaitFor(ctx, event.RecordBeforeCreate(new, nil, m, ns, rve, nil)); err != nil {
|
||||
return
|
||||
} else if !rve.IsValid() {
|
||||
@ -714,11 +739,6 @@ func (svc record) create(ctx context.Context, new *types.Record) (rec *types.Rec
|
||||
|
||||
new.Values = RecordValueDefaults(m, new.Values)
|
||||
|
||||
dd, err = svc.DupDetection(ctx, m, new)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Handle payload from automation scripts
|
||||
if rve = svc.procCreate(ctx, invokerID, m, new); !rve.IsValid() {
|
||||
return nil, dd, RecordErrValueInput().Wrap(rve)
|
||||
@ -996,17 +1016,20 @@ func (svc record) update(ctx context.Context, upd *types.Record) (rec *types.Rec
|
||||
upd.SetModule(m)
|
||||
old.SetModule(m)
|
||||
|
||||
dd, err = svc.DupDetection(ctx, m, upd)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
{
|
||||
// Handle input payload
|
||||
// handle deDup error/warnings
|
||||
dd, err = svc.DupDetection(ctx, m, upd)
|
||||
|
||||
// handle input payload errors
|
||||
if rve = svc.procUpdate(ctx, invokerID, m, upd, old); !rve.IsValid() {
|
||||
return nil, dd, RecordErrValueInput().Wrap(rve)
|
||||
}
|
||||
|
||||
// record value errors from dup detection
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Scripts can (besides simple error value) return complex record value error set
|
||||
// that is passed back to the UI or any other API consumer
|
||||
//
|
||||
@ -1833,7 +1856,6 @@ func (svc record) DupDetection(ctx context.Context, m *types.Module, rec *types.
|
||||
return
|
||||
}
|
||||
|
||||
// @todo: improve error string with details
|
||||
rProps.setValueErrors(out)
|
||||
|
||||
// Error out if duplicate record exist
|
||||
@ -2036,7 +2058,7 @@ fields:
|
||||
val.Value = pickRandomID(recRefs[refModID])
|
||||
|
||||
case "select":
|
||||
//val.Value = src.Select(f.Options)
|
||||
// val.Value = src.Select(f.Options)
|
||||
continue fields
|
||||
|
||||
case "url":
|
||||
|
||||
@ -93,12 +93,9 @@ type (
|
||||
}
|
||||
|
||||
ModuleConfigRecordDeDup struct {
|
||||
// enable or disable duplicate detection
|
||||
Enabled bool `json:"enabled"`
|
||||
|
||||
// strictly restrict record saving
|
||||
// otherwise show a warning with list of duplicated records
|
||||
Strict bool `json:"strict"`
|
||||
Strict bool `json:"-"`
|
||||
|
||||
// list of duplicate detection rules applied to module's fields
|
||||
Rules DeDupRuleSet `json:"rules,omitempty"`
|
||||
|
||||
@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/cortezaproject/corteza/server/pkg/locale"
|
||||
"github.com/cortezaproject/corteza/server/pkg/str"
|
||||
"github.com/spf13/cast"
|
||||
"strings"
|
||||
)
|
||||
@ -18,23 +19,44 @@ type (
|
||||
}
|
||||
|
||||
DeDupRule struct {
|
||||
Name DeDupRuleName `json:"name"`
|
||||
Strict bool `json:"strict"`
|
||||
Attributes []string `json:"attributes"`
|
||||
Name DeDupRuleName `json:"name"`
|
||||
Strict bool `json:"strict"`
|
||||
ErrorMessage string `json:"errorMessage"`
|
||||
ConstraintSet DeDupRuleConstraintSet `json:"constraints"`
|
||||
}
|
||||
|
||||
DeDupRuleConstraint struct {
|
||||
Attribute string `json:"attribute"`
|
||||
Modifier DeDupValueModifier `json:"modifier"`
|
||||
MultiValue DeDupMultiValueConstraint `json:"multiValue"`
|
||||
}
|
||||
|
||||
DeDupRuleConstraintSet []*DeDupRuleConstraint
|
||||
|
||||
// DeDupRuleName represent the identifier for duplicate detection rule
|
||||
DeDupRuleName string
|
||||
|
||||
// DeDupValueModifier represent the algorithm used to check value string
|
||||
DeDupValueModifier string
|
||||
|
||||
// DeDupMultiValueConstraint for matching multi values accordingly
|
||||
DeDupMultiValueConstraint string
|
||||
|
||||
// DeDupIssueKind based on strict mode rule or duplication config
|
||||
DeDupIssueKind string
|
||||
)
|
||||
|
||||
const (
|
||||
caseSensitive DeDupRuleName = "case-sensitive"
|
||||
ignoreCase DeDupValueModifier = "ignore-case"
|
||||
caseSensitive DeDupValueModifier = "case-sensitive"
|
||||
fuzzyMatch DeDupValueModifier = "fuzzy-match"
|
||||
soundsLike DeDupValueModifier = "sounds-like"
|
||||
|
||||
dupWarning DeDupIssueKind = "duplication_warning"
|
||||
dupError DeDupIssueKind = "duplication_error"
|
||||
oneOf DeDupMultiValueConstraint = "one-of"
|
||||
equal DeDupMultiValueConstraint = "equal"
|
||||
|
||||
deDupWarning DeDupIssueKind = "duplication_warning"
|
||||
deDupError DeDupIssueKind = "duplication_error"
|
||||
)
|
||||
|
||||
func DeDup() *deDup {
|
||||
@ -47,7 +69,7 @@ func (d deDup) CheckDuplication(ctx context.Context, rules DeDupRuleSet, rec Rec
|
||||
out = &RecordValueErrorSet{}
|
||||
err = rules.Walk(func(rule *DeDupRule) error {
|
||||
if rule.HasAttributes() {
|
||||
values := rr.GetValuesByName(distinct(rule.Attributes)...)
|
||||
values := rr.GetValuesByName(distinct(rule.Attributes())...)
|
||||
|
||||
set := rule.validateValue(ctx, d.ls, rec, values)
|
||||
|
||||
@ -72,7 +94,14 @@ func (rule DeDupIssueKind) String() string {
|
||||
}
|
||||
|
||||
func (rule DeDupRule) HasAttributes() bool {
|
||||
return len(rule.Attributes) > 0
|
||||
return len(rule.ConstraintSet) > 0 && len(rule.Attributes()) > 0
|
||||
}
|
||||
|
||||
func (rule DeDupRule) Attributes() (out []string) {
|
||||
for _, c := range rule.ConstraintSet {
|
||||
out = append(out, c.Attribute)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (rule DeDupRule) IsStrict() bool {
|
||||
@ -80,9 +109,9 @@ func (rule DeDupRule) IsStrict() bool {
|
||||
}
|
||||
|
||||
func (rule DeDupRule) IssueKind() string {
|
||||
out := dupWarning
|
||||
out := deDupWarning
|
||||
if rule.Strict {
|
||||
out = dupError
|
||||
out = deDupError
|
||||
}
|
||||
|
||||
return out.String()
|
||||
@ -93,43 +122,56 @@ func (rule DeDupRule) IssueMessage() (out string) {
|
||||
}
|
||||
|
||||
func (rule DeDupRule) String() string {
|
||||
return fmt.Sprintf("%s duplicate detection on `%s` field", rule.Name, strings.Join(rule.Attributes, ", "))
|
||||
return fmt.Sprintf("%s duplicate detection on `%s` field", rule.Name, strings.Join(rule.Attributes(), ", "))
|
||||
}
|
||||
|
||||
// validateValue will check duplicate detection based on rules name
|
||||
func (rule DeDupRule) validateValue(ctx context.Context, ls localeService, rec Record, vv RecordValueSet) (out *RecordValueErrorSet) {
|
||||
switch rule.Name {
|
||||
case caseSensitive:
|
||||
return rule.checkCaseSensitiveDuplication(ctx, ls, rec, vv)
|
||||
default:
|
||||
return rule.checkCaseSensitiveDuplication(ctx, ls, rec, vv)
|
||||
}
|
||||
return rule.checkCaseSensitiveDuplication(ctx, ls, rec, vv)
|
||||
}
|
||||
|
||||
func (rule DeDupRule) checkCaseSensitiveDuplication(ctx context.Context, ls localeService, rec Record, vv RecordValueSet) (out *RecordValueErrorSet) {
|
||||
out = &RecordValueErrorSet{}
|
||||
recVal := rec.Values
|
||||
var (
|
||||
recVal = rec.Values
|
||||
)
|
||||
|
||||
for _, a := range rule.Attributes {
|
||||
rv := recVal.Get(a, 0)
|
||||
if rv == nil {
|
||||
for _, c := range rule.ConstraintSet {
|
||||
rvv := recVal.FilterByName(c.Attribute)
|
||||
if rvv.Len() == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
var (
|
||||
valErr = &RecordValueErrorSet{}
|
||||
)
|
||||
|
||||
_ = vv.Walk(func(v *RecordValue) error {
|
||||
if v.RecordID != rec.ID {
|
||||
if toLower(v.Value) == toLower(rv.Value) {
|
||||
out.Push(RecordValueError{
|
||||
Kind: rule.IssueKind(),
|
||||
Message: ls.T(ctx, "compose", rule.IssueMessage()),
|
||||
Meta: map[string]interface{}{
|
||||
"field": v.Name,
|
||||
"value": v.Value,
|
||||
"dupValueField": rv.Name,
|
||||
"recordID": cast.ToString(v.RecordID),
|
||||
"rule": rule.String(),
|
||||
},
|
||||
})
|
||||
_ = rvv.Walk(func(rv *RecordValue) error {
|
||||
if len(rv.Value) > 0 && matchValue(c.Modifier, rv.Value, v.Value) {
|
||||
valErr.Push(RecordValueError{
|
||||
Kind: rule.IssueKind(),
|
||||
Message: ls.T(ctx, "compose", rule.IssueMessage()),
|
||||
Meta: map[string]interface{}{
|
||||
"field": v.Name,
|
||||
"value": v.Value,
|
||||
"dupValueField": rv.Name,
|
||||
"recordID": cast.ToString(v.RecordID),
|
||||
"rule": rule.String(),
|
||||
},
|
||||
})
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
// 1. multiValue is empty, then all value needs to be a match then return error/warning
|
||||
// 2. multiValue is oneOf, then one or more value needs to be a match then return error/warning
|
||||
// 3. multiValue is equal, then all value needs to be a match then return error/warning
|
||||
if (!valErr.IsValid() && (!c.HasMultiValue() || c.IsAllEqual()) && valErr.Len() == rvv.Len()) || (c.IsOneOf() && valErr.Len() > 0) {
|
||||
if out == nil {
|
||||
out = &RecordValueErrorSet{}
|
||||
}
|
||||
out.Push(valErr.Set...)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@ -139,6 +181,23 @@ func (rule DeDupRule) checkCaseSensitiveDuplication(ctx context.Context, ls loca
|
||||
return
|
||||
}
|
||||
|
||||
func (c DeDupRuleConstraint) HasMultiValue() bool {
|
||||
switch c.MultiValue {
|
||||
case oneOf, equal:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (c DeDupRuleConstraint) IsAllEqual() bool {
|
||||
return c.MultiValue == equal
|
||||
}
|
||||
|
||||
func (c DeDupRuleConstraint) IsOneOf() bool {
|
||||
return c.MultiValue == oneOf
|
||||
}
|
||||
|
||||
func (v *RecordValueErrorSet) SetMetaID(id uint64) {
|
||||
if v.IsValid() {
|
||||
return
|
||||
@ -154,21 +213,7 @@ func (v *RecordValueErrorSet) SetMetaID(id uint64) {
|
||||
}
|
||||
|
||||
func (v *RecordValueErrorSet) HasStrictErrors() bool {
|
||||
return v.HasKind(dupError.String())
|
||||
}
|
||||
|
||||
// CaseSensitiveDuplicationRule prepares the case-sensitive duplicate detection rule
|
||||
func CaseSensitiveDuplicationRule(strict bool, identifiers ...string) DeDupRule {
|
||||
return makeDuplicationRule(caseSensitive, strict, identifiers...)
|
||||
}
|
||||
|
||||
// makeDuplicationRule prepares duplication detection rules
|
||||
func makeDuplicationRule(name DeDupRuleName, strict bool, attributes ...string) DeDupRule {
|
||||
return DeDupRule{
|
||||
Name: name,
|
||||
Strict: strict,
|
||||
Attributes: attributes,
|
||||
}
|
||||
return v.HasKind(deDupError.String())
|
||||
}
|
||||
|
||||
// distinct only list the different (distinct) values
|
||||
@ -183,6 +228,19 @@ func distinct(input []string) (out []string) {
|
||||
return
|
||||
}
|
||||
|
||||
func toLower(s string) string {
|
||||
return strings.ToLower(s)
|
||||
// matchValue will check if the input matches with target string as per the modifier
|
||||
func matchValue(modifier DeDupValueModifier, input string, target string) bool {
|
||||
switch modifier {
|
||||
case ignoreCase:
|
||||
return str.Match(input, target, str.CaseInSensitiveMatch)
|
||||
case caseSensitive:
|
||||
return str.Match(input, target, str.CaseSensitiveMatch)
|
||||
case fuzzyMatch:
|
||||
return str.Match(input, target, str.LevenshteinDistance)
|
||||
case soundsLike:
|
||||
return str.Match(input, target, str.Soundex)
|
||||
default:
|
||||
// ignoreCase as default, if not specified
|
||||
return str.Match(input, target, str.CaseInSensitiveMatch)
|
||||
}
|
||||
}
|
||||
|
||||
126
server/compose/types/record_detect_duplicates_test.go
Normal file
126
server/compose/types/record_detect_duplicates_test.go
Normal file
@ -0,0 +1,126 @@
|
||||
package types
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/cortezaproject/corteza/server/pkg/locale"
|
||||
"github.com/spf13/cast"
|
||||
"github.com/stretchr/testify/require"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDeDupRule_checkCaseSensitiveDuplication(t *testing.T) {
|
||||
var (
|
||||
req = require.New(t)
|
||||
ctx = context.Background()
|
||||
ls = locale.Global()
|
||||
|
||||
rule1 = DeDupRule{
|
||||
Name: "",
|
||||
Strict: true,
|
||||
ConstraintSet: []*DeDupRuleConstraint{
|
||||
{
|
||||
Attribute: "name",
|
||||
Modifier: ignoreCase,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
tests = []struct {
|
||||
name string
|
||||
rule DeDupRule
|
||||
rec Record
|
||||
vv RecordValueSet
|
||||
wantOut *RecordValueErrorSet
|
||||
}{
|
||||
{
|
||||
name: "no duplication",
|
||||
rule: rule1,
|
||||
rec: Record{
|
||||
ID: 1,
|
||||
Values: RecordValueSet{
|
||||
&RecordValue{
|
||||
RecordID: 1,
|
||||
Name: "name",
|
||||
Value: "test",
|
||||
},
|
||||
},
|
||||
},
|
||||
vv: RecordValueSet{
|
||||
&RecordValue{
|
||||
RecordID: 2,
|
||||
Name: "name",
|
||||
Value: "test",
|
||||
},
|
||||
},
|
||||
wantOut: &RecordValueErrorSet{
|
||||
Set: []RecordValueError{
|
||||
{
|
||||
Kind: deDupError.String(),
|
||||
Message: rule1.IssueMessage(),
|
||||
Meta: map[string]interface{}{
|
||||
"field": "name",
|
||||
"value": "test",
|
||||
"dupValueField": "name",
|
||||
"recordID": cast.ToString(2),
|
||||
"rule": rule1.String(),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
gotOut := tt.rule.checkCaseSensitiveDuplication(ctx, ls, tt.rec, tt.vv)
|
||||
req.Equal(tt.wantOut, gotOut, "checkCaseSensitiveDuplication() = %v, want %v", gotOut, tt.wantOut)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_matchValue(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
target string
|
||||
modifier DeDupValueModifier
|
||||
want bool
|
||||
}{
|
||||
{
|
||||
name: "ignoreCase match value",
|
||||
input: "test",
|
||||
target: "tEst",
|
||||
modifier: ignoreCase,
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "caseSensitive match value",
|
||||
input: "tEst",
|
||||
target: "tEst",
|
||||
modifier: caseSensitive,
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "fuzzyMatch match value",
|
||||
input: "kitten",
|
||||
target: "sitting",
|
||||
modifier: fuzzyMatch,
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "soundsLike match value",
|
||||
input: "Robert",
|
||||
target: "Rupert",
|
||||
modifier: soundsLike,
|
||||
want: true,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := matchValue(tt.modifier, tt.input, tt.target); got != tt.want {
|
||||
t.Errorf("matchValue() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -29,6 +29,13 @@ func (v *RecordValueErrorSet) IsValid() bool {
|
||||
return v == nil || len(v.Set) == 0
|
||||
}
|
||||
|
||||
func (v *RecordValueErrorSet) Len() int {
|
||||
if v == nil {
|
||||
return 0
|
||||
}
|
||||
return len(v.Set)
|
||||
}
|
||||
|
||||
func (v *RecordValueErrorSet) Error() string {
|
||||
var no = 0
|
||||
if v != nil {
|
||||
@ -62,6 +69,20 @@ func (v *RecordValueErrorSet) HasKind(kind string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (v *RecordValueErrorSet) Merge(errs ...*RecordValueErrorSet) {
|
||||
if v == nil {
|
||||
return
|
||||
}
|
||||
|
||||
for _, e := range errs {
|
||||
if e == nil || e.IsValid() {
|
||||
continue
|
||||
}
|
||||
|
||||
v.Push(e.Set...)
|
||||
}
|
||||
}
|
||||
|
||||
// IsRecordValueErrorSet tests if given error is RecordValueErrorSet (or it wraps it) and it has errors
|
||||
// If not is not (or !IsValid), it return nil!
|
||||
func IsRecordValueErrorSet(err error) *RecordValueErrorSet {
|
||||
|
||||
52
server/pkg/str/levenshtein.go
Normal file
52
server/pkg/str/levenshtein.go
Normal file
@ -0,0 +1,52 @@
|
||||
package str
|
||||
|
||||
// write Levenshtein Distance search algorithm for strings
|
||||
// https://en.wikipedia.org/wiki/Levenshtein_distance
|
||||
func ToLevenshteinDistance(a, b string) int {
|
||||
var (
|
||||
// length of a
|
||||
la = len(a)
|
||||
// length of b
|
||||
lb = len(b)
|
||||
// distance matrix
|
||||
d = make([][]int, la+1)
|
||||
)
|
||||
|
||||
// initialize distance matrix
|
||||
for i := 0; i <= la; i++ {
|
||||
d[i] = make([]int, lb+1)
|
||||
d[i][0] = i
|
||||
}
|
||||
|
||||
for j := 0; j <= lb; j++ {
|
||||
d[0][j] = j
|
||||
}
|
||||
|
||||
// calculate distance matrix
|
||||
for i := 1; i <= la; i++ {
|
||||
for j := 1; j <= lb; j++ {
|
||||
if a[i-1] == b[j-1] {
|
||||
d[i][j] = d[i-1][j-1]
|
||||
} else {
|
||||
// fix this min function
|
||||
d[i][j] = min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]+1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return d[la][lb]
|
||||
}
|
||||
|
||||
func min(a, b, c int) int {
|
||||
if a < b {
|
||||
if a < c {
|
||||
return a
|
||||
}
|
||||
}
|
||||
|
||||
if b < c {
|
||||
return b
|
||||
}
|
||||
|
||||
return c
|
||||
}
|
||||
39
server/pkg/str/levenshtein_test.go
Normal file
39
server/pkg/str/levenshtein_test.go
Normal file
@ -0,0 +1,39 @@
|
||||
package str
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestLevenshteinDistance(t *testing.T) {
|
||||
tests := []struct {
|
||||
a string
|
||||
b string
|
||||
want int
|
||||
}{
|
||||
{"", "hello", 5},
|
||||
{"hello", "", 5},
|
||||
{"hello", "hello", 0},
|
||||
{"ab", "aa", 1},
|
||||
{"ab", "ba", 2},
|
||||
{"ab", "aaa", 2},
|
||||
{"bbb", "a", 3},
|
||||
{"kitten", "sitting", 3},
|
||||
{"distance", "difference", 5},
|
||||
{"levenshtein", "frankenstein", 6},
|
||||
{"resume and cafe", "resumes and cafes", 2},
|
||||
{"a very long string that is meant to exceed", "another very long string that is meant to exceed", 6},
|
||||
// Testing acutes and umlauts
|
||||
{"resumé and café", "resumés and cafés", 2},
|
||||
{"resume and cafe", "resumé and café", 4},
|
||||
{"Hafþór Júlíus Björnsson", "Hafþor Julius Bjornsson", 8},
|
||||
// Only 2 characters are less in the 2nd string
|
||||
{"།་གམ་འས་པ་་མ།", "།་གམའས་པ་་མ", 6},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.a, func(t *testing.T) {
|
||||
if got := ToLevenshteinDistance(tt.a, tt.b); got != tt.want {
|
||||
t.Errorf("LevenshteinDistance() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
104
server/pkg/str/soundex.go
Normal file
104
server/pkg/str/soundex.go
Normal file
@ -0,0 +1,104 @@
|
||||
package str
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ToSoundex takes a word and returns the soundex code for it.
|
||||
// https://en.wikipedia.org/wiki/Soundex
|
||||
//
|
||||
// 1. Retain the first letter of the name and drop all other occurrences of a, e, i, o, u, y, h, w.
|
||||
// 2. Replace consonants with digits as follows (after the first letter):
|
||||
// b, f, p, v → 1
|
||||
// c, g, j, k, q, s, x, z → 2
|
||||
// d, t → 3
|
||||
// l → 4
|
||||
// m, n → 5
|
||||
// r → 6
|
||||
// 3. If two or more letters with the same number are adjacent in the original name (before step 1),
|
||||
// only retain the first letter; also two letters with the same number separated
|
||||
// by 'h' or 'w' are coded as a single number, whereas such letters separated by a vowel are coded twice.
|
||||
// This rule also applies to the first letter.
|
||||
// 4. Iterate the previous step until you have one letter and three numbers.
|
||||
// If you have too few letters in your word that you can't assign three numbers, append with zeros
|
||||
// until there are three numbers. If you have more than 3 letters, just retain the first 3 numbers.
|
||||
func ToSoundex(s string) string {
|
||||
var (
|
||||
// soundex code
|
||||
code string
|
||||
// last code
|
||||
lastCode string
|
||||
// last rune
|
||||
lastRune rune
|
||||
// last rune is vowel
|
||||
lastRuneIsVowel bool
|
||||
)
|
||||
|
||||
// retain the first letter of the name and drop all other occurrences of a, e, i, o, u, y, h, w
|
||||
for _, r := range s {
|
||||
if r == 'a' || r == 'e' || r == 'i' || r == 'o' || r == 'u' || r == 'y' || r == 'h' || r == 'w' {
|
||||
continue
|
||||
}
|
||||
|
||||
code = string(r)
|
||||
break
|
||||
}
|
||||
|
||||
// replace consonants with digits as follows (after the first letter)
|
||||
for _, r := range s {
|
||||
if r == 'a' || r == 'e' || r == 'i' || r == 'o' || r == 'u' || r == 'y' || r == 'h' || r == 'w' {
|
||||
lastRuneIsVowel = true
|
||||
continue
|
||||
}
|
||||
|
||||
if lastRuneIsVowel {
|
||||
lastRuneIsVowel = false
|
||||
lastCode = ""
|
||||
}
|
||||
|
||||
switch r {
|
||||
case 'b', 'f', 'p', 'v':
|
||||
lastCode = "1"
|
||||
case 'c', 'g', 'j', 'k', 'q', 's', 'x', 'z':
|
||||
lastCode = "2"
|
||||
case 'd', 't':
|
||||
lastCode = "3"
|
||||
case 'l':
|
||||
lastCode = "4"
|
||||
case 'm', 'n':
|
||||
lastCode = "5"
|
||||
case 'r':
|
||||
lastCode = "6"
|
||||
}
|
||||
|
||||
if lastCode != "" && lastCode != string(lastRune) {
|
||||
code += lastCode
|
||||
}
|
||||
|
||||
lastRune = r
|
||||
}
|
||||
|
||||
// if two or more letters with the same number are adjacent in the original name (before step 1),
|
||||
// only retain the first letter
|
||||
// also two letters with the same number separated by 'h' or 'w' are coded as a single number,
|
||||
// whereas such letters separated by a vowel are coded twice
|
||||
// this rule also applies to the first letter
|
||||
code = strings.ReplaceAll(code, "11", "1")
|
||||
code = strings.ReplaceAll(code, "22", "2")
|
||||
code = strings.ReplaceAll(code, "33", "3")
|
||||
code = strings.ReplaceAll(code, "44", "4")
|
||||
code = strings.ReplaceAll(code, "55", "5")
|
||||
code = strings.ReplaceAll(code, "66", "6")
|
||||
|
||||
// iterate the previous step until you have one letter and three numbers
|
||||
// if you have too few letters in your word that you can't assign three numbers,
|
||||
// append with zeros until there are three numbers
|
||||
// if you have more than 3 letters, just retain the first 3 numbers
|
||||
if len(code) < 4 {
|
||||
code += strings.Repeat("0", 4-len(code))
|
||||
} else {
|
||||
code = code[:4]
|
||||
}
|
||||
|
||||
return code
|
||||
}
|
||||
64
server/pkg/str/soundex_test.go
Normal file
64
server/pkg/str/soundex_test.go
Normal file
@ -0,0 +1,64 @@
|
||||
package str
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func Test_soundex(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
"Robert",
|
||||
"R163",
|
||||
},
|
||||
{
|
||||
"Rupert",
|
||||
"R163",
|
||||
},
|
||||
{
|
||||
"Rubin",
|
||||
"R150",
|
||||
},
|
||||
{
|
||||
"Ashcraft",
|
||||
"A261",
|
||||
},
|
||||
{
|
||||
"Ashcroft",
|
||||
"A261",
|
||||
},
|
||||
{
|
||||
"Tymczak",
|
||||
"T522",
|
||||
},
|
||||
{
|
||||
"Pfister",
|
||||
"P123",
|
||||
},
|
||||
{
|
||||
"AH KEY",
|
||||
"A000",
|
||||
},
|
||||
{
|
||||
"The quick brown fox",
|
||||
"T221",
|
||||
},
|
||||
{
|
||||
"h3110 w021d",
|
||||
"3000",
|
||||
},
|
||||
{
|
||||
"1337",
|
||||
"1000",
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := ToSoundex(tt.name); got != tt.want {
|
||||
t.Errorf("soundex() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
31
server/pkg/str/str.go
Normal file
31
server/pkg/str/str.go
Normal file
@ -0,0 +1,31 @@
|
||||
package str
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
// DefaultLevenshteinDistance is the default levenshtein distance
|
||||
DefaultLevenshteinDistance = 3
|
||||
|
||||
CaseInSensitiveMatch = iota
|
||||
CaseSensitiveMatch
|
||||
LevenshteinDistance
|
||||
Soundex
|
||||
)
|
||||
|
||||
// Match will match string as per given algorithm
|
||||
func Match(str1, str2 string, algorithm int) bool {
|
||||
switch algorithm {
|
||||
case LevenshteinDistance:
|
||||
return ToLevenshteinDistance(str1, str2) <= DefaultLevenshteinDistance
|
||||
case Soundex:
|
||||
return ToSoundex(str1) == ToSoundex(str2)
|
||||
case CaseSensitiveMatch:
|
||||
return strings.Compare(str1, str2) == 0
|
||||
case CaseInSensitiveMatch:
|
||||
return strings.EqualFold(str1, str2)
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
@ -42,6 +42,7 @@ var (
|
||||
fix_2022_09_00_addRevisionOnComposeRecords,
|
||||
fix_2022_09_00_addMetaOnComposeRecords,
|
||||
fix_2022_09_00_addMissingNodeIdOnFederationMapping,
|
||||
fix_2023_03_00_migrateComposeModuleConfigForRecordDeDup,
|
||||
}
|
||||
)
|
||||
|
||||
@ -208,7 +209,7 @@ func fix_2022_09_00_migrateOldComposeRecordValues(ctx context.Context, s *Store)
|
||||
|
||||
err = func() (err error) {
|
||||
query = fmt.Sprintf(recordsPerModule, mod.NamespaceID, mod.ID, sliceLastRecordID, recordSliceSize)
|
||||
//println(query)
|
||||
// println(query)
|
||||
rows, err = s.DB.QueryContext(ctx, query)
|
||||
if err != nil {
|
||||
return
|
||||
@ -237,7 +238,7 @@ func fix_2022_09_00_migrateOldComposeRecordValues(ctx context.Context, s *Store)
|
||||
}
|
||||
|
||||
query = fmt.Sprintf(recValuesPerModule, strings.Join(recordIDs, ","))
|
||||
//println(query)
|
||||
// println(query)
|
||||
rows, err = s.DB.QueryContext(ctx, query)
|
||||
if err != nil {
|
||||
return
|
||||
@ -429,6 +430,131 @@ func fix_2022_09_00_addMissingNodeIdOnFederationMapping(ctx context.Context, s *
|
||||
)
|
||||
}
|
||||
|
||||
func fix_2023_03_00_migrateComposeModuleConfigForRecordDeDup(ctx context.Context, s *Store) (err error) {
|
||||
type (
|
||||
oldRule struct {
|
||||
Name string `json:"name"`
|
||||
Strict bool `json:"strict"`
|
||||
Attributes []string `json:"attributes"`
|
||||
}
|
||||
rules struct {
|
||||
Rules []oldRule `json:"rules"`
|
||||
}
|
||||
)
|
||||
|
||||
var (
|
||||
log = s.log(ctx)
|
||||
query string
|
||||
aux []byte
|
||||
rr rules
|
||||
rows *sql.Rows
|
||||
modules types.ModuleSet
|
||||
)
|
||||
|
||||
_, err = s.DataDefiner.TableLookup(ctx, model.Module.Ident)
|
||||
if err != nil {
|
||||
if errors.IsNotFound(err) {
|
||||
log.Debug("skipping module config recordDeDup migration: compose_module table not found")
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
const (
|
||||
moduleConfigRecordDeDup = `
|
||||
SELECT compose_module.config -> 'recordDeDup' AS recordDeDup
|
||||
FROM compose_module
|
||||
WHERE compose_module.id = %d`
|
||||
)
|
||||
|
||||
modules, _, err = s.SearchComposeModules(ctx, types.ModuleFilter{})
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// 1. Check if module has recordDeDup rules
|
||||
// 2. If yes, migrate them to new format
|
||||
// 3. Save module
|
||||
for _, m := range modules {
|
||||
var (
|
||||
migratedRules types.DeDupRuleSet
|
||||
)
|
||||
|
||||
if err = s.Tx(ctx, func(ctx context.Context, s store.Storer) (err error) {
|
||||
log.Info("collecting module.config.recordDeDup for module", zap.Uint64("id", m.ID))
|
||||
|
||||
query = fmt.Sprintf(moduleConfigRecordDeDup, m.ID)
|
||||
rows, err = s.(*Store).DB.QueryContext(ctx, query)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
defer func() {
|
||||
// assign error to return value...
|
||||
err = rows.Close()
|
||||
}()
|
||||
|
||||
for rows.Next() {
|
||||
if err = rows.Err(); err != nil {
|
||||
log.Info("failed to scan rows to migrated module.config.recordDeDup for module",
|
||||
zap.Uint64("id", m.ID))
|
||||
return
|
||||
}
|
||||
|
||||
err = rows.Scan(&aux)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
err = json.Unmarshal(aux, &rr)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
for _, r := range rr.Rules {
|
||||
if len(r.Attributes) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
var rcc types.DeDupRuleConstraintSet
|
||||
for _, atr := range r.Attributes {
|
||||
if len(atr) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
rcc = append(rcc, &types.DeDupRuleConstraint{
|
||||
Attribute: atr,
|
||||
Modifier: "ignore-case",
|
||||
MultiValue: "equal",
|
||||
})
|
||||
}
|
||||
|
||||
migratedRules = append(migratedRules, &types.DeDupRule{
|
||||
Strict: r.Strict,
|
||||
ConstraintSet: rcc,
|
||||
})
|
||||
}
|
||||
|
||||
if len(migratedRules) > 0 {
|
||||
m.Config.RecordDeDup.Rules = migratedRules
|
||||
|
||||
log.Info("saving migrated module.config.recordDeDup for module", zap.Uint64("id", m.ID))
|
||||
if err = s.UpdateComposeModule(ctx, m); err != nil {
|
||||
log.Info("error saving migrated module.config.recordDeDup for module", zap.Uint64("id", m.ID))
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}); err != nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func count(ctx context.Context, s *Store, table string, ee ...goqu.Expression) (count int) {
|
||||
db := s.DB.(goqu.SQLDatabase)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user