3
0

Add tests for pkg/str methods

This commit is contained in:
Tomaž Jerman 2024-01-30 09:00:16 +01:00
parent 7b31f9e72b
commit 6c499217ef
4 changed files with 88 additions and 15 deletions

View File

@ -37,3 +37,32 @@ func TestLevenshteinDistance(t *testing.T) {
})
}
}
// goos: darwin
// goarch: arm64
// pkg: github.com/cortezaproject/corteza/server/pkg/str
// BenchmarkLeven_100_100-12 39949 25767 ns/op 93184 B/op 102 allocs/op
// BenchmarkLeven_1000_1000-12 390 3081967 ns/op 8298552 B/op 1011 allocs/op
// BenchmarkLeven_10000_10000-12 4 299103531 ns/op 829957216 B/op 10131 allocs/op
// PASS
func benchmarkLeven(b *testing.B, w1l, w2l int) {
w1 := randStringRunes(w1l)
w2 := randStringRunes(w2l)
for i := 0; i < b.N; i++ {
ToLevenshteinDistance(w1, w2)
}
}
func BenchmarkLeven_100_100(b *testing.B) {
benchmarkLeven(b, 100, 100)
}
func BenchmarkLeven_1000_1000(b *testing.B) {
benchmarkLeven(b, 1000, 1000)
}
func BenchmarkLeven_10000_10000(b *testing.B) {
benchmarkLeven(b, 10000, 10000)
}

View File

@ -0,0 +1,13 @@
package str
import "math/rand"
var letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890?*_Ž!$%&'()=?*")
func randStringRunes(n int) string {
b := make([]rune, n)
for i := range b {
b[i] = letterRunes[rand.Intn(len(letterRunes))]
}
return string(b)
}

View File

@ -7,21 +7,21 @@ import (
// ToSoundex takes a word and returns the soundex code for it.
// https://en.wikipedia.org/wiki/Soundex
//
// 1. Retain the first letter of the name and drop all other occurrences of a, e, i, o, u, y, h, w.
// 2. Replace consonants with digits as follows (after the first letter):
// b, f, p, v → 1
// c, g, j, k, q, s, x, z → 2
// d, t → 3
// l → 4
// m, n → 5
// r → 6
// 3. If two or more letters with the same number are adjacent in the original name (before step 1),
// only retain the first letter; also two letters with the same number separated
// by 'h' or 'w' are coded as a single number, whereas such letters separated by a vowel are coded twice.
// This rule also applies to the first letter.
// 4. Iterate the previous step until you have one letter and three numbers.
// If you have too few letters in your word that you can't assign three numbers, append with zeros
// until there are three numbers. If you have more than 3 letters, just retain the first 3 numbers.
// 1. Retain the first letter of the name and drop all other occurrences of a, e, i, o, u, y, h, w.
// 2. Replace consonants with digits as follows (after the first letter):
// b, f, p, v → 1
// c, g, j, k, q, s, x, z → 2
// d, t → 3
// l → 4
// m, n → 5
// r → 6
// 3. If two or more letters with the same number are adjacent in the original name (before step 1),
// only retain the first letter; also two letters with the same number separated
// by 'h' or 'w' are coded as a single number, whereas such letters separated by a vowel are coded twice.
// This rule also applies to the first letter.
// 4. Iterate the previous step until you have one letter and three numbers.
// If you have too few letters in your word that you can't assign three numbers, append with zeros
// until there are three numbers. If you have more than 3 letters, just retain the first 3 numbers.
func ToSoundex(s string) string {
var (
// soundex code

View File

@ -62,3 +62,34 @@ func Test_soundex(t *testing.T) {
})
}
}
// goos: darwin
// goarch: arm64
// pkg: github.com/cortezaproject/corteza/server/pkg/str
// BenchmarkSoundex_100_100-12 475930 2835 ns/op 3568 B/op 80 allocs/op
// BenchmarkSoundex_1000_1000-12 30688 37494 ns/op 225458 B/op 657 allocs/op
// BenchmarkSoundex_10000_10000-12 568 2023842 ns/op 25114459 B/op 6892 allocs/op
// BenchmarkSoundex_100000_100000-12 7 160967804 ns/op 2544780057 B/op 69015 allocs/op
func benchmarkSoundex(b *testing.B, w1l int) {
w1 := randStringRunes(w1l)
for i := 0; i < b.N; i++ {
ToSoundex(w1)
}
}
func BenchmarkSoundex_100_100(b *testing.B) {
benchmarkSoundex(b, 100)
}
func BenchmarkSoundex_1000_1000(b *testing.B) {
benchmarkSoundex(b, 1000)
}
func BenchmarkSoundex_10000_10000(b *testing.B) {
benchmarkSoundex(b, 10000)
}
func BenchmarkSoundex_100000_100000(b *testing.B) {
benchmarkSoundex(b, 100000)
}