2024-09-24 06:17:11 -04:00
|
|
|
package utils
|
|
|
|
|
2024-11-21 12:30:26 -05:00
|
|
|
import (
|
2024-12-03 10:38:54 -05:00
|
|
|
"regexp"
|
2024-11-21 12:30:26 -05:00
|
|
|
"strings"
|
|
|
|
)
|
2024-09-24 06:17:11 -04:00
|
|
|
|
2024-11-21 12:30:26 -05:00
|
|
|
func levenshteinDistance(str1, str2 string) int {
|
2024-09-24 06:17:11 -04:00
|
|
|
str1 = strings.ToLower(str1)
|
|
|
|
str2 = strings.ToLower(str2)
|
|
|
|
s1, s2 := []rune(str1), []rune(str2)
|
|
|
|
lenS1, lenS2 := len(s1), len(s2)
|
|
|
|
if lenS1 == 0 {
|
|
|
|
return lenS2
|
|
|
|
}
|
|
|
|
if lenS2 == 0 {
|
|
|
|
return lenS1
|
|
|
|
}
|
|
|
|
|
|
|
|
d := make([][]int, lenS1+1)
|
|
|
|
for i := range d {
|
|
|
|
d[i] = make([]int, lenS2+1)
|
|
|
|
}
|
|
|
|
|
|
|
|
for i := 0; i <= lenS1; i++ {
|
|
|
|
d[i][0] = i
|
|
|
|
}
|
|
|
|
for j := 0; j <= lenS2; j++ {
|
|
|
|
d[0][j] = j
|
|
|
|
}
|
|
|
|
|
|
|
|
for i := 1; i <= lenS1; i++ {
|
|
|
|
for j := 1; j <= lenS2; j++ {
|
|
|
|
cost := 0
|
|
|
|
if s1[i-1] != s2[j-1] {
|
|
|
|
cost = 1
|
|
|
|
}
|
2024-12-03 10:38:54 -05:00
|
|
|
d[i][j] = min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]+cost)
|
2024-09-24 06:17:11 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return d[lenS1][lenS2]
|
|
|
|
}
|
|
|
|
|
2024-12-03 10:38:54 -05:00
|
|
|
var noneAlphaNumericRegex = regexp.MustCompile("[^a-zA-Z0-9]+")
|
|
|
|
|
|
|
|
func removeSpacesAndNoneAlphaNumeric(str string) string {
|
|
|
|
str = strings.ToLower(str)
|
|
|
|
str = noneAlphaNumericRegex.ReplaceAllString(str, "")
|
|
|
|
return str
|
|
|
|
}
|
|
|
|
|
2024-09-24 06:17:11 -04:00
|
|
|
func Similarity(str1, str2 string) float64 {
|
2024-12-03 10:38:54 -05:00
|
|
|
str1 = removeSpacesAndNoneAlphaNumeric(str1)
|
|
|
|
str2 = removeSpacesAndNoneAlphaNumeric(str2)
|
2024-11-21 12:30:26 -05:00
|
|
|
distance := levenshteinDistance(str1, str2)
|
2024-09-24 06:17:11 -04:00
|
|
|
maxLength := len(str1)
|
|
|
|
if len(str2) > maxLength {
|
|
|
|
maxLength = len(str2)
|
|
|
|
}
|
|
|
|
|
2024-12-03 10:38:54 -05:00
|
|
|
adjustedLength := float64(maxLength + (len(str1)+len(str2))/2)
|
2024-09-24 06:17:11 -04:00
|
|
|
|
|
|
|
if maxLength == 0 {
|
|
|
|
return 1.0
|
|
|
|
}
|
|
|
|
|
2024-12-03 10:38:54 -05:00
|
|
|
similarity := 1.0 - float64(distance)/float64(adjustedLength)
|
2024-09-24 06:17:11 -04:00
|
|
|
return similarity
|
|
|
|
}
|