Skip to content

Commit 3853662

Browse files
authored
Merge pull request #172 from adamdecaf/improve-name-search
fix: improve name search by using cleaned name
2 parents 9bcce06 + af1b9a5 commit 3853662

6 files changed

+174
-64
lines changed

ACHDictionary.go

+11-7
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,9 @@ func (f *ACHDictionary) readJSON(r io.Reader) error {
183183
PhoneNumber: fmt.Sprintf("%s%s%s", ps[i].CustomerAreaCode, ps[i].CustomerPhonePrefix, ps[i].CustomerPhoneSuffix),
184184
StatusCode: ps[i].InstitutionStatusCode,
185185
ViewCode: ps[i].DataViewCode,
186+
187+
// Our Custom Fields
188+
CleanName: Normalize(ps[i].CustomerName),
186189
}
187190
f.IndexACHRoutingNumber[ps[i].RoutingNumber] = p
188191
f.ACHParticipants = append(f.ACHParticipants, p)
@@ -250,6 +253,9 @@ func (f *ACHDictionary) parseACHParticipant(line string) error {
250253
// ViewCode (1): 1
251254
p.ViewCode = line[149:150]
252255

256+
// Our custom fields
257+
p.CleanName = Normalize(p.CustomerName)
258+
253259
f.ACHParticipants = append(f.ACHParticipants, p)
254260
f.IndexACHRoutingNumber[p.RoutingNumber] = p
255261
return nil
@@ -324,26 +330,24 @@ func (f *ACHDictionary) RoutingNumberSearch(s string, limit int) ([]*ACHParticip
324330
})
325331
}
326332
}
327-
return reduceResult(out, limit), nil
333+
return reduceACHResults(out, limit), nil
328334
}
329335

330336
// FinancialInstitutionSearch returns a FEDACH participant based on a ACHParticipant.CustomerName
331337
func (f *ACHDictionary) FinancialInstitutionSearch(s string, limit int) []*ACHParticipant {
332338
s = strings.ToLower(s)
333339

334-
// Participants is a subset ACHDictionary.ACHParticipants that match the search based on JaroWinkler similarity
335-
// and Levenshtein similarity
336340
out := make([]*achParticipantResult, 0)
337341

338342
for _, achP := range f.ACHParticipants {
339343
// JaroWinkler is a more accurate version of the Jaro algorithm. It works by boosting the
340344
// score of exact matches at the beginning of the strings. By doing this, Winkler says that
341345
// typos are less common to happen at the beginning.
342-
jaroScore := strcmp.JaroWinkler(strings.ToLower(achP.CustomerName), s)
346+
jaroScore := strcmp.JaroWinkler(strings.ToLower(achP.CleanName), s)
343347

344348
// Levenshtein is the "edit distance" between two strings. This is the count of operations
345349
// (insert, delete, replace) needed for two strings to be equal.
346-
levenScore := strcmp.Levenshtein(strings.ToLower(achP.CustomerName), s)
350+
levenScore := strcmp.Levenshtein(strings.ToLower(achP.CleanName), s)
347351

348352
if jaroScore > ACHJaroWinklerSimilarity || levenScore > ACHLevenshteinSimilarity {
349353
out = append(out, &achParticipantResult{
@@ -353,7 +357,7 @@ func (f *ACHDictionary) FinancialInstitutionSearch(s string, limit int) []*ACHPa
353357
}
354358
}
355359

356-
return reduceResult(out, limit)
360+
return reduceACHResults(out, limit)
357361
}
358362

359363
// ACHParticipantStateFilter filters ACHParticipant by State.
@@ -441,7 +445,7 @@ func (f *ACHDictionary) PostalCodeFilter(s string) []*ACHParticipant {
441445
return nsl
442446
}
443447

444-
func reduceResult(in []*achParticipantResult, limit int) []*ACHParticipant {
448+
func reduceACHResults(in []*achParticipantResult, limit int) []*ACHParticipant {
445449
sort.SliceStable(in, func(i, j int) bool { return in[i].highestMatch > in[j].highestMatch })
446450

447451
out := make([]*ACHParticipant, 0)

ACHDictionary_test.go

+49
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ import (
1212
"testing"
1313

1414
"github.com/moov-io/base"
15+
16+
"github.com/stretchr/testify/require"
1517
)
1618

1719
func loadTestACHFiles(t *testing.T) (*ACHDictionary, *ACHDictionary) {
@@ -322,6 +324,53 @@ func TestACHRoutingNumberNumeric(t *testing.T) {
322324
}
323325
}
324326

327+
func TestACHFinancialInstitutionSearch__Examples(t *testing.T) {
328+
_, plainDict := loadTestACHFiles(t)
329+
330+
cases := []struct {
331+
input string
332+
expected *ACHParticipant
333+
}{
334+
{
335+
input: "Chase",
336+
expected: &ACHParticipant{
337+
RoutingNumber: "021000021",
338+
CustomerName: "JPMORGAN CHASE",
339+
},
340+
},
341+
{
342+
input: "Wells",
343+
expected: &ACHParticipant{
344+
RoutingNumber: "101205940",
345+
CustomerName: "WELLS BANK",
346+
},
347+
},
348+
{
349+
input: "Fargo",
350+
expected: &ACHParticipant{
351+
RoutingNumber: "291378392",
352+
CustomerName: "FARGO VA FEDERAL CU",
353+
},
354+
},
355+
{
356+
input: "Wells Fargo",
357+
expected: &ACHParticipant{
358+
RoutingNumber: "011100106",
359+
CustomerName: "WELLS FARGO BANK",
360+
},
361+
},
362+
}
363+
364+
for i := range cases {
365+
// The plain dictionary has 18k records, so search is more realistic
366+
results := plainDict.FinancialInstitutionSearch(cases[i].input, 1)
367+
require.Len(t, results, 1)
368+
369+
require.Equal(t, cases[i].expected.RoutingNumber, results[0].RoutingNumber)
370+
require.Equal(t, cases[i].expected.CustomerName, results[0].CustomerName)
371+
}
372+
}
373+
325374
// TestACHFinancialInstitutionSearch tests search string `First Bank`
326375
func TestACHFinancialInstitutionSearch(t *testing.T) {
327376
jsonDict, plainDict := loadTestACHFiles(t)

WIREDictionary.go

+57-39
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"encoding/json"
1111
"io"
1212
"io/ioutil"
13+
"math"
1314
"sort"
1415
"strings"
1516
"unicode/utf8"
@@ -145,6 +146,9 @@ func (f *WIREDictionary) readJSON(r io.Reader) error {
145146
FundsSettlementOnlyStatus: ps[i].FundsSettlementOnlyStatus,
146147
BookEntrySecuritiesTransferStatus: ps[i].SecuritiesEligibility,
147148
Date: ps[i].ChangeDate,
149+
150+
// Our Custom Fields
151+
CleanName: Normalize(ps[i].CustomerName),
148152
}
149153
f.WIREParticipants = append(f.WIREParticipants, p)
150154
f.IndexWIRERoutingNumber[p.RoutingNumber] = p
@@ -200,6 +204,10 @@ func (f *WIREDictionary) parseWIREParticipant(line string) error {
200204
p.BookEntrySecuritiesTransferStatus = line[92:93]
201205
// Date YYYYMMDD (8): 122415
202206
p.Date = line[93:101]
207+
208+
// Our custom fields
209+
p.CleanName = Normalize(p.CustomerName)
210+
203211
f.WIREParticipants = append(f.WIREParticipants, p)
204212
f.IndexWIRERoutingNumber[p.RoutingNumber] = p
205213
return nil
@@ -232,7 +240,7 @@ func (f *WIREDictionary) FinancialInstitutionSearchSingle(s string) []*WIREParti
232240
// RoutingNumberSearch returns FEDWIRE participants if WIREParticipant.RoutingNumber begins with prefix string s.
233241
// The first 2 digits of the routing number are required.
234242
// Based on https://www.frbservices.org/EPaymentsDirectory/search.html
235-
func (f *WIREDictionary) RoutingNumberSearch(s string) ([]*WIREParticipant, error) {
243+
func (f *WIREDictionary) RoutingNumberSearch(s string, limit int) ([]*WIREParticipant, error) {
236244
s = strings.TrimSpace(s)
237245

238246
if utf8.RuneCountInString(s) < MinimumRoutingNumberDigits {
@@ -250,58 +258,52 @@ func (f *WIREDictionary) RoutingNumberSearch(s string) ([]*WIREParticipant, erro
250258
f.errors.Add(ErrRoutingNumberNumeric)
251259
return nil, f.errors
252260
}
261+
exactMatch := len(s) == 9
253262

254-
Participants := make([]*WIREParticipant, 0)
255-
263+
out := make([]*wireParticipantResult, 0)
256264
for _, wireP := range f.WIREParticipants {
257-
if strings.HasPrefix(wireP.RoutingNumber, s) {
258-
Participants = append(Participants, wireP)
265+
if exactMatch {
266+
if wireP.RoutingNumber == s {
267+
out = append(out, &wireParticipantResult{
268+
WIREParticipant: wireP,
269+
highestMatch: 1.0,
270+
})
271+
}
272+
} else {
273+
out = append(out, &wireParticipantResult{
274+
WIREParticipant: wireP,
275+
highestMatch: strcmp.JaroWinkler(wireP.RoutingNumber, s),
276+
})
259277
}
260278
}
261-
262-
return Participants, nil
279+
return reduceWIREResults(out, limit), nil
263280
}
264281

265282
// FinancialInstitutionSearch returns a FEDWIRE participant based on a WIREParticipant.CustomerName
266-
func (f *WIREDictionary) FinancialInstitutionSearch(s string) []*WIREParticipant {
283+
func (f *WIREDictionary) FinancialInstitutionSearch(s string, limit int) []*WIREParticipant {
267284
s = strings.ToLower(s)
268285

269-
// Participants is a subset WIREDictionary.WIREParticipants that match the search based on JaroWinkler similarity
270-
// and Levenshtein similarity
271-
Participants := make([]*WIREParticipant, 0)
286+
out := make([]*wireParticipantResult, 0)
272287

273-
// JaroWinkler is a more accurate version of the Jaro algorithm. It works by boosting the
274-
// score of exact matches at the beginning of the strings. By doing this, Winkler says that
275-
// typos are less common to happen at the beginning.
276288
for _, wireP := range f.WIREParticipants {
277-
if strcmp.JaroWinkler(strings.ToLower(wireP.CustomerName), s) > WIREJaroWinklerSimilarity {
278-
Participants = append(Participants, wireP)
289+
// JaroWinkler is a more accurate version of the Jaro algorithm. It works by boosting the
290+
// score of exact matches at the beginning of the strings. By doing this, Winkler says that
291+
// typos are less common to happen at the beginning.
292+
jaroScore := strcmp.JaroWinkler(strings.ToLower(wireP.CleanName), s)
293+
294+
// Levenshtein is the "edit distance" between two strings. This is the count of operations
295+
// (insert, delete, replace) needed for two strings to be equal.
296+
levenScore := strcmp.Levenshtein(strings.ToLower(wireP.CleanName), s)
297+
298+
if jaroScore > ACHJaroWinklerSimilarity || levenScore > ACHLevenshteinSimilarity {
299+
out = append(out, &wireParticipantResult{
300+
WIREParticipant: wireP,
301+
highestMatch: math.Max(jaroScore, levenScore),
302+
})
279303
}
280304
}
281305

282-
// Levenshtein is the "edit distance" between two strings. This is the count of operations
283-
// (insert, delete, replace) needed for two strings to be equal.
284-
for _, wireP := range f.WIREParticipants {
285-
if strcmp.Levenshtein(strings.ToLower(wireP.CustomerName), s) > WIRELevenshteinSimilarity {
286-
287-
// Only append if the not included in the Participant sub-set
288-
if len(Participants) != 0 {
289-
for _, p := range Participants {
290-
if p.CustomerName == wireP.CustomerName && p.RoutingNumber == wireP.RoutingNumber {
291-
break
292-
}
293-
}
294-
Participants = append(Participants, wireP)
295-
296-
} else {
297-
Participants = append(Participants, wireP)
298-
}
299-
}
300-
}
301-
// Sort the result
302-
sort.SliceStable(Participants, func(i, j int) bool { return Participants[i].CustomerName < Participants[j].CustomerName })
303-
304-
return Participants
306+
return reduceWIREResults(out, limit)
305307
}
306308

307309
// WIREParticipantRoutingNumberFilter filters WIREParticipant by Routing Number
@@ -365,3 +367,19 @@ func (f *WIREDictionary) CityFilter(s string) []*WIREParticipant {
365367
}
366368
return nsl
367369
}
370+
371+
type wireParticipantResult struct {
372+
*WIREParticipant
373+
374+
highestMatch float64
375+
}
376+
377+
func reduceWIREResults(in []*wireParticipantResult, limit int) []*WIREParticipant {
378+
sort.SliceStable(in, func(i, j int) bool { return in[i].highestMatch > in[j].highestMatch })
379+
380+
out := make([]*WIREParticipant, 0)
381+
for i := 0; i < limit && i < len(in); i++ {
382+
out = append(out, in[i].WIREParticipant)
383+
}
384+
return out
385+
}

0 commit comments

Comments
 (0)