Something like:
************************************************** *************
*!* DOUBLE Metaphone (c) 1998, 1999 BY Lawrence Philips
*!*
*!* Slightly modified BY Kevin Atkinson TO fix several bugs AND
*!* TO ALLOW it TO give BACK more than 4 characters.
*!*
*!* Atkinson's C++ version Translated to Visual Foxpro
*!* by Craig Boyd (Slighthaze) 10-23-2003
*!* From http://aspell.sourceforge.net/metaphone/dmetaph.cpp
*!* Also added SIGNIFICANTCHARS constant so developer
*!* can control number of characters returned
************************************************** *************
*********************************
*!* Matches are as follows
*********************************
*!* STRONG MATCH
*!* Primary = Primary
*!*
*!* NORMAL MATCH
*!* Primary = Secondary
*!* Secondary = Primary
*!*
*!* WEAK MATCH
*!* Alternate = Alternate
*********************************
*********************************
*!* Example of use
*********************************
LOCAL lnSoundex1, lnSoundex2
LOCAL lcPrimary1, lcSecondary1
LOCAL lcPrimary2, lcSecondary2
LOCAL lcFirstWord, lcSecondWord
lnSoundex1 = SOUNDEX("CEASE")
lnSoundex2 = SOUNDEX("SEAS")
=DOUBLEMETAPHONE("CEASE", @lcPrimary1, @lcSecondary1)
=DOUBLEMETAPHONE("SEAS", @lcPrimary2, @lcSecondary2)
MESSAGEBOX(["CEASE" compared to "SEAS"] + CHR(13)+ CHR(13) ;
+"SOUNDEX RETURNS: " + CHR(13)+ CHR(9) ;
+ TRANSFORM(lnSoundex1) + " = " + TRANSFORM(lnSoundex2) + " (" +
IIF(lnSoundex1 = lnSoundex2,"TRUE", "FALSE") + ")" + CHR(13)+ CHR(13) ;
+"METAPHONE RETURNS: " + CHR(13)+ CHR(9) ;
+ "Primary #1: " + lcPrimary1 + CHR(9) + "Primary #2: " + lcPrimary2 + CHR(9)
+ CHR(13)+ CHR(9) ;
+ "Secondary: " + lcSecondary1 + CHR(9) + "Secondary: " + lcSecondary2 +
CHR(9) + CHR(13)+ CHR(9) ;
+ "Match Type: " + GetMatchType(lcPrimary1, lcSecondary1, lcPrimary2,
lcSecondary2) ;
, 0, "EXAMPLE 1 OF 2")
lnSoundex1 = SOUNDEX("MICHAEL")
lnSoundex2 = SOUNDEX("MICIAL")
=DOUBLEMETAPHONE("MICHAEL", @lcPrimary1, @lcSecondary1)
=DOUBLEMETAPHONE("MICIAL", @lcPrimary2, @lcSecondary2)
MESSAGEBOX(["MICHAEL" compared to "MICIAL"] + CHR(13)+ CHR(13) ;
+"SOUNDEX RETURNS: " + CHR(13)+ CHR(9) ;
+ TRANSFORM(lnSoundex1) + " = " + TRANSFORM(lnSoundex2) + " (" +
IIF(lnSoundex1 = lnSoundex2,"TRUE", "FALSE") + ")" + CHR(13)+ CHR(13) ;
+"METAPHONE RETURNS: " + CHR(13)+ CHR(9) ;
+ "Primary #1: " + lcPrimary1 + CHR(9) + "Primary #2: " + lcPrimary2 + CHR(9)
+ CHR(13)+ CHR(9) ;
+ "Secondary: " + lcSecondary1 + CHR(9) + "Secondary: " + lcSecondary2 +
CHR(9) + CHR(13)+ CHR(9) ;
+ "Match Type: " + GetMatchType(lcPrimary1, lcSecondary1, lcPrimary2,
lcSecondary2) ;
, 0, "EXAMPLE 2 OF 2")
DO WHILE .T.
IF MESSAGEBOX("Would you like to try a comparison of your own?",36,"GIVE IT A
TRY") = 6
lcFirstWord = ALLTRIM(INPUTBOX("Enter a name or word:", "FIRST WORD TO
COMPARE"))
IF EMPTY(lcFirstWord)
MESSAGEBOX("You must enter both words.", 16, "UNABLE TO RUN COMPARISON")
LOOP
ENDIF
lcSecondWord = ALLTRIM(INPUTBOX("Enter another name or word:", "SECOND WORD
TO COMPARE"))
IF EMPTY(lcSecondWord)
MESSAGEBOX("You must enter both words.", 16, "UNABLE TO RUN COMPARISON")
LOOP
ENDIF
lnSoundex1 = SOUNDEX(lcFirstWord)
lnSoundex2 = SOUNDEX(lcSecondWord)
=DOUBLEMETAPHONE(lcFirstWord, @lcPrimary1, @lcSecondary1)
=DOUBLEMETAPHONE(lcSecondWord, @lcPrimary2, @lcSecondary2)
MESSAGEBOX(["]+lcFirstWord+[" compared to "]+lcSecondWord+["] + CHR(13)+
CHR(13) ;
+"SOUNDEX RETURNS: " + CHR(13)+ CHR(9) ;
+ TRANSFORM(lnSoundex1) + " = " + TRANSFORM(lnSoundex2) + " (" +
IIF(lnSoundex1 = lnSoundex2,"TRUE", "FALSE") + ")" + CHR(13)+ CHR(13) ;
+"METAPHONE RETURNS: " + CHR(13)+ CHR(9) ;
+ "Primary #1: " + lcPrimary1 + CHR(9) + "Primary #2: " + lcPrimary2 +
CHR(9) + CHR(13)+ CHR(9) ;
+ "Secondary: " + lcSecondary1 + CHR(9) + "Secondary: " + lcSecondary2 +
CHR(9) + CHR(13)+ CHR(9) ;
+ "Match Type: " + GetMatchType(lcPrimary1, lcSecondary1, lcPrimary2,
lcSecondary2) ;
, 0, "COMPARISON RESULTS")
ELSE
EXIT
ENDIF
ENDDO
FUNCTION GetMatchType(tcPrime1, tcSecond1, tcPrime2, tcSecond2)
DO CASE
CASE tcPrime1 = tcPrime2 AND !EMPTY(tcPrime1) AND !EMPTY(tcPrime2)
RETURN "STRONG MATCH"
CASE tcPrime1 = tcSecond2 AND !EMPTY(tcPrime1) AND !EMPTY(tcSecond2)
RETURN "NORMAL MATCH"
CASE tcSecond1 = tcPrime2 AND !EMPTY(tcSecond1) AND !EMPTY(tcPrime2)
RETURN "NORMAL MATCH"
CASE tcSecond1 = tcSecond2 AND !EMPTY(tcSecond1) AND !EMPTY(tcSecond2)
RETURN "WEAK MATCH"
OTHERWISE
RETURN "NO MATCH"
ENDCASE
ENDFUNC
********END OF EXAMPLE*********
*******************************
FUNCTION DOUBLEMETAPHONE
*******************************
PARAMETERS tcWord, tcMetaph, tcMetaph2
#DEFINE SIGNIFICANTCHARS 4 &&Can be changed to allow more or less characters
returned
PRIVATE plAlternate, pnLength, pcPrimary, pcSecondary
LOCAL lcLetter, lnCurrent, lnLast
STORE "" TO lcLetter, pcPrimary, pcSecondary
plAlternate = .F.
lnCurrent = 1
pnLength = LEN(tcWord)
IF pnLength < 1
RETURN ""
ENDIF
lnLast = pnLength
plAlternate = .F.
tcWord = UPPER(ALLTRIM(tcWord))
*!* pad the original string so that we can index beyond the edge of the world
tcWord = tcWord + " "
*!* skip these when at start of word
IF INLIST(SUBSTR(tcWord,1,2), "GN", "KN", "PN", "WR", "PS")
lnCurrent = lnCurrent + 1
ENDIF
*!* Initial 'X' is pronounced 'Z' e.g. 'Xavier'
IF SUBSTR(tcWord,1,1) = 'X'
MetaphAdd("S") && 'Z' maps to 'S'
lnCurrent = lnCurrent + 1
ENDIF
DO WHILE .T. OR LEN(pcPrimary) < 4 OR LEN(pcSecondary) < 4
IF lnCurrent > pnLength
EXIT
ENDIF
lcLetter = SUBSTR(tcWord,lnCurrent,1)
DO CASE
CASE INLIST(lcLetter, 'A', 'E', 'I', 'O', 'U', 'Y')
IF lnCurrent = 1
*!* all init vowels now map to 'A'
MetaphAdd("A")
ENDIF
lnCurrent = lnCurrent + 1
LOOP
CASE lcLetter = 'B'
*!* "-mb", e.g", "dumb", already skipped over...
MetaphAdd("P")
IF SUBSTR(tcWord,lnCurrent + 1,1) = 'B'
lnCurrent = lnCurrent + 2
ELSE
lnCurrent = lnCurrent + 1
ENDIF
LOOP
CASE lcLetter = 'Ç'
MetaphAdd("S")
lnCurrent = lnCurrent + 1
LOOP
CASE lcLetter = 'C'
*!* various germanic
IF (lnCurrent > 1) ;
AND !IsVowel(lnCurrent - 2) ;
AND SUBSTR(tcWord, lnCurrent - 1, 3) = "ACH" ;
AND ((SUBSTR(tcWord,lnCurrent + 2,1) != 'I') AND
((SUBSTR(tcWord,lnCurrent + 2,1) != 'E') ;
OR INLIST(SUBSTR(tcWord,lnCurrent - 2, 6), "BACHER", "MACHER") ))
MetaphAdd("K")
lnCurrent = lnCurrent + 2
LOOP
ENDIF
*!* special case 'caesar'
IF lnCurrent = 1 AND SUBSTR(tcWord,lnCurrent, 6) = "CAESAR"
MetaphAdd("S")
lnCurrent = lnCurrent + 2
LOOP
ENDIF
*!* italian 'chianti'
IF SUBSTR(tcWord, lnCurrent, 4) = "CHIA"
MetaphAdd("K")
lnCurrent = lnCurrent + 2
LOOP
ENDIF
IF SUBSTR(tcWord, lnCurrent, 2) = "CH"
*!* find 'michael'
IF (lnCurrent > 1) AND SUBSTR(tcWord, lnCurrent, 4) = "CHAE"
MetaphAddCond("K", "X")
lnCurrent = lnCurrent + 2
LOOP
ENDIF
*!* greek roots e.g. 'chemistry', 'chorus'
IF (lnCurrent = 1) ;
AND (INLIST(SUBSTR(tcWord, lnCurrent + 1, 5), "HARAC", "HARIS") ;
OR INLIST(SUBSTR(tcWord, lnCurrent + 1, 3), "HOR", "HYM", "HIA",
"HEM")) ;
AND !SUBSTR(tcWord, 1, 5) = "CHORE"
MetaphAdd("K")
lnCurrent = lnCurrent + 2
LOOP
ENDIF
*!* germanic, greek, or otherwise 'ch' for 'kh' sound
*!* e.g., 'wachtler', 'wechsler', but not 'tichner'
*!* e.g., 'wachtler', 'wechsler', but not 'tichner'
IF (INLIST(SUBSTR(tcWord, 1, 4), "VAN ", "VON ") OR SUBSTR(tcWord, 1, 3)
= "SCH") ;
OR INLIST(SUBSTR(tcWord, lnCurrent - 2, 6), "ORCHES", "ARCHIT",
"ORCHID") ;
OR INLIST(SUBSTR(tcWord, lnCurrent + 2, 1), "T", "S") ;
OR ((INLIST(SUBSTR(tcWord, lnCurrent - 1, 1), "A", "O", "U", "E") OR
(lnCurrent = 1)) ;
AND INLIST(SUBSTR(tcWord, lnCurrent + 2, 1), "L", "R", "N", "M",
"B", "H", "F", "V", "W", " "))
MetaphAdd("K")
ELSE
IF lnCurrent > 1
IF SUBSTR(tcWord, 1, 2) = "MC"
*!* e.g., "McHugh"
MetaphAdd("K")
ELSE
MetaphAddCond("X", "K")
ENDIF
ELSE
MetaphAdd("X")
ENDIF
ENDIF
lnCurrent = lnCurrent + 2
LOOP
ENDIF
*!* e.g, 'czerny'
IF SUBSTR(tcWord, lnCurrent, 2) = "CZ" AND SUBSTR(tcWord, lnCurrent - 2,
4) != "WICZ"
MetaphAddCond("S", "X")
lnCurrent = lnCurrent + 2
LOOP
ENDIF
*!* e.g., 'focaccia'
IF SUBSTR(tcWord, lnCurrent + 1, 3) = "CIA"
MetaphAdd("X")
lnCurrent = lnCurrent + 3
LOOP
ENDIF
*!* double 'C', but not if e.g. 'McClellan'
IF SUBSTR(tcWord, lnCurrent, 2) = "CC" AND !(lnCurrent = 1 AND
SUBSTR(tcWord,1,1) = 'M')
*!* 'bellocchio' but not 'bacchus'
IF INLIST(SUBSTR(tcWord, lnCurrent + 2, 1), "I", "E", "H") AND
SUBSTR(tcWord, lnCurrent + 2, 2) != "HU"
*!* 'accident', 'accede' 'succeed'
IF((lnCurrent = 1) AND (SUBSTR(tcWord,lnCurrent - 1,1) = 'A')) ;
OR INLIST(SUBSTR(tcWord, lnCurrent - 1, 5), "UCCEE", "UCCES")
MetaphAdd("KS")
*!* 'bacci', 'bertucci', other italian
ELSE
MetaphAdd("X")
ENDIF
lnCurrent = lnCurrent + 3
LOOP
ELSE&& Pierce's rule
MetaphAdd("K")
lnCurrent = lnCurrent + 2
LOOP
ENDIF
ENDIF
IF INLIST(SUBSTR(tcWord, lnCurrent, 2), "CK", "CG", "CQ")
MetaphAdd("K")
lnCurrent = lnCurrent + 2
LOOP
ENDIF
IF INLIST(SUBSTR(tcWord, lnCurrent, 2), "CI", "CE", "CY")
*!* italian vs. english
IF INLIST(SUBSTR(tcWord, lnCurrent, 3), "CIO", "CIE", "CIA")
MetaphAddCond("S", "X")
ELSE
MetaphAdd("S")
ENDIF
lnCurrent = lnCurrent + 2
LOOP
ENDIF
*!* else
MetaphAdd("K")
*!* name sent in 'mac caffrey', 'mac gregor
IF INLIST(SUBSTR(tcWord, lnCurrent + 1, 2), " C", " Q", " G")
lnCurrent = lnCurrent + 3
ELSE
IF INLIST(SUBSTR(tcWord, lnCurrent + 1, 1), "C", "K", "Q") ;
AND !INLIST(SUBSTR(tcWord, lnCurrent + 1, 2), "CE", "CI")
lnCurrent = lnCurrent + 2
ELSE
lnCurrent = lnCurrent + 1
ENDIF
LOOP
ENDIF
CASE lcLetter = 'D'
IF SUBSTR(tcWord, lnCurrent, 2) = "DG"
IF INLIST(SUBSTR(tcWord, lnCurrent + 2, 1), "I", "E", "Y")
*!* e.g. 'edge'
MetaphAdd("J")
lnCurrent = lnCurrent + 3
ELSE
*!* e.g. 'edgar'
MetaphAdd("TK")
lnCurrent = lnCurrent + 2
ENDIF
LOOP
ENDIF
IF INLIST(SUBSTR(tcWord, lnCurrent, 2), "DT", "DD")
MetaphAdd("T")
lnCurrent = lnCurrent + 2
LOOP
ENDIF
*!* else
MetaphAdd("T")
lnCurrent = lnCurrent + 1
LOOP
CASE lcLetter = 'F'
IF SUBSTR(tcWord,lnCurrent + 1,1) = 'F'
lnCurrent = lnCurrent + 2
ELSE
lnCurrent = lnCurrent + 1
ENDIF
MetaphAdd("F")
LOOP
CASE lcLetter = 'G'
IF SUBSTR(tcWord,lnCurrent + 1,1) = 'H'
IF (lnCurrent > 1) AND !IsVowel(lnCurrent - 1)
MetaphAdd("K")
lnCurrent = lnCurrent + 2
LOOP
ENDIF
IF lnCurrent < 3
*!* 'ghislane', ghiradelli
IF lnCurrent = 1
IF SUBSTR(tcWord,lnCurrent + 2,1) = 'I'
MetaphAdd("J")
ELSE
MetaphAdd("K")
ENDIF
lnCurrent = lnCurrent + 2
LOOP
ENDIF
ENDIF
*!* Parker's rule (with some further refinements) - e.g., 'hugh'
*!* e.g., 'bough'
*!* e.g., 'broughton'
IF((lnCurrent > 1) AND INLIST(SUBSTR(tcWord, lnCurrent - 2, 1), "B",
"H", "D")) ;
OR ((lnCurrent > 2) AND INLIST(SUBSTR(tcWord, lnCurrent - 3, 1),
"B", "H", "D")) ;
OR ((lnCurrent > 3) AND INLIST(SUBSTR(tcWord, lnCurrent - 4, 1),
"B", "H"))
lnCurrent = lnCurrent + 2
LOOP
ELSE
*!* e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
IF (lnCurrent > 2) ;
AND (SUBSTR(tcWord,lnCurrent - 1,1) = 'U') ;
AND INLIST(SUBSTR(tcWord, lnCurrent - 3, 1), "C", "G", "L", "R",
"T")
MetaphAdd("F")
ELSE
IF (lnCurrent > 1) AND SUBSTR(tcWord,lnCurrent - 1,1) != 'I'
MetaphAdd("K")
ENDIF
ENDIF
lnCurrent = lnCurrent + 2
LOOP
ENDIF
ENDIF
IF SUBSTR(tcWord,lnCurrent + 1,1) = 'N'
IF (lnCurrent = 1) AND IsVowel(1) AND !SlavoGermanic()
MetaphAddCond("KN", "N")
ELSE
*!* not e.g. 'cagney'
IF SUBSTR(tcWord, lnCurrent + 2, 2) != "EY" ;
AND (SUBSTR(tcWord,lnCurrent + 1,1) != 'Y') AND !SlavoGermanic()
MetaphAddCond("N", "KN")
ELSE
MetaphAdd("KN")
ENDIF
ENDIF
lnCurrent = lnCurrent + 2
LOOP
ENDIF
*!* 'tagliaro'
IF SUBSTR(tcWord, lnCurrent + 1, 2) = "LI" AND !SlavoGermanic()
MetaphAddCond("KL", "L")
lnCurrent = lnCurrent + 2
LOOP
ENDIF
*!* -ges-,-gep-,-gel-, -gie- at beginning
IF (lnCurrent = 1) ;
AND ((SUBSTR(tcWord,lnCurrent + 1,1) = 'Y') ;
OR INLIST(SUBSTR(tcWord, lnCurrent + 1, 2), "ES", "EP", "EB", "EL",
"EY", "IB", "IL", "IN", "IE", "EI", "ER"))
MetaphAddCond("K", "J")
lnCurrent = lnCurrent + 2
LOOP
ENDIF
*!* -ger-, -gy-
IF (SUBSTR(tcWord, lnCurrent + 1, 2) = "ER" OR SUBSTR(tcWord,lnCurrent +
1,1) = 'Y') ;
AND !INLIST(SUBSTR(tcWord, 1, 6), "DANGER", "RANGER", "MANGER") ;
AND !INLIST(SUBSTR(tcWord, lnCurrent - 1, 1), "E", "I") ;
AND !INLIST(SUBSTR(tcWord, lnCurrent - 1, 3), "RGY", "OGY")
MetaphAddCond("K", "J")
lnCurrent = lnCurrent + 2
LOOP
ENDIF
*!* italian e.g, 'biaggi'
IF INLIST(SUBSTR(tcWord, lnCurrent + 1, 1), "E", "I", "Y") OR
INLIST(SUBSTR(tcWord, lnCurrent - 1, 4), "AGGI", "OGGI")
*!* obvious germanic
IF (INLIST(SUBSTR(tcWord, 1, 4), "VAN ", "VON ") OR SUBSTR(tcWord, 1, 3)
= "SCH") ;
OR SUBSTR(tcWord, lnCurrent + 1, 2) = "ET"
MetaphAdd("K")
ELSE
*!* always soft if french ending
IF SUBSTR(tcWord, lnCurrent + 1, 4) = "IER "
MetaphAdd("J")
ELSE
MetaphAddCond("J", "K")
ENDIF
ENDIF
lnCurrent = lnCurrent + 2
LOOP
ENDIF
IF SUBSTR(tcWord,lnCurrent + 1,1) = 'G'
lnCurrent = lnCurrent + 2
ELSE
lnCurrent = lnCurrent + 1
ENDIF
MetaphAdd("K")
LOOP
CASE lcLetter = 'H'
*!* only keep if first & before vowel or btw. 2 vowels
IF((lnCurrent = 1) OR IsVowel(lnCurrent - 1)) ;
AND IsVowel(lnCurrent + 1)
MetaphAdd("H")
lnCurrent = lnCurrent + 2
ELSE && also takes care of 'HH'
lnCurrent = lnCurrent + 1
ENDIF
LOOP
CASE lcLetter = 'J'
*!* obvious spanish, 'jose', 'san jacinto'
IF SUBSTR(tcWord, lnCurrent, 4) = "JOSE" OR SUBSTR(tcWord, 1, 4) = "SAN "
IF ((lnCurrent = 1) AND (SUBSTR(tcWord, lnCurrent + 4,1) = ' ')) OR
SUBSTR(tcWord, 1, 4) = "SAN "
MetaphAdd("H")
ELSE
MetaphAddCond("J", "H")
ENDIF
lnCurrent = lnCurrent + 1
LOOP
ENDIF
IF (lnCurrent = 1) AND SUBSTR(tcWord, lnCurrent, 4) != "JOSE"
MetaphAddCond("J", "A") && Yankelovich/Jankelowicz
ELSE
*!* spanish pron. of e.g. 'bajador'
IF IsVowel(lnCurrent - 1) ;
AND !SlavoGermanic() ;
AND ((SUBSTR(tcWord,lnCurrent + 1,1) = 'A') OR
(SUBSTR(tcWord,lnCurrent + 1,1) = 'O'))
MetaphAddCond("J", "H")
ELSE
IF lnCurrent = lnLast
MetaphAddCond("J", " ")
ELSE
IF !INLIST(SUBSTR(tcWord, lnCurrent + 1, 1), "L", "T", "K", "S",
"N", "M", "B", "Z") ;
AND !INLIST(SUBSTR(tcWord, lnCurrent - 1, 1), "S", "K", "L")
MetaphAdd("J")
ENDIF
ENDIF
ENDIF
ENDIF
IF SUBSTR(tcWord,lnCurrent + 1,1) = 'J' && it could happen!
lnCurrent = lnCurrent + 2
ELSE
lnCurrent = lnCurrent + 1
ENDIF
LOOP
CASE lcLetter = 'K'
IF SUBSTR(tcWord,lnCurrent + 1,1) = 'K'
lnCurrent = lnCurrent + 2
ELSE
lnCurrent = lnCurrent + 1
ENDIF
MetaphAdd("K")
LOOP
CASE lcLetter = 'L'
IF SUBSTR(tcWord,lnCurrent + 1,1) = 'L'
*!* spanish e.g. 'cabrillo', 'gallegos'
IF ((lnCurrent = (pnLength - 3)) ;
AND INLIST(SUBSTR(tcWord, lnCurrent - 1, 4), "ILLO", "ILLA",
"ALLE")) ;
OR ((INLIST(SUBSTR(tcWord, lnLast - 1, 2), "AS", "OS") OR
INLIST(SUBSTR(tcWord, lnLast, 1), "A", "O")) ;
AND SUBSTR(tcWord, lnCurrent - 1, 4) = "ALLE")
MetaphAddCond("L", " ")
lnCurrent = lnCurrent + 2
LOOP
ENDIF
lnCurrent = lnCurrent + 2
ELSE
lnCurrent = lnCurrent + 1
ENDIF
MetaphAdd("L")
LOOP
CASE lcLetter = 'M'
*!* 'dumb','thumb'
IF (SUBSTR(tcWord, lnCurrent - 1, 3) = "UMB" ;
AND (((lnCurrent + 1) = lnLast) OR SUBSTR(tcWord, lnCurrent + 2, 2) =
"ER")) ;
OR (SUBSTR(tcWord,lnCurrent + 1,1) = 'M')
lnCurrent = lnCurrent + 2
ELSE
lnCurrent = lnCurrent + 1
ENDIF
MetaphAdd("M")
LOOP
CASE lcLetter = 'N'
IF SUBSTR(tcWord,lnCurrent + 1,1) = 'N'
lnCurrent = lnCurrent + 2
ELSE
lnCurrent = lnCurrent + 1
ENDIF
MetaphAdd("N")
LOOP
CASE lcLetter = 'Ñ'
lnCurrent = lnCurrent + 1
MetaphAdd("N")
LOOP
CASE lcLetter = 'P'
IF SUBSTR(tcWord,lnCurrent + 1,1) = 'H'
MetaphAdd("F")
lnCurrent = lnCurrent + 2
LOOP
ENDIF
*!* also account for "campbell", "raspberry"
IF INLIST(SUBSTR(tcWord, lnCurrent + 1, 1), "P", "B")
lnCurrent = lnCurrent + 2
ELSE
lnCurrent = lnCurrent + 1
MetaphAdd("P")
ENDIF
LOOP
CASE lcLetter = 'Q'
IF SUBSTR(tcWord,lnCurrent + 1,1) = 'Q'
lnCurrent = lnCurrent + 2
ELSE
lnCurrent = lnCurrent + 1
ENDIF
MetaphAdd("K")
LOOP
CASE lcLetter = 'R'
*!* french e.g. 'rogier', but exclude 'hochmeier'
IF (lnCurrent = lnLast) ;
AND !SlavoGermanic() ;
AND SUBSTR(tcWord, lnCurrent - 2, 2) = "IE" ;
AND !INLIST(SUBSTR(tcWord, lnCurrent - 4, 2), "ME", "MA")
MetaphAddCond("", "R")
ELSE
MetaphAdd("R")
ENDIF
IF SUBSTR(tcWord,lnCurrent + 1,1) = 'R'
lnCurrent = lnCurrent + 2
ELSE
lnCurrent = lnCurrent + 1
ENDIF
LOOP
CASE lcLetter = 'S'
*!* special cases 'island', 'isle', 'carlisle', 'carlysle'
IF INLIST(SUBSTR(tcWord, lnCurrent - 1, 3), "ISL", "YSL")
lnCurrent = lnCurrent + 1
LOOP
ENDIF
*!* special case 'sugar-'
IF (lnCurrent = 1) AND SUBSTR(tcWord, lnCurrent, 5) = "SUGAR"
MetaphAddCond("X", "S")
lnCurrent = lnCurrent + 1
LOOP
ENDIF
IF SUBSTR(tcWord, lnCurrent, 2) = "SH"
*!* germanic
IF INLIST(SUBSTR(tcWord, lnCurrent + 1, 4), "HEIM", "HOEK", "HOLM",
"HOLZ")
MetaphAdd("S")
ELSE
MetaphAdd("X")
ENDIF
lnCurrent = lnCurrent + 2
LOOP
ENDIF
*!* italian & armenian
IF INLIST(SUBSTR(tcWord, lnCurrent, 3), "SIO", "SIA") OR SUBSTR(tcWord,
lnCurrent, 4) = "SIAN"
IF !SlavoGermanic()
MetaphAddCond("S", "X")
ELSE
MetaphAdd("S")
ENDIF
lnCurrent = lnCurrent + 3
LOOP
ENDIF
*!* german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match
'schneider'
*!* also, -sz- in slavic language altho in hungarian it is pronounced 's'
IF ((lnCurrent = 1) ;
AND INLIST(SUBSTR(tcWord, lnCurrent + 1, 1), "M", "N", "L", "W")) ;
OR SUBSTR(tcWord, lnCurrent + 1, 1) = "Z"
MetaphAddCond("S", "X")
IF SUBSTR(tcWord, lnCurrent + 1, 1) = "Z"
lnCurrent = lnCurrent + 2
ELSE
lnCurrent = lnCurrent + 1
ENDIF
LOOP
ENDIF
IF SUBSTR(tcWord, lnCurrent, 2) = "SC"
*!* Schlesinger's rule
IF SUBSTR(tcWord,lnCurrent + 2,1) = 'H'
*!* dutch origin, e.g. 'school', 'schooner'
IF INLIST(SUBSTR(tcWord, lnCurrent + 3, 2), "OO", "ER", "EN", "UY",
"ED", "EM")
*!* 'schermerhorn', 'schenker'
IF INLIST(SUBSTR(tcWord, lnCurrent + 3, 2), "ER", "EN")
MetaphAddCond("X", "SK")
ELSE
MetaphAdd("SK")
ENDIF
lnCurrent = lnCurrent + 3
LOOP
ELSE
IF (lnCurrent = 1) AND !IsVowel(3) AND (SUBSTR(tcWord,3,1) != 'W')
MetaphAddCond("X", "S")
ELSE
MetaphAdd("X")
ENDIF
lnCurrent = lnCurrent + 3
LOOP
ENDIF
ENDIF
IF INLIST(SUBSTR(tcWord, lnCurrent + 2, 1), "I", "E", "Y")
MetaphAdd("S")
lnCurrent = lnCurrent + 3
LOOP
ENDIF
*!* else
MetaphAdd("SK")
lnCurrent = lnCurrent + 3
LOOP
ENDIF
*!* french e.g. 'resnais', 'artois'
IF (lnCurrent = lnLast) AND INLIST(SUBSTR(tcWord, lnCurrent - 2, 2), "AI",
"OI")
MetaphAddCond("", "S")
ELSE
MetaphAdd("S")
ENDIF
IF INLIST(SUBSTR(tcWord, lnCurrent + 1, 1), "S", "Z")
lnCurrent = lnCurrent + 2
ELSE
lnCurrent = lnCurrent + 1
ENDIF
LOOP
CASE lcLetter = 'T'
IF SUBSTR(tcWord, lnCurrent, 4) = "TION"
MetaphAdd("X")
lnCurrent = lnCurrent + 3
LOOP
ENDIF
IF INLIST(SUBSTR(tcWord, lnCurrent, 3), "TIA", "TCH")
MetaphAdd("X")
lnCurrent = lnCurrent + 3
LOOP
ENDIF
IF SUBSTR(tcWord, lnCurrent, 2) = "TH" ;
OR SUBSTR(tcWord, lnCurrent, 3) = "TTH"
*!* special case 'thomas', 'thames' or germanic
IF INLIST(SUBSTR(tcWord, lnCurrent + 2, 2), "OM", "AM") ;
OR INLIST(SUBSTR(tcWord, 1, 4), "VAN ", "VON ") ;
OR SUBSTR(tcWord, 1, 3) = "SCH"
MetaphAdd("T")
ELSE
MetaphAddCond("0", "T")
ENDIF
lnCurrent = lnCurrent + 2
LOOP
ENDIF
IF INLIST(SUBSTR(tcWord, lnCurrent + 1, 1), "T", "D")
lnCurrent = lnCurrent + 2
ELSE
lnCurrent = lnCurrent + 1
ENDIF
MetaphAdd("T")
LOOP
CASE lcLetter = 'V'
IF SUBSTR(tcWord,lnCurrent + 1,1) = 'V'
lnCurrent = lnCurrent + 2
ELSE
lnCurrent = lnCurrent + 1
ENDIF
MetaphAdd("F")
LOOP
CASE lcLetter = 'W'
*!* can also be in middle of word
IF SUBSTR(tcWord, lnCurrent, 2) = "WR"
MetaphAdd("R")
lnCurrent = lnCurrent + 2
LOOP
ENDIF
IF (lnCurrent = 1) ;
AND (IsVowel(lnCurrent + 1) OR SUBSTR(tcWord, lnCurrent, 2) = "WH")
*!* Wasserman should match Vasserman
IF IsVowel(lnCurrent + 1)
MetaphAddCond("A", "F")
ELSE
*!* need Uomo to match Womo
MetaphAdd("A")
ENDIF
ENDIF
*!* Arnow should match Arnoff
IF ((lnCurrent = lnLast) AND IsVowel(lnCurrent - 1)) ;
OR INLIST(SUBSTR(tcWord, lnCurrent - 1, 5), "EWSKI", "EWSKY", "OWSKI",
"OWSKY") ;
OR SUBSTR(tcWord, 1, 3) = "SCH"
MetaphAddCond("", "F")
lnCurrent = lnCurrent + 1
LOOP
ENDIF
*!* polish e.g. 'filipowicz'
IF INLIST(SUBSTR(tcWord, lnCurrent, 4), "WICZ", "WITZ")
MetaphAddCond("TS", "FX")
lnCurrent = lnCurrent + 4
LOOP
ENDIF
*!* else skip it
lnCurrent = lnCurrent + 1
LOOP
CASE lcLetter = 'X'
*!* french e.g. breaux
IF !(lnCurrent = lnLast ;
AND (INLIST(SUBSTR(tcWord, lnCurrent - 3, 3), "IAU", "EAU") ;
OR INLIST(SUBSTR(tcWord, lnCurrent - 2, 2), "AU", "OU")))
MetaphAdd("KS")
ENDIF
IF INLIST(SUBSTR(tcWord, lnCurrent + 1, 1), "C", "X")
lnCurrent = lnCurrent + 2
ELSE
lnCurrent = lnCurrent + 1
ENDIF
LOOP
CASE lcLetter = 'Z'
*!* chinese pinyin e.g. 'zhao'
IF SUBSTR(tcWord,lnCurrent + 1,1) = 'H'
MetaphAdd("J")
lnCurrent = lnCurrent + 2
LOOP
ELSE
IF INLIST(SUBSTR(tcWord, lnCurrent + 1, 2), "ZO", "ZI", "ZA") ;
OR (SlavoGermanic() AND ((lnCurrent > 1) AND SUBSTR(tcWord,lnCurrent
- 1,1) != 'T'))
MetaphAddCond("S", "TS")
ELSE
MetaphAdd("S")
ENDIF
ENDIF
IF SUBSTR(tcWord,lnCurrent + 1,1) = 'Z'
lnCurrent = lnCurrent + 2
ELSE
lnCurrent = lnCurrent + 1
ENDIF
LOOP
ENDCASE
lnCurrent = lnCurrent + 1
ENDDO
tcMetaph = LEFT(pcPrimary, SIGNIFICANTCHARS)
IF plAlternate
tcMetaph2 = LEFT(pcSecondary, SIGNIFICANTCHARS)
ELSE
tcMetaph2 = ""
ENDIF
ENDFUNC
*******************************
FUNCTION SlavoGermanic()
*******************************
IF ATC('W', tcWord) > 0 OR ATC('K', tcWord) > 0 ;
OR ATC("CZ", tcWord) > 0 OR ATC("WITZ", tcWord) > 0
RETURN .T.
ELSE
RETURN .F.
ENDIF
ENDFUNC
*******************************
PROCEDURE MetaphAdd(lcMain)
*******************************
IF LEN(lcMain) > 0
pcPrimary = pcPrimary + lcMain
pcSecondary = pcSecondary + lcMain
ENDIF
ENDPROC
*******************************
PROCEDURE MetaphAddCond(lcMain, lcAlt)
*******************************
IF LEN(lcMain) > 0
pcPrimary = pcPrimary + lcMain
ENDIF
IF LEN(lcAlt) > 0
plAlternate = .T.
IF SUBSTR(lcAlt, 1, 1) != ' '
pcSecondary = pcSecondary + lcAlt
ENDIF
ELSE
IF LEN(lcMain) > 0 AND SUBSTR(lcMain, 1, 1) != ' '
pcSecondary = pcSecondary + lcMain
ENDIF
ENDIF
ENDPROC
*******************************
FUNCTION IsVowel(lnAt)
*******************************
LOCAL lcIt
IF !BETWEEN(lnAt, 1, pnLength)
RETURN .F.
ELSE
lcIt = SUBSTR(tcWord, lnAt, 1)
RETURN (INLIST(lcIt,'A','E','I','O','U','Y'))
ENDIF
ENDFUNC
-- Craig SBoyd
--------------------------------------------------------------------------------
Category CODE Samples
On Mon, 04 Dec 2006 17:13:43 -0600, tim_witort (AT) hotmail (DOT) com (Tim Witort) wrote:
Quote:
Does anyone know of an existing class or code that can find
"similar" text? What I have in mind is this:
1) Given a text string
2) Examine an array or cursor of strings and weight each
one as to it's similarity to the search string.
3) A weight of 100 would indicate a perfect match
4) The logic would understand that the search string could
contain multiple words and would take that into account
when comparing to the target strings.
It would seem someone has written something like this since it
is similar to a lot of web search engines and how they order
the hits by weight - this would be for text strings rather than
web sites.
Any ideas? I'd rather not write this myself if it's already
been done.
-- TRW
_______________________________________
t i m
a t
w i t o r t d o t c o m
_______________________________________ |
--- AntiSpam/harvest ---
Remove X's to send email to me.