sql server - How can I modify my string comparison algorithm to reduce the 'significance' of digits? -
i have sql clr table valued function accepts 2 string parameters purpose of comparing company names , returning match score outcome.
this c# function i'm using determine likelyhood of 2 strings matching:
this works great, because of simplicity of code, comparing hn felt 09 as hn felt 01 as giving high percentage right want reduce outcome 50% if difference bwteen strings digit or digits. how achieve below function?
public static decimal comparetext(string string1, string string2) { // more string cleaning string1 = string1.replace(",", " ").replace(".", " ").replace("/", " ").trim(); string1 = encoding.ascii.getstring(encoding.getencoding("cyrillic").getbytes(string1)); string1 = string1.replace(" ", " |").replace("| ", "").replace("|", ""); string1 = wordfunctions.removeduplicatewords(string1); string2 = string2.replace(",", " ").replace(".", " ").replace("/", " ").trim(); string2 = encoding.ascii.getstring(encoding.getencoding("cyrillic").getbytes(string2)); string2 = string2.replace(" ", " |").replace("| ", "").replace("|", ""); string2 = wordfunctions.removeduplicatewords(string2); string[] string1separatewords = string1.split(' '); string[] string2separatewords = string2.split(' '); int string1wordcount = 0; int string2wordcount = 0; decimal theresult = 0; string1wordcount = string1separatewords.length; string2wordcount = string2separatewords.length; int samewordcount = 0; foreach (string string1word in string1separatewords) { if (string2separatewords.contains(string1word)) { samewordcount++; } } if (string1wordcount > string2wordcount) { theresult = (decimal)samewordcount / string1wordcount; } else if (string2wordcount > string1wordcount) { theresult = (decimal)samewordcount / string2wordcount; } else if (string1wordcount == string2wordcount) { theresult = (decimal)samewordcount / string1wordcount; } else { theresult = 0; } return (theresult * 100); } this part compares words (simple works):
int samewordcount = 0;
foreach (string string1word in string1separatewords) { if (string2separatewords.contains(string1word)) { samewordcount++; } } i'm not able work out how check mismatches in digits
Comments
Post a Comment