概述
sql代码
CREATE OR REPLACE FUNCTION ld -- Levenshtein distance
(p_source_string IN VARCHAR2,
p_target_string IN VARCHAR2)
RETURN NUMBER
DETERMINISTIC
AS
v_length_of_source NUMBER := NVL (LENGTH (p_source_string), 0);
v_length_of_target NUMBER := NVL (LENGTH (p_target_string), 0);
TYPE mytabtype IS TABLE OF NUMBER INDEX BY BINARY_INTEGER;
column_to_left mytabtype;
current_column mytabtype;
v_cost NUMBER := 0;
BEGIN
IF v_length_of_source = 0 THEN
RETURN v_length_of_target;
ELSIF v_length_of_target = 0 THEN
RETURN v_length_of_source;
ELSE
FOR j IN 0 .. v_length_of_target LOOP
column_to_left(j) := j;
END LOOP;
FOR i IN 1.. v_length_of_source LOOP
current_column(0) := i;
FOR j IN 1 .. v_length_of_target LOOP
IF SUBSTR (p_source_string, i, 1) =
SUBSTR (p_target_string, j, 1)
THEN v_cost := 0;
ELSE v_cost := 1;
END IF;
current_column(j) := LEAST (current_column(j-1) + 1,
column_to_left(j) + 1,
column_to_left(j-1) + v_cost);
END LOOP;
FOR j IN 0 .. v_length_of_target LOOP
column_to_left(j) := current_column(j);
END LOOP;
END LOOP;
END IF;
RETURN current_column(v_length_of_target);
END ld;
java代码
/*
* 相似度公式:Kq*q/(Kq*q+Kr*r+Ks*s) (Kq > 0 , Kr>=0,Ka>=0)
* 设q是字符串1和字符串2中都存在的单词的总数,s是字符串1中存在,字符串2中不存在的单词总数,
* r是字符串2中存在,字符串1中不存在的单词总数,t是字符串1和字符串2中都不存在的单词总数。
*/
public static double getSimilar(String ora,String dest){
double ff=0.0;
Sentence oraSen = SplitWord.splitWord(ora);
Sentence desSen = SplitWord.splitWord(dest);
if(oraSen!=null&&desSen!=null){
HashSet<String> oraS=oraSen.toWord();
HashSet<String> desS=desSen.toWord();
if(oraS!=null && desS!=null){
int q=0,s=0,r=0;
Iterator<String> ite=oraS.iterator();
while(ite.hasNext()){
Object o=ite.next();
if(desS.contains(o)) ++q;
}
s=oraS.size()-q;
r=desS.size()-s;
return 2.00*q/ (2*q+1*r+1*s);
}
return 1;
}
return ff;
}
最后
以上就是瘦瘦电源为你收集整理的中文句子相似度判断源码的全部内容,希望文章能够帮你解决中文句子相似度判断源码所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
发表评论 取消回复