参考博客:编辑距离(levenshtein distance)C/C++实现
使用wstring优化处理中文:
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107class levenshtein { public: static int compare( const std::string& s1, const std::string& s2 ) { // create two work vectors of integer distances const int m = s1.size(); const int n = s2.size(); int* v1 = new int[n+1]; int* v2 = new int[n+1]; // initialize v1 (the previous row of distances) // this row is A[0][i]: edit distance for an empty s1 // the distance is just the number of characters to delete from s2 for( int i = 0; i <= n; ++i ) { v1[i] = i; } // calculate v2 (current row distances) from the previous row v1 for( int i = 0; i < m; ++i ) { // first element of v2 is A[i+1][0] // edit distance is delete (i+1) chars from s to match empty s2 v2[0] = i+1; // use formula to fill in the rest of the row for( int j = 0; j < n; ++j ) { // calculating costs for A[i+1][j+1] int deletionCost = v1[j+1] + 1; int insertionCost = v2[j] + 1; int substitutionCost = v1[j]; if( s1[i] != s2[j] ) { ++ substitutionCost; } v2[j+1] = min3( deletionCost, insertionCost, substitutionCost ); } // copy v2 (current row) to v1 (previous row) for next iteration swap( v1, v2 ); } // after the last swap, the results of v2 are now in v1 int retval = v1[n]; delete []v1; delete []v2; return retval; } static int wcompare( const std::wstring& s1, const std::wstring& s2 ) { // create two work vectors of integer distances const int m = s1.size(); const int n = s2.size(); int* v1 = new int[n+1]; int* v2 = new int[n+1]; // initialize v1 (the previous row of distances) // this row is A[0][i]: edit distance for an empty s1 // the distance is just the number of characters to delete from s2 for( int i = 0; i <= n; ++i ) { v1[i] = i; } // calculate v2 (current row distances) from the previous row v1 for( int i = 0; i < m; ++i ) { // first element of v2 is A[i+1][0] // edit distance is delete (i+1) chars from s to match empty s2 v2[0] = i+1; // use formula to fill in the rest of the row for( int j = 0; j < n; ++j ) { // calculating costs for A[i+1][j+1] int deletionCost = v1[j+1] + 1; int insertionCost = v2[j] + 1; int substitutionCost = v1[j]; if( s1[i] != s2[j] ) { ++ substitutionCost; } v2[j+1] = min3( deletionCost, insertionCost, substitutionCost ); } // copy v2 (current row) to v1 (previous row) for next iteration swap( v1, v2 ); } // after the last swap, the results of v2 are now in v1 int retval = v1[n]; delete []v1; delete []v2; return retval; } private: static int min3( int a, int b, int c ) { if ( a < b ) { return a < c ? a : c ; } else { return b < c ? b : c ; } } static void swap( int*& a, int*& b ) { int* tmp = a; a = b; b = tmp; } };
最后
以上就是鳗鱼玫瑰最近收集整理的关于C++ 中文字符串编辑距离计算的全部内容,更多相关C++内容请搜索靠谱客的其他文章。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复