概述
Widnows kernel中不存在Unicdoe/UTF-8转换函数, 因此通过分析UTF-8编码表自己实现转换函数
UTF-8编码字符理论上可以最多到6个字节长,然而16位BMP(Basic Multilingual Plane)字符最多只用到3字节长。下面看一下UTF-8编码表:
U-00000000 - U-0000007F: 0xxxxxxx
U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
enum Hex{ HexC0 = 0xC0, HexE0 = 0xE0, HexF0 = 0xF0, HexF8 = 0xF8, HexFC = 0xFC, HexFE = 0xFE, HexFF = 0xFF, Hex80 = 0x80, Hex800 = 0x800, Hex10000 = 0x10000, Hex200000 = 0x200000, Hex4000000 = 0x4000000, Hex80000000 = 0x80000000};
/*
* des: The address of a sequence of Unicode characters.
* src: The address of a sequence of UTF8 characters.
* size_d: The number of Unicode characters in word.
* size_s: The number of UTF8 characters to convert in byte.
*/
VOID UTF82Unicode(PWCHAR des, PBYTE src, int size_d, int size_s)
{
int s = 0, d = 0;
while(s < size_s && d < size_d)
{
// 1 Byte UTF8
if(src[s] < Hex80)
{
des[d++] = src[s++];
}
// Suffix byte
if(src[s] >= Hex80 && src[s] < HexC0)
{
s++;
continue;
}
// 2 Bytes UTF8
if(src[s] >= HexC0 && src[s] < HexE0)
{
if(s + 1 >= size_s)
break;
des[d] = (src[s] & 0x1F) << 6;
des[d] |= src[s+1] & 0x3F;
d++;
s++;
}
// 3 Bytes UTF8
if(src[s] >= HexE0 && src[s] < HexF0)
{
if(s + 2 >= size_s)
break;
des[d] = (src[s] & 0x0F) << 12;
des[d] |= (src[s+1] & 0x3F) << 6;
des[d] |= src[s+2] & 0x3F;
d++;
s++;
}
}
}
/*
* des: The address of a sequence of UTF8 characters.
* src: The address of a sequence of Unicode characters.
* size_d: The number of UTF8 characters in byte.
* size_s: The number of Unicode characters to convert in word.
*/
VOID Unicode2UTF8(PBYTE des, PWCHAR src, int size_d, int size_s)
{
int s = 0, d = 0;
while(s < size_s && d < size_d)
{
if(src[s] < Hex80)
{
// 1 Byte UTF-8
des[d] = (BYTE)src[s];
d++;
s++;
}
if(src[s] >= Hex80 && src[s] < Hex800)
{
// 2 Bytes UTF-8
if(d + 1 >= size_d)
break;
des[d] = (BYTE)(0xC0 | (src[s] >> 6));
des[d+1] = (BYTE)(0x80 | (src[s] & 0x003F));
d += 2;
s++;
}
if(src[s] >= Hex800 && src[s] < Hex10000)
{
// 3 Bytes UTF-8
if(d + 2 >= size_d )
break;
des[d] = (BYTE)(0xE0 | (src[s] >> 12));
des[d+1] = (BYTE)(0x80 | ((src[s] >> 6) & 0x003F));
des[d+2] = (BYTE)(0x80 | (src[s] & 0x003F));
d += 3;
s++;
}
}
}
最后
以上就是动人狗为你收集整理的Windows kernel中实现Unicode和UTF-8之间的转换的全部内容,希望文章能够帮你解决Windows kernel中实现Unicode和UTF-8之间的转换所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复