我是靠谱客的博主 动人狗,最近开发中收集的这篇文章主要介绍Windows kernel中实现Unicode和UTF-8之间的转换,觉得挺不错的,现在分享给大家,希望可以做个参考。

概述

Widnows kernel中不存在Unicdoe/UTF-8转换函数, 因此通过分析UTF-8编码表自己实现转换函数

UTF-8编码字符理论上可以最多到6个字节长,然而16位BMP(Basic Multilingual Plane)字符最多只用到3字节长。下面看一下UTF-8编码表:

U-00000000 - U-0000007F: 0xxxxxxx
U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 

enum Hex{ HexC0 = 0xC0, HexE0 = 0xE0, HexF0 = 0xF0, HexF8 = 0xF8, HexFC = 0xFC, HexFE = 0xFE, HexFF = 0xFF, Hex80 = 0x80, Hex800 = 0x800, Hex10000 = 0x10000, Hex200000 = 0x200000, Hex4000000 = 0x4000000, Hex80000000 = 0x80000000};

/* 
 * des: The address of a sequence of Unicode characters.
 * src: The address of a sequence of UTF8 characters.
 * size_d: The number of Unicode characters in word.
 * size_s: The number of UTF8 characters to convert in byte.
 */
VOID UTF82Unicode(PWCHAR des, PBYTE src, int size_d, int size_s)
{
	int s = 0, d = 0;
	
	while(s < size_s && d < size_d)
	{
		// 1 Byte UTF8
		if(src[s] < Hex80)
		{
			des[d++] = src[s++];
		}
		// Suffix byte
		if(src[s] >= Hex80 && src[s] < HexC0)
		{
			s++;
			continue;
		}
		//  2 Bytes UTF8
		if(src[s] >= HexC0 && src[s] < HexE0)
		{
			if(s + 1 >= size_s)
				break;
			des[d] = (src[s] & 0x1F) << 6;
			des[d] |= src[s+1] & 0x3F;
			d++;
			s++;
		}
		// 3 Bytes UTF8
		if(src[s] >= HexE0 && src[s] < HexF0)
		{
			if(s + 2 >= size_s)
				break;
			des[d] = (src[s] & 0x0F) << 12;
			des[d] |= (src[s+1] & 0x3F) << 6;
			des[d] |= src[s+2] & 0x3F;
			d++;
			s++;
		}
	}
}

/* 
 * des: The address of a sequence of UTF8 characters.
 * src: The address of a sequence of Unicode characters.
 * size_d: The number of UTF8 characters in byte.
 * size_s: The number of Unicode characters to convert in word.
 */
VOID Unicode2UTF8(PBYTE des, PWCHAR src, int size_d, int size_s)
{
	int s = 0, d = 0;
	while(s < size_s && d < size_d)
	{
		if(src[s] < Hex80)
		{
			// 1 Byte UTF-8
			des[d] = (BYTE)src[s];
			d++; 
			s++;
		}
		if(src[s] >= Hex80 && src[s] < Hex800)
		{
			// 2 Bytes UTF-8
			if(d + 1 >= size_d)
				break;
			des[d] = (BYTE)(0xC0 | (src[s] >> 6));
			des[d+1] = (BYTE)(0x80 | (src[s] & 0x003F));
			d += 2;
			s++;
		}
		if(src[s] >= Hex800 && src[s] < Hex10000)
		{
			// 3 Bytes UTF-8
			if(d + 2 >= size_d )
				break;
			des[d] = (BYTE)(0xE0 | (src[s] >> 12));
			des[d+1] = (BYTE)(0x80 | ((src[s] >> 6) & 0x003F));
			des[d+2] = (BYTE)(0x80 | (src[s] & 0x003F));
			d += 3;
			s++;
		}
	}
}


最后

以上就是动人狗为你收集整理的Windows kernel中实现Unicode和UTF-8之间的转换的全部内容,希望文章能够帮你解决Windows kernel中实现Unicode和UTF-8之间的转换所遇到的程序开发问题。

如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。

本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
点赞(53)

评论列表共有 0 条评论

立即
投稿
返回
顶部