vc 读写txt文件支持Ascii/Unicode/Unicode big endian/UTF-8

69 阅读 0 评论 46 点赞

我是靠谱客的博主炙热方盒，最近开发中收集的这篇文章主要介绍vc 读写txt文件支持Ascii/Unicode/Unicode big endian/UTF-8，觉得挺不错的，现在分享给大家，希望可以做个参考。

概述

#include <string>
using std::string;
using std::wstring;

std::wstring GlobalMultiByteToWideChar(const char* pszMultiByte,UINT uCodePage = CP_ACP )
{
	try
	{
		if (NULL == pszMultiByte)
		{
			throw -1;
		}

		int iMultiBytes = ::MultiByteToWideChar( uCodePage, 0, pszMultiByte, -1, NULL, 0 );

		if (iMultiBytes == 0)
		{
			throw -1;
		}

		wchar_t* pwszMultiByte = new wchar_t[ iMultiBytes + 1];

		pwszMultiByte[iMultiBytes] = 0;
		if (!pwszMultiByte)
		{
			throw -1;
		}

		::MultiByteToWideChar( uCodePage, 0, pszMultiByte , -1, pwszMultiByte, iMultiBytes );

		std::wstring wstrWideChar = pwszMultiByte;
		delete [] pwszMultiByte;
		pwszMultiByte = NULL;

		return wstrWideChar.c_str();
	}
	catch (...)
	{
		return L"";
	}
}

std::string GlobalWideCharToMultiByte(const wchar_t* pwszMultiByte,UINT uCodePage  = CP_ACP )
{
	try
	{
		if (NULL == pwszMultiByte)
		{
			throw -1;
		}

		int iMultiBytes = ::WideCharToMultiByte( uCodePage, 0, pwszMultiByte, -1, NULL, 0 ,NULL,FALSE);

		if (iMultiBytes == 0)
		{
			throw -1;
		}

		char* pszMultiByte = new char[ iMultiBytes + 1];

		pszMultiByte[iMultiBytes] = 0;
		if (!pszMultiByte)
		{
			throw -1;
		}

		::WideCharToMultiByte( uCodePage, 0, pwszMultiByte , -1, pszMultiByte, iMultiBytes ,NULL,FALSE);

		std::string strMultiChar = pszMultiByte;
		delete [] pszMultiByte;
		pszMultiByte = NULL;

		return strMultiChar.c_str();
	}
	catch (...)
	{
		return "";
	}
}

//

enum BomFormat
{
	BOM_ASCII,
	BOM_UNICODE,
	BOM_UNICODE_BIG_ENDIAN,
	BOM_UTF_8
};

bool SaveTxtData(std::wstring& strText,std::wstring& strPathName,BomFormat bomFormat = BOM_UNICODE)
{
	CFile File;
	bool res = false ;

	if (File.Open(strPathName.c_str(), CFile::modeCreate | CFile::modeWrite | CFile::typeBinary))
	{
		unsigned char *lpData = NULL;
		unsigned long ulLen = 0 ;
		unsigned long ulDataLen = strText.length() * 2 ;

		switch (bomFormat)
		{
		case BOM_ASCII:
			{
				std::string strData = GlobalWideCharToMultiByte(strText.c_str());
				ulLen = strData.length();
				lpData = new unsigned char[ulLen];
				memcpy(lpData,strData.c_str(),ulLen);
			}
			break;
		case BOM_UNICODE:
			{
				ulLen = ulDataLen + 2 ;
				lpData = new unsigned char[ulLen];
				lpData[0] = 0xff;
				lpData[1] = 0xfe;
				memcpy(&lpData[2],strText.c_str(),ulDataLen);
			}
			break;
		case BOM_UNICODE_BIG_ENDIAN:
			{
				ulLen = ulDataLen + 2 ;
				lpData = new unsigned char[ulLen];
				lpData[0] = 0xfe;
				lpData[1] = 0xff;

				wchar_t* pd = new wchar_t[ulDataLen];
				memcpy(pd,strText.c_str(),ulDataLen);
				for (unsigned long i = 0;i < ulDataLen ;i ++)
				{
					pd[i] = htons(pd[i]);
				}
				memcpy(&lpData[2],pd,ulDataLen);
				delete [] pd;
			}
			break;
		case BOM_UTF_8:
			{
				std::string strData = GlobalWideCharToMultiByte(strText.c_str(),CP_UTF8);

				ulLen = strData.length() + 3;
				lpData = new unsigned char[ulLen];

				lpData[0] = 0xef;
				lpData[1] = 0xbb;
				lpData[2] = 0xbf;

				memcpy(&lpData[3],strData.c_str(),strData.length());
			}
			break;
		}

		if (lpData)
		{
			File.Write(lpData,ulLen);
			res = true ;
		}
		File.Close();
	}
	return res;
}

BomFormat GetTxtBomFormat(unsigned char* lpData,unsigned long ulLen)
{
	BomFormat bomFormat = BOM_ASCII ;
	if ( ulLen >= 2 )
	{
		if (lpData[0] == 0xff && lpData[1] == 0xfe) // unicode
		{
			bomFormat = BOM_UNICODE;
		}
		if (lpData[0] == 0xfe && lpData[1] == 0xff) // unicode big endian
		{
			bomFormat = BOM_UNICODE_BIG_ENDIAN; 
		}
		if (ulLen >= 3)
		{
			if (lpData[0] == 0xef && lpData[1] == 0xbb && lpData[2] == 0xbf) // utf-8
			{
				bomFormat = BOM_UTF_8;
			}
		}
	}
	return bomFormat;
}

bool OpenTxtData(std::wstring& strText,std::wstring& strPathName)
{
	CFile File;
	bool res = false ;

	if (File.Open(strPathName.c_str(), CFile::modeRead | CFile::typeBinary))
	{
		int iLength = (int)File.GetLength();
		unsigned char* szBuffer= new unsigned char[iLength+1];
		memset(szBuffer, 0, iLength+1);
		File.Read(szBuffer,iLength);
		szBuffer[iLength] = 0 ;

		if (iLength > 0)
		{
			switch (GetTxtBomFormat(szBuffer,iLength))
			{
			case BOM_ASCII:
				strText = GlobalMultiByteToWideChar((const char*)&szBuffer[0],CP_ACP);
				break;
			case BOM_UNICODE:
				{
					ASSERT(!(iLength % 2));
					int len = iLength/2;
					wchar_t* buf= new wchar_t[len];
					memset(buf,0,iLength);
					memcpy(buf,&szBuffer[2],iLength-2);
					buf[len-1] = 0;

					strText = buf;
					delete [] buf;
				}
				break;
			case BOM_UNICODE_BIG_ENDIAN:
				{
					ASSERT(!(iLength % 2));

					int len = iLength/2;
					wchar_t* buf= new wchar_t[len];
					memset(buf,0,iLength);

					memcpy(buf,&szBuffer[2],iLength-2);
					for (int i = 0;i < len;i ++){
						buf[i] = htons(buf[i]);
					}
					buf[len-1] = 0;

					strText = buf;
					delete [] buf;
				}
				break;
			case BOM_UTF_8:
				strText = GlobalMultiByteToWideChar((const char*)&szBuffer[3],CP_UTF8);
				break;
			}
			
		}
		res = true ;
		delete [] szBuffer;
		File.Close();
	}
	return res;
}