[ Programing ]/Algorithm
UTF-8을 Unicode로, Unicode를 UTF-8로 변환하기.
Mister_Q
2011. 1. 5. 19:00
// UTF-8 코드를 Unicode 로 변환
bool UTF8ToUnicode( std::wstring& src_, const char *szMessage )
{
if( NULL == szMessage ) return false;
int size = strlen( szMessage );
int p = 0;
wchar_t d;
while( size > p )
{
if( (szMessage[p] & 0xE0 ) == 0xE0 ) // 3 byte
{
d = ((szMessage[p] & 0x0f) << 12) | ((szMessage[p +1] & 0x3F) << 6) | (szMessage[p +2] & 0x3F);
p += 3;
}
else if( (szMessage[p] & 0xC0) == 0xC0 ) // 2 byte
{
d = ((szMessage[p] & 0x1F) << 6) | (szMessage[p +1] & 0x3F);
p += 2;
}
else // 1 byte
{
d = szMessage[p++] & 0x7F;
}
if( 0 != d )
{
src_.push_back( d );
}
}
return true;
}
// Unicode 코드를 UTF-8 로 변환
bool UnicodeToUTF8( std::string& src_, const wchar_t *szMessage )
{
if( NULL == szMessage ) return false;
int size = wcslen( szMessage );
int index = 0;
wchar_t uc;
for( int i = 0; i < size; ++i )
{
uc = szMessage[i];
if( 0x7f >= uc ) // 1 byte
{
src_.push_back( (char)uc );
}
else if( 0x7ff >= uc ) // 2 byte
{
src_.push_back( (char)0xc0 + uc / (wchar_t)pow(2.0f, 6.0f) );
src_.push_back( (char)0x80 + uc % (wchar_t)pow(2.0f, 6.0f) );
}
else if( 0xffff >= uc ) // 3 byte
{
src_.push_back( (char)0xe0 + uc / (wchar_t)pow(2.0f, 12.0f) );
src_.push_back( (char)0x80 + uc / (wchar_t)pow(2.0f, 6.0f) % (wchar_t)pow(2.0f, 6.0f) );
src_.push_back( (char)0x80 + uc % (wchar_t)pow(2.0f, 6.0f) );
}
}
return true;
}
[To be continue... private]
d = ((szMessage[p] & 0x0f) << 12) | ((szMessage[p +1] & 0x3F) << 6) | (szMessage[p +2] & 0x3F);
p += 3;
}
else if( (szMessage[p] & 0xC0) == 0xC0 ) // 2 byte
{
d = ((szMessage[p] & 0x1F) << 6) | (szMessage[p +1] & 0x3F);
p += 2;
}
else // 1 byte
{
d = szMessage[p++] & 0x7F;
}
if( 0 != d )
{
src_.push_back( d );
}
}
return true;
}
// Unicode 코드를 UTF-8 로 변환
bool UnicodeToUTF8( std::string& src_, const wchar_t *szMessage )
{
if( NULL == szMessage ) return false;
int size = wcslen( szMessage );
int index = 0;
wchar_t uc;
for( int i = 0; i < size; ++i )
{
uc = szMessage[i];
if( 0x7f >= uc ) // 1 byte
{
src_.push_back( (char)uc );
}
else if( 0x7ff >= uc ) // 2 byte
{
src_.push_back( (char)0xc0 + uc / (wchar_t)pow(2.0f, 6.0f) );
src_.push_back( (char)0x80 + uc % (wchar_t)pow(2.0f, 6.0f) );
}
else if( 0xffff >= uc ) // 3 byte
{
src_.push_back( (char)0xe0 + uc / (wchar_t)pow(2.0f, 12.0f) );
src_.push_back( (char)0x80 + uc / (wchar_t)pow(2.0f, 6.0f) % (wchar_t)pow(2.0f, 6.0f) );
src_.push_back( (char)0x80 + uc % (wchar_t)pow(2.0f, 6.0f) );
}
}
return true;
}
[To be continue... private]