// UTF-8 코드를 Unicode 로 변환
bool UTF8ToUnicode( std::wstring& src_, const char *szMessage )
{
if( NULL == szMessage ) return false;
int size = strlen( szMessage );
int p = 0;
wchar_t d;
while( size > p )
{
if( (szMessage[p] & 0xE0 ) == 0xE0 ) // 3 byte
{
d = ((szMessage[p] & 0x0f) << 12) | ((szMessage[p +1] & 0x3F) << 6) | (szMessage[p +2] & 0x3F);
p += 3;
}
else if( (szMessage[p] & 0xC0) == 0xC0 ) // 2 byte
{
d = ((szMessage[p] & 0x1F) << 6) | (szMessage[p +1] & 0x3F);
p += 2;
}
else // 1 byte
{
d = szMessage[p++] & 0x7F;
}
if( 0 != d )
{
src_.push_back( d );
}
}
return true;
}
// Unicode 코드를 UTF-8 로 변환
bool UnicodeToUTF8( std::string& src_, const wchar_t *szMessage )
{
if( NULL == szMessage ) return false;
int size = wcslen( szMessage );
int index = 0;
wchar_t uc;
for( int i = 0; i < size; ++i )
{
uc = szMessage[i];
if( 0x7f >= uc ) // 1 byte
{
src_.push_back( (char)uc );
}
else if( 0x7ff >= uc ) // 2 byte
{
src_.push_back( (char)0xc0 + uc / (wchar_t)pow(2.0f, 6.0f) );
src_.push_back( (char)0x80 + uc % (wchar_t)pow(2.0f, 6.0f) );
}
else if( 0xffff >= uc ) // 3 byte
{
src_.push_back( (char)0xe0 + uc / (wchar_t)pow(2.0f, 12.0f) );
src_.push_back( (char)0x80 + uc / (wchar_t)pow(2.0f, 6.0f) % (wchar_t)pow(2.0f, 6.0f) );
src_.push_back( (char)0x80 + uc % (wchar_t)pow(2.0f, 6.0f) );
}
}
return true;
}
[To be continue... private]
d = ((szMessage[p] & 0x0f) << 12) | ((szMessage[p +1] & 0x3F) << 6) | (szMessage[p +2] & 0x3F);
p += 3;
}
else if( (szMessage[p] & 0xC0) == 0xC0 ) // 2 byte
{
d = ((szMessage[p] & 0x1F) << 6) | (szMessage[p +1] & 0x3F);
p += 2;
}
else // 1 byte
{
d = szMessage[p++] & 0x7F;
}
if( 0 != d )
{
src_.push_back( d );
}
}
return true;
}
// Unicode 코드를 UTF-8 로 변환
bool UnicodeToUTF8( std::string& src_, const wchar_t *szMessage )
{
if( NULL == szMessage ) return false;
int size = wcslen( szMessage );
int index = 0;
wchar_t uc;
for( int i = 0; i < size; ++i )
{
uc = szMessage[i];
if( 0x7f >= uc ) // 1 byte
{
src_.push_back( (char)uc );
}
else if( 0x7ff >= uc ) // 2 byte
{
src_.push_back( (char)0xc0 + uc / (wchar_t)pow(2.0f, 6.0f) );
src_.push_back( (char)0x80 + uc % (wchar_t)pow(2.0f, 6.0f) );
}
else if( 0xffff >= uc ) // 3 byte
{
src_.push_back( (char)0xe0 + uc / (wchar_t)pow(2.0f, 12.0f) );
src_.push_back( (char)0x80 + uc / (wchar_t)pow(2.0f, 6.0f) % (wchar_t)pow(2.0f, 6.0f) );
src_.push_back( (char)0x80 + uc % (wchar_t)pow(2.0f, 6.0f) );
}
}
return true;
}
[To be continue... private]
'[ Programing ] > Algorithm' 카테고리의 다른 글
욕설 필터 및 문자 검색 (0) | 2011.10.10 |
---|---|
케릭터 간 거리 측정 (0) | 2011.07.14 |
비트(bit) 연산 (0) | 2011.01.03 |
거리 계산 (0) | 2011.01.03 |
UTF-8 1~3 Byte 문자 구분 (2) | 2010.07.15 |