Archive

Posts Tagged ‘Windows-1252’

ATL CString Extension for UTF-8, UTF-7, ASCII, OEM, Latin1 Character Sets

April 29th, 2009 Christian Etter No comments

Sometimes we come across Text that has been encoded in a particular locale or Unicode encoding. ATL CString classes do not provide conversion for this in most cases, that’s where these two extension classes come in handy:

CStringWExt – Convert 8-bit Character Sets to UTF-16

class CStringWExt : public CStringW
    {
    public:
        BOOL Latin12Wide  ( PSTR s ) { return CP2Wide( 28591    , s ); } // Latin1 encoding or ISO/IEC 8859-1, similar to Windows-1252 
        BOOL OEM2Wide     ( PSTR s ) { return CP2Wide( CP_OEMCP , s ); } // Use for console related text
        BOOL ASCII2Wide   ( PSTR s ) { return CP2Wide( 20127    , s ); }
        BOOL UTF72Wide    ( PSTR s ) { return CP2Wide( CP_UTF7  , s ); }
        BOOL UTF82Wide    ( PSTR s ) { return CP2Wide( CP_UTF8  , s ); }
        BOOL ANSI2Wide    ( PSTR s ) { return CP2Wide( CP_ACP   , s ); }
        BOOL UserCP2Wide  ( PSTR s ) { return CP2Wide( GetUserCodePage()  , s ); }
        BOOL SystemCP2Wide( PSTR s ) { return CP2Wide( GetSystemCodePage(), s ); } // System code page is the locale set for non Unicode programs
        UINT GetUserCodePage() { return GetCodePage( LOCALE_USER_DEFAULT ); }
        UINT GetSystemCodePage() { return GetCodePage( LOCALE_SYSTEM_DEFAULT ); }
        UINT GetCodePage( LCID locale )
        {
            UINT langCP;
            if ( GetLocaleInfo( locale, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER, (LPTSTR)&langCP, sizeof(langCP) ) )
                return langCP;
            return 0;
        }
        BOOL CP2Wide( UINT cp, PCSTR s )
        {
            if ( s == NULL )
                return FALSE;
            int iBuffer = MultiByteToWideChar( cp, 0, s, -1, NULL, 0 );
            if ( iBuffer == 0 )
                return FALSE;
            Preallocate( iBuffer );
            if ( !MultiByteToWideChar( cp, 0, s, -1, GetBuffer() , GetAllocLength() ) )
                return FALSE;
            ReleaseBuffer();
            return TRUE;
        }
};

CStringAExt – Convert UTF-16 to 8-bit Character Set

This conversion with a target of OEM, ASCII and ANSI CP is potentially lossy, depeding on the text that has to be converted. To check if any loss has occurred, use an instance of CStringWExt above.

class CStringAExt : public CStringA
    {
    public:
        BOOL Wide2Latin1  ( PWSTR s ) { return Wide2CP( 28591    , s ); } // Latin1 encoding or ISO/IEC 8859-1, similar to Windows-1252 
        BOOL Wide2OEM     ( PWSTR s ) { return Wide2CP( CP_OEMCP , s ); } // Use for console related text
        BOOL Wide2ASCII   ( PWSTR s ) { return Wide2CP( 20127    , s ); }
        BOOL Wide2UTF7    ( PWSTR s ) { return Wide2CP( CP_UTF7  , s ); }
        BOOL Wide2UTF8    ( PWSTR s ) { return Wide2CP( CP_UTF8  , s ); }
        BOOL Wide2ANSI    ( PWSTR s ) { return Wide2CP( CP_ACP   , s ); }
        BOOL Wide2UserCP  ( PWSTR s ) { return Wide2CP( GetUserCodePage()  , s ); }
        BOOL Wide2SystemCP( PWSTR s ) { return Wide2CP( GetSystemCodePage(), s ); } // System code page is the locale set for non Unicode programs
        UINT GetUserCodePage() { return GetCodePage( LOCALE_USER_DEFAULT ); }
        UINT GetSystemCodePage() { return GetCodePage( LOCALE_SYSTEM_DEFAULT ); }
        UINT GetCodePage( LCID locale )
        {
            UINT langCP;
            if ( GetLocaleInfo( locale, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER, (LPTSTR)&langCP, sizeof(langCP) ) )
                return langCP;
            return 0;
        }
        BOOL Wide2CP( UINT cp, PCWSTR s )
        {
            if ( s == NULL )
                return FALSE;
            int iBuffer = WideCharToMultiByte( cp, 0, s, -1, NULL, 0, NULL, NULL );
            if ( iBuffer == 0 )
                return FALSE;
            Preallocate( iBuffer );
            if ( !WideCharToMultiByte( cp, 0, s, -1, GetBuffer() , GetAllocLength(), NULL, NULL ) )
                return FALSE;
            ReleaseBuffer();
            return TRUE;
        }
};