Make BOM-detection code in wxConvAuto public.

Export GetBOM() and DetectBOM() functions.

Also rename BOMType enum elements to use "wx" prefix now that they're public.

Closes #13599.

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@69571 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin
2011-10-27 22:48:54 +00:00
parent 76ff3d06f5
commit 038809c2f6
3 changed files with 151 additions and 52 deletions

View File

@@ -6,6 +6,74 @@
// Licence: wxWindows licence
/////////////////////////////////////////////////////////////////////////////
/**
Constants representing various BOM types.
BOM is an abbreviation for "Byte Order Mark", a special Unicode character
which may be inserted into the beginning of a text stream to indicate its
encoding.
@since 2.9.3
*/
enum wxBOM
{
/**
Unknown BOM.
This is returned if BOM presence couldn't be determined and normally
happens because not enough bytes of input have been analysed.
*/
wxBOM_Unknown = -1,
/**
No BOM.
The stream doesn't contain BOM character at all.
*/
wxBOM_None,
/**
UTF-32 big endian BOM.
The stream is encoded in big endian variant of UTF-32.
*/
wxBOM_UTF32BE,
/**
UTF-32 little endian BOM.
The stream is encoded in little endian variant of UTF-32.
*/
wxBOM_UTF32LE,
/**
UTF-16 big endian BOM.
The stream is encoded in big endian variant of UTF-16.
*/
wxBOM_UTF16BE,
/**
UTF-16 little endian BOM.
The stream is encoded in little endian variant of UTF-16.
*/
wxBOM_UTF16LE,
/**
UTF-8 BOM.
The stream is encoded in UTF-8.
Notice that contrary to a popular belief, it's perfectly possible and,
n fact, common under Microsoft Windows systems, to have a BOM in an
UTF-8 stream: while it's not used to indicate the endianness of UTF-8
stream (as it's byte-oriented), the BOM can still be useful just as an
unambiguous indicator of UTF-8 being used.
*/
wxBOM_UTF8
};
/**
@class wxConvAuto
@@ -66,6 +134,19 @@ public:
*/
wxConvAuto(wxFontEncoding enc = wxFONTENCODING_DEFAULT);
/**
Return the detected BOM type.
The BOM type is detected after sufficiently many initial bytes have
passed through this conversion object so it will always return
wxBOM_Unknown immediately after the object creation but may return a
different value later.
@since 2.9.3
*/
wxBOM GetBOM() const;
/**
Disable the use of the fall back encoding: if the input doesn't have a
BOM and is not valid UTF-8, the conversion will fail.
@@ -92,5 +173,16 @@ public:
@c wxFONTENCODING_DEFAULT can't be used here.
*/
static void SetFallbackEncoding(wxFontEncoding enc);
};
/**
Return the BOM type of this buffer.
This is a helper function which is normally only used internally by
wxConvAuto but provided for convenience of the code that wants to
detect the encoding of a stream by checking it for BOM presence on its
own.
@since 2.9.3
*/
static wxBOM DetectBOM(const char *src, size_t srcLen);
};