Files
wxWidgets/src/common/strvararg.cpp
Vadim Zeitlin c30fe114ee Fix handling of "%z" in format strings for MinGW in all cases
MinGW only supports "%z" directly when using ANSI stdio, but this is not
always the case (even if it often is, as just including any standard C++
header enables ANSI stdio as a side effect). Translate "%z" to "%I" for it as
well as for MSVC to ensure that it always works.

Closes #17492.
2016-04-12 17:11:49 +02:00

775 lines
24 KiB
C++

///////////////////////////////////////////////////////////////////////////////
// Name: src/common/strvararg.cpp
// Purpose: macros for implementing type-safe vararg passing of strings
// Author: Vaclav Slavik
// Created: 2007-02-19
// Copyright: (c) 2007 REA Elektronik GmbH
// Licence: wxWindows licence
///////////////////////////////////////////////////////////////////////////////
// ============================================================================
// declarations
// ============================================================================
// ----------------------------------------------------------------------------
// headers
// ----------------------------------------------------------------------------
// for compilers that support precompilation, includes "wx.h".
#include "wx/wxprec.h"
#ifdef __BORLANDC__
#pragma hdrstop
#endif
#include "wx/strvararg.h"
#include "wx/string.h"
#include "wx/crt.h"
#include "wx/private/wxprintf.h"
// ============================================================================
// implementation
// ============================================================================
// ----------------------------------------------------------------------------
// wxArgNormalizer<>
// ----------------------------------------------------------------------------
const wxStringCharType *wxArgNormalizerNative<const wxString&>::get() const
{
return m_value.wx_str();
}
const wxStringCharType *wxArgNormalizerNative<const wxCStrData&>::get() const
{
return m_value.AsInternal();
}
#if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
wxArgNormalizerWchar<const wxString&>::wxArgNormalizerWchar(
const wxString& s,
const wxFormatString *fmt, unsigned index)
: wxArgNormalizerWithBuffer<wchar_t>(s.wc_str(), fmt, index)
{
}
wxArgNormalizerWchar<const wxCStrData&>::wxArgNormalizerWchar(
const wxCStrData& s,
const wxFormatString *fmt, unsigned index)
: wxArgNormalizerWithBuffer<wchar_t>(s.AsWCharBuf(), fmt, index)
{
}
#endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
// ----------------------------------------------------------------------------
// wxArgNormalizedString
// ----------------------------------------------------------------------------
wxString wxArgNormalizedString::GetString() const
{
if ( !IsValid() )
return wxEmptyString;
#if wxUSE_UTF8_LOCALE_ONLY
return wxString(reinterpret_cast<const char*>(m_ptr));
#else
#if wxUSE_UNICODE_UTF8
if ( wxLocaleIsUtf8 )
return wxString(reinterpret_cast<const char*>(m_ptr));
else
#endif
return wxString(reinterpret_cast<const wxChar*>(m_ptr));
#endif // !wxUSE_UTF8_LOCALE_ONLY
}
wxArgNormalizedString::operator wxString() const
{
return GetString();
}
// ----------------------------------------------------------------------------
// wxFormatConverter: class doing the "%s" and "%c" normalization
// ----------------------------------------------------------------------------
/*
There are four problems with wxPrintf() etc. format strings:
1) The printf vararg macros convert all forms of strings into
wxStringCharType* representation. This may make the format string
incorrect: for example, if %ls was used together with a wchar_t*
variadic argument, this would no longer work, because the templates
would change wchar_t* argument to wxStringCharType* and %ls would now
be incorrect in e.g. UTF-8 build. We need make sure only one specifier
form is used.
2) To complicate matters further, the meaning of %s and %c is different
under Windows and on Unix. The Windows/MS convention is as follows:
In ANSI mode:
format specifier results in
-----------------------------------
%s, %hs, %hS char*
%ls, %S, %lS wchar_t*
In Unicode mode:
format specifier results in
-----------------------------------
%hs, %S, %hS char*
%s, %ls, %lS wchar_t*
(While on POSIX systems we have %C identical to %lc and %c always means
char (in any mode) while %lc always means wchar_t.)
In other words, we should _only_ use %s on Windows and %ls on Unix for
wxUSE_UNICODE_WCHAR build.
3) To make things even worse, we need two forms in UTF-8 build: one for
passing strings to ANSI functions under UTF-8 locales (this one should
use %s) and one for widechar functions used under non-UTF-8 locales
(this one should use %ls).
And, of course, the same should be done for %c as well.
wxScanf() family of functions is simpler, because we don't normalize their
variadic arguments and we only have to handle 2) above and only for widechar
versions.
*/
template<typename T>
class wxFormatConverterBase
{
public:
typedef T CharType;
wxFormatConverterBase()
{
m_fmtOrig = NULL;
m_fmtLast = NULL;
m_nCopied = 0;
}
wxScopedCharTypeBuffer<CharType> Convert(const CharType *format)
{
// this is reset to NULL if we modify the format string
m_fmtOrig = format;
while ( *format )
{
if ( CopyFmtChar(*format++) == wxT('%') )
{
#if wxUSE_PRINTF_POS_PARAMS
if ( *format >= '0' && *format <= '9' )
{
SkipDigits(&format);
if ( *format == '$' )
{
// It was a positional argument specification.
CopyFmtChar(*format++);
}
//else: it was a width specification, nothing else to do.
}
#endif // wxUSE_PRINTF_POS_PARAMS
// skip any flags
while ( IsFlagChar(*format) )
CopyFmtChar(*format++);
// and possible width
if ( *format == wxT('*') )
CopyFmtChar(*format++);
else
SkipDigits(&format);
// precision?
if ( *format == wxT('.') )
{
CopyFmtChar(*format++);
if ( *format == wxT('*') )
CopyFmtChar(*format++);
else
SkipDigits(&format);
}
// next we can have a size modifier
SizeModifier size;
switch ( *format )
{
// MSVC doesn't support C99 'z' size modifier, but it uses
// 'I' with exactly the same meaning.
//
// MinGW does support 'z' but only in ANSI stdio mode, and
// we can't be sure that this is what is actually going to
// be used, application code could explicitly define
// __USE_MINGW_ANSI_STDIO=0 (e.g. because it needs legacy
// behaviour for its own printf() calls), so we map it to
// 'I' for it too.
#if defined(__VISUALC__) || defined(__MINGW32__)
case 'z':
ChangeFmtChar('I');
format++;
size = Size_Default;
break;
#endif // __VISUALC__ || __MINGW32__
case 'h':
size = Size_Short;
format++;
break;
case 'l':
// "ll" has a different meaning!
if ( format[1] != 'l' )
{
size = Size_Long;
format++;
break;
}
wxFALLTHROUGH;
default:
size = Size_Default;
}
CharType outConv = *format;
SizeModifier outSize = size;
// and finally we should have the type
switch ( *format )
{
case wxT('S'):
case wxT('s'):
// all strings were converted into the same form by
// wxArgNormalizer<T>, this form depends on the context
// in which the value is used (scanf/printf/wprintf):
HandleString(*format, size, outConv, outSize);
break;
case wxT('C'):
case wxT('c'):
HandleChar(*format, size, outConv, outSize);
break;
default:
// nothing special to do
break;
}
if ( outConv == *format && outSize == size ) // no change
{
if ( size != Size_Default )
CopyFmtChar(*(format - 1));
CopyFmtChar(*format);
}
else // something changed
{
switch ( outSize )
{
case Size_Long:
InsertFmtChar(wxT('l'));
break;
case Size_Short:
InsertFmtChar(wxT('h'));
break;
case Size_Default:
// nothing to do
break;
}
InsertFmtChar(outConv);
}
format++;
}
}
// notice that we only translated the string if m_fmtOrig == NULL (as
// set by CopyAllBefore()), otherwise we should simply use the original
// format
if ( m_fmtOrig )
{
return wxScopedCharTypeBuffer<CharType>::CreateNonOwned(m_fmtOrig);
}
else
{
// shrink converted format string to actual size (instead of
// over-sized allocation from CopyAllBefore()) and NUL-terminate
// it:
m_fmt.shrink(m_fmtLast - m_fmt.data());
return m_fmt;
}
}
virtual ~wxFormatConverterBase() {}
protected:
enum SizeModifier
{
Size_Default,
Size_Short,
Size_Long
};
// called to handle %S or %s; 'conv' is conversion specifier ('S' or 's'
// respectively), 'size' is the preceding size modifier; the new values of
// conversion and size specifiers must be written to outConv and outSize
virtual void HandleString(CharType conv, SizeModifier size,
CharType& outConv, SizeModifier& outSize) = 0;
// ditto for %C or %c
virtual void HandleChar(CharType conv, SizeModifier size,
CharType& outConv, SizeModifier& outSize) = 0;
private:
// copy another character to the translated format: this function does the
// copy if we are translating but doesn't do anything at all if we don't,
// so we don't create the translated format string at all unless we really
// need to (i.e. InsertFmtChar() is called)
CharType CopyFmtChar(CharType ch)
{
if ( !m_fmtOrig )
{
// we're translating, do copy
*(m_fmtLast++) = ch;
}
else
{
// simply increase the count which should be copied by
// CopyAllBefore() later if needed
m_nCopied++;
}
return ch;
}
// insert an extra character
void InsertFmtChar(CharType ch)
{
if ( m_fmtOrig )
{
// so far we haven't translated anything yet
CopyAllBefore();
}
*(m_fmtLast++) = ch;
}
// change a character
void ChangeFmtChar(CharType ch)
{
if ( m_fmtOrig )
{
// so far we haven't translated anything yet
CopyAllBefore();
}
*m_fmtLast++ = ch;
}
void CopyAllBefore()
{
wxASSERT_MSG( m_fmtOrig && m_fmt.data() == NULL, "logic error" );
// the modified format string is guaranteed to be no longer than
// 3/2 of the original (worst case: the entire format string consists
// of "%s" repeated and is expanded to "%ls" on Unix), so we can
// allocate the buffer now and not worry about running out of space if
// we over-allocate a bit:
size_t fmtLen = wxStrlen(m_fmtOrig);
// worst case is of even length, so there's no rounding error in *3/2:
m_fmt.extend(fmtLen * 3 / 2);
if ( m_nCopied > 0 )
wxStrncpy(m_fmt.data(), m_fmtOrig, m_nCopied);
m_fmtLast = m_fmt.data() + m_nCopied;
// we won't need it any longer and resetting it also indicates that we
// modified the format
m_fmtOrig = NULL;
}
static bool IsFlagChar(CharType ch)
{
return ch == wxT('-') || ch == wxT('+') ||
ch == wxT('0') || ch == wxT(' ') || ch == wxT('#');
}
void SkipDigits(const CharType **ptpc)
{
while ( **ptpc >= wxT('0') && **ptpc <= wxT('9') )
CopyFmtChar(*(*ptpc)++);
}
// the translated format
wxCharTypeBuffer<CharType> m_fmt;
CharType *m_fmtLast;
// the original format
const CharType *m_fmtOrig;
// the number of characters already copied (i.e. already parsed, but left
// unmodified)
size_t m_nCopied;
};
// Distinguish between the traditional Windows (and MSVC) behaviour and Cygwin
// (which is always Unix-like) and MinGW. The last one is the most interesting
// case as it can behave either as MSVC (__USE_MINGW_ANSI_STDIO=0) or as POSIX
// (__USE_MINGW_ANSI_STDIO=1, which is explicitly set by including any standard
// C++ header such as e.g. <string>). Luckily, "%ls" and "%lc" work in both
// cases, at least for recent MinGW versions, so just use it always.
#if defined(__WINDOWS__) && \
!defined(__CYGWIN__) && \
!defined(__MINGW32__)
// on Windows, we should use %s and %c regardless of the build:
class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
{
virtual void HandleString(CharType WXUNUSED(conv),
SizeModifier WXUNUSED(size),
CharType& outConv, SizeModifier& outSize)
{
outConv = 's';
outSize = Size_Default;
}
virtual void HandleChar(CharType WXUNUSED(conv),
SizeModifier WXUNUSED(size),
CharType& outConv, SizeModifier& outSize)
{
outConv = 'c';
outSize = Size_Default;
}
};
#else // !__WINDOWS__
// on Unix, it's %s for ANSI functions and %ls for widechar:
#if !wxUSE_UTF8_LOCALE_ONLY
class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
{
virtual void HandleString(CharType WXUNUSED(conv),
SizeModifier WXUNUSED(size),
CharType& outConv, SizeModifier& outSize) wxOVERRIDE
{
outConv = 's';
outSize = Size_Long;
}
virtual void HandleChar(CharType WXUNUSED(conv),
SizeModifier WXUNUSED(size),
CharType& outConv, SizeModifier& outSize) wxOVERRIDE
{
outConv = 'c';
outSize = Size_Long;
}
};
#endif // !wxUSE_UTF8_LOCALE_ONLY
#endif // __WINDOWS__/!__WINDOWS__
#if wxUSE_UNICODE_UTF8
class wxPrintfFormatConverterUtf8 : public wxFormatConverterBase<char>
{
virtual void HandleString(CharType WXUNUSED(conv),
SizeModifier WXUNUSED(size),
CharType& outConv, SizeModifier& outSize) wxOVERRIDE
{
outConv = 's';
outSize = Size_Default;
}
virtual void HandleChar(CharType WXUNUSED(conv),
SizeModifier WXUNUSED(size),
CharType& outConv, SizeModifier& outSize) wxOVERRIDE
{
// chars are represented using wchar_t in both builds, so this is
// the same as above
outConv = 'c';
outSize = Size_Long;
}
};
#endif // wxUSE_UNICODE_UTF8
#if !wxUSE_UNICODE // FIXME-UTF8: remove
class wxPrintfFormatConverterANSI : public wxFormatConverterBase<char>
{
virtual void HandleString(CharType WXUNUSED(conv),
SizeModifier WXUNUSED(size),
CharType& outConv, SizeModifier& outSize)
{
outConv = 's';
outSize = Size_Default;
}
virtual void HandleChar(CharType WXUNUSED(conv),
SizeModifier WXUNUSED(size),
CharType& outConv, SizeModifier& outSize)
{
outConv = 'c';
outSize = Size_Default;
}
};
#endif // ANSI
#ifndef __WINDOWS__
/*
wxScanf() format translation is different, we need to translate %s to %ls
and %c to %lc on Unix (but not Windows and for widechar functions only!).
So to use native functions in order to get our semantics we must do the
following translations in Unicode mode:
wxWidgets specifier POSIX specifier
----------------------------------------
%hc, %C, %hC %c
%c %lc
*/
class wxScanfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
{
virtual void HandleString(CharType conv, SizeModifier size,
CharType& outConv, SizeModifier& outSize) wxOVERRIDE
{
outConv = 's';
outSize = GetOutSize(conv == 'S', size);
}
virtual void HandleChar(CharType conv, SizeModifier size,
CharType& outConv, SizeModifier& outSize) wxOVERRIDE
{
outConv = 'c';
outSize = GetOutSize(conv == 'C', size);
}
SizeModifier GetOutSize(bool convIsUpper, SizeModifier size)
{
// %S and %hS -> %s and %lS -> %ls
if ( convIsUpper )
{
if ( size == Size_Long )
return Size_Long;
else
return Size_Default;
}
else // %s or %c
{
if ( size == Size_Default )
return Size_Long;
else
return size;
}
}
};
const wxScopedWCharBuffer wxScanfConvertFormatW(const wchar_t *format)
{
return wxScanfFormatConverterWchar().Convert(format);
}
#endif // !__WINDOWS__
// ----------------------------------------------------------------------------
// wxFormatString
// ----------------------------------------------------------------------------
#if !wxUSE_UNICODE_WCHAR
const char* wxFormatString::InputAsChar()
{
if ( m_char )
return m_char.data();
// in ANSI build, wx_str() returns char*, in UTF-8 build, this function
// is only called under UTF-8 locales, so we should return UTF-8 string,
// which is, again, what wx_str() returns:
if ( m_str )
return m_str->wx_str();
// ditto wxCStrData:
if ( m_cstr )
return m_cstr->AsInternal();
// the last case is that wide string was passed in: in that case, we need
// to convert it:
wxASSERT( m_wchar );
m_char = wxConvLibc.cWC2MB(m_wchar.data());
return m_char.data();
}
const char* wxFormatString::AsChar()
{
if ( !m_convertedChar )
#if !wxUSE_UNICODE // FIXME-UTF8: remove this
m_convertedChar = wxPrintfFormatConverterANSI().Convert(InputAsChar());
#else
m_convertedChar = wxPrintfFormatConverterUtf8().Convert(InputAsChar());
#endif
return m_convertedChar.data();
}
#endif // !wxUSE_UNICODE_WCHAR
#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
const wchar_t* wxFormatString::InputAsWChar()
{
if ( m_wchar )
return m_wchar.data();
#if wxUSE_UNICODE_WCHAR
if ( m_str )
return m_str->wc_str();
if ( m_cstr )
return m_cstr->AsInternal();
#else // wxUSE_UNICODE_UTF8
if ( m_str )
{
m_wchar = m_str->wc_str();
return m_wchar.data();
}
if ( m_cstr )
{
m_wchar = m_cstr->AsWCharBuf();
return m_wchar.data();
}
#endif // wxUSE_UNICODE_WCHAR/UTF8
// the last case is that narrow string was passed in: in that case, we need
// to convert it:
wxASSERT( m_char );
m_wchar = wxConvLibc.cMB2WC(m_char.data());
return m_wchar.data();
}
const wchar_t* wxFormatString::AsWChar()
{
if ( !m_convertedWChar )
m_convertedWChar = wxPrintfFormatConverterWchar().Convert(InputAsWChar());
return m_convertedWChar.data();
}
#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
wxString wxFormatString::InputAsString() const
{
if ( m_str )
return *m_str;
if ( m_cstr )
return m_cstr->AsString();
if ( m_wchar )
return wxString(m_wchar);
if ( m_char )
return wxString(m_char);
wxFAIL_MSG( "invalid wxFormatString - not initialized?" );
return wxString();
}
// ----------------------------------------------------------------------------
// wxFormatString::GetArgumentType()
// ----------------------------------------------------------------------------
namespace
{
template<typename CharType>
wxFormatString::ArgumentType DoGetArgumentType(const CharType *format,
unsigned n)
{
wxCHECK_MSG( format, wxFormatString::Arg_Unknown,
"empty format string not allowed here" );
wxPrintfConvSpecParser<CharType> parser(format);
if ( n > parser.nargs )
{
// The n-th argument doesn't appear in the format string and is unused.
// This can happen e.g. if a translation of the format string is used
// and the translation language tends to avoid numbers in singular forms.
// The translator would then typically replace "%d" with "One" (e.g. in
// Hebrew). Passing too many vararg arguments does not harm, so its
// better to be more permissive here and allow legitimate uses in favour
// of catching harmless errors.
return wxFormatString::Arg_Unused;
}
wxCHECK_MSG( parser.pspec[n-1] != NULL, wxFormatString::Arg_Unknown,
"requested argument not found - invalid format string?" );
switch ( parser.pspec[n-1]->m_type )
{
case wxPAT_CHAR:
case wxPAT_WCHAR:
return wxFormatString::Arg_Char;
case wxPAT_PCHAR:
case wxPAT_PWCHAR:
return wxFormatString::Arg_String;
case wxPAT_INT:
return wxFormatString::Arg_Int;
case wxPAT_LONGINT:
return wxFormatString::Arg_LongInt;
#ifdef wxLongLong_t
case wxPAT_LONGLONGINT:
return wxFormatString::Arg_LongLongInt;
#endif
case wxPAT_SIZET:
return wxFormatString::Arg_Size_t;
case wxPAT_DOUBLE:
return wxFormatString::Arg_Double;
case wxPAT_LONGDOUBLE:
return wxFormatString::Arg_LongDouble;
case wxPAT_POINTER:
return wxFormatString::Arg_Pointer;
case wxPAT_NINT:
return wxFormatString::Arg_IntPtr;
case wxPAT_NSHORTINT:
return wxFormatString::Arg_ShortIntPtr;
case wxPAT_NLONGINT:
return wxFormatString::Arg_LongIntPtr;
case wxPAT_STAR:
// "*" requires argument of type int
return wxFormatString::Arg_Int;
case wxPAT_INVALID:
// (handled after the switch statement)
break;
}
// silence warning
wxFAIL_MSG( "unexpected argument type" );
return wxFormatString::Arg_Unknown;
}
} // anonymous namespace
wxFormatString::ArgumentType wxFormatString::GetArgumentType(unsigned n) const
{
if ( m_char )
return DoGetArgumentType(m_char.data(), n);
else if ( m_wchar )
return DoGetArgumentType(m_wchar.data(), n);
else if ( m_str )
return DoGetArgumentType(m_str->wx_str(), n);
else if ( m_cstr )
return DoGetArgumentType(m_cstr->AsInternal(), n);
wxFAIL_MSG( "unreachable code" );
return Arg_Unknown;
}