Handle "&" in exactly the same way as "&" in wxMarkupParser, i.e. do not map the former to "&&" to prevent it from being interpreted as a mnemonic as this is incompatible with using markup for anything but the control labels, e.g. for wxDataViewCtrl items text, in which mnemonics are not recognized. And even when using markup for control labels, it was a questionable decision as it's really not clear at all why should the XML entity and the raw character itself be handled differently. Also split wxMarkupText into two classes, wxMarkupText that handles mnemonics in the markup (which is typically a label) and a very similar, but not derived, wxItemMarkupText that handles mnemonics-less markup for list etc. items, uses DrawItemText() and supports ellipsizing. Illustrate the use of ampersands in the dataview sample.
479 lines
16 KiB
C++
479 lines
16 KiB
C++
///////////////////////////////////////////////////////////////////////////////
|
|
// Name: src/common/markupparser.cpp
|
|
// Purpose: Implementation of wxMarkupParser.
|
|
// Author: Vadim Zeitlin
|
|
// Created: 2011-02-16
|
|
// Copyright: (c) 2011 Vadim Zeitlin <vadim@wxwidgets.org>
|
|
// Licence: wxWindows licence
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
// ============================================================================
|
|
// declarations
|
|
// ============================================================================
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// headers
|
|
// ----------------------------------------------------------------------------
|
|
|
|
// for compilers that support precompilation, includes "wx.h".
|
|
#include "wx/wxprec.h"
|
|
|
|
#ifdef __BORLANDC__
|
|
#pragma hdrstop
|
|
#endif
|
|
|
|
#if wxUSE_MARKUP
|
|
|
|
#ifndef WX_PRECOMP
|
|
#include "wx/log.h"
|
|
#endif
|
|
|
|
#include "wx/private/markupparser.h"
|
|
|
|
#include "wx/stack.h"
|
|
|
|
namespace
|
|
{
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// constants
|
|
// ----------------------------------------------------------------------------
|
|
|
|
// Array containing the predefined XML 1.0 entities.
|
|
const struct XMLEntity
|
|
{
|
|
const char *name;
|
|
int len; // == strlen(name)
|
|
char value;
|
|
} xmlEntities[] =
|
|
{
|
|
{ "lt", 2, '<' },
|
|
{ "gt", 2, '>' },
|
|
{ "amp", 3, '&' },
|
|
{ "apos", 4, '\''},
|
|
{ "quot", 4, '"' },
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// helper functions
|
|
// ----------------------------------------------------------------------------
|
|
|
|
wxString
|
|
ExtractUntil(char ch, wxString::const_iterator& it, wxString::const_iterator end)
|
|
{
|
|
wxString str;
|
|
for ( ; it != end; ++it )
|
|
{
|
|
if ( *it == ch )
|
|
return str;
|
|
|
|
str += *it;
|
|
}
|
|
|
|
// Return empty string to indicate that we didn't find ch at all.
|
|
return wxString();
|
|
}
|
|
|
|
} // anonymous namespace
|
|
|
|
// ============================================================================
|
|
// wxMarkupParser implementation
|
|
// ============================================================================
|
|
|
|
wxString
|
|
wxMarkupParser::ParseAttrs(wxString attrs, TagAndAttrs& tagAndAttrs)
|
|
{
|
|
if ( tagAndAttrs.name.CmpNoCase("span") != 0 && !attrs.empty() )
|
|
{
|
|
return wxString::Format("tag \"%s\" can't have attributes",
|
|
tagAndAttrs.name);
|
|
}
|
|
|
|
// TODO: Parse more attributes described at
|
|
// http://library.gnome.org/devel/pango/stable/PangoMarkupFormat.html
|
|
// and at least ignore them gracefully instead of giving errors (but
|
|
// quite a few of them could be supported as well, notable font_desc).
|
|
|
|
wxMarkupSpanAttributes& spanAttrs = tagAndAttrs.attrs;
|
|
|
|
while ( !attrs.empty() )
|
|
{
|
|
wxString rest;
|
|
const wxString attr = attrs.BeforeFirst(' ', &rest);
|
|
attrs = rest;
|
|
|
|
// The "original" versions are used for error messages only.
|
|
wxString valueOrig;
|
|
const wxString nameOrig = attr.BeforeFirst('=', &valueOrig);
|
|
|
|
const wxString name = nameOrig.Lower();
|
|
wxString value = valueOrig.Lower();
|
|
|
|
// All attributes values must be quoted.
|
|
if ( value.length() < 2 ||
|
|
(value[0] != value.Last()) ||
|
|
(value[0] != '"' && value[0] != '\'') )
|
|
{
|
|
return wxString::Format("bad quoting for value of \"%s\"",
|
|
nameOrig);
|
|
}
|
|
|
|
value.assign(value, 1, value.length() - 2);
|
|
|
|
if ( name == "foreground" || name == "fgcolor" || name == "color" )
|
|
{
|
|
spanAttrs.m_fgCol = value;
|
|
}
|
|
else if ( name == "background" || name == "bgcolor" )
|
|
{
|
|
spanAttrs.m_bgCol = value;
|
|
}
|
|
else if ( name == "font_family" || name == "face" )
|
|
{
|
|
spanAttrs.m_fontFace = value;
|
|
}
|
|
else if ( name == "font_weight" || name == "weight" )
|
|
{
|
|
unsigned long weight;
|
|
|
|
if ( value == "ultralight" || value == "light" || value == "normal" )
|
|
spanAttrs.m_isBold = wxMarkupSpanAttributes::No;
|
|
else if ( value == "bold" || value == "ultrabold" || value == "heavy" )
|
|
spanAttrs.m_isBold = wxMarkupSpanAttributes::Yes;
|
|
else if ( value.ToULong(&weight) )
|
|
spanAttrs.m_isBold = weight >= 600 ? wxMarkupSpanAttributes::Yes
|
|
: wxMarkupSpanAttributes::No;
|
|
else
|
|
return wxString::Format("invalid font weight \"%s\"", valueOrig);
|
|
}
|
|
else if ( name == "font_style" || name == "style" )
|
|
{
|
|
if ( value == "normal" )
|
|
spanAttrs.m_isItalic = wxMarkupSpanAttributes::No;
|
|
else if ( value == "oblique" || value == "italic" )
|
|
spanAttrs.m_isItalic = wxMarkupSpanAttributes::Yes;
|
|
else
|
|
return wxString::Format("invalid font style \"%s\"", valueOrig);
|
|
}
|
|
else if ( name == "size" )
|
|
{
|
|
unsigned long size;
|
|
if ( value.ToULong(&size) )
|
|
{
|
|
spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_PointParts;
|
|
spanAttrs.m_fontSize = size;
|
|
}
|
|
else if ( value == "smaller" || value == "larger" )
|
|
{
|
|
spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Relative;
|
|
spanAttrs.m_fontSize = value == "smaller" ? -1 : +1;
|
|
}
|
|
else // Must be a CSS-like size specification
|
|
{
|
|
int cssSize = 1;
|
|
if ( value.StartsWith("xx-", &rest) )
|
|
cssSize = 3;
|
|
else if ( value.StartsWith("x-", &rest) )
|
|
cssSize = 2;
|
|
else if ( value == "medium" )
|
|
cssSize = 0;
|
|
else
|
|
rest = value;
|
|
|
|
if ( cssSize != 0 )
|
|
{
|
|
if ( rest == "small" )
|
|
cssSize = -cssSize;
|
|
else if ( rest != "large" )
|
|
return wxString::Format("invalid font size \"%s\"",
|
|
valueOrig);
|
|
}
|
|
|
|
spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Symbolic;
|
|
spanAttrs.m_fontSize = cssSize;
|
|
}
|
|
}
|
|
}
|
|
|
|
return wxString();
|
|
}
|
|
|
|
bool wxMarkupParser::OutputTag(const TagAndAttrs& tagAndAttrs, bool start)
|
|
{
|
|
if ( tagAndAttrs.name.CmpNoCase("span") == 0 )
|
|
{
|
|
if ( start )
|
|
m_output.OnSpanStart(tagAndAttrs.attrs);
|
|
else
|
|
m_output.OnSpanEnd(tagAndAttrs.attrs);
|
|
|
|
return true;
|
|
}
|
|
else // non-span tag
|
|
{
|
|
static const struct TagHandler
|
|
{
|
|
const char *name;
|
|
void (wxMarkupParserOutput::*startFunc)();
|
|
void (wxMarkupParserOutput::*endFunc)();
|
|
} tagHandlers[] =
|
|
{
|
|
{ "b", &wxMarkupParserOutput::OnBoldStart,
|
|
&wxMarkupParserOutput::OnBoldEnd },
|
|
{ "i", &wxMarkupParserOutput::OnItalicStart,
|
|
&wxMarkupParserOutput::OnItalicEnd },
|
|
{ "u", &wxMarkupParserOutput::OnUnderlinedStart,
|
|
&wxMarkupParserOutput::OnUnderlinedEnd },
|
|
{ "s", &wxMarkupParserOutput::OnStrikethroughStart,
|
|
&wxMarkupParserOutput::OnStrikethroughEnd },
|
|
{ "big", &wxMarkupParserOutput::OnBigStart,
|
|
&wxMarkupParserOutput::OnBigEnd },
|
|
{ "small", &wxMarkupParserOutput::OnSmallStart,
|
|
&wxMarkupParserOutput::OnSmallEnd },
|
|
{ "tt", &wxMarkupParserOutput::OnTeletypeStart,
|
|
&wxMarkupParserOutput::OnTeletypeEnd },
|
|
};
|
|
|
|
for ( unsigned n = 0; n < WXSIZEOF(tagHandlers); n++ )
|
|
{
|
|
const TagHandler& h = tagHandlers[n];
|
|
|
|
if ( tagAndAttrs.name.CmpNoCase(h.name) == 0 )
|
|
{
|
|
if ( start )
|
|
(m_output.*(h.startFunc))();
|
|
else
|
|
(m_output.*(h.endFunc))();
|
|
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Unknown tag name.
|
|
return false;
|
|
}
|
|
|
|
bool wxMarkupParser::Parse(const wxString& text)
|
|
{
|
|
// The stack containing the names and corresponding attributes (which are
|
|
// actually only used for <span> tags) of all of the currently opened tag
|
|
// or none if we're not inside any tag.
|
|
wxStack<TagAndAttrs> tags;
|
|
|
|
// Current run of text.
|
|
wxString current;
|
|
|
|
const wxString::const_iterator end = text.end();
|
|
for ( wxString::const_iterator it = text.begin(); it != end; ++it )
|
|
{
|
|
switch ( (*it).GetValue() )
|
|
{
|
|
case '<':
|
|
{
|
|
// Flush the text preceding the tag, if any.
|
|
if ( !current.empty() )
|
|
{
|
|
m_output.OnText(current);
|
|
current.clear();
|
|
}
|
|
|
|
// This variable is used only in the debugging messages
|
|
// and doesn't need to be defined if they're not compiled
|
|
// at all (it actually would result in unused variable
|
|
// messages in this case).
|
|
#if wxUSE_LOG_DEBUG || !defined(HAVE_VARIADIC_MACROS)
|
|
// Remember the tag starting position for the error
|
|
// messages.
|
|
const size_t pos = it - text.begin();
|
|
#endif
|
|
bool start = true;
|
|
if ( ++it != end && *it == '/' )
|
|
{
|
|
start = false;
|
|
++it;
|
|
}
|
|
|
|
const wxString tag = ExtractUntil('>', it, end);
|
|
if ( tag.empty() )
|
|
{
|
|
wxLogDebug("%s at %lu.",
|
|
it == end ? "Unclosed tag starting"
|
|
: "Empty tag",
|
|
pos);
|
|
return false;
|
|
}
|
|
|
|
if ( start )
|
|
{
|
|
wxString attrs;
|
|
const wxString name = tag.BeforeFirst(' ', &attrs);
|
|
|
|
TagAndAttrs tagAndAttrs(name);
|
|
const wxString err = ParseAttrs(attrs, tagAndAttrs);
|
|
if ( !err.empty() )
|
|
{
|
|
wxLogDebug("Bad attributes for \"%s\" "
|
|
"at %lu: %s.",
|
|
name, pos, err);
|
|
return false;
|
|
}
|
|
|
|
tags.push(tagAndAttrs);
|
|
}
|
|
else // end tag
|
|
{
|
|
if ( tags.empty() || tags.top().name != tag )
|
|
{
|
|
wxLogDebug("Unmatched closing tag \"%s\" at %lu.",
|
|
tag, pos);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if ( !OutputTag(tags.top(), start) )
|
|
{
|
|
wxLogDebug("Unknown tag at %lu.", pos);
|
|
return false;
|
|
}
|
|
|
|
if ( !start )
|
|
tags.pop();
|
|
}
|
|
break;
|
|
|
|
case '>':
|
|
wxLogDebug("'>' should be escaped as \">\"; at %lu.",
|
|
it - text.begin());
|
|
break;
|
|
|
|
case '&':
|
|
// Processing is somewhat complicated: we need to recognize at
|
|
// least the "<" entity to allow escaping left square
|
|
// brackets in the markup and, in fact, we recognize all of the
|
|
// standard XML entities for consistency with Pango markup
|
|
// parsing.
|
|
//
|
|
// However we also allow '&' to appear unescaped, i.e. directly
|
|
// and not as "&" when it is used to introduce the mnemonic
|
|
// for the label. In this case we simply leave it alone.
|
|
//
|
|
// Notice that this logic makes it impossible to have a label
|
|
// with "lt;" inside it and using "l" as mnemonic but hopefully
|
|
// this shouldn't be a problem in practice.
|
|
{
|
|
const size_t pos = it - text.begin() + 1;
|
|
|
|
unsigned n;
|
|
for ( n = 0; n < WXSIZEOF(xmlEntities); n++ )
|
|
{
|
|
const XMLEntity& xmlEnt = xmlEntities[n];
|
|
if ( text.compare(pos, xmlEnt.len, xmlEnt.name) == 0
|
|
&& text[pos + xmlEnt.len] == ';' )
|
|
{
|
|
current += xmlEnt.value;
|
|
|
|
it += xmlEnt.len + 1; // +1 for '&' itself
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ( n < WXSIZEOF(xmlEntities) )
|
|
break;
|
|
wxFALLTHROUGH;//else: fall through, '&' is not special
|
|
}
|
|
|
|
default:
|
|
current += *it;
|
|
}
|
|
}
|
|
|
|
if ( !tags.empty() )
|
|
{
|
|
wxLogDebug("Missing closing tag for \"%s\"", tags.top().name);
|
|
return false;
|
|
}
|
|
|
|
if ( !current.empty() )
|
|
m_output.OnText(current);
|
|
|
|
return true;
|
|
}
|
|
|
|
/* static */
|
|
wxString wxMarkupParser::Quote(const wxString& text)
|
|
{
|
|
wxString quoted;
|
|
quoted.reserve(text.length());
|
|
|
|
for ( wxString::const_iterator it = text.begin(); it != text.end(); ++it )
|
|
{
|
|
unsigned n;
|
|
for ( n = 0; n < WXSIZEOF(xmlEntities); n++ )
|
|
{
|
|
const XMLEntity& xmlEnt = xmlEntities[n];
|
|
if ( *it == xmlEnt.value )
|
|
{
|
|
quoted << '&' << xmlEnt.name << ';';
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ( n == WXSIZEOF(xmlEntities) )
|
|
quoted += *it;
|
|
}
|
|
|
|
return quoted;
|
|
}
|
|
|
|
/* static */
|
|
wxString wxMarkupParser::Strip(const wxString& text)
|
|
{
|
|
class StripOutput : public wxMarkupParserOutput
|
|
{
|
|
public:
|
|
StripOutput() { }
|
|
|
|
const wxString& GetText() const { return m_text; }
|
|
|
|
virtual void OnText(const wxString& string) wxOVERRIDE { m_text += string; }
|
|
|
|
virtual void OnBoldStart() wxOVERRIDE { }
|
|
virtual void OnBoldEnd() wxOVERRIDE { }
|
|
|
|
virtual void OnItalicStart() wxOVERRIDE { }
|
|
virtual void OnItalicEnd() wxOVERRIDE { }
|
|
|
|
virtual void OnUnderlinedStart() wxOVERRIDE { }
|
|
virtual void OnUnderlinedEnd() wxOVERRIDE { }
|
|
|
|
virtual void OnStrikethroughStart() wxOVERRIDE { }
|
|
virtual void OnStrikethroughEnd() wxOVERRIDE { }
|
|
|
|
virtual void OnBigStart() wxOVERRIDE { }
|
|
virtual void OnBigEnd() wxOVERRIDE { }
|
|
|
|
virtual void OnSmallStart() wxOVERRIDE { }
|
|
virtual void OnSmallEnd() wxOVERRIDE { }
|
|
|
|
virtual void OnTeletypeStart() wxOVERRIDE { }
|
|
virtual void OnTeletypeEnd() wxOVERRIDE { }
|
|
|
|
virtual void OnSpanStart(const wxMarkupSpanAttributes& WXUNUSED(a)) wxOVERRIDE { }
|
|
virtual void OnSpanEnd(const wxMarkupSpanAttributes& WXUNUSED(a)) wxOVERRIDE { }
|
|
|
|
private:
|
|
wxString m_text;
|
|
};
|
|
|
|
StripOutput output;
|
|
wxMarkupParser parser(output);
|
|
if ( !parser.Parse(text) )
|
|
return wxString();
|
|
|
|
return output.GetText();
|
|
}
|
|
|
|
#endif // wxUSE_MARKUP
|