This code is not used anywhere yet, this commit only adds the parser for the markup and the related classes as well as the corresponding unit test. git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@67048 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
428 lines
14 KiB
C++
428 lines
14 KiB
C++
///////////////////////////////////////////////////////////////////////////////
|
|
// Name: src/common/markupparser.cpp
|
|
// Purpose: Implementation of wxMarkupParser.
|
|
// Author: Vadim Zeitlin
|
|
// Created: 2011-02-16
|
|
// RCS-ID: $Id: $
|
|
// Copyright: (c) 2011 Vadim Zeitlin <vadim@wxwidgets.org>
|
|
// Licence: wxWindows licence
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
// ============================================================================
|
|
// declarations
|
|
// ============================================================================
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// headers
|
|
// ----------------------------------------------------------------------------
|
|
|
|
// for compilers that support precompilation, includes "wx.h".
|
|
#include "wx/wxprec.h"
|
|
|
|
#ifdef __BORLANDC__
|
|
#pragma hdrstop
|
|
#endif
|
|
|
|
#ifndef WX_PRECOMP
|
|
#endif // WX_PRECOMP
|
|
|
|
#include "wx/private/markupparser.h"
|
|
|
|
#include "wx/stack.h"
|
|
|
|
namespace
|
|
{
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// constants
|
|
// ----------------------------------------------------------------------------
|
|
|
|
// Array containing the predefined XML 1.0 entities.
|
|
const struct XMLEntity
|
|
{
|
|
const char *name;
|
|
int len; // == strlen(name)
|
|
char value;
|
|
} xmlEntities[] =
|
|
{
|
|
{ "lt", 2, '<' },
|
|
{ "gt", 2, '>' },
|
|
{ "amp", 3, '&' },
|
|
{ "apos", 4, '\''},
|
|
{ "quot", 4, '"' },
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// helper functions
|
|
// ----------------------------------------------------------------------------
|
|
|
|
wxString
|
|
ExtractUntil(char ch, wxString::const_iterator& it, wxString::const_iterator end)
|
|
{
|
|
wxString str;
|
|
for ( ; it != end; ++it )
|
|
{
|
|
if ( *it == ch )
|
|
return str;
|
|
|
|
str += *it;
|
|
}
|
|
|
|
// Return empty string to indicate that we didn't find ch at all.
|
|
return wxString();
|
|
}
|
|
|
|
} // anonymous namespace
|
|
|
|
// ============================================================================
|
|
// wxMarkupParser implementation
|
|
// ============================================================================
|
|
|
|
wxString
|
|
wxMarkupParser::ParseAttrs(wxString attrs, TagAndAttrs& tagAndAttrs)
|
|
{
|
|
if ( tagAndAttrs.name.CmpNoCase("span") != 0 && !attrs.empty() )
|
|
{
|
|
return wxString::Format("tag \"%s\" can't have attributes",
|
|
tagAndAttrs.name);
|
|
}
|
|
|
|
// TODO: Parse more attributes described at
|
|
// http://library.gnome.org/devel/pango/stable/PangoMarkupFormat.html
|
|
// and at least ignore them gracefully instead of giving errors (but
|
|
// quite a few of them could be supported as well, notable font_desc).
|
|
|
|
wxMarkupSpanAttributes& spanAttrs = tagAndAttrs.attrs;
|
|
|
|
while ( !attrs.empty() )
|
|
{
|
|
wxString rest;
|
|
const wxString attr = attrs.BeforeFirst(' ', &rest);
|
|
attrs = rest;
|
|
|
|
// The "original" versions are used for error messages only.
|
|
wxString valueOrig;
|
|
const wxString nameOrig = attr.BeforeFirst('=', &valueOrig);
|
|
|
|
const wxString name = nameOrig.Lower();
|
|
wxString value = valueOrig.Lower();
|
|
|
|
// All attributes values must be quoted.
|
|
if ( value.length() < 2 ||
|
|
(value[0] != value.Last()) ||
|
|
(value[0] != '"' && value[0] != '\'') )
|
|
{
|
|
return wxString::Format("bad quoting for value of \"%s\"",
|
|
nameOrig);
|
|
}
|
|
|
|
value.assign(value, 1, value.length() - 2);
|
|
|
|
if ( name == "foreground" || name == "fgcolor" || name == "color" )
|
|
{
|
|
spanAttrs.m_fgCol = value;
|
|
}
|
|
else if ( name == "background" || name == "bgcolor" )
|
|
{
|
|
spanAttrs.m_bgCol = value;
|
|
}
|
|
else if ( name == "font_family" || name == "face" )
|
|
{
|
|
spanAttrs.m_fontFace = value;
|
|
}
|
|
else if ( name == "font_weight" || name == "weight" )
|
|
{
|
|
unsigned long weight;
|
|
|
|
if ( value == "ultralight" || value == "light" || value == "normal" )
|
|
spanAttrs.m_isBold = wxMarkupSpanAttributes::No;
|
|
else if ( value == "bold" || value == "ultrabold" || value == "heavy" )
|
|
spanAttrs.m_isBold = wxMarkupSpanAttributes::Yes;
|
|
else if ( value.ToULong(&weight) )
|
|
spanAttrs.m_isBold = weight >= 600 ? wxMarkupSpanAttributes::Yes
|
|
: wxMarkupSpanAttributes::No;
|
|
else
|
|
return wxString::Format("invalid font weight \"%s\"", valueOrig);
|
|
}
|
|
else if ( name == "font_style" || name == "style" )
|
|
{
|
|
if ( value == "normal" )
|
|
spanAttrs.m_isItalic = wxMarkupSpanAttributes::No;
|
|
else if ( value == "oblique" || value == "italic" )
|
|
spanAttrs.m_isItalic = wxMarkupSpanAttributes::Yes;
|
|
else
|
|
return wxString::Format("invalid font style \"%s\"", valueOrig);
|
|
}
|
|
else if ( name == "size" )
|
|
{
|
|
unsigned long size;
|
|
if ( value.ToULong(&size) )
|
|
{
|
|
spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_PointParts;
|
|
spanAttrs.m_fontSize = size;
|
|
}
|
|
else if ( value == "smaller" || value == "larger" )
|
|
{
|
|
spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Relative;
|
|
spanAttrs.m_fontSize = value == "smaller" ? -1 : +1;
|
|
}
|
|
else // Must be a CSS-like size specification
|
|
{
|
|
int cssSize = 1;
|
|
wxString rest;
|
|
if ( value.StartsWith("xx-", &rest) )
|
|
cssSize = 3;
|
|
else if ( value.StartsWith("x-", &rest) )
|
|
cssSize = 2;
|
|
else if ( value == "medium" )
|
|
cssSize = 0;
|
|
else
|
|
rest = value;
|
|
|
|
if ( cssSize != 0 )
|
|
{
|
|
if ( rest == "small" )
|
|
cssSize = -cssSize;
|
|
else if ( rest != "large" )
|
|
return wxString::Format("invalid font size \"%s\"",
|
|
valueOrig);
|
|
}
|
|
|
|
spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Symbolic;
|
|
spanAttrs.m_fontSize = cssSize;
|
|
}
|
|
}
|
|
}
|
|
|
|
return wxString();
|
|
}
|
|
|
|
bool wxMarkupParser::OutputTag(const TagAndAttrs& tagAndAttrs, bool start)
|
|
{
|
|
if ( tagAndAttrs.name.CmpNoCase("span") == 0 )
|
|
{
|
|
if ( start )
|
|
m_output.OnSpanStart(tagAndAttrs.attrs);
|
|
else
|
|
m_output.OnSpanEnd(tagAndAttrs.attrs);
|
|
|
|
return true;
|
|
}
|
|
else // non-span tag
|
|
{
|
|
static const struct TagHandler
|
|
{
|
|
const char *name;
|
|
void (wxMarkupParserOutput::*startFunc)();
|
|
void (wxMarkupParserOutput::*endFunc)();
|
|
} tagHandlers[] =
|
|
{
|
|
{ "b", &wxMarkupParserOutput::OnBoldStart,
|
|
&wxMarkupParserOutput::OnBoldEnd },
|
|
{ "i", &wxMarkupParserOutput::OnItalicStart,
|
|
&wxMarkupParserOutput::OnItalicEnd },
|
|
{ "u", &wxMarkupParserOutput::OnUnderlinedStart,
|
|
&wxMarkupParserOutput::OnUnderlinedEnd },
|
|
{ "s", &wxMarkupParserOutput::OnStrikethroughStart,
|
|
&wxMarkupParserOutput::OnStrikethroughEnd },
|
|
{ "big", &wxMarkupParserOutput::OnBigStart,
|
|
&wxMarkupParserOutput::OnBigEnd },
|
|
{ "small", &wxMarkupParserOutput::OnSmallStart,
|
|
&wxMarkupParserOutput::OnSmallEnd },
|
|
{ "tt", &wxMarkupParserOutput::OnTeletypeStart,
|
|
&wxMarkupParserOutput::OnTeletypeEnd },
|
|
};
|
|
|
|
for ( unsigned n = 0; n < WXSIZEOF(tagHandlers); n++ )
|
|
{
|
|
const TagHandler& h = tagHandlers[n];
|
|
|
|
if ( tagAndAttrs.name.CmpNoCase(h.name) == 0 )
|
|
{
|
|
if ( start )
|
|
(m_output.*(h.startFunc))();
|
|
else
|
|
(m_output.*(h.endFunc))();
|
|
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Unknown tag name.
|
|
return false;
|
|
}
|
|
|
|
bool wxMarkupParser::Parse(const wxString& text)
|
|
{
|
|
// The stack containing the names and corresponding attributes (which are
|
|
// actually only used for <span> tags) of all of the currently opened tag
|
|
// or none if we're not inside any tag.
|
|
wxStack<TagAndAttrs> tags;
|
|
|
|
// Current run of text.
|
|
wxString current;
|
|
|
|
const wxString::const_iterator end = text.end();
|
|
for ( wxString::const_iterator it = text.begin(); it != end; ++it )
|
|
{
|
|
switch ( (*it).GetValue() )
|
|
{
|
|
case '<':
|
|
{
|
|
// Flush the text preceding the tag, if any.
|
|
if ( !current.empty() )
|
|
{
|
|
m_output.OnText(current);
|
|
current.clear();
|
|
}
|
|
|
|
// Remember the tag starting position for the error
|
|
// messages.
|
|
const size_t pos = it - text.begin();
|
|
|
|
bool start = true;
|
|
if ( ++it != end && *it == '/' )
|
|
{
|
|
start = false;
|
|
++it;
|
|
}
|
|
|
|
const wxString tag = ExtractUntil('>', it, end);
|
|
if ( tag.empty() )
|
|
{
|
|
wxLogDebug("%s at %lu.",
|
|
it == end ? "Unclosed tag starting"
|
|
: "Empty tag",
|
|
pos);
|
|
return false;
|
|
}
|
|
|
|
if ( start )
|
|
{
|
|
wxString attrs;
|
|
const wxString name = tag.BeforeFirst(' ', &attrs);
|
|
|
|
TagAndAttrs tagAndAttrs(name);
|
|
const wxString err = ParseAttrs(attrs, tagAndAttrs);
|
|
if ( !err.empty() )
|
|
{
|
|
wxLogDebug("Bad attributes for \"%s\" "
|
|
"at %lu: %s.",
|
|
name, pos, err);
|
|
return false;
|
|
}
|
|
|
|
tags.push(tagAndAttrs);
|
|
}
|
|
else // end tag
|
|
{
|
|
if ( tags.empty() || tags.top().name != tag )
|
|
{
|
|
wxLogDebug("Unmatched closing tag \"%s\" at %lu.",
|
|
tag, pos);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if ( !OutputTag(tags.top(), start) )
|
|
{
|
|
wxLogDebug("Unknown tag at %lu.", pos);
|
|
return false;
|
|
}
|
|
|
|
if ( !start )
|
|
tags.pop();
|
|
}
|
|
break;
|
|
|
|
case '>':
|
|
wxLogDebug("'>' should be escaped as \">\"; at %lu.",
|
|
it - text.begin());
|
|
break;
|
|
|
|
case '&':
|
|
// Processing is somewhat complicated: we need to recognize at
|
|
// least the "<" entity to allow escaping left square
|
|
// brackets in the markup and, in fact, we recognize all of the
|
|
// standard XML entities for consistency with Pango markup
|
|
// parsing.
|
|
//
|
|
// However we also allow '&' to appear unescaped, i.e. directly
|
|
// and not as "&" when it is used to introduce the mnemonic
|
|
// for the label. In this case we simply leave it alone.
|
|
//
|
|
// Notice that this logic makes it impossible to have a label
|
|
// with "lt;" inside it and using "l" as mnemonic but hopefully
|
|
// this shouldn't be a problem in practice.
|
|
{
|
|
const size_t pos = it - text.begin() + 1;
|
|
|
|
unsigned n;
|
|
for ( n = 0; n < WXSIZEOF(xmlEntities); n++ )
|
|
{
|
|
const XMLEntity& xmlEnt = xmlEntities[n];
|
|
if ( text.compare(pos, xmlEnt.len, xmlEnt.name) == 0
|
|
&& text[pos + xmlEnt.len] == ';' )
|
|
{
|
|
// Escape the ampersands if needed to protect them
|
|
// from being interpreted as mnemonics indicators.
|
|
if ( xmlEnt.value == '&' )
|
|
current += "&&";
|
|
else
|
|
current += xmlEnt.value;
|
|
|
|
it += xmlEnt.len + 1; // +1 for '&' itself
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ( n < WXSIZEOF(xmlEntities) )
|
|
break;
|
|
//else: fall through, '&' is not special
|
|
}
|
|
|
|
default:
|
|
current += *it;
|
|
}
|
|
}
|
|
|
|
if ( !tags.empty() )
|
|
{
|
|
wxLogDebug("Missing closing tag for \"%s\"", tags.top().name);
|
|
return false;
|
|
}
|
|
|
|
if ( !current.empty() )
|
|
m_output.OnText(current);
|
|
|
|
return true;
|
|
}
|
|
|
|
/* static */
|
|
wxString wxMarkupParser::Quote(const wxString& text)
|
|
{
|
|
wxString quoted;
|
|
quoted.reserve(text.length());
|
|
|
|
for ( wxString::const_iterator it = text.begin(); it != text.end(); ++it )
|
|
{
|
|
unsigned n;
|
|
for ( n = 0; n < WXSIZEOF(xmlEntities); n++ )
|
|
{
|
|
const XMLEntity& xmlEnt = xmlEntities[n];
|
|
if ( *it == xmlEnt.value )
|
|
{
|
|
quoted << '&' << xmlEnt.name << ';';
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ( n == WXSIZEOF(xmlEntities) )
|
|
quoted += *it;
|
|
}
|
|
|
|
return quoted;
|
|
}
|