/////////////////////////////////////////////////////////////////////////////// // Name: src/common/markupparser.cpp // Purpose: Implementation of wxMarkupParser. // Author: Vadim Zeitlin // Created: 2011-02-16 // RCS-ID: $Id: $ // Copyright: (c) 2011 Vadim Zeitlin // Licence: wxWindows licence /////////////////////////////////////////////////////////////////////////////// // ============================================================================ // declarations // ============================================================================ // ---------------------------------------------------------------------------- // headers // ---------------------------------------------------------------------------- // for compilers that support precompilation, includes "wx.h". #include "wx/wxprec.h" #ifdef __BORLANDC__ #pragma hdrstop #endif #ifndef WX_PRECOMP #endif // WX_PRECOMP #include "wx/private/markupparser.h" #include "wx/stack.h" namespace { // ---------------------------------------------------------------------------- // constants // ---------------------------------------------------------------------------- // Array containing the predefined XML 1.0 entities. const struct XMLEntity { const char *name; int len; // == strlen(name) char value; } xmlEntities[] = { { "lt", 2, '<' }, { "gt", 2, '>' }, { "amp", 3, '&' }, { "apos", 4, '\''}, { "quot", 4, '"' }, }; // ---------------------------------------------------------------------------- // helper functions // ---------------------------------------------------------------------------- wxString ExtractUntil(char ch, wxString::const_iterator& it, wxString::const_iterator end) { wxString str; for ( ; it != end; ++it ) { if ( *it == ch ) return str; str += *it; } // Return empty string to indicate that we didn't find ch at all. return wxString(); } } // anonymous namespace // ============================================================================ // wxMarkupParser implementation // ============================================================================ wxString wxMarkupParser::ParseAttrs(wxString attrs, TagAndAttrs& tagAndAttrs) { if ( tagAndAttrs.name.CmpNoCase("span") != 0 && !attrs.empty() ) { return wxString::Format("tag \"%s\" can't have attributes", tagAndAttrs.name); } // TODO: Parse more attributes described at // http://library.gnome.org/devel/pango/stable/PangoMarkupFormat.html // and at least ignore them gracefully instead of giving errors (but // quite a few of them could be supported as well, notable font_desc). wxMarkupSpanAttributes& spanAttrs = tagAndAttrs.attrs; while ( !attrs.empty() ) { wxString rest; const wxString attr = attrs.BeforeFirst(' ', &rest); attrs = rest; // The "original" versions are used for error messages only. wxString valueOrig; const wxString nameOrig = attr.BeforeFirst('=', &valueOrig); const wxString name = nameOrig.Lower(); wxString value = valueOrig.Lower(); // All attributes values must be quoted. if ( value.length() < 2 || (value[0] != value.Last()) || (value[0] != '"' && value[0] != '\'') ) { return wxString::Format("bad quoting for value of \"%s\"", nameOrig); } value.assign(value, 1, value.length() - 2); if ( name == "foreground" || name == "fgcolor" || name == "color" ) { spanAttrs.m_fgCol = value; } else if ( name == "background" || name == "bgcolor" ) { spanAttrs.m_bgCol = value; } else if ( name == "font_family" || name == "face" ) { spanAttrs.m_fontFace = value; } else if ( name == "font_weight" || name == "weight" ) { unsigned long weight; if ( value == "ultralight" || value == "light" || value == "normal" ) spanAttrs.m_isBold = wxMarkupSpanAttributes::No; else if ( value == "bold" || value == "ultrabold" || value == "heavy" ) spanAttrs.m_isBold = wxMarkupSpanAttributes::Yes; else if ( value.ToULong(&weight) ) spanAttrs.m_isBold = weight >= 600 ? wxMarkupSpanAttributes::Yes : wxMarkupSpanAttributes::No; else return wxString::Format("invalid font weight \"%s\"", valueOrig); } else if ( name == "font_style" || name == "style" ) { if ( value == "normal" ) spanAttrs.m_isItalic = wxMarkupSpanAttributes::No; else if ( value == "oblique" || value == "italic" ) spanAttrs.m_isItalic = wxMarkupSpanAttributes::Yes; else return wxString::Format("invalid font style \"%s\"", valueOrig); } else if ( name == "size" ) { unsigned long size; if ( value.ToULong(&size) ) { spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_PointParts; spanAttrs.m_fontSize = size; } else if ( value == "smaller" || value == "larger" ) { spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Relative; spanAttrs.m_fontSize = value == "smaller" ? -1 : +1; } else // Must be a CSS-like size specification { int cssSize = 1; wxString rest; if ( value.StartsWith("xx-", &rest) ) cssSize = 3; else if ( value.StartsWith("x-", &rest) ) cssSize = 2; else if ( value == "medium" ) cssSize = 0; else rest = value; if ( cssSize != 0 ) { if ( rest == "small" ) cssSize = -cssSize; else if ( rest != "large" ) return wxString::Format("invalid font size \"%s\"", valueOrig); } spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Symbolic; spanAttrs.m_fontSize = cssSize; } } } return wxString(); } bool wxMarkupParser::OutputTag(const TagAndAttrs& tagAndAttrs, bool start) { if ( tagAndAttrs.name.CmpNoCase("span") == 0 ) { if ( start ) m_output.OnSpanStart(tagAndAttrs.attrs); else m_output.OnSpanEnd(tagAndAttrs.attrs); return true; } else // non-span tag { static const struct TagHandler { const char *name; void (wxMarkupParserOutput::*startFunc)(); void (wxMarkupParserOutput::*endFunc)(); } tagHandlers[] = { { "b", &wxMarkupParserOutput::OnBoldStart, &wxMarkupParserOutput::OnBoldEnd }, { "i", &wxMarkupParserOutput::OnItalicStart, &wxMarkupParserOutput::OnItalicEnd }, { "u", &wxMarkupParserOutput::OnUnderlinedStart, &wxMarkupParserOutput::OnUnderlinedEnd }, { "s", &wxMarkupParserOutput::OnStrikethroughStart, &wxMarkupParserOutput::OnStrikethroughEnd }, { "big", &wxMarkupParserOutput::OnBigStart, &wxMarkupParserOutput::OnBigEnd }, { "small", &wxMarkupParserOutput::OnSmallStart, &wxMarkupParserOutput::OnSmallEnd }, { "tt", &wxMarkupParserOutput::OnTeletypeStart, &wxMarkupParserOutput::OnTeletypeEnd }, }; for ( unsigned n = 0; n < WXSIZEOF(tagHandlers); n++ ) { const TagHandler& h = tagHandlers[n]; if ( tagAndAttrs.name.CmpNoCase(h.name) == 0 ) { if ( start ) (m_output.*(h.startFunc))(); else (m_output.*(h.endFunc))(); return true; } } } // Unknown tag name. return false; } bool wxMarkupParser::Parse(const wxString& text) { // The stack containing the names and corresponding attributes (which are // actually only used for tags) of all of the currently opened tag // or none if we're not inside any tag. wxStack tags; // Current run of text. wxString current; const wxString::const_iterator end = text.end(); for ( wxString::const_iterator it = text.begin(); it != end; ++it ) { switch ( (*it).GetValue() ) { case '<': { // Flush the text preceding the tag, if any. if ( !current.empty() ) { m_output.OnText(current); current.clear(); } // Remember the tag starting position for the error // messages. const size_t pos = it - text.begin(); bool start = true; if ( ++it != end && *it == '/' ) { start = false; ++it; } const wxString tag = ExtractUntil('>', it, end); if ( tag.empty() ) { wxLogDebug("%s at %lu.", it == end ? "Unclosed tag starting" : "Empty tag", pos); return false; } if ( start ) { wxString attrs; const wxString name = tag.BeforeFirst(' ', &attrs); TagAndAttrs tagAndAttrs(name); const wxString err = ParseAttrs(attrs, tagAndAttrs); if ( !err.empty() ) { wxLogDebug("Bad attributes for \"%s\" " "at %lu: %s.", name, pos, err); return false; } tags.push(tagAndAttrs); } else // end tag { if ( tags.empty() || tags.top().name != tag ) { wxLogDebug("Unmatched closing tag \"%s\" at %lu.", tag, pos); return false; } } if ( !OutputTag(tags.top(), start) ) { wxLogDebug("Unknown tag at %lu.", pos); return false; } if ( !start ) tags.pop(); } break; case '>': wxLogDebug("'>' should be escaped as \">\"; at %lu.", it - text.begin()); break; case '&': // Processing is somewhat complicated: we need to recognize at // least the "<" entity to allow escaping left square // brackets in the markup and, in fact, we recognize all of the // standard XML entities for consistency with Pango markup // parsing. // // However we also allow '&' to appear unescaped, i.e. directly // and not as "&" when it is used to introduce the mnemonic // for the label. In this case we simply leave it alone. // // Notice that this logic makes it impossible to have a label // with "lt;" inside it and using "l" as mnemonic but hopefully // this shouldn't be a problem in practice. { const size_t pos = it - text.begin() + 1; unsigned n; for ( n = 0; n < WXSIZEOF(xmlEntities); n++ ) { const XMLEntity& xmlEnt = xmlEntities[n]; if ( text.compare(pos, xmlEnt.len, xmlEnt.name) == 0 && text[pos + xmlEnt.len] == ';' ) { // Escape the ampersands if needed to protect them // from being interpreted as mnemonics indicators. if ( xmlEnt.value == '&' ) current += "&&"; else current += xmlEnt.value; it += xmlEnt.len + 1; // +1 for '&' itself break; } } if ( n < WXSIZEOF(xmlEntities) ) break; //else: fall through, '&' is not special } default: current += *it; } } if ( !tags.empty() ) { wxLogDebug("Missing closing tag for \"%s\"", tags.top().name); return false; } if ( !current.empty() ) m_output.OnText(current); return true; } /* static */ wxString wxMarkupParser::Quote(const wxString& text) { wxString quoted; quoted.reserve(text.length()); for ( wxString::const_iterator it = text.begin(); it != text.end(); ++it ) { unsigned n; for ( n = 0; n < WXSIZEOF(xmlEntities); n++ ) { const XMLEntity& xmlEnt = xmlEntities[n]; if ( *it == xmlEnt.value ) { quoted << '&' << xmlEnt.name << ';'; break; } } if ( n == WXSIZEOF(xmlEntities) ) quoted += *it; } return quoted; }