It's not in use yet, but add Kevin's docparser code so it doesn't get lost...
git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@41113 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
394
wxPython/docs/bin/docparser/wxhtmlparse.py
Normal file
394
wxPython/docs/bin/docparser/wxhtmlparse.py
Normal file
@@ -0,0 +1,394 @@
|
||||
import sys, os, string, glob
|
||||
import re
|
||||
from docparser.wxclasses import *
|
||||
import wx
|
||||
|
||||
|
||||
outputdir = "output"
|
||||
|
||||
#
|
||||
# Class REs
|
||||
#
|
||||
|
||||
class_desc_re = """<H2>.*?</H2>(.*?)<B><FONT COLOR="#FF0000">"""
|
||||
win_styles_re = """<B><FONT COLOR="#FF0000">Window styles</FONT></B><P>(.*?)<B><FONT COLOR="#FF0000">"""
|
||||
win_styles_extra_re = """<B><FONT COLOR="#FF0000">Extra window styles</FONT></B><P>(.*?)<B><FONT COLOR="#FF0000">"""
|
||||
win_style_re = """<TR><TD VALIGN=TOP WIDTH=.*?>\s*?<FONT FACE=".*?">\s*?<B>(.*?)</B>\s*?</FONT></TD>\s*?<TD VALIGN=TOP>\s*?<FONT FACE=".*?">(.*?)</FONT></TD></TR>"""
|
||||
derived_re = """<B><FONT COLOR="#FF0000">Derived from</FONT></B><P>(.*?)<P>"""
|
||||
derived_class_re = """<A HREF=".*?">(.*?)</A>"""
|
||||
|
||||
#
|
||||
# Method REs
|
||||
#
|
||||
|
||||
# groups - header, description
|
||||
method_re = "<H3>(.*?)</H3>\s*?<P>(.*?)<HR>"
|
||||
lastmethod_re = "<H3>(.*?)</H3>\s*?<P>(.*?)\s*?<P>\s*?</FONT>"
|
||||
headings_re = "<B><FONT COLOR=\"#FF0000\">(.*?)</FONT></B><P>(.*?)"
|
||||
# groups = param name, param value
|
||||
param_re = "<I>(.*?)</I><UL><UL>(.*?)</UL></UL>"
|
||||
# groups - return type, method name, arguments
|
||||
proto_re = "<B>(.*?)</B>.*?<B>(.*?)</B>\s*?\((.*?)\)"
|
||||
# groups - arg type, arg name
|
||||
args_re = "<B>(.*?)</B>.*?<I>(.*?)</I>"
|
||||
code_re = "<PRE>(.*?)</PRE>"
|
||||
link_re = "<A href=\"(.*?)\"><B>(.*?)</B></A><BR>"
|
||||
|
||||
#
|
||||
# wxPython/wxPerl note REs
|
||||
#
|
||||
|
||||
wx_re = "wx[A-Z]\S+"
|
||||
wxperl_overload_re = "<B><FONT COLOR=\"#0000C8\">wxPerl note:</FONT></B> In wxPerl there are two methods instead of a single overloaded method:<P>\s*?<UL><UL>(.*?)</UL></UL>"
|
||||
wxperl_re = "<B><FONT COLOR=\"#0000C8\">wxPerl note:</FONT></B>(.*?)<P>"
|
||||
|
||||
wxpython_constructors_re = """<B><FONT COLOR="#0000C8">wxPython note:</FONT></B> Constructors supported by wxPython are:<P>\s*?<UL><UL>(.*?)</UL></UL>"""
|
||||
wxpython_overload_re = """<TR><TD VALIGN=TOP.*?>\s*?<FONT FACE=".*?">\s*?<B>(.*?)</B>\s*?</FONT></TD>\s*?<TD VALIGN=TOP>\s*?<FONT FACE=".*?">(.*?)</FONT></TD></TR>"""
|
||||
|
||||
wxpython_overloads_re = "<B><FONT COLOR=\"#0000C8\">wxPython note:</FONT></B> In place of a single overloaded method name, wxPython\s*?implements the following methods:<P>\s*?<UL><UL>(.*?)</UL></UL>"
|
||||
wxpython_re = "<B><FONT COLOR=\"#0000C8\">wxPython note:</FONT></B>(.*?)<P>"
|
||||
|
||||
|
||||
# convert wxWhatever to wx.Whatever
|
||||
def namespacify_wxClasses(contents):
|
||||
wx_regex = re.compile(wx_re, re.MULTILINE | re.DOTALL)
|
||||
|
||||
result = wx_regex.sub(wxReplaceFunc, contents)
|
||||
return result
|
||||
|
||||
def wxReplaceFunc(match):
|
||||
text = match.group()
|
||||
if text.find("wxWidgets") == -1 and text.find("wxPython") == -1 and text.find("wxPerl") == -1:
|
||||
text = text.replace("wx", "wx.")
|
||||
return text
|
||||
|
||||
|
||||
|
||||
# Methods to de-C++itize data.
|
||||
def pythonize_text(contents):
|
||||
"""
|
||||
Remove C++isms that definitely shouldn't be in any text.
|
||||
"""
|
||||
contents = contents.replace("false", "False")
|
||||
contents = contents.replace("true", "True")
|
||||
contents = contents.replace("non-NULL", "not None")
|
||||
contents = contents.replace("NULL", "None")
|
||||
contents = contents.replace("const ", "")
|
||||
contents = contents.replace("::", ".")
|
||||
contents = contents.replace("\r\n", "\n")
|
||||
contents = contents.replace("\r", "\n")
|
||||
contents = contents.replace("''", "\"")
|
||||
return namespacify_wxClasses(contents)
|
||||
|
||||
def pythonize_args(contents):
|
||||
"""
|
||||
Remove C++isms from arguments (some of these terms may be used in other
|
||||
contexts in actual documentation, so we don't remove them there).
|
||||
"""
|
||||
contents = contents.replace("static", "")
|
||||
contents = contents.replace("virtual void", "")
|
||||
contents = contents.replace("virtual", "")
|
||||
contents = contents.replace("void*", "int")
|
||||
contents = contents.replace("void", "")
|
||||
|
||||
contents = contents.replace("off_t", "long")
|
||||
contents = contents.replace("size_t", "long")
|
||||
contents = contents.replace("*", "")
|
||||
contents = contents.replace("&", "")
|
||||
contents = contents.replace("&", "")
|
||||
contents = contents.replace("char", "string")
|
||||
contents = contents.replace("wxChar", "string")
|
||||
contents = contents.replace("wxCoord", "int")
|
||||
contents = contents.replace("<A HREF=\"wx_wxstring.html#wxstring\">wxString</A>", "string")
|
||||
|
||||
return pythonize_text(contents)
|
||||
|
||||
def formatMethodProtos(protos):
|
||||
"""
|
||||
Remove C++isms in the method prototypes.
|
||||
"""
|
||||
for proto in protos:
|
||||
proto[0] = pythonize_args(proto[0])
|
||||
proto[0] = proto[0].strip()
|
||||
|
||||
proto[1] = namespacify_wxClasses(proto[1])
|
||||
for arg in proto[2]:
|
||||
arg[0] = pythonize_args(arg[0])
|
||||
arg[0].strip()
|
||||
|
||||
# for arg names, we should be more careful about what we replace
|
||||
arg[1] = pythonize_text(arg[1])
|
||||
arg[1] = arg[1].replace("*", "")
|
||||
arg[1] = arg[1].replace("&", "")
|
||||
|
||||
return protos
|
||||
|
||||
|
||||
|
||||
# functions for getting data from methods
|
||||
def getMethodWxPythonOverrides(text, isConstructor=False):
|
||||
overloads_re = wxpython_overloads_re
|
||||
if isConstructor:
|
||||
overloads_re = wxpython_constructors_re
|
||||
overload_regex = re.compile(overloads_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
|
||||
match = overload_regex.search(text, 0)
|
||||
note = ""
|
||||
start = -1
|
||||
end = -1
|
||||
overrides = []
|
||||
if match:
|
||||
def getWxPythonOverridesFromMatch(match):
|
||||
return [namespacify_wxClasses(match.group(1)), pythonize_text(match.group(2))]
|
||||
|
||||
start = match.start()
|
||||
end = match.end()
|
||||
overrides, returntext = findAllMatches(wxpython_overload_re, match.group(1), getWxPythonOverridesFromMatch)
|
||||
|
||||
returntext = text
|
||||
|
||||
if start != -1 and end != -1:
|
||||
#print "note is: " + text[start:end]
|
||||
returntext = text.replace(text[start:end], "")
|
||||
|
||||
return overrides, returntext
|
||||
|
||||
def getMethodWxPythonNote(text):
|
||||
python_regex = re.compile(wxpython_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
|
||||
match = python_regex.search(text)
|
||||
start = -1
|
||||
end = -1
|
||||
note = ""
|
||||
if match:
|
||||
start = match.start()
|
||||
end = match.end()
|
||||
note = match.group(1)
|
||||
|
||||
returntext = text
|
||||
|
||||
if start != -1 and end != -1:
|
||||
#print "note is: " + text[start:end]
|
||||
returntext = text.replace(text[start:end], "")
|
||||
|
||||
return note, returntext
|
||||
|
||||
def findAllMatches(re_string, text, handler, start=0):
|
||||
"""
|
||||
findAllMatches finds matches for a given regex, then runs the handler function
|
||||
on each match, and returns a list of objects, along with a version of the
|
||||
text with the area matches were found stripped.
|
||||
Note the stripping of text is not generally usable yet, it assumes matches
|
||||
are in continuous blocks, which is true of the wx docs.
|
||||
"""
|
||||
regex = re.compile(re_string, re.MULTILINE | re.DOTALL | re.IGNORECASE)
|
||||
match = regex.search(text, start)
|
||||
results = []
|
||||
|
||||
startpoint = -1
|
||||
endpoint = -1
|
||||
|
||||
if match:
|
||||
startpoint = match.start()
|
||||
|
||||
while match:
|
||||
start = match.end()
|
||||
results.append(handler(match))
|
||||
endpoint = match.end()
|
||||
match = regex.search(text, start)
|
||||
|
||||
returntext = text
|
||||
if startpoint != -1 and endpoint != -1:
|
||||
returntext = text.replace(text[startpoint:endpoint], "")
|
||||
|
||||
return results, returntext
|
||||
|
||||
def getMethodParams(text):
|
||||
paramstart = text.find("<B><FONT COLOR=\"#FF0000\">Parameters</FONT></B><P>")
|
||||
params, returntext = findAllMatches(param_re, text, getMethodParamsFromMatch, paramstart)
|
||||
|
||||
return params, returntext
|
||||
|
||||
def getMethodParamsFromMatch(match):
|
||||
return [match.group(1).strip(), pythonize_text(match.group(2)).strip()]
|
||||
|
||||
def getPrototypeFromMatch(match):
|
||||
return [match.group(1), match.group(2), getProtoArgs(match.group(3))]
|
||||
|
||||
def getProtoArgsFromMatch(match):
|
||||
return [match.group(1), match.group(2)]
|
||||
|
||||
|
||||
|
||||
# These methods parse the docs, finding matches and then using the FromMatch
|
||||
# functions to parse the data. After that, the results are "Pythonized"
|
||||
# by removing C++isms.
|
||||
def getMethodProtos(text):
|
||||
protos, returntext = findAllMatches(proto_re, text, getPrototypeFromMatch)
|
||||
return formatMethodProtos(protos), returntext
|
||||
|
||||
def getProtoArgs(text):
|
||||
args, returntext = findAllMatches(args_re, text, getProtoArgsFromMatch)
|
||||
return args
|
||||
|
||||
def getMethodDesc(text):
|
||||
heading_text = "<B><FONT COLOR=\"#FF0000\">"
|
||||
return_text = text
|
||||
end = text.find(heading_text)
|
||||
if end != -1:
|
||||
return_text = text[0:end]
|
||||
|
||||
return pythonize_text(return_text)
|
||||
|
||||
|
||||
def removeWxPerlNotes(text):
|
||||
perl_overload_regex = re.compile(wxperl_overload_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
|
||||
result = perl_overload_regex.sub("", text)
|
||||
|
||||
perl_regex = re.compile(wxperl_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
|
||||
result = perl_regex.sub("", result)
|
||||
|
||||
return result
|
||||
|
||||
def removeCPPCode(text):
|
||||
code_regex = re.compile(code_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
|
||||
|
||||
result = code_regex.sub("", text)
|
||||
return result
|
||||
|
||||
|
||||
def getMethod(match, parent):
|
||||
name = match.group(1)
|
||||
if name.find("::") != -1:
|
||||
name = name.split("::")[1]
|
||||
name = namespacify_wxClasses(name).strip()
|
||||
start = match.end()
|
||||
protos, remainder = getMethodProtos(match.group(2))
|
||||
|
||||
isConstructor = False
|
||||
#print "name: %s, parent name: %s" % (name, parent.name)
|
||||
if name == parent.name.replace("wx", "wx."):
|
||||
isConstructor = True
|
||||
overrides, remainder = getMethodWxPythonOverrides(remainder, isConstructor)
|
||||
|
||||
note, remainder = getMethodWxPythonNote(remainder)
|
||||
params, remainder = getMethodParams(remainder)
|
||||
desc = getMethodDesc(remainder)
|
||||
method = wxMethod(name, parent, protos, params, desc)
|
||||
method.pythonNote = note
|
||||
method.pythonOverrides = overrides
|
||||
if len(method.pythonOverrides) > 0:
|
||||
print "has overrides!\n\n\n\n"
|
||||
return method
|
||||
|
||||
def getClassDerivedFrom(text):
|
||||
|
||||
def getDerivedClassesFromMatch(match):
|
||||
return namespacify_wxClasses(match.group(1))
|
||||
|
||||
derived_classes = []
|
||||
derived_regex = re.compile(derived_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
|
||||
match = derived_regex.search(text)
|
||||
if match:
|
||||
derived_classes, returntext = findAllMatches(derived_class_re, match.group(1), getDerivedClassesFromMatch)
|
||||
|
||||
return derived_classes
|
||||
|
||||
def getClassDescription(text):
|
||||
|
||||
def getClassDescriptionFromMatch(match):
|
||||
return match.group(1)
|
||||
|
||||
desc, returntext = findAllMatches(class_desc_re, text, getClassDescriptionFromMatch)
|
||||
|
||||
return pythonize_text(desc[0])
|
||||
|
||||
def getClassStyles(text, extraStyles=False):
|
||||
styles_re = win_styles_re
|
||||
if extraStyles:
|
||||
styles_re = win_styles_extra_re
|
||||
styles_regex = re.compile(styles_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
|
||||
match = styles_regex.search(text)
|
||||
|
||||
styles = []
|
||||
if match:
|
||||
def getClassStyleFromMatch(match):
|
||||
return [namespacify_wxClasses(match.group(1)), pythonize_text(match.group(2))]
|
||||
|
||||
styles, remainder = findAllMatches(win_style_re, match.group(1), getClassStyleFromMatch)
|
||||
|
||||
return styles
|
||||
|
||||
# Main functions - these drive the process.
|
||||
def getClassMethods(doc, parent):
|
||||
contents = open(doc, "rb").read()
|
||||
|
||||
# get rid of some particularly tricky parts before parsing
|
||||
contents = contents.replace("<B>const</B>", "")
|
||||
contents = removeWxPerlNotes(contents)
|
||||
contents = removeCPPCode(contents)
|
||||
|
||||
method_regex = re.compile(method_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
|
||||
match = method_regex.search(contents)
|
||||
start = 0
|
||||
methods = {}
|
||||
while match:
|
||||
start = match.end()
|
||||
newmethod = getMethod(match, parent)
|
||||
basename = parent.name.replace("wx", "")
|
||||
isConstructor = (basename == newmethod.name.replace("wx.", ""))
|
||||
if isConstructor or eval("newmethod.name in dir(wx.%s)" % basename):
|
||||
print "Adding %s.%s" % (parent.name, newmethod.name)
|
||||
methods[newmethod.name] = newmethod
|
||||
match = method_regex.search(contents, start)
|
||||
|
||||
lastmethod_regex = re.compile(lastmethod_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
|
||||
match = lastmethod_regex.search(contents, start)
|
||||
if match:
|
||||
newmethod = getMethod(match, parent)
|
||||
basename = parent.name.replace("wx", "")
|
||||
isConstructor = (basename == newmethod.name.replace("wx.", ""))
|
||||
if isConstructor or eval("newmethod.name in dir(wx.%s)" % basename):
|
||||
print "Adding %s.%s" % (parent.name, newmethod.name)
|
||||
methods[newmethod.name] = newmethod
|
||||
|
||||
for name in methods:
|
||||
if name[0:3] == "Get":
|
||||
propname = name[3:]
|
||||
basename = parent.name.replace("wx", "")
|
||||
if not propname in eval("dir(wx.%s)" % basename):
|
||||
parent.props.append(propname)
|
||||
else:
|
||||
parent.propConflicts.append(parent.name + "." + propname)
|
||||
# get rid of the destructor and operator methods
|
||||
ignore_methods = ["~" + namespacify_wxClasses(parent.name), "operator ==",
|
||||
"operator <<", "operator >>", "operator =",
|
||||
"operator !=", "operator*", "operator++" ]
|
||||
for method in ignore_methods:
|
||||
if method in methods:
|
||||
methods.pop(method)
|
||||
|
||||
return methods
|
||||
|
||||
def getClasses(doc):
|
||||
global docspath
|
||||
contents = open(doc, "rb").read()
|
||||
link_regex = re.compile(link_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
|
||||
start = contents.find("<H2>Alphabetical class reference</H2>")
|
||||
result = link_regex.search(contents, start)
|
||||
classes = {}
|
||||
while result:
|
||||
start = result.end()
|
||||
name = result.group(2).strip()
|
||||
classpage = result.group(1).split("#")[0]
|
||||
basename = name.replace("wx", "")
|
||||
if basename in dir(wx):
|
||||
classfile = os.path.join(os.path.dirname(doc), classpage)
|
||||
classtext = open(classfile, "rb").read()
|
||||
derivedClasses = getClassDerivedFrom(classtext)
|
||||
description = getClassDescription(classtext)
|
||||
styles = getClassStyles(classtext)
|
||||
extra_styles = getClassStyles(classtext, extraStyles=True)
|
||||
classes[name] = wxClass(name, description, derivedClasses, styles, extra_styles)
|
||||
classes[name].methods = getClassMethods(classfile, classes[name])
|
||||
result = link_regex.search(contents, start)
|
||||
|
||||
return classes
|
Reference in New Issue
Block a user