It's not in use yet, but add Kevin's docparser code so it doesn't get lost...

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@41113 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Robin Dunn
2006-09-09 19:42:51 +00:00
parent e78e8fdbfd
commit ce1245e1fa
5 changed files with 718 additions and 0 deletions

View File

View File

@@ -0,0 +1,32 @@
import re
conversion_table = {
"B" : "**",
"I" : "*",
"TT": "``",
"P" : "\n",
"BR": "\n",
}
html_classlink_re = "<A HREF=\".*?\">(.*?)</A>"
def htmlToReST(html):
# \n is useless in the HTML docs, we'll use P tags to break paragraphs.
restText = html.replace("\n", "")
restText = restText.replace("*", "\\*")
if restText.find("<P>") == 0:
restText = restText[3:]
link_regex = re.compile(html_classlink_re, re.DOTALL | re.MULTILINE | re.IGNORECASE)
restText = link_regex.sub("`\g<1>`", restText)
for htmltag in conversion_table:
for tagname in [htmltag, htmltag.lower()]:
restText = restText.replace("<%s>" % tagname, conversion_table[htmltag])
restText = restText.replace("</%s>" % tagname, conversion_table[htmltag])
# we need to escape any remaining double-quotes
restText = restText.replace('"', '\\"')
return restText.strip()

View File

@@ -0,0 +1,173 @@
import wx
import restconvert
html_heading = "<H3><font color=\"red\">%s</font></H3>"
def stylesAsHtml(styles, extraStyles=False):
heading = "Window styles"
if extraStyles:
heading = "Extra window styles"
html = html_heading % heading
html += """<table width="95%">"""
for style in styles:
html += "<tr><td>%s</td><td>%s</td>" % (style[0], style[1])
html += "</table>"
return html
class wxClass:
def __init__(self, name, description="", derivedFrom=[], styles=[], extrastyles=[]):
self.name = name
self.description = description
self.derivedFrom = derivedFrom
self.styles = styles
self.extrastyles = extrastyles
self.methods = {}
self.propConflicts = []
self.props = []
def asHtml(self):
html = "<H1>%s</H1>" % self.name
html += self.description
if len(self.derivedFrom) > 0:
html += html_heading % "Derived from"
for der in self.derivedFrom:
derurl = der.replace("wx.", "wx").lower()
html += "<a href=\"wx_%s.html\">%s</a></br>" % (derurl, der)
if len(self.styles) > 0:
html += stylesAsHtml(self.styles)
if len(self.extrastyles) > 0:
html += stylesAsHtml(self.extrastyles, extraStyles=True)
return html
def asReST(self):
restText = "DocStr(%s,\n" % (self.name)
restText += ");"
return restText
def createProps(self):
propsText = ""
propList = self.props
for conflict in self.propConflicts:
if conflict in propList:
propList.remove(conflict)
basename = self.name.replace("wx", "")
for prop in propList:
if prop != "":
propname = prop
if propname[0] == "3":
propname = "Three" + propname[1:]
getter = "wx.%s.Get%s" % (basename, prop)
setter = "wx.%s.Set%s" % (basename, prop)
propsText += "wx.%s.%s = property(%s" % (basename, propname, getter)
hasSetter = eval("(\"%s\" in dir(wx.%s))" % ("Set" + prop, basename))
if hasSetter:
propsText += ", %s" % setter
propsText += ")\n"
if propsText != "":
propsText += "\n\n"
return propsText
class wxMethod:
def __init__(self, name, parent, prototypes=[], params={}, description="", remarks=""):
self.name = name
self.parent = parent
self.prototypes = prototypes
self.params = params
self.description = description
self.remarks = remarks
self.pythonNote = ""
self.pythonOverrides = []
def asReST(self):
restText = ""
# The below code converts prototypes into ReST, but currently isn't
# needed. Left here in case we change approach later.
#for proto in self.prototypes:
# restText += proto[1] + "("
# counter = 1
# for arg in proto[2]:
# restText += "%s %s" % (arg[0].replace("wx.", ""), arg[1])
# if counter < len(proto[2]):
# restText += ", "
# counter += 1
# if proto[0] != "":
# restText += "-> " + proto[0]
# restText += "\n"
#restText += "\n"
if len(self.params) > 0:
for param in self.params:
restText += "\n:param %s: %s" % (param[0], restconvert.htmlToReST(param[1]))
restText += "\n\n"
restText += restconvert.htmlToReST(self.description.strip())
return restText
def asHtml(self):
anchorname = self.getAnchorName()
retval = "<A name=\"%s\"></A>" % (anchorname)
retval += "<H3>%s</H3>" % self.name
if len(self.pythonOverrides) > 0:
for myfunc in self.pythonOverrides:
retval += "<p><b>%s</b></br>%s</p>" % (myfunc[0], myfunc[1])
else:
for proto in self.prototypes:
retval += "<P><B>"
if proto[0] != "":
retval += proto[0] + " "
retval += proto[1] + "("
counter = 1
for arg in proto[2]:
retval += "%s <i>%s</i>" % (arg[0], arg[1])
if counter < len(proto[2]):
retval += ", "
counter += 1
retval += ")</B></P>"
if len(self.params) > 0:
retval += "<table width=\"90%%\" cellspacing=\"10\">"
for param in self.params:
retval += "<tr><td align=\"right\"><i>%s</i></td><td bgcolor=\"#E3E3E3\">%s</td></tr>" % (param[0], param[1])
retval += "</table>"
retval += "<p>%s</p>" % self.description
if self.remarks != "":
retval += "<font color=\"red\">Remarks</font><h4>%s</h4></font>" % self.remarks
return retval
def getAnchorName(self):
anchorname = self.parent.name.lower() + self.name.lower()
if self.parent.name == self.name:
anchorname = self.name.lower()
return anchorname
def asString(self):
retval = "method: " + self.name
retval += "\n\nprototypes: "
for proto in self.prototypes:
retval += "\t%s" % `proto`
retval += "\n\nparams: "
for param in self.params:
retval += "%s: %s" % (param, self.params[param])
retval += "\n\ndescription: \n" + self.description
retval += "remarks: \n" + self.remarks
return retval

View File

@@ -0,0 +1,394 @@
import sys, os, string, glob
import re
from docparser.wxclasses import *
import wx
outputdir = "output"
#
# Class REs
#
class_desc_re = """<H2>.*?</H2>(.*?)<B><FONT COLOR="#FF0000">"""
win_styles_re = """<B><FONT COLOR="#FF0000">Window styles</FONT></B><P>(.*?)<B><FONT COLOR="#FF0000">"""
win_styles_extra_re = """<B><FONT COLOR="#FF0000">Extra window styles</FONT></B><P>(.*?)<B><FONT COLOR="#FF0000">"""
win_style_re = """<TR><TD VALIGN=TOP WIDTH=.*?>\s*?<FONT FACE=".*?">\s*?<B>(.*?)</B>\s*?</FONT></TD>\s*?<TD VALIGN=TOP>\s*?<FONT FACE=".*?">(.*?)</FONT></TD></TR>"""
derived_re = """<B><FONT COLOR="#FF0000">Derived from</FONT></B><P>(.*?)<P>"""
derived_class_re = """<A HREF=".*?">(.*?)</A>"""
#
# Method REs
#
# groups - header, description
method_re = "<H3>(.*?)</H3>\s*?<P>(.*?)<HR>"
lastmethod_re = "<H3>(.*?)</H3>\s*?<P>(.*?)\s*?<P>\s*?</FONT>"
headings_re = "<B><FONT COLOR=\"#FF0000\">(.*?)</FONT></B><P>(.*?)"
# groups = param name, param value
param_re = "<I>(.*?)</I><UL><UL>(.*?)</UL></UL>"
# groups - return type, method name, arguments
proto_re = "<B>(.*?)</B>.*?<B>(.*?)</B>\s*?\((.*?)\)"
# groups - arg type, arg name
args_re = "<B>(.*?)</B>.*?<I>(.*?)</I>"
code_re = "<PRE>(.*?)</PRE>"
link_re = "<A href=\"(.*?)\"><B>(.*?)</B></A><BR>"
#
# wxPython/wxPerl note REs
#
wx_re = "wx[A-Z]\S+"
wxperl_overload_re = "<B><FONT COLOR=\"#0000C8\">wxPerl note:</FONT></B> In wxPerl there are two methods instead of a single overloaded method:<P>\s*?<UL><UL>(.*?)</UL></UL>"
wxperl_re = "<B><FONT COLOR=\"#0000C8\">wxPerl note:</FONT></B>(.*?)<P>"
wxpython_constructors_re = """<B><FONT COLOR="#0000C8">wxPython note:</FONT></B> Constructors supported by wxPython are:<P>\s*?<UL><UL>(.*?)</UL></UL>"""
wxpython_overload_re = """<TR><TD VALIGN=TOP.*?>\s*?<FONT FACE=".*?">\s*?<B>(.*?)</B>\s*?</FONT></TD>\s*?<TD VALIGN=TOP>\s*?<FONT FACE=".*?">(.*?)</FONT></TD></TR>"""
wxpython_overloads_re = "<B><FONT COLOR=\"#0000C8\">wxPython note:</FONT></B> In place of a single overloaded method name, wxPython\s*?implements the following methods:<P>\s*?<UL><UL>(.*?)</UL></UL>"
wxpython_re = "<B><FONT COLOR=\"#0000C8\">wxPython note:</FONT></B>(.*?)<P>"
# convert wxWhatever to wx.Whatever
def namespacify_wxClasses(contents):
wx_regex = re.compile(wx_re, re.MULTILINE | re.DOTALL)
result = wx_regex.sub(wxReplaceFunc, contents)
return result
def wxReplaceFunc(match):
text = match.group()
if text.find("wxWidgets") == -1 and text.find("wxPython") == -1 and text.find("wxPerl") == -1:
text = text.replace("wx", "wx.")
return text
# Methods to de-C++itize data.
def pythonize_text(contents):
"""
Remove C++isms that definitely shouldn't be in any text.
"""
contents = contents.replace("false", "False")
contents = contents.replace("true", "True")
contents = contents.replace("non-NULL", "not None")
contents = contents.replace("NULL", "None")
contents = contents.replace("const ", "")
contents = contents.replace("::", ".")
contents = contents.replace("\r\n", "\n")
contents = contents.replace("\r", "\n")
contents = contents.replace("''", "\"")
return namespacify_wxClasses(contents)
def pythonize_args(contents):
"""
Remove C++isms from arguments (some of these terms may be used in other
contexts in actual documentation, so we don't remove them there).
"""
contents = contents.replace("static", "")
contents = contents.replace("virtual void", "")
contents = contents.replace("virtual", "")
contents = contents.replace("void*", "int")
contents = contents.replace("void", "")
contents = contents.replace("off_t", "long")
contents = contents.replace("size_t", "long")
contents = contents.replace("*", "")
contents = contents.replace("&amp;", "")
contents = contents.replace("&", "")
contents = contents.replace("char", "string")
contents = contents.replace("wxChar", "string")
contents = contents.replace("wxCoord", "int")
contents = contents.replace("<A HREF=\"wx_wxstring.html#wxstring\">wxString</A>", "string")
return pythonize_text(contents)
def formatMethodProtos(protos):
"""
Remove C++isms in the method prototypes.
"""
for proto in protos:
proto[0] = pythonize_args(proto[0])
proto[0] = proto[0].strip()
proto[1] = namespacify_wxClasses(proto[1])
for arg in proto[2]:
arg[0] = pythonize_args(arg[0])
arg[0].strip()
# for arg names, we should be more careful about what we replace
arg[1] = pythonize_text(arg[1])
arg[1] = arg[1].replace("*", "")
arg[1] = arg[1].replace("&", "")
return protos
# functions for getting data from methods
def getMethodWxPythonOverrides(text, isConstructor=False):
overloads_re = wxpython_overloads_re
if isConstructor:
overloads_re = wxpython_constructors_re
overload_regex = re.compile(overloads_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
match = overload_regex.search(text, 0)
note = ""
start = -1
end = -1
overrides = []
if match:
def getWxPythonOverridesFromMatch(match):
return [namespacify_wxClasses(match.group(1)), pythonize_text(match.group(2))]
start = match.start()
end = match.end()
overrides, returntext = findAllMatches(wxpython_overload_re, match.group(1), getWxPythonOverridesFromMatch)
returntext = text
if start != -1 and end != -1:
#print "note is: " + text[start:end]
returntext = text.replace(text[start:end], "")
return overrides, returntext
def getMethodWxPythonNote(text):
python_regex = re.compile(wxpython_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
match = python_regex.search(text)
start = -1
end = -1
note = ""
if match:
start = match.start()
end = match.end()
note = match.group(1)
returntext = text
if start != -1 and end != -1:
#print "note is: " + text[start:end]
returntext = text.replace(text[start:end], "")
return note, returntext
def findAllMatches(re_string, text, handler, start=0):
"""
findAllMatches finds matches for a given regex, then runs the handler function
on each match, and returns a list of objects, along with a version of the
text with the area matches were found stripped.
Note the stripping of text is not generally usable yet, it assumes matches
are in continuous blocks, which is true of the wx docs.
"""
regex = re.compile(re_string, re.MULTILINE | re.DOTALL | re.IGNORECASE)
match = regex.search(text, start)
results = []
startpoint = -1
endpoint = -1
if match:
startpoint = match.start()
while match:
start = match.end()
results.append(handler(match))
endpoint = match.end()
match = regex.search(text, start)
returntext = text
if startpoint != -1 and endpoint != -1:
returntext = text.replace(text[startpoint:endpoint], "")
return results, returntext
def getMethodParams(text):
paramstart = text.find("<B><FONT COLOR=\"#FF0000\">Parameters</FONT></B><P>")
params, returntext = findAllMatches(param_re, text, getMethodParamsFromMatch, paramstart)
return params, returntext
def getMethodParamsFromMatch(match):
return [match.group(1).strip(), pythonize_text(match.group(2)).strip()]
def getPrototypeFromMatch(match):
return [match.group(1), match.group(2), getProtoArgs(match.group(3))]
def getProtoArgsFromMatch(match):
return [match.group(1), match.group(2)]
# These methods parse the docs, finding matches and then using the FromMatch
# functions to parse the data. After that, the results are "Pythonized"
# by removing C++isms.
def getMethodProtos(text):
protos, returntext = findAllMatches(proto_re, text, getPrototypeFromMatch)
return formatMethodProtos(protos), returntext
def getProtoArgs(text):
args, returntext = findAllMatches(args_re, text, getProtoArgsFromMatch)
return args
def getMethodDesc(text):
heading_text = "<B><FONT COLOR=\"#FF0000\">"
return_text = text
end = text.find(heading_text)
if end != -1:
return_text = text[0:end]
return pythonize_text(return_text)
def removeWxPerlNotes(text):
perl_overload_regex = re.compile(wxperl_overload_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
result = perl_overload_regex.sub("", text)
perl_regex = re.compile(wxperl_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
result = perl_regex.sub("", result)
return result
def removeCPPCode(text):
code_regex = re.compile(code_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
result = code_regex.sub("", text)
return result
def getMethod(match, parent):
name = match.group(1)
if name.find("::") != -1:
name = name.split("::")[1]
name = namespacify_wxClasses(name).strip()
start = match.end()
protos, remainder = getMethodProtos(match.group(2))
isConstructor = False
#print "name: %s, parent name: %s" % (name, parent.name)
if name == parent.name.replace("wx", "wx."):
isConstructor = True
overrides, remainder = getMethodWxPythonOverrides(remainder, isConstructor)
note, remainder = getMethodWxPythonNote(remainder)
params, remainder = getMethodParams(remainder)
desc = getMethodDesc(remainder)
method = wxMethod(name, parent, protos, params, desc)
method.pythonNote = note
method.pythonOverrides = overrides
if len(method.pythonOverrides) > 0:
print "has overrides!\n\n\n\n"
return method
def getClassDerivedFrom(text):
def getDerivedClassesFromMatch(match):
return namespacify_wxClasses(match.group(1))
derived_classes = []
derived_regex = re.compile(derived_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
match = derived_regex.search(text)
if match:
derived_classes, returntext = findAllMatches(derived_class_re, match.group(1), getDerivedClassesFromMatch)
return derived_classes
def getClassDescription(text):
def getClassDescriptionFromMatch(match):
return match.group(1)
desc, returntext = findAllMatches(class_desc_re, text, getClassDescriptionFromMatch)
return pythonize_text(desc[0])
def getClassStyles(text, extraStyles=False):
styles_re = win_styles_re
if extraStyles:
styles_re = win_styles_extra_re
styles_regex = re.compile(styles_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
match = styles_regex.search(text)
styles = []
if match:
def getClassStyleFromMatch(match):
return [namespacify_wxClasses(match.group(1)), pythonize_text(match.group(2))]
styles, remainder = findAllMatches(win_style_re, match.group(1), getClassStyleFromMatch)
return styles
# Main functions - these drive the process.
def getClassMethods(doc, parent):
contents = open(doc, "rb").read()
# get rid of some particularly tricky parts before parsing
contents = contents.replace("<B>const</B>", "")
contents = removeWxPerlNotes(contents)
contents = removeCPPCode(contents)
method_regex = re.compile(method_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
match = method_regex.search(contents)
start = 0
methods = {}
while match:
start = match.end()
newmethod = getMethod(match, parent)
basename = parent.name.replace("wx", "")
isConstructor = (basename == newmethod.name.replace("wx.", ""))
if isConstructor or eval("newmethod.name in dir(wx.%s)" % basename):
print "Adding %s.%s" % (parent.name, newmethod.name)
methods[newmethod.name] = newmethod
match = method_regex.search(contents, start)
lastmethod_regex = re.compile(lastmethod_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
match = lastmethod_regex.search(contents, start)
if match:
newmethod = getMethod(match, parent)
basename = parent.name.replace("wx", "")
isConstructor = (basename == newmethod.name.replace("wx.", ""))
if isConstructor or eval("newmethod.name in dir(wx.%s)" % basename):
print "Adding %s.%s" % (parent.name, newmethod.name)
methods[newmethod.name] = newmethod
for name in methods:
if name[0:3] == "Get":
propname = name[3:]
basename = parent.name.replace("wx", "")
if not propname in eval("dir(wx.%s)" % basename):
parent.props.append(propname)
else:
parent.propConflicts.append(parent.name + "." + propname)
# get rid of the destructor and operator methods
ignore_methods = ["~" + namespacify_wxClasses(parent.name), "operator ==",
"operator &lt;&lt;", "operator &gt;&gt;", "operator =",
"operator !=", "operator*", "operator++" ]
for method in ignore_methods:
if method in methods:
methods.pop(method)
return methods
def getClasses(doc):
global docspath
contents = open(doc, "rb").read()
link_regex = re.compile(link_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
start = contents.find("<H2>Alphabetical class reference</H2>")
result = link_regex.search(contents, start)
classes = {}
while result:
start = result.end()
name = result.group(2).strip()
classpage = result.group(1).split("#")[0]
basename = name.replace("wx", "")
if basename in dir(wx):
classfile = os.path.join(os.path.dirname(doc), classpage)
classtext = open(classfile, "rb").read()
derivedClasses = getClassDerivedFrom(classtext)
description = getClassDescription(classtext)
styles = getClassStyles(classtext)
extra_styles = getClassStyles(classtext, extraStyles=True)
classes[name] = wxClass(name, description, derivedClasses, styles, extra_styles)
classes[name].methods = getClassMethods(classfile, classes[name])
result = link_regex.search(contents, start)
return classes

View File

@@ -0,0 +1,119 @@
import sys, os, string, glob
import re
from docparser.wxclasses import *
from docparser.wxhtmlparse import *
import wx
# HTML macros
html_heading = "<H3><font color=\"red\">%s</font></H3>"
def classToHTML(name, thisclass):
global outdir, classes
page = open(os.path.join(outdir, "wx_" + name.lower() + ".html"), "w")
classname = namespacify_wxClasses(name)
page.write(thisclass.asHtml())
page.write("<HR>" + html_heading % "Methods")
methods = thisclass.methods
if len(thisclass.derivedFrom) > 0:
for parentclass in thisclass.derivedFrom:
classname = parentclass.replace("wx.", "wx")
if classname in classes.keys():
derivedmethods = classes[classname].methods
if parentclass in derivedmethods:
derivedmethods.pop(parentclass)
methods.update(derivedmethods)
methodnames = sortMethods(classname, methods.keys())
for method in methodnames:
page.write("<A href=\"#%s\">%s</A></BR>" % (methods[method].getAnchorName(), method))
page.write("<HR>")
for method in methodnames:
page.write(methods[method].asHtml())
page.write("<HR>")
page.close()
def sortMethods(classname, methodnames):
names = methodnames
names.sort()
# bump the constructor to the top of the list.
if classname in names:
names.remove(classname)
names.insert(0, classname)
return names
def makeDocString(name, docstring, longdocs=""):
myname = name.replace("wx.", "wx")
return "DocStr(%s, \"%s\", \"%s\");\n\n" % (myname, docstring, longdocs)
def classToReST(name, thisclass):
global restdir
page = open(os.path.join(restdir, "_" + name + "_docstrings.i"), "w")
page.write(makeDocString(thisclass.name, thisclass.description))
classname = namespacify_wxClasses(name)
methodnames = sortMethods(classname, thisclass.methods.keys())
for method in methodnames:
docstr = makeDocString(name + "::" + method.replace("wx.", "wx"), thisclass.methods[method].asReST())
page.write(docstr)
page.close()
docspath = sys.argv[1]
if not os.path.isdir(docspath):
# get default directory
print "Please specify the directory where docs are located."
outdir = os.path.join(docspath, outputdir)
if not os.path.exists(outdir):
os.makedirs(outdir)
restdir = os.path.join(docspath, "docstrings")
if not os.path.exists(restdir):
os.makedirs(restdir)
classes_page = os.path.join(docspath, "wx_classref.html")
print "docspath: %s" % (classes_page)
if os.path.exists(classes_page):
# first, add namespace conventions to classes page.
output = open(os.path.join(outdir, os.path.basename(classes_page)), "w")
output.write("<HTML><HEAD></HEAD><BODY>")
propsfile = open(os.path.join(outdir, "props.py"), "w")
propsfile.write("import wx\n\n")
# now, change the classes.
print "parsing wx HTML docs..."
classes = getClasses(classes_page)
names = classes.keys()
names.sort()
propConflicts = []
for name in names:
basename = name.replace("wx", "")
urlname = "wx_%s.html" % name.lower()
output.write("<b><a href=\"%s\">%s</a></b><br>" % (urlname, basename))
print "creating HTML docs for " + name
classToHTML(name, classes[name])
print "creating rest docs for " + name
classToReST(name, classes[name])
propsfile.write(classes[name].createProps())
propsfile.close()
output.write("</BODY></HTML>")
output.close()
print "prop conflicts: " + `propConflicts`
#for doc in glob.glob(os.path.join(docspath, "wx_*.html")):
# print "doc is: %s" % (doc)
# pythonize_doc(doc)