New Features: In Tab-View mode, Ctrl-number will take the user to
    the numbered tab view.  Modified files now show an '*' astrisk in
    the view title.  Debugger framework can now support PHP debugging.
    Not important for python development, but at least that means the
    debugger framework is more generalized.
git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@38852 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
		
	
		
			
				
	
	
		
			381 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			381 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
#----------------------------------------------------------------------------
 | 
						|
# Name:         parser.py
 | 
						|
# Purpose:      parsing utilities
 | 
						|
#
 | 
						|
# Author:       Jeff Norton
 | 
						|
#
 | 
						|
# Created:      8/9/05
 | 
						|
# CVS-ID:       $Id$
 | 
						|
# Copyright:    (c) 2004-2005 ActiveGrid, Inc.
 | 
						|
# License:      wxWindows License
 | 
						|
#----------------------------------------------------------------------------
 | 
						|
 | 
						|
import re
 | 
						|
from activegrid.util.lang import *
 | 
						|
ifDefPy()
 | 
						|
import string
 | 
						|
import array
 | 
						|
endIfDef()
 | 
						|
 | 
						|
XPATH_ROOT_VAR = '__rootObj__'
 | 
						|
GETOBJECTPARTNAMES  =   ["primaryRef", "ref", "orderings", "limit"]
 | 
						|
 | 
						|
class Tokenizer(object):
 | 
						|
    
 | 
						|
    TOKEN_IDENT = 1
 | 
						|
    TOKEN_STRING = 2
 | 
						|
    TOKEN_OP = 3
 | 
						|
    TOKEN_WS = 4
 | 
						|
##    TOKEN_PLACEHOLDER = 5
 | 
						|
    
 | 
						|
    def __init__(self, text, identStart=None, tokenSep=None, ignoreWhitespace=True):
 | 
						|
        """
 | 
						|
Turn a string into individual tokens.  Three types of tokens are recognized:
 | 
						|
    TOKEN_IDENT:   identifiers (those that start with the identStart pattern)
 | 
						|
    TOKEN_STRING:  quoted string
 | 
						|
    TOKEN_OP:      everything else
 | 
						|
Tokens are separated by white space or the tokenSep pattern.  
 | 
						|
Constructor parameters:
 | 
						|
    text:  The string to tokenize
 | 
						|
    identStart:  A regular expression describing characters which start an identifier
 | 
						|
                 The default expression accepts letters, "_", and "/".
 | 
						|
    tokenSep:    A regular expression describing the characters which end a token 
 | 
						|
                 (in addition to whitespace).  The default expression accepts
 | 
						|
                 anything except alpha-numerics, "_", "/", and ":".
 | 
						|
Usage:
 | 
						|
    Invoke getNextToken (or next) to get the next token.  The instance variables
 | 
						|
    token, and tokenVal will be populated with the current token type (TOKEN_IDENT,
 | 
						|
    TOKEN_STRING, or TOEKN_OP) and value respectively.  nextToken and nextTokenVal
 | 
						|
    will also be available for lookahead.   The next method is similar to
 | 
						|
    getNextToken but also returns the token value.  A value of None signals end
 | 
						|
    of stream.                                           
 | 
						|
        """
 | 
						|
        self.ignoreWhitespace=ignoreWhitespace
 | 
						|
        ifDefPy()
 | 
						|
        if (isinstance(text, array.array)):
 | 
						|
            text = text.tostring()
 | 
						|
        endIfDef()
 | 
						|
        self.text = asString(text)
 | 
						|
        self.textIndex = 0
 | 
						|
        self.textLen = len(self.text)
 | 
						|
        self.token = None
 | 
						|
        self.tokenVal = None
 | 
						|
        self.nextToken = None
 | 
						|
        self.nextTokenVal = None
 | 
						|
        if (identStart == None):
 | 
						|
            identStart = "[a-zA-Z_/]"
 | 
						|
        if (tokenSep == None):
 | 
						|
            tokenSep = "[^a-zA-Z0-9_/:]"
 | 
						|
        self.identStart = re.compile(identStart)
 | 
						|
        self.tokenSep = re.compile(tokenSep)
 | 
						|
        self.getNextToken() # Prime the pump
 | 
						|
        
 | 
						|
    def isEscaped(text, index):
 | 
						|
        if ((index > 0) and (text[index-1] == '\\') and ((index < 2) or (text[index-2] != '\\'))):
 | 
						|
            return True
 | 
						|
        return False
 | 
						|
    isEscaped = staticmethod(isEscaped)
 | 
						|
 | 
						|
    def findClosingQuote(text, index, char):
 | 
						|
        index = index + 1
 | 
						|
        while True:
 | 
						|
            endIndex = text.find(char, index)
 | 
						|
            if (endIndex < 1):
 | 
						|
                return -1
 | 
						|
            if (Tokenizer.isEscaped(text, endIndex)):
 | 
						|
                index = endIndex+1
 | 
						|
            else:
 | 
						|
                break
 | 
						|
        return endIndex + 1
 | 
						|
    findClosingQuote = staticmethod(findClosingQuote)
 | 
						|
 | 
						|
    def _findClosing(self, char):
 | 
						|
        if (self.textIndex >= self.textLen):
 | 
						|
            raise Exception("The text \"%s\" has an unmatched string starting at %d" % (self.text, self.textIndex))
 | 
						|
        index = Tokenizer.findClosingQuote(self.text, self.textIndex, char)
 | 
						|
        if (index < 0):
 | 
						|
            raise Exception("The text \"%s\" has an unmatched string starting at %d" % (self.text, self.textIndex-1))
 | 
						|
        return index
 | 
						|
        
 | 
						|
    def next(self):
 | 
						|
        self.getNextToken()
 | 
						|
        if (self.token == None):
 | 
						|
            raise StopIteration()
 | 
						|
        return self.tokenVal
 | 
						|
 | 
						|
    def getNextToken(self):
 | 
						|
        self.token = self.nextToken
 | 
						|
        self.tokenVal = self.nextTokenVal
 | 
						|
        while (self.textIndex < self.textLen):
 | 
						|
            c = self.text[self.textIndex]
 | 
						|
            if (c not in string.whitespace):
 | 
						|
                if (c == '"' or c == "'" or c == '`'):
 | 
						|
                    endIndex = self._findClosing(c)
 | 
						|
                    self.nextToken = self.TOKEN_STRING
 | 
						|
                    self.nextTokenVal = self.text[self.textIndex:endIndex]
 | 
						|
                    self.textIndex = endIndex
 | 
						|
                    return
 | 
						|
                elif (self.identStart.search(c)):
 | 
						|
                    endMatch = self.tokenSep.search(self.text, self.textIndex+1)
 | 
						|
                    if (endMatch):
 | 
						|
                        endIndex = endMatch.start()
 | 
						|
                    else:
 | 
						|
                        endIndex = self.textLen
 | 
						|
                    self.nextToken = self.TOKEN_IDENT
 | 
						|
                    self.nextTokenVal = self.text[self.textIndex:endIndex]
 | 
						|
                    self.textIndex = endIndex
 | 
						|
                    return
 | 
						|
                else:
 | 
						|
                    self.nextToken = self.TOKEN_OP
 | 
						|
                    endIndex = self.textIndex + 1
 | 
						|
                    if (c == '<' or c == '>' or c == '!' or c == '='):
 | 
						|
                        if ((endIndex < self.textLen) and (self.text[endIndex] == '=')):
 | 
						|
                            endIndex += 1
 | 
						|
                    elif ((c == '%') and (endIndex < self.textLen)):
 | 
						|
                        c = self.text[endIndex]
 | 
						|
                        if (c in ['d', 'i', 'o', 'u', 'x', 'X', 'e', 'E', 'f', 'F', 'g', 'G', 'c', 'r', 's', '%']):
 | 
						|
                            endIndex += 1
 | 
						|
##                            self.nextToken = self.TOKEN_PLACEHOLDER # Should really be this but no one can handle it yet
 | 
						|
                    self.nextTokenVal = self.text[self.textIndex:endIndex]
 | 
						|
                    self.textIndex = endIndex
 | 
						|
                    return
 | 
						|
            elif not self.ignoreWhitespace:
 | 
						|
                self.nextToken=self.TOKEN_WS
 | 
						|
                self.nextTokenVal=""
 | 
						|
                while c in string.whitespace:
 | 
						|
                    self.nextTokenVal+=c
 | 
						|
                    self.textIndex+=1
 | 
						|
                    if self.textIndex==len(self.text):
 | 
						|
                        break
 | 
						|
                    c=self.text[self.textIndex]
 | 
						|
                return
 | 
						|
            self.textIndex += 1
 | 
						|
        self.nextToken = None
 | 
						|
        self.nextTokenVal = None
 | 
						|
 | 
						|
def isXPathNonVar(var):
 | 
						|
    """Returns true iff var is a string ("foo" or 'foo') or a number."""
 | 
						|
    if (var.startswith("'") and var.endswith("'")) or \
 | 
						|
            (var.startswith('"') and var.endswith('"')):
 | 
						|
        return True
 | 
						|
 | 
						|
    # list from XPathToCode, below
 | 
						|
    if var.lower() in ["count", "empty", "true", "false", "null", "and", "or", \
 | 
						|
            "like", "not"]:
 | 
						|
        return True
 | 
						|
 | 
						|
    try:
 | 
						|
        t=int(var)
 | 
						|
        return True
 | 
						|
    except TypeError, e:
 | 
						|
        pass
 | 
						|
    except ValueError, e:
 | 
						|
        pass
 | 
						|
 | 
						|
    return False
 | 
						|
 | 
						|
def xpathToCode(xpaths, convertBracket=True):
 | 
						|
    if ((xpaths == None) or (len(xpaths) < 1)):
 | 
						|
        return "True"
 | 
						|
    if (not isinstance(xpaths, (list, tuple))):
 | 
						|
        xpaths = [xpaths]
 | 
						|
    result = []
 | 
						|
    for xpath in xpaths:
 | 
						|
        t = Tokenizer(xpath, "[a-zA-Z0-9_/:\.]", "[^a-zA-Z0-9_/:\.]", ignoreWhitespace=False)
 | 
						|
        expr = []
 | 
						|
        lastToken=None
 | 
						|
        while t.nextToken != None:
 | 
						|
            t.getNextToken()
 | 
						|
            if (t.token == Tokenizer.TOKEN_WS):
 | 
						|
                expr.append(" ")
 | 
						|
            elif (t.token == Tokenizer.TOKEN_OP):
 | 
						|
                if (t.tokenVal == "="):
 | 
						|
                    expr.append("==")
 | 
						|
                elif (t.tokenVal == "[" and convertBracket):
 | 
						|
                    expr.append("(")
 | 
						|
                elif (t.tokenVal == "]" and convertBracket):
 | 
						|
                    expr.append(")")
 | 
						|
                else:
 | 
						|
                    expr.append(t.tokenVal)
 | 
						|
            elif (t.token == Tokenizer.TOKEN_IDENT):
 | 
						|
                if (t.tokenVal == "and"):
 | 
						|
                    expr.append(" and ")
 | 
						|
                elif (t.tokenVal == "or"):
 | 
						|
                    expr.append(" or ")
 | 
						|
                elif (t.tokenVal == "not"):
 | 
						|
                    expr.append(" not ")
 | 
						|
                elif (t.tokenVal == "like"):
 | 
						|
                    # REVIEW stoens@activegrid.com 02-Nov-05 --
 | 
						|
                    # This is very limited support for like:
 | 
						|
                    # typically like queries look like this: "foo like 'blah%'".
 | 
						|
                    # So translate this into "foo.startswith(blah)".
 | 
						|
                    # We should use a regular expression to support '%'s in
 | 
						|
                    # arbitrary places in the string. After 1.1.
 | 
						|
                    if t.nextToken and t.nextTokenVal.endswith("%'"):
 | 
						|
                        t.getNextToken() # throw away the "like" token
 | 
						|
                        last = len(expr) - 1
 | 
						|
                        expr[last] = "%s.startswith(%s')"\
 | 
						|
                            % (expr[last], t.tokenVal[:-2])
 | 
						|
                    else:
 | 
						|
                        # old behavior
 | 
						|
                        expr.append(t.tokenVal)
 | 
						|
                    
 | 
						|
                elif (t.tokenVal == "count"):
 | 
						|
                    expr.append("len")
 | 
						|
                elif (t.tokenVal == 'empty'):
 | 
						|
                    expr.append('ctx.isEmptyPath')
 | 
						|
                elif (t.tokenVal == 'true'):
 | 
						|
                    expr.append(_parseConstantFunction(t, 'True'))
 | 
						|
                elif (t.tokenVal == 'false'):
 | 
						|
                    expr.append(_parseConstantFunction(t, 'False'))
 | 
						|
                elif (t.tokenVal == 'null'):
 | 
						|
                    expr.append(_parseConstantFunction(t, 'None'))
 | 
						|
                elif (-1!=t.tokenVal.find(':')):
 | 
						|
                    serviceDef, args=_parseServiceFunction(t)
 | 
						|
 | 
						|
                    # XXX handle serviceDef, args being None
 | 
						|
 | 
						|
                    for i in range(len(args)):
 | 
						|
                        args[i]=xpathToCode(args[i], False)
 | 
						|
                    jargs="[%s]" % (",".join(args))
 | 
						|
 | 
						|
                    # XXX should be processmodel.DATASERVICE_PROCESS_NAME, not "dataservice"
 | 
						|
                    if serviceDef[0]=='dataservice':
 | 
						|
                        expr.append("runtimesupport.invokeDataServiceWrapper(%s, %s, ctx, locals())" % \
 | 
						|
                                (serviceDef, jargs))
 | 
						|
                    else:
 | 
						|
                        expr.append("runtimesupport.invokeServiceWrapper(%s, %s, ctx)" % \
 | 
						|
                                (serviceDef, jargs))
 | 
						|
                else:
 | 
						|
                    if (lastToken==')' or lastToken==']'):
 | 
						|
                        wasFunc=True
 | 
						|
                    else:
 | 
						|
                        wasFunc=False
 | 
						|
                    if (t.tokenVal.startswith('/')) and not wasFunc:
 | 
						|
                        expr.append(XPATH_ROOT_VAR)
 | 
						|
                    expr.append(t.tokenVal.replace('/','.'))
 | 
						|
                lastToken=t.tokenVal
 | 
						|
            else: 
 | 
						|
                expr.append(t.tokenVal)
 | 
						|
                
 | 
						|
 | 
						|
        if (len(expr) == 2 and expr[0]==" "):
 | 
						|
            expr = "".join(expr)
 | 
						|
            result.append(expr)
 | 
						|
        elif (len(expr) > 1):
 | 
						|
            expr = "".join(expr)
 | 
						|
            result.append("(%s)" % expr)
 | 
						|
        elif (len(expr) > 0):
 | 
						|
            result.append(expr[0])
 | 
						|
        
 | 
						|
    return " and ".join(result)
 | 
						|
 | 
						|
def _parseArgs(t):
 | 
						|
    args=[]
 | 
						|
    argcon=""
 | 
						|
 | 
						|
    if t.tokenVal!='(':
 | 
						|
        return []
 | 
						|
    if t.nextTokenVal==')':
 | 
						|
        t.getNextToken()
 | 
						|
        return []
 | 
						|
 | 
						|
    depth=1
 | 
						|
 | 
						|
    while(depth!=0):
 | 
						|
        if not t.nextToken:
 | 
						|
            raise Exception("parameters list with no closing ) after token: %s" % t.tokenVal)
 | 
						|
        t.getNextToken()
 | 
						|
 | 
						|
        if t.tokenVal=='(':
 | 
						|
            depth+=1
 | 
						|
        if t.tokenVal==')':
 | 
						|
            depth-=1
 | 
						|
 | 
						|
        if depth==0 or (depth==1 and t.tokenVal==','):
 | 
						|
            args.append(argcon)
 | 
						|
            argcon=""
 | 
						|
        else:
 | 
						|
            argcon+=t.tokenVal
 | 
						|
    return args
 | 
						|
 | 
						|
def _parseServiceFunction(t):
 | 
						|
    """Parses what appears to be a service function call into serviceDefs and args lists.
 | 
						|
 | 
						|
    Returns None, None if the serviceFunction appears to be invalid.
 | 
						|
    """
 | 
						|
    if t.nextTokenVal!='(':
 | 
						|
        return t.tokenVal, None
 | 
						|
 | 
						|
    serviceDef=t.tokenVal.split(':')
 | 
						|
    t.getNextToken()
 | 
						|
    args=_parseArgs(t)
 | 
						|
 | 
						|
    return serviceDef, args
 | 
						|
    
 | 
						|
def _parseConstantFunction(t, outputValue):
 | 
						|
    firstVal = t.tokenVal
 | 
						|
    if t.nextTokenVal != '(':
 | 
						|
        return firstVal
 | 
						|
    t.getNextToken()
 | 
						|
    if t.nextTokenVal != ')':
 | 
						|
        return "%s%s" % (firstVal, '(')
 | 
						|
    t.getNextToken()
 | 
						|
    return outputValue
 | 
						|
 | 
						|
def parseDSPredicate(ctx, str, vars, valueList=None):
 | 
						|
    from activegrid.util.utillang import evalCode
 | 
						|
    from activegrid.util.utillang import ObjAsDict
 | 
						|
 | 
						|
    if valueList == None:
 | 
						|
        valueList = []
 | 
						|
    indexVar=0
 | 
						|
    oldIndexVar=0
 | 
						|
    sourceStr=str
 | 
						|
    inlinedPredicate=[]
 | 
						|
    qualifications=[]
 | 
						|
    while True:
 | 
						|
        oldIndexVar = indexVar
 | 
						|
        dollarCurlForm = False
 | 
						|
        quoted = False
 | 
						|
        indexVar = sourceStr.find("bpws:getVariableData", indexVar)
 | 
						|
        if indexVar == -1:
 | 
						|
            indexVar = sourceStr.find("${", oldIndexVar)
 | 
						|
            if indexVar == -1:
 | 
						|
                break
 | 
						|
            dollarCurlForm = True
 | 
						|
        if indexVar > 0 and sourceStr[indexVar-1] in ('"',"'"):
 | 
						|
            quoted = True
 | 
						|
        if not dollarCurlForm:
 | 
						|
            openParen = sourceStr.find("(", indexVar)
 | 
						|
            if openParen == -1:
 | 
						|
                break
 | 
						|
            closeParen = sourceStr.find(")", openParen)
 | 
						|
            if closeParen == -1:
 | 
						|
                break
 | 
						|
        else:
 | 
						|
            openParen = indexVar+1
 | 
						|
            closeParen = sourceStr.find("}", openParen)
 | 
						|
            if closeParen == -1:
 | 
						|
                break
 | 
						|
        varRef = sourceStr[openParen+1: closeParen]
 | 
						|
        if varRef.startswith('"') or varRef.startswith("'"):
 | 
						|
            varRef = varRef[1:]
 | 
						|
        if varRef.endswith('"') or varRef.endswith("'"):
 | 
						|
            varRef = varRef[:-1]
 | 
						|
        if isinstance(vars, dict) or isinstance(vars, ObjAsDict):
 | 
						|
            varRefCode = xpathToCode(varRef)
 | 
						|
            value = evalCode(varRefCode, vars)
 | 
						|
        else:
 | 
						|
            value = ctx.evalPath(vars, varRef)
 | 
						|
        inlinedPredicate.append(sourceStr[oldIndexVar:indexVar])
 | 
						|
        if quoted:
 | 
						|
            inlinedPredicate.append("%s" % value)
 | 
						|
        else:
 | 
						|
            inlinedPredicate.append('%s')
 | 
						|
            valueList.append(value)
 | 
						|
        indexVar = closeParen+1
 | 
						|
    inlinedPredicate.append(sourceStr[oldIndexVar:])
 | 
						|
    qualifications.append(''.join(inlinedPredicate))
 | 
						|
    return qualifications, valueList
 |