#---------------------------------------------------------------------------- # Name: parser.py # Purpose: parsing utilities # # Author: Jeff Norton # # Created: 8/9/05 # CVS-ID: $Id$ # Copyright: (c) 2004-2005 ActiveGrid, Inc. # License: wxWindows License #---------------------------------------------------------------------------- import re from activegrid.util.lang import * ifDefPy() import string import array endIfDef() XPATH_ROOT_VAR = '__rootObj__' GETOBJECTPARTNAMES = ["primaryRef", "ref", "orderings", "limit"] class Tokenizer(object): TOKEN_IDENT = 1 TOKEN_STRING = 2 TOKEN_OP = 3 TOKEN_WS = 4 ## TOKEN_PLACEHOLDER = 5 def __init__(self, text, identStart=None, tokenSep=None, ignoreWhitespace=True): """ Turn a string into individual tokens. Three types of tokens are recognized: TOKEN_IDENT: identifiers (those that start with the identStart pattern) TOKEN_STRING: quoted string TOKEN_OP: everything else Tokens are separated by white space or the tokenSep pattern. Constructor parameters: text: The string to tokenize identStart: A regular expression describing characters which start an identifier The default expression accepts letters, "_", and "/". tokenSep: A regular expression describing the characters which end a token (in addition to whitespace). The default expression accepts anything except alpha-numerics, "_", "/", and ":". Usage: Invoke getNextToken (or next) to get the next token. The instance variables token, and tokenVal will be populated with the current token type (TOKEN_IDENT, TOKEN_STRING, or TOEKN_OP) and value respectively. nextToken and nextTokenVal will also be available for lookahead. The next method is similar to getNextToken but also returns the token value. A value of None signals end of stream. """ self.ignoreWhitespace=ignoreWhitespace ifDefPy() if (isinstance(text, array.array)): text = text.tostring() endIfDef() self.text = asString(text) self.textIndex = 0 self.textLen = len(self.text) self.token = None self.tokenVal = None self.nextToken = None self.nextTokenVal = None if (identStart == None): identStart = "[a-zA-Z_/]" if (tokenSep == None): tokenSep = "[^a-zA-Z0-9_/:]" self.identStart = re.compile(identStart) self.tokenSep = re.compile(tokenSep) self.getNextToken() # Prime the pump def isEscaped(text, index): if ((index > 0) and (text[index-1] == '\\') and ((index < 2) or (text[index-2] != '\\'))): return True return False isEscaped = staticmethod(isEscaped) def findClosingQuote(text, index, char): index = index + 1 while True: endIndex = text.find(char, index) if (endIndex < 1): return -1 if (Tokenizer.isEscaped(text, endIndex)): index = endIndex+1 else: break return endIndex + 1 findClosingQuote = staticmethod(findClosingQuote) def _findClosing(self, char): if (self.textIndex >= self.textLen): raise Exception("The text \"%s\" has an unmatched string starting at %d" % (self.text, self.textIndex)) index = Tokenizer.findClosingQuote(self.text, self.textIndex, char) if (index < 0): raise Exception("The text \"%s\" has an unmatched string starting at %d" % (self.text, self.textIndex-1)) return index def next(self): self.getNextToken() if (self.token == None): raise StopIteration() return self.tokenVal def getNextToken(self): self.token = self.nextToken self.tokenVal = self.nextTokenVal while (self.textIndex < self.textLen): c = self.text[self.textIndex] if (c not in string.whitespace): if (c == '"' or c == "'" or c == '`'): endIndex = self._findClosing(c) self.nextToken = self.TOKEN_STRING self.nextTokenVal = self.text[self.textIndex:endIndex] self.textIndex = endIndex return elif (self.identStart.search(c)): endMatch = self.tokenSep.search(self.text, self.textIndex+1) if (endMatch): endIndex = endMatch.start() else: endIndex = self.textLen self.nextToken = self.TOKEN_IDENT self.nextTokenVal = self.text[self.textIndex:endIndex] self.textIndex = endIndex return else: self.nextToken = self.TOKEN_OP endIndex = self.textIndex + 1 if (c == '<' or c == '>' or c == '!' or c == '='): if ((endIndex < self.textLen) and (self.text[endIndex] == '=')): endIndex += 1 elif ((c == '%') and (endIndex < self.textLen)): c = self.text[endIndex] if (c in ['d', 'i', 'o', 'u', 'x', 'X', 'e', 'E', 'f', 'F', 'g', 'G', 'c', 'r', 's', '%']): endIndex += 1 ## self.nextToken = self.TOKEN_PLACEHOLDER # Should really be this but no one can handle it yet self.nextTokenVal = self.text[self.textIndex:endIndex] self.textIndex = endIndex return elif not self.ignoreWhitespace: self.nextToken=self.TOKEN_WS self.nextTokenVal="" while c in string.whitespace: self.nextTokenVal+=c self.textIndex+=1 if self.textIndex==len(self.text): break c=self.text[self.textIndex] return self.textIndex += 1 self.nextToken = None self.nextTokenVal = None def isXPathNonVar(var): """Returns true iff var is a string ("foo" or 'foo') or a number.""" if (var.startswith("'") and var.endswith("'")) or \ (var.startswith('"') and var.endswith('"')): return True # list from XPathToCode, below if var.lower() in ["count", "empty", "true", "false", "null", "and", "or", \ "like", "not"]: return True try: t=int(var) return True except TypeError, e: pass except ValueError, e: pass return False def xpathToCode(xpaths, convertBracket=True): if ((xpaths == None) or (len(xpaths) < 1)): return "True" if (not isinstance(xpaths, (list, tuple))): xpaths = [xpaths] result = [] for xpath in xpaths: t = Tokenizer(xpath, "[a-zA-Z0-9_/:\.]", "[^a-zA-Z0-9_/:\.]", ignoreWhitespace=False) expr = [] lastToken=None while t.nextToken != None: t.getNextToken() if (t.token == Tokenizer.TOKEN_WS): expr.append(" ") elif (t.token == Tokenizer.TOKEN_OP): if (t.tokenVal == "="): expr.append("==") elif (t.tokenVal == "[" and convertBracket): expr.append("(") elif (t.tokenVal == "]" and convertBracket): expr.append(")") else: expr.append(t.tokenVal) elif (t.token == Tokenizer.TOKEN_IDENT): if (t.tokenVal == "and"): expr.append(" and ") elif (t.tokenVal == "or"): expr.append(" or ") elif (t.tokenVal == "not"): expr.append(" not ") elif (t.tokenVal == "like"): # REVIEW stoens@activegrid.com 02-Nov-05 -- # This is very limited support for like: # typically like queries look like this: "foo like 'blah%'". # So translate this into "foo.startswith(blah)". # We should use a regular expression to support '%'s in # arbitrary places in the string. After 1.1. if t.nextToken and t.nextTokenVal.endswith("%'"): t.getNextToken() # throw away the "like" token last = len(expr) - 1 expr[last] = "%s.startswith(%s')"\ % (expr[last], t.tokenVal[:-2]) else: # old behavior expr.append(t.tokenVal) elif (t.tokenVal == "count"): expr.append("len") elif (t.tokenVal == 'empty'): expr.append('ctx.isEmptyPath') elif (t.tokenVal == 'true'): expr.append(_parseConstantFunction(t, 'True')) elif (t.tokenVal == 'false'): expr.append(_parseConstantFunction(t, 'False')) elif (t.tokenVal == 'null'): expr.append(_parseConstantFunction(t, 'None')) elif (-1!=t.tokenVal.find(':')): serviceDef, args=_parseServiceFunction(t) # XXX handle serviceDef, args being None for i in range(len(args)): args[i]=xpathToCode(args[i], False) jargs="[%s]" % (",".join(args)) # XXX should be processmodel.DATASERVICE_PROCESS_NAME, not "dataservice" if serviceDef[0]=='dataservice': expr.append("runtimesupport.invokeDataServiceWrapper(%s, %s, ctx, locals())" % \ (serviceDef, jargs)) else: expr.append("runtimesupport.invokeServiceWrapper(%s, %s, ctx)" % \ (serviceDef, jargs)) else: if (lastToken==')' or lastToken==']'): wasFunc=True else: wasFunc=False if (t.tokenVal.startswith('/')) and not wasFunc: expr.append(XPATH_ROOT_VAR) expr.append(t.tokenVal.replace('/','.')) lastToken=t.tokenVal else: expr.append(t.tokenVal) if (len(expr) == 2 and expr[0]==" "): expr = "".join(expr) result.append(expr) elif (len(expr) > 1): expr = "".join(expr) result.append("(%s)" % expr) elif (len(expr) > 0): result.append(expr[0]) return " and ".join(result) def _parseArgs(t): args=[] argcon="" if t.tokenVal!='(': return [] if t.nextTokenVal==')': t.getNextToken() return [] depth=1 while(depth!=0): if not t.nextToken: raise Exception("parameters list with no closing ) after token: %s" % t.tokenVal) t.getNextToken() if t.tokenVal=='(': depth+=1 if t.tokenVal==')': depth-=1 if depth==0 or (depth==1 and t.tokenVal==','): args.append(argcon) argcon="" else: argcon+=t.tokenVal return args def _parseServiceFunction(t): """Parses what appears to be a service function call into serviceDefs and args lists. Returns None, None if the serviceFunction appears to be invalid. """ if t.nextTokenVal!='(': return t.tokenVal, None serviceDef=t.tokenVal.split(':') t.getNextToken() args=_parseArgs(t) return serviceDef, args def _parseConstantFunction(t, outputValue): firstVal = t.tokenVal if t.nextTokenVal != '(': return firstVal t.getNextToken() if t.nextTokenVal != ')': return "%s%s" % (firstVal, '(') t.getNextToken() return outputValue def parseDSPredicate(ctx, str, vars, valueList=None): from activegrid.util.utillang import evalCode from activegrid.util.utillang import ObjAsDict if valueList == None: valueList = [] indexVar=0 oldIndexVar=0 sourceStr=str inlinedPredicate=[] qualifications=[] while True: oldIndexVar = indexVar dollarCurlForm = False quoted = False indexVar = sourceStr.find("bpws:getVariableData", indexVar) if indexVar == -1: indexVar = sourceStr.find("${", oldIndexVar) if indexVar == -1: break dollarCurlForm = True if indexVar > 0 and sourceStr[indexVar-1] in ('"',"'"): quoted = True if not dollarCurlForm: openParen = sourceStr.find("(", indexVar) if openParen == -1: break closeParen = sourceStr.find(")", openParen) if closeParen == -1: break else: openParen = indexVar+1 closeParen = sourceStr.find("}", openParen) if closeParen == -1: break varRef = sourceStr[openParen+1: closeParen] if varRef.startswith('"') or varRef.startswith("'"): varRef = varRef[1:] if varRef.endswith('"') or varRef.endswith("'"): varRef = varRef[:-1] if isinstance(vars, dict) or isinstance(vars, ObjAsDict): varRefCode = xpathToCode(varRef) value = evalCode(varRefCode, vars) else: value = ctx.evalPath(vars, varRef) inlinedPredicate.append(sourceStr[oldIndexVar:indexVar]) if quoted: inlinedPredicate.append("%s" % value) else: inlinedPredicate.append('%s') valueList.append(value) indexVar = closeParen+1 inlinedPredicate.append(sourceStr[oldIndexVar:]) qualifications.append(''.join(inlinedPredicate)) return qualifications, valueList