#!/usr/local/bin/python2.3 try: from cStringIO import StringIO except: from StringIO import StringIO from PyZ3950 import z3950, oids,asn1 from PyZ3950.zdefs import make_attr from types import IntType, StringType, ListType from PyZ3950.CQLParser import CQLshlex """ Parser for PQF directly into RPN structure. PQF docs: http://www.indexdata.dk/yaz/doc/tools.html NB: This does not implement /everything/ in PQF, in particular: @attr 2=3 @and @attr 1=4 title @attr 1=1003 author (eg that 2 should be 3 for all subsequent clauses) """ class PQFParser: lexer = None currentToken = None nextToken = None def __init__(self, l): self.lexer = l self.fetch_token() def fetch_token(self): """ Read ahead one token """ tok = self.lexer.get_token() self.currentToken = self.nextToken self.nextToken = tok def is_boolean(self): if (self.currentToken.lower() in ['@and', '@or', '@not', '@prox']): return 1 else: return 0 def defaultClause(self, t): # Assign a default clause: anywhere = clause = z3950.AttributesPlusTerm() attrs = [(oids.Z3950_ATTRS_BIB1, 1, 1016), (oids.Z3950_ATTRS_BIB1, 2, 3)] clause.attributes = [make_attr(*e) for e in attrs] clause.term = t return ('op', ('attrTerm', clause)) # Grammar fns def query(self): set = self.top_set() qst = self.query_struct() # Pull in a (hopefully) null token self.fetch_token() if (self.currentToken): # Nope, unprocessed tokens remain raise(ValueError) rpnq = z3950.RPNQuery() if set: rpnq.attributeSet = set else: rpnq.attributeSet = oids.Z3950_ATTRS_BIB1_ov rpnq.rpn = qst return ('type_1', rpnq) def top_set(self): if (self.nextToken == '@attrset'): self.fetch_token() self.fetch_token() n = self.currentToken.upper() if (n[:14] == "1.2.840.10003."): return asn1.OidVal(map(int, n.split('.'))) return oids.oids['Z3950']['ATTRS'][n]['oid'] else: return None # This totally ignores the BNF, but does the 'right' thing def query_struct(self): self.fetch_token() if (self.currentToken == '@attr'): attrs = [] while self.currentToken == '@attr': attrs.append(self.attr_spec()) self.fetch_token() t = self.term() # Now we have attrs + term clause = z3950.AttributesPlusTerm() clause.attributes = [make_attr(*e) for e in attrs] clause.term = t return ('op', ('attrTerm', clause)) elif (self.is_boolean()): # @operator query query return self.complex() elif (self.currentToken == '@set'): return self.result_set() elif (self.currentToken == "{"): # Parens s = self.query_struct() if (self.nextToken <> "}"): raise(ValueError) else: self.fetch_token() return s else: t = self.term() return self.defaultClause(t) def term(self): # Need to split to allow attrlist then @term type = 'general' if (self.currentToken == '@term'): self.fetch_token() type = self.currentToken.lower() types = {'general' : 'general', 'string' : 'characterString', 'numeric' : 'numeric', 'external' : 'external'} type = types[type] self.fetch_token() if (self.currentToken[0] == '"' and self.currentToken[-1] == '"'): term = self.currentToken[1:-1] else: term = self.currentToken return (type, term) def result_set(self): self.fetch_token() return ('op', ('resultSet', self.currentToken)) def attr_spec(self): # @attr is CT self.fetch_token() if (self.currentToken.find('=') == -1): # attrset set = self.currentToken if (set[:14] == "1.2.840.10003."): set = asn1.OidVal(map(int, set.split('.'))) else: set = oids.oids['Z3950']['ATTRS'][set.upper()]['oid'] self.fetch_token() else: set = None # May raise (atype, val) = self.currentToken.split('=') if (not atype.isdigit()): raise ValueError atype = int(atype) if (val.isdigit()): val = int(val) return (set, atype, val) def complex(self): op = z3950.RpnRpnOp() op.op = self.boolean() op.rpn1 = self.query_struct() op.rpn2 = self.query_struct() return ('rpnRpnOp', op) def boolean(self): b = self.currentToken[1:] b = b.lower() if (b == 'prox'): self.fetch_token() exclusion = self.currentToken self.fetch_token() distance = self.currentToken self.fetch_token() ordered = self.currentToken self.fetch_token() relation = self.currentToken self.fetch_token() which = self.currentToken self.fetch_token() unit = self.currentToken prox = z3950.ProximityOperator() if (not (relation.isdigit() and exclusion.isdigit() and distance.isdigit() and unit.isdigit())): raise ValueError prox.relationType = int(relation) prox.exclusion = bool(exclusion) prox.distance = int(distance) if (which[0] == 'k'): prox.unit = ('known', int(unit)) elif (which[0] == 'p'): prox.unit = ('private', int(unit)) else: raise ValueError return (b, prox) elif b == 'not': return ('and-not', None) else: return (b, None) def parse(q): query = StringIO(q) lexer = CQLshlex(query) # Override CQL's wordchars list to include /=><() lexer.wordchars += "!@#$%^&*-+[];,.?|~`:\\><=/'()" parser = PQFParser(lexer) return parser.query() def rpn2pqf(rpn): # Turn RPN structure into PQF equivalent q = rpn[1] if (rpn[0] == 'type_1'): # Top level if (q.attributeSet): query = '@attrset %s ' % ( '.'.join(map(str, q.attributeSet.lst))) else: query = "" rest = rpn2pqf(q.rpn) return "%s%s" % (query, rest) elif (rpn[0] == 'rpnRpnOp'): # boolean if (q.op[0] in ['and', 'or']): query = ['@', q.op[0], ' '] elif (q.op[0] == 'and-not'): query = ['@not '] else: query = ['@prox'] # XXX query.append(' ') query.append(rpn2pqf(q.rpn1)) query.append(' ') query.append(rpn2pqf(q.rpn2)) return ''.join(query) elif (rpn[0] == 'op'): if (q[0] == 'attrTerm'): query = [] for a in q[1].attributes: if (a.attributeValue[0] == 'numeric'): val = str(a.attributeValue[1]) else: val = a.attributeValue[1].list[0][1] query.append("@attr %i=%s " % (a.attributeType, val)) query.append('"%s" ' % (q[1].term[1])) return ''.join(query) elif (q[0] == 'resultSet'): return "@set %s" % (q[1])