"""CQL utility functions and subclasses""" from CQLParser import * from types import ListType, IntType from SRWDiagnostics import * from PyZ3950 import z3950, asn1, oids from PyZ3950.zdefs import make_attr asn1.register_oid (oids.Z3950_QUERY_CQL, asn1.GeneralString) class ZCQLConfig: contextSets = {'dc' : 'info:srw/cql-context-set/1/dc-v1.1', 'cql' : 'info:srw/cql-context-set/1/cql-v1.1', 'bath' : 'http://zing.z3950.org/cql/bath/2.0/', 'zthes' : 'http://zthes.z3950.org/cql/1.0/', 'ccg' : 'http://srw.cheshire3.org/contextSets/ccg/1.1/ ', 'rec' : 'info:srw/cql-context-set/2/rec-1.0', 'net' : 'info:srw/cql-context-set/2/net-1.0'} dc = {'title' : 4, 'subject' : 21, 'creator' : 1003, 'author' : 1003, 'editor' : 1020, 'contributor' : 1018, 'publisher' : 1018, 'description' : 62, 'date' : 30, 'resourceType' : 1031, 'type' : 1031, 'format' : 1034, 'identifier' : 12, 'source' : 1019, 'language' : 54, 'relation' : 1016, 'coverage' : 1016, 'rights' : 1016 } cql = {'anywhere' : 1016, 'serverChoice' : 1016} # The common bib1 points bib1 = {"personal_name" : 1, "corporate_name" : 2, "conference_name" : 3, "title" : 4, "title_series" : 5, "title_uniform" : 6, "isbn" : 7, "issn" : 8, "lccn" : 9, "local_number" : 12, "dewey_number" : 13, "lccn" : 16, "local_classification" : 20, "subject" : 21, "subject_lc" : 27, "subject_local" : 29, "date" : 30, "date_publication" : 31, "date_acquisition" : 32, "local_call_number" : 53, "abstract" : 62, "note" : 63, "record_type" : 1001, "name" : 1002, "author" : 1003, "author_personal" : 1004, "identifier" : 1007, "text_body" : 1010, "date_modified" : 1012, "date_added" : 1011, "concept_text" : 1014, "any" : 1016, "default" : 1017, "publisher" : 1018, "record_source" : 1019, "editor" : 1020, "docid" : 1032, "anywhere" : 1035, "sici" : 1037 } exp1 = {"explainCategory" :1, "humanStringLanguage" : 2, "databaseName" : 3, "serverName" : 4, "attributeSetOID" : 5, "recordSyntaxOID" : 6, "tagSetOID" : 7, "extendedServiceOID" : 8, "dateAdded" : 9, "dateChanged" : 10, "dateExpires" : 11, "elementSetName" : 12, "processingContext" : 13, "processingName" : 14, "termListName" : 15, "schemaOID" : 16, "producer" : 17, "supplier" : 18, "availability" : 19, "proprietary" : 20, "userFee" : 21, "variantSetOID" : 22, "unitSystem" : 23, "keyword" : 24, "explainDatabase" : 25, "processingOID" : 26 } xd1 = {"title" : 1, "subject" : 2, "name" : 3, "description" : 4, "date" : 5, "type" : 6, "format" : 7, "identifier" : 8, "source" : 9, "langauge" : 10, "relation" : 11, "coverage" : 12, "rights" : 13} util = {"record_date" : 1, "record_agent" : 2, "record_language" : 3, "control_number" : 4, "cost" : 5, "record_syntax" : 6, "database_schema" : 7, "score" : 8, "rank" : 9, "result_set_position" : 10, "all" : 11, "anywhere" : 12, "server_choice" : 13, "wildcard" : 14, "wildpath" : 15} defaultAttrSet = z3950.Z3950_ATTRS_BIB1_ov def __init__(self): self.util1 = self.util self.xd = self.xd1 def attrsToCql(self, attrs): hash = {} for c in attrs: if (not c[0]): c[0] = self.defaultAttrSet hash[(c[0], c[1])] = c[2] bib1 = z3950.Z3950_ATTRS_BIB1_ov use = hash.get((bib1, 1), 4) rel = hash.get((bib1, 2), 3) posn = hash.get((bib1, 3), None) struct = hash.get((bib1, 4), None) trunc = hash.get((bib1, 5), None) comp = hash.get((bib1, 6), None) index = None if (not isinstance(use, int)): index = indexType(use) else: for v in self.dc.items(): if use == v[1]: index = indexType("dc.%s" % (v[0])) break if not index: for v in self.bib1.items(): if (use == v[1]): index = indexType("bib1.%s" % (v[0])) break if not index: index = indexType("bib1.%i" % (use)) relations = ['', '<', '<=', '=', '>=', '>', '<>'] if (comp == 3): relation = relationType("exact") elif (rel > 6): if struct in [2, 6]: relation = relationType('any') else: relation = relationType('=') else: relation = relationType(relations[rel]) if (rel == 100): relation.modifiers.append(modifierClauseType('phonetic')) elif (rel == 101): relation.modifiers.append(modifierClauseType('stem')) elif (rel == 102): relation.modifiers.append(modifierClauseType('relevant')) if (struct in [2, 6]): relation.modifiers.append(modifierClauseType('word')) elif (struct in [4, 5, 100]): relation.modifiers.append(modifierClauseType('date')) elif (struct == 109): relation.modifiers.append(modifierClauseType('number')) elif (struct in [1, 108]): relation.modifiers.append(modifierClauseType('string')) elif (struct == 104): relation.modifiers.append(modifierClauseType('uri')) return (index, relation) zConfig = ZCQLConfig() def rpn2cql(rpn, config=zConfig, attrSet=None): if rpn[0] == 'op': # single search clause op = rpn[1] type = op[0] if type == 'attrTerm': attrs = op[1].attributes term = op[1].term combs = [] for comb in attrs: if hasattr(comb, 'attributeSet'): attrSet = comb.attributeSet if hasattr(comb, 'attributeType'): aType = comb.attributeType else: # Broken! aType = 1 vstruct = comb.attributeValue if (vstruct[0] == 'numeric'): aValue = vstruct[1] else: # Complex attr value vstruct = vstruct[1] if (hasattr(vstruct, 'list')): aValue = vstruct.list[0][1] else: # semanticAction? aValue = vstruct.semanticAction[0][1] combs.append([attrSet, aType, aValue]) # Now let config do its thing (index, relation) = config.attrsToCql(combs) return searchClauseType(index, relation, termType(term[1])) elif type == 'resultSet': return searchClauseType(indexType('cql.resultSetId'), relationType('='), termType(op[0])) elif rpn[0] == 'rpnRpnOp': triple = rpn[1] bool = triple.op lhs = triple.rpn1 rhs = triple.rpn2 ctrip = tripleType() ctrip.leftOperation = rpn2cql(lhs, config) ctrip.rightOperand = rpn2cql(rhs, config) ctrip.boolean = booleanType(bool[0]) if bool[0] == 'prox': distance = bool[1].distance order = bool[1].ordered if order: order = "ordered" else: order = "unordered" relation = bool[1].relationType rels = ["", "<", "<=", "=", ">=", ">", "<>"] relation = rels[relation] unit = bool[1].proximityUnitCode units = ["", "character", "word", "sentence", "paragraph", "section", "chapter", "document", "element", "subelement", "elementType", "byte"] if unit[0] == "known": unit = units[unit[1]] mods = [cql.modifierClauseType('distance', relation, str(distance)), cql.modifierClauseType('word', '=', unit), cql.modifierClauseType(order)] ctrip.boolean.modifiers = mods return ctrip elif rpn[0] == 'type_1': q = rpn[1] return rpn2cql(q.rpn, config, q.attributeSet) class CSearchClause(SearchClause): def convertMetachars(self, t): "Convert SRW meta characters in to Cheshire's meta characters" # Fail on ?, ^ or * not at the end. if (count(t, "?") != count(t, "\\?")): diag = Diagnostic28() diag.details = "? Unsupported" raise diag elif (count(t, "^") != count(t, "\\^")): diag = Diagnostic31() diag.details = "^ Unsupported" raise diag elif (count(t, "*") != count(t, "\\*")): if t[-1] != "*" or t[-2] == "\\": diag = Diagnostic28() diag.details = "Non trailing * unsupported" raise diag else: t[-1] = "#" t = replace(t, "\\^", "^") t = replace(t, "\\?", "?") t = replace(t, "\\*", "*") return t def toRPN(self, top=None): if not top: top = self if (self.relation.value in ['any', 'all']): # Need to split this into and/or tree if (self.relation.value == 'any'): bool = " or " else: bool = " and " words = self.term.value.split() self.relation.value = '=' # Add 'word' relationModifier self.relation.modifiers.append(CModifierClause('cql.word')) # Create CQL, parse it, walk new tree idxrel = "%s %s" % (self.index.toCQL(), self.relation.toCQL()) text = [] for w in words: text.append('%s "%s"' % (idxrel, w)) cql = bool.join(text) tree = parse(cql) tree.prefixes = self.prefixes tree.parent = self.parent tree.config = self.config return tree.toRPN(top) else: # attributes, term # AttributeElement: attributeType, attributeValue # attributeValue ('numeric', n) or ('complex', struct) if (self.index.value == 'resultsetid'): return ('op', ('resultSet', self.term.value)) clause = z3950.AttributesPlusTerm() attrs = self.index.toRPN(top) if (self.term.value.isdigit()): self.relation.modifiers.append(CModifierClause('cql.number')) relattrs = self.relation.toRPN(top) attrs.update(relattrs) butes =[] for e in attrs.iteritems(): butes.append((e[0][0], e[0][1], e[1])) clause.attributes = [make_attr(*e) for e in butes] clause.term = self.term.toRPN(top) return ('op', ('attrTerm', clause)) class CBoolean(Boolean): def toRPN(self, top): op = self.value if (self.value == 'not'): op = 'and-not' elif (self.value == 'prox'): # Create ProximityOperator prox = z3950.ProximityOperator() # distance, ordered, proximityUnitCode, relationType u = self['unit'] try: units = ["", "character", "word", "sentence", "paragraph", "section", "chapter", "document", "element", "subelement", "elementType", "byte"] if (u.value in units): prox.unit = ('known', units.index(u.value)) else: # Uhhhh..... prox.unit = ('private', int(u.value)) except: prox.unit = ('known', 2) d = self['distance'] try: prox.distance = int(d.value) except: if (prox.unit == ('known', 2)): prox.distance = 1 else: prox.distance = 0 try: rels = ["", "<", "<=", "=", ">=", ">", "<>"] prox.relationType = rels.index(d.comparison) except: prox.relationType = 2 prox.ordered = bool(self['ordered']) return ('op', ('prox', prox)) return (op, None) class CTriple(Triple): def toRPN(self, top=None): """rpnRpnOp""" if not top: top = self op = z3950.RpnRpnOp() op.rpn1 = self.leftOperand.toRPN(top) op.rpn2 = self.rightOperand.toRPN(top) op.op = self.boolean.toRPN(top) return ('rpnRpnOp', op) class CIndex(Index): def toRPN(self, top): self.resolvePrefix() pf = self.prefix if (not pf and self.prefixURI): # We have a default for k in zConfig.contextSets: if zConfig.contextSets[k] == self.prefixURI: pf = k break # Default BIB1 set = oids.oids['Z3950']['ATTRS']['BIB1']['oid'] if (hasattr(top, 'config') and top.config): config = top.config # Check SRW Configuration cql = config.contextSetNamespaces['cql'] index = self.value if self.prefixURI == cql and self.value == "serverchoice": # Have to resolve our prefixes etc, so create an index object to do it index = config.defaultIndex cidx = CIndex(index) cidx.config = config cidx.parent = config cidx.resolvePrefix() pf = cidx.prefix index = cidx.value if config.indexHash.has_key(pf): if config.indexHash[pf].has_key(index): idx = config.indexHash[pf][index] # Need to map from this list to RPN list attrs = {} for i in idx: set = asn1.OidVal(map(int, i[0].split('.'))) type = int(i[1]) if (i[2].isdigit()): val = int(i[2]) else: val = i[2] attrs[(set, type)] = val return attrs else: diag = Diagnostic16() diag.details = index diag.message = "Unknown index" raise diag else: diag = Diagnostic15() diag.details = pf diag.message = "Unknown context set" raise diag elif (hasattr(zConfig, pf)): mp = getattr(zConfig, pf) if (mp.has_key(self.value)): val = mp[self.value] else: val = self.value elif (oids.oids['Z3950']['ATTRS'].has_key(pf.upper())): set = oids.oids['Z3950']['ATTRS'][pf.upper()]['oid'] if (self.value.isdigit()): # bib1.1018 val = int(self.value) else: # complex attribute for bib1 val = self.value else: print "Can't resolve %s" % pf raise(ValueError) return {(set, 1) : val} class CRelation(Relation): def toRPN(self, top): rels = ['', '<', '<=', '=', '>=', '>', '<>'] set = z3950.Z3950_ATTRS_BIB1_ov vals = [None, None, None, None, None, None, None] if self.value in rels: vals[2] = rels.index(self.value) elif self.value in ['exact', 'scr']: vals[2] = 3 elif (self.value == 'within'): vals[2] = 104 if self['relevant']: vals[2] = 102 elif self['stem']: vals[2] = 101 elif self['phonetic']: vals[2] = 100 if self['number']: vals[4] = 109 vals[5] = 100 elif self['date']: vals[4] = 5 elif self['word']: vals[4] = 2 if self.value == 'exact': vals[3] = 1 vals[5] = 100 # vals[6] = 3 else: vals[3] = 3 # vals[6] = 1 attrs = {} for x in range(1,7): if vals[x]: attrs[(z3950.Z3950_ATTRS_BIB1_ov, x)] = vals[x] return attrs class CTerm(Term): def toRPN(self, top): return ('general', self.value) class CModifierClause(ModifierClause): pass class CModifierType(ModifierType): pass