545 lines
17 KiB
Python
545 lines
17 KiB
Python
|
|
"""CQL utility functions and subclasses"""
|
|
|
|
from CQLParser import *
|
|
from types import ListType, IntType
|
|
from SRWDiagnostics import *
|
|
|
|
from PyZ3950 import z3950, asn1, oids
|
|
from PyZ3950.zdefs import make_attr
|
|
|
|
asn1.register_oid (oids.Z3950_QUERY_CQL, asn1.GeneralString)
|
|
|
|
class ZCQLConfig:
|
|
|
|
contextSets = {'dc' : 'info:srw/cql-context-set/1/dc-v1.1',
|
|
'cql' : 'info:srw/cql-context-set/1/cql-v1.1',
|
|
'bath' : 'http://zing.z3950.org/cql/bath/2.0/',
|
|
'zthes' : 'http://zthes.z3950.org/cql/1.0/',
|
|
'ccg' : 'http://srw.cheshire3.org/contextSets/ccg/1.1/ ',
|
|
'rec' : 'info:srw/cql-context-set/2/rec-1.0',
|
|
'net' : 'info:srw/cql-context-set/2/net-1.0'}
|
|
|
|
dc = {'title' : 4,
|
|
'subject' : 21,
|
|
'creator' : 1003,
|
|
'author' : 1003,
|
|
'editor' : 1020,
|
|
'contributor' : 1018,
|
|
'publisher' : 1018,
|
|
'description' : 62,
|
|
'date' : 30,
|
|
'resourceType' : 1031,
|
|
'type' : 1031,
|
|
'format' : 1034,
|
|
'identifier' : 12,
|
|
'source' : 1019,
|
|
'language' : 54,
|
|
'relation' : 1016,
|
|
'coverage' : 1016,
|
|
'rights' : 1016
|
|
}
|
|
|
|
cql = {'anywhere' : 1016,
|
|
'serverChoice' : 1016}
|
|
|
|
# The common bib1 points
|
|
bib1 = {"personal_name" : 1,
|
|
"corporate_name" : 2,
|
|
"conference_name" : 3,
|
|
"title" : 4,
|
|
"title_series" : 5,
|
|
"title_uniform" : 6,
|
|
"isbn" : 7,
|
|
"issn" : 8,
|
|
"lccn" : 9,
|
|
"local_number" : 12,
|
|
"dewey_number" : 13,
|
|
"lccn" : 16,
|
|
"local_classification" : 20,
|
|
"subject" : 21,
|
|
"subject_lc" : 27,
|
|
"subject_local" : 29,
|
|
"date" : 30,
|
|
"date_publication" : 31,
|
|
"date_acquisition" : 32,
|
|
"local_call_number" : 53,
|
|
"abstract" : 62,
|
|
"note" : 63,
|
|
"record_type" : 1001,
|
|
"name" : 1002,
|
|
"author" : 1003,
|
|
"author_personal" : 1004,
|
|
"identifier" : 1007,
|
|
"text_body" : 1010,
|
|
"date_modified" : 1012,
|
|
"date_added" : 1011,
|
|
"concept_text" : 1014,
|
|
"any" : 1016,
|
|
"default" : 1017,
|
|
"publisher" : 1018,
|
|
"record_source" : 1019,
|
|
"editor" : 1020,
|
|
"docid" : 1032,
|
|
"anywhere" : 1035,
|
|
"sici" : 1037
|
|
}
|
|
|
|
exp1 = {"explainCategory" :1,
|
|
"humanStringLanguage" : 2,
|
|
"databaseName" : 3,
|
|
"serverName" : 4,
|
|
"attributeSetOID" : 5,
|
|
"recordSyntaxOID" : 6,
|
|
"tagSetOID" : 7,
|
|
"extendedServiceOID" : 8,
|
|
"dateAdded" : 9,
|
|
"dateChanged" : 10,
|
|
"dateExpires" : 11,
|
|
"elementSetName" : 12,
|
|
"processingContext" : 13,
|
|
"processingName" : 14,
|
|
"termListName" : 15,
|
|
"schemaOID" : 16,
|
|
"producer" : 17,
|
|
"supplier" : 18,
|
|
"availability" : 19,
|
|
"proprietary" : 20,
|
|
"userFee" : 21,
|
|
"variantSetOID" : 22,
|
|
"unitSystem" : 23,
|
|
"keyword" : 24,
|
|
"explainDatabase" : 25,
|
|
"processingOID" : 26
|
|
}
|
|
|
|
xd1 = {"title" : 1,
|
|
"subject" : 2,
|
|
"name" : 3,
|
|
"description" : 4,
|
|
"date" : 5,
|
|
"type" : 6,
|
|
"format" : 7,
|
|
"identifier" : 8,
|
|
"source" : 9,
|
|
"langauge" : 10,
|
|
"relation" : 11,
|
|
"coverage" : 12,
|
|
"rights" : 13}
|
|
|
|
util = {"record_date" : 1,
|
|
"record_agent" : 2,
|
|
"record_language" : 3,
|
|
"control_number" : 4,
|
|
"cost" : 5,
|
|
"record_syntax" : 6,
|
|
"database_schema" : 7,
|
|
"score" : 8,
|
|
"rank" : 9,
|
|
"result_set_position" : 10,
|
|
"all" : 11,
|
|
"anywhere" : 12,
|
|
"server_choice" : 13,
|
|
"wildcard" : 14,
|
|
"wildpath" : 15}
|
|
|
|
defaultAttrSet = z3950.Z3950_ATTRS_BIB1_ov
|
|
|
|
def __init__(self):
|
|
self.util1 = self.util
|
|
self.xd = self.xd1
|
|
|
|
def attrsToCql(self, attrs):
|
|
hash = {}
|
|
for c in attrs:
|
|
if (not c[0]):
|
|
c[0] = self.defaultAttrSet
|
|
hash[(c[0], c[1])] = c[2]
|
|
bib1 = z3950.Z3950_ATTRS_BIB1_ov
|
|
use = hash.get((bib1, 1), 4)
|
|
rel = hash.get((bib1, 2), 3)
|
|
posn = hash.get((bib1, 3), None)
|
|
struct = hash.get((bib1, 4), None)
|
|
trunc = hash.get((bib1, 5), None)
|
|
comp = hash.get((bib1, 6), None)
|
|
|
|
index = None
|
|
if (not isinstance(use, int)):
|
|
index = indexType(use)
|
|
else:
|
|
for v in self.dc.items():
|
|
if use == v[1]:
|
|
index = indexType("dc.%s" % (v[0]))
|
|
break
|
|
if not index:
|
|
for v in self.bib1.items():
|
|
if (use == v[1]):
|
|
index = indexType("bib1.%s" % (v[0]))
|
|
break
|
|
if not index:
|
|
index = indexType("bib1.%i" % (use))
|
|
|
|
relations = ['', '<', '<=', '=', '>=', '>', '<>']
|
|
if (comp == 3):
|
|
relation = relationType("exact")
|
|
elif (rel > 6):
|
|
if struct in [2, 6]:
|
|
relation = relationType('any')
|
|
else:
|
|
relation = relationType('=')
|
|
else:
|
|
relation = relationType(relations[rel])
|
|
|
|
if (rel == 100):
|
|
relation.modifiers.append(modifierClauseType('phonetic'))
|
|
elif (rel == 101):
|
|
relation.modifiers.append(modifierClauseType('stem'))
|
|
elif (rel == 102):
|
|
relation.modifiers.append(modifierClauseType('relevant'))
|
|
|
|
if (struct in [2, 6]):
|
|
relation.modifiers.append(modifierClauseType('word'))
|
|
elif (struct in [4, 5, 100]):
|
|
relation.modifiers.append(modifierClauseType('date'))
|
|
elif (struct == 109):
|
|
relation.modifiers.append(modifierClauseType('number'))
|
|
elif (struct in [1, 108]):
|
|
relation.modifiers.append(modifierClauseType('string'))
|
|
elif (struct == 104):
|
|
relation.modifiers.append(modifierClauseType('uri'))
|
|
|
|
return (index, relation)
|
|
|
|
zConfig = ZCQLConfig()
|
|
|
|
def rpn2cql(rpn, config=zConfig, attrSet=None):
|
|
if rpn[0] == 'op':
|
|
# single search clause
|
|
op = rpn[1]
|
|
type = op[0]
|
|
if type == 'attrTerm':
|
|
attrs = op[1].attributes
|
|
term = op[1].term
|
|
combs = []
|
|
for comb in attrs:
|
|
if hasattr(comb, 'attributeSet'):
|
|
attrSet = comb.attributeSet
|
|
if hasattr(comb, 'attributeType'):
|
|
aType = comb.attributeType
|
|
else:
|
|
# Broken!
|
|
aType = 1
|
|
vstruct = comb.attributeValue
|
|
if (vstruct[0] == 'numeric'):
|
|
aValue = vstruct[1]
|
|
else:
|
|
# Complex attr value
|
|
vstruct = vstruct[1]
|
|
if (hasattr(vstruct, 'list')):
|
|
aValue = vstruct.list[0][1]
|
|
else:
|
|
# semanticAction?
|
|
aValue = vstruct.semanticAction[0][1]
|
|
combs.append([attrSet, aType, aValue])
|
|
# Now let config do its thing
|
|
(index, relation) = config.attrsToCql(combs)
|
|
return searchClauseType(index, relation, termType(term[1]))
|
|
|
|
elif type == 'resultSet':
|
|
return searchClauseType(indexType('cql.resultSetId'), relationType('='), termType(op[0]))
|
|
|
|
elif rpn[0] == 'rpnRpnOp':
|
|
triple = rpn[1]
|
|
bool = triple.op
|
|
lhs = triple.rpn1
|
|
rhs = triple.rpn2
|
|
ctrip = tripleType()
|
|
ctrip.leftOperation = rpn2cql(lhs, config)
|
|
ctrip.rightOperand = rpn2cql(rhs, config)
|
|
ctrip.boolean = booleanType(bool[0])
|
|
if bool[0] == 'prox':
|
|
distance = bool[1].distance
|
|
order = bool[1].ordered
|
|
if order:
|
|
order = "ordered"
|
|
else:
|
|
order = "unordered"
|
|
relation = bool[1].relationType
|
|
rels = ["", "<", "<=", "=", ">=", ">", "<>"]
|
|
relation = rels[relation]
|
|
unit = bool[1].proximityUnitCode
|
|
units = ["", "character", "word", "sentence", "paragraph", "section", "chapter", "document", "element", "subelement", "elementType", "byte"]
|
|
if unit[0] == "known":
|
|
unit = units[unit[1]]
|
|
mods = [cql.modifierClauseType('distance', relation, str(distance)), cql.modifierClauseType('word', '=', unit), cql.modifierClauseType(order)]
|
|
ctrip.boolean.modifiers = mods
|
|
return ctrip
|
|
|
|
elif rpn[0] == 'type_1':
|
|
q = rpn[1]
|
|
return rpn2cql(q.rpn, config, q.attributeSet)
|
|
|
|
|
|
|
|
|
|
class CSearchClause(SearchClause):
|
|
|
|
def convertMetachars(self, t):
|
|
"Convert SRW meta characters in to Cheshire's meta characters"
|
|
# Fail on ?, ^ or * not at the end.
|
|
if (count(t, "?") != count(t, "\\?")):
|
|
diag = Diagnostic28()
|
|
diag.details = "? Unsupported"
|
|
raise diag
|
|
elif (count(t, "^") != count(t, "\\^")):
|
|
diag = Diagnostic31()
|
|
diag.details = "^ Unsupported"
|
|
raise diag
|
|
elif (count(t, "*") != count(t, "\\*")):
|
|
if t[-1] != "*" or t[-2] == "\\":
|
|
diag = Diagnostic28()
|
|
diag.details = "Non trailing * unsupported"
|
|
raise diag
|
|
else:
|
|
t[-1] = "#"
|
|
t = replace(t, "\\^", "^")
|
|
t = replace(t, "\\?", "?")
|
|
t = replace(t, "\\*", "*")
|
|
return t
|
|
|
|
def toRPN(self, top=None):
|
|
if not top:
|
|
top = self
|
|
|
|
if (self.relation.value in ['any', 'all']):
|
|
# Need to split this into and/or tree
|
|
if (self.relation.value == 'any'):
|
|
bool = " or "
|
|
else:
|
|
bool = " and "
|
|
words = self.term.value.split()
|
|
self.relation.value = '='
|
|
# Add 'word' relationModifier
|
|
self.relation.modifiers.append(CModifierClause('cql.word'))
|
|
|
|
# Create CQL, parse it, walk new tree
|
|
idxrel = "%s %s" % (self.index.toCQL(), self.relation.toCQL())
|
|
text = []
|
|
for w in words:
|
|
text.append('%s "%s"' % (idxrel, w))
|
|
cql = bool.join(text)
|
|
tree = parse(cql)
|
|
tree.prefixes = self.prefixes
|
|
tree.parent = self.parent
|
|
tree.config = self.config
|
|
return tree.toRPN(top)
|
|
else:
|
|
# attributes, term
|
|
# AttributeElement: attributeType, attributeValue
|
|
# attributeValue ('numeric', n) or ('complex', struct)
|
|
if (self.index.value == 'resultsetid'):
|
|
return ('op', ('resultSet', self.term.value))
|
|
|
|
clause = z3950.AttributesPlusTerm()
|
|
attrs = self.index.toRPN(top)
|
|
if (self.term.value.isdigit()):
|
|
self.relation.modifiers.append(CModifierClause('cql.number'))
|
|
relattrs = self.relation.toRPN(top)
|
|
attrs.update(relattrs)
|
|
butes =[]
|
|
for e in attrs.iteritems():
|
|
butes.append((e[0][0], e[0][1], e[1]))
|
|
|
|
clause.attributes = [make_attr(*e) for e in butes]
|
|
clause.term = self.term.toRPN(top)
|
|
|
|
return ('op', ('attrTerm', clause))
|
|
|
|
|
|
class CBoolean(Boolean):
|
|
|
|
def toRPN(self, top):
|
|
op = self.value
|
|
if (self.value == 'not'):
|
|
op = 'and-not'
|
|
elif (self.value == 'prox'):
|
|
# Create ProximityOperator
|
|
prox = z3950.ProximityOperator()
|
|
# distance, ordered, proximityUnitCode, relationType
|
|
u = self['unit']
|
|
try:
|
|
units = ["", "character", "word", "sentence", "paragraph", "section", "chapter", "document", "element", "subelement", "elementType", "byte"]
|
|
if (u.value in units):
|
|
prox.unit = ('known', units.index(u.value))
|
|
else:
|
|
# Uhhhh.....
|
|
prox.unit = ('private', int(u.value))
|
|
except:
|
|
prox.unit = ('known', 2)
|
|
|
|
d = self['distance']
|
|
try:
|
|
prox.distance = int(d.value)
|
|
except:
|
|
if (prox.unit == ('known', 2)):
|
|
prox.distance = 1
|
|
else:
|
|
prox.distance = 0
|
|
try:
|
|
rels = ["", "<", "<=", "=", ">=", ">", "<>"]
|
|
prox.relationType = rels.index(d.comparison)
|
|
except:
|
|
prox.relationType = 2
|
|
|
|
prox.ordered = bool(self['ordered'])
|
|
return ('op', ('prox', prox))
|
|
|
|
return (op, None)
|
|
|
|
class CTriple(Triple):
|
|
|
|
def toRPN(self, top=None):
|
|
"""rpnRpnOp"""
|
|
if not top:
|
|
top = self
|
|
|
|
op = z3950.RpnRpnOp()
|
|
op.rpn1 = self.leftOperand.toRPN(top)
|
|
op.rpn2 = self.rightOperand.toRPN(top)
|
|
op.op = self.boolean.toRPN(top)
|
|
return ('rpnRpnOp', op)
|
|
|
|
|
|
class CIndex(Index):
|
|
def toRPN(self, top):
|
|
self.resolvePrefix()
|
|
pf = self.prefix
|
|
if (not pf and self.prefixURI):
|
|
# We have a default
|
|
for k in zConfig.contextSets:
|
|
if zConfig.contextSets[k] == self.prefixURI:
|
|
pf = k
|
|
break
|
|
|
|
# Default BIB1
|
|
set = oids.oids['Z3950']['ATTRS']['BIB1']['oid']
|
|
|
|
if (hasattr(top, 'config') and top.config):
|
|
config = top.config
|
|
# Check SRW Configuration
|
|
cql = config.contextSetNamespaces['cql']
|
|
index = self.value
|
|
if self.prefixURI == cql and self.value == "serverchoice":
|
|
# Have to resolve our prefixes etc, so create an index object to do it
|
|
index = config.defaultIndex
|
|
cidx = CIndex(index)
|
|
cidx.config = config
|
|
cidx.parent = config
|
|
cidx.resolvePrefix()
|
|
pf = cidx.prefix
|
|
index = cidx.value
|
|
|
|
if config.indexHash.has_key(pf):
|
|
if config.indexHash[pf].has_key(index):
|
|
idx = config.indexHash[pf][index]
|
|
# Need to map from this list to RPN list
|
|
attrs = {}
|
|
for i in idx:
|
|
set = asn1.OidVal(map(int, i[0].split('.')))
|
|
type = int(i[1])
|
|
if (i[2].isdigit()):
|
|
val = int(i[2])
|
|
else:
|
|
val = i[2]
|
|
attrs[(set, type)] = val
|
|
return attrs
|
|
else:
|
|
diag = Diagnostic16()
|
|
diag.details = index
|
|
diag.message = "Unknown index"
|
|
raise diag
|
|
else:
|
|
diag = Diagnostic15()
|
|
diag.details = pf
|
|
diag.message = "Unknown context set"
|
|
raise diag
|
|
elif (hasattr(zConfig, pf)):
|
|
mp = getattr(zConfig, pf)
|
|
if (mp.has_key(self.value)):
|
|
val = mp[self.value]
|
|
else:
|
|
val = self.value
|
|
elif (oids.oids['Z3950']['ATTRS'].has_key(pf.upper())):
|
|
set = oids.oids['Z3950']['ATTRS'][pf.upper()]['oid']
|
|
if (self.value.isdigit()):
|
|
# bib1.1018
|
|
val = int(self.value)
|
|
else:
|
|
# complex attribute for bib1
|
|
val = self.value
|
|
else:
|
|
print "Can't resolve %s" % pf
|
|
raise(ValueError)
|
|
|
|
return {(set, 1) : val}
|
|
|
|
|
|
class CRelation(Relation):
|
|
def toRPN(self, top):
|
|
rels = ['', '<', '<=', '=', '>=', '>', '<>']
|
|
set = z3950.Z3950_ATTRS_BIB1_ov
|
|
vals = [None, None, None, None, None, None, None]
|
|
|
|
if self.value in rels:
|
|
vals[2] = rels.index(self.value)
|
|
elif self.value in ['exact', 'scr']:
|
|
vals[2] = 3
|
|
elif (self.value == 'within'):
|
|
vals[2] = 104
|
|
|
|
if self['relevant']:
|
|
vals[2] = 102
|
|
elif self['stem']:
|
|
vals[2] = 101
|
|
elif self['phonetic']:
|
|
vals[2] = 100
|
|
|
|
if self['number']:
|
|
vals[4] = 109
|
|
vals[5] = 100
|
|
elif self['date']:
|
|
vals[4] = 5
|
|
elif self['word']:
|
|
vals[4] = 2
|
|
|
|
if self.value == 'exact':
|
|
vals[3] = 1
|
|
vals[5] = 100
|
|
# vals[6] = 3
|
|
else:
|
|
vals[3] = 3
|
|
# vals[6] = 1
|
|
|
|
attrs = {}
|
|
for x in range(1,7):
|
|
if vals[x]:
|
|
attrs[(z3950.Z3950_ATTRS_BIB1_ov, x)] = vals[x]
|
|
|
|
return attrs
|
|
|
|
|
|
class CTerm(Term):
|
|
def toRPN(self, top):
|
|
return ('general', self.value)
|
|
|
|
class CModifierClause(ModifierClause):
|
|
pass
|
|
|
|
class CModifierType(ModifierType):
|
|
pass
|
|
|
|
|
|
|
|
|
|
|