This repository has been archived on 2024-07-04. You can view files and clone it, but cannot push or open issues or pull requests.

545 lines
17 KiB
Raw Normal View History

"""CQL utility functions and subclasses"""
from CQLParser import *
from types import ListType, IntType
from SRWDiagnostics import *
from PyZ3950 import z3950, asn1, oids
from PyZ3950.zdefs import make_attr
asn1.register_oid (oids.Z3950_QUERY_CQL, asn1.GeneralString)
class ZCQLConfig:
contextSets = {'dc' : 'info:srw/cql-context-set/1/dc-v1.1',
'cql' : 'info:srw/cql-context-set/1/cql-v1.1',
'bath' : '',
'zthes' : '',
'ccg' : ' ',
'rec' : 'info:srw/cql-context-set/2/rec-1.0',
'net' : 'info:srw/cql-context-set/2/net-1.0'}
dc = {'title' : 4,
'subject' : 21,
'creator' : 1003,
'author' : 1003,
'editor' : 1020,
'contributor' : 1018,
'publisher' : 1018,
'description' : 62,
'date' : 30,
'resourceType' : 1031,
'type' : 1031,
'format' : 1034,
'identifier' : 12,
'source' : 1019,
'language' : 54,
'relation' : 1016,
'coverage' : 1016,
'rights' : 1016
cql = {'anywhere' : 1016,
'serverChoice' : 1016}
# The common bib1 points
bib1 = {"personal_name" : 1,
"corporate_name" : 2,
"conference_name" : 3,
"title" : 4,
"title_series" : 5,
"title_uniform" : 6,
"isbn" : 7,
"issn" : 8,
"lccn" : 9,
"local_number" : 12,
"dewey_number" : 13,
"lccn" : 16,
"local_classification" : 20,
"subject" : 21,
"subject_lc" : 27,
"subject_local" : 29,
"date" : 30,
"date_publication" : 31,
"date_acquisition" : 32,
"local_call_number" : 53,
"abstract" : 62,
"note" : 63,
"record_type" : 1001,
"name" : 1002,
"author" : 1003,
"author_personal" : 1004,
"identifier" : 1007,
"text_body" : 1010,
"date_modified" : 1012,
"date_added" : 1011,
"concept_text" : 1014,
"any" : 1016,
"default" : 1017,
"publisher" : 1018,
"record_source" : 1019,
"editor" : 1020,
"docid" : 1032,
"anywhere" : 1035,
"sici" : 1037
exp1 = {"explainCategory" :1,
"humanStringLanguage" : 2,
"databaseName" : 3,
"serverName" : 4,
"attributeSetOID" : 5,
"recordSyntaxOID" : 6,
"tagSetOID" : 7,
"extendedServiceOID" : 8,
"dateAdded" : 9,
"dateChanged" : 10,
"dateExpires" : 11,
"elementSetName" : 12,
"processingContext" : 13,
"processingName" : 14,
"termListName" : 15,
"schemaOID" : 16,
"producer" : 17,
"supplier" : 18,
"availability" : 19,
"proprietary" : 20,
"userFee" : 21,
"variantSetOID" : 22,
"unitSystem" : 23,
"keyword" : 24,
"explainDatabase" : 25,
"processingOID" : 26
xd1 = {"title" : 1,
"subject" : 2,
"name" : 3,
"description" : 4,
"date" : 5,
"type" : 6,
"format" : 7,
"identifier" : 8,
"source" : 9,
"langauge" : 10,
"relation" : 11,
"coverage" : 12,
"rights" : 13}
util = {"record_date" : 1,
"record_agent" : 2,
"record_language" : 3,
"control_number" : 4,
"cost" : 5,
"record_syntax" : 6,
"database_schema" : 7,
"score" : 8,
"rank" : 9,
"result_set_position" : 10,
"all" : 11,
"anywhere" : 12,
"server_choice" : 13,
"wildcard" : 14,
"wildpath" : 15}
defaultAttrSet = z3950.Z3950_ATTRS_BIB1_ov
def __init__(self):
self.util1 = self.util
self.xd = self.xd1
def attrsToCql(self, attrs):
hash = {}
for c in attrs:
if (not c[0]):
c[0] = self.defaultAttrSet
hash[(c[0], c[1])] = c[2]
bib1 = z3950.Z3950_ATTRS_BIB1_ov
use = hash.get((bib1, 1), 4)
rel = hash.get((bib1, 2), 3)
posn = hash.get((bib1, 3), None)
struct = hash.get((bib1, 4), None)
trunc = hash.get((bib1, 5), None)
comp = hash.get((bib1, 6), None)
index = None
if (not isinstance(use, int)):
index = indexType(use)
for v in self.dc.items():
if use == v[1]:
index = indexType("dc.%s" % (v[0]))
if not index:
for v in self.bib1.items():
if (use == v[1]):
index = indexType("bib1.%s" % (v[0]))
if not index:
index = indexType("bib1.%i" % (use))
relations = ['', '<', '<=', '=', '>=', '>', '<>']
if (comp == 3):
relation = relationType("exact")
elif (rel > 6):
if struct in [2, 6]:
relation = relationType('any')
relation = relationType('=')
relation = relationType(relations[rel])
if (rel == 100):
elif (rel == 101):
elif (rel == 102):
if (struct in [2, 6]):
elif (struct in [4, 5, 100]):
elif (struct == 109):
elif (struct in [1, 108]):
elif (struct == 104):
return (index, relation)
zConfig = ZCQLConfig()
def rpn2cql(rpn, config=zConfig, attrSet=None):
if rpn[0] == 'op':
# single search clause
op = rpn[1]
type = op[0]
if type == 'attrTerm':
attrs = op[1].attributes
term = op[1].term
combs = []
for comb in attrs:
if hasattr(comb, 'attributeSet'):
attrSet = comb.attributeSet
if hasattr(comb, 'attributeType'):
aType = comb.attributeType
# Broken!
aType = 1
vstruct = comb.attributeValue
if (vstruct[0] == 'numeric'):
aValue = vstruct[1]
# Complex attr value
vstruct = vstruct[1]
if (hasattr(vstruct, 'list')):
aValue = vstruct.list[0][1]
# semanticAction?
aValue = vstruct.semanticAction[0][1]
combs.append([attrSet, aType, aValue])
# Now let config do its thing
(index, relation) = config.attrsToCql(combs)
return searchClauseType(index, relation, termType(term[1]))
elif type == 'resultSet':
return searchClauseType(indexType('cql.resultSetId'), relationType('='), termType(op[0]))
elif rpn[0] == 'rpnRpnOp':
triple = rpn[1]
bool = triple.op
lhs = triple.rpn1
rhs = triple.rpn2
ctrip = tripleType()
ctrip.leftOperation = rpn2cql(lhs, config)
ctrip.rightOperand = rpn2cql(rhs, config)
ctrip.boolean = booleanType(bool[0])
if bool[0] == 'prox':
distance = bool[1].distance
order = bool[1].ordered
if order:
order = "ordered"
order = "unordered"
relation = bool[1].relationType
rels = ["", "<", "<=", "=", ">=", ">", "<>"]
relation = rels[relation]
unit = bool[1].proximityUnitCode
units = ["", "character", "word", "sentence", "paragraph", "section", "chapter", "document", "element", "subelement", "elementType", "byte"]
if unit[0] == "known":
unit = units[unit[1]]
mods = [cql.modifierClauseType('distance', relation, str(distance)), cql.modifierClauseType('word', '=', unit), cql.modifierClauseType(order)]
ctrip.boolean.modifiers = mods
return ctrip
elif rpn[0] == 'type_1':
q = rpn[1]
return rpn2cql(q.rpn, config, q.attributeSet)
class CSearchClause(SearchClause):
def convertMetachars(self, t):
"Convert SRW meta characters in to Cheshire's meta characters"
# Fail on ?, ^ or * not at the end.
if (count(t, "?") != count(t, "\\?")):
diag = Diagnostic28()
diag.details = "? Unsupported"
raise diag
elif (count(t, "^") != count(t, "\\^")):
diag = Diagnostic31()
diag.details = "^ Unsupported"
raise diag
elif (count(t, "*") != count(t, "\\*")):
if t[-1] != "*" or t[-2] == "\\":
diag = Diagnostic28()
diag.details = "Non trailing * unsupported"
raise diag
t[-1] = "#"
t = replace(t, "\\^", "^")
t = replace(t, "\\?", "?")
t = replace(t, "\\*", "*")
return t
def toRPN(self, top=None):
if not top:
top = self
if (self.relation.value in ['any', 'all']):
# Need to split this into and/or tree
if (self.relation.value == 'any'):
bool = " or "
bool = " and "
words = self.term.value.split()
self.relation.value = '='
# Add 'word' relationModifier
# Create CQL, parse it, walk new tree
idxrel = "%s %s" % (self.index.toCQL(), self.relation.toCQL())
text = []
for w in words:
text.append('%s "%s"' % (idxrel, w))
cql = bool.join(text)
tree = parse(cql)
tree.prefixes = self.prefixes
tree.parent = self.parent
tree.config = self.config
return tree.toRPN(top)
# attributes, term
# AttributeElement: attributeType, attributeValue
# attributeValue ('numeric', n) or ('complex', struct)
if (self.index.value == 'resultsetid'):
return ('op', ('resultSet', self.term.value))
clause = z3950.AttributesPlusTerm()
attrs = self.index.toRPN(top)
if (self.term.value.isdigit()):
relattrs = self.relation.toRPN(top)
butes =[]
for e in attrs.iteritems():
butes.append((e[0][0], e[0][1], e[1]))
clause.attributes = [make_attr(*e) for e in butes]
clause.term = self.term.toRPN(top)
return ('op', ('attrTerm', clause))
class CBoolean(Boolean):
def toRPN(self, top):
op = self.value
if (self.value == 'not'):
op = 'and-not'
elif (self.value == 'prox'):
# Create ProximityOperator
prox = z3950.ProximityOperator()
# distance, ordered, proximityUnitCode, relationType
u = self['unit']
units = ["", "character", "word", "sentence", "paragraph", "section", "chapter", "document", "element", "subelement", "elementType", "byte"]
if (u.value in units):
prox.unit = ('known', units.index(u.value))
# Uhhhh.....
prox.unit = ('private', int(u.value))
prox.unit = ('known', 2)
d = self['distance']
prox.distance = int(d.value)
if (prox.unit == ('known', 2)):
prox.distance = 1
prox.distance = 0
rels = ["", "<", "<=", "=", ">=", ">", "<>"]
prox.relationType = rels.index(d.comparison)
prox.relationType = 2
prox.ordered = bool(self['ordered'])
return ('op', ('prox', prox))
return (op, None)
class CTriple(Triple):
def toRPN(self, top=None):
if not top:
top = self
op = z3950.RpnRpnOp()
op.rpn1 = self.leftOperand.toRPN(top)
op.rpn2 = self.rightOperand.toRPN(top)
op.op = self.boolean.toRPN(top)
return ('rpnRpnOp', op)
class CIndex(Index):
def toRPN(self, top):
pf = self.prefix
if (not pf and self.prefixURI):
# We have a default
for k in zConfig.contextSets:
if zConfig.contextSets[k] == self.prefixURI:
pf = k
# Default BIB1
set = oids.oids['Z3950']['ATTRS']['BIB1']['oid']
if (hasattr(top, 'config') and top.config):
config = top.config
# Check SRW Configuration
cql = config.contextSetNamespaces['cql']
index = self.value
if self.prefixURI == cql and self.value == "serverchoice":
# Have to resolve our prefixes etc, so create an index object to do it
index = config.defaultIndex
cidx = CIndex(index)
cidx.config = config
cidx.parent = config
pf = cidx.prefix
index = cidx.value
if config.indexHash.has_key(pf):
if config.indexHash[pf].has_key(index):
idx = config.indexHash[pf][index]
# Need to map from this list to RPN list
attrs = {}
for i in idx:
set = asn1.OidVal(map(int, i[0].split('.')))
type = int(i[1])
if (i[2].isdigit()):
val = int(i[2])
val = i[2]
attrs[(set, type)] = val
return attrs
diag = Diagnostic16()
diag.details = index
diag.message = "Unknown index"
raise diag
diag = Diagnostic15()
diag.details = pf
diag.message = "Unknown context set"
raise diag
elif (hasattr(zConfig, pf)):
mp = getattr(zConfig, pf)
if (mp.has_key(self.value)):
val = mp[self.value]
val = self.value
elif (oids.oids['Z3950']['ATTRS'].has_key(pf.upper())):
set = oids.oids['Z3950']['ATTRS'][pf.upper()]['oid']
if (self.value.isdigit()):
# bib1.1018
val = int(self.value)
# complex attribute for bib1
val = self.value
print "Can't resolve %s" % pf
return {(set, 1) : val}
class CRelation(Relation):
def toRPN(self, top):
rels = ['', '<', '<=', '=', '>=', '>', '<>']
set = z3950.Z3950_ATTRS_BIB1_ov
vals = [None, None, None, None, None, None, None]
if self.value in rels:
vals[2] = rels.index(self.value)
elif self.value in ['exact', 'scr']:
vals[2] = 3
elif (self.value == 'within'):
vals[2] = 104
if self['relevant']:
vals[2] = 102
elif self['stem']:
vals[2] = 101
elif self['phonetic']:
vals[2] = 100
if self['number']:
vals[4] = 109
vals[5] = 100
elif self['date']:
vals[4] = 5
elif self['word']:
vals[4] = 2
if self.value == 'exact':
vals[3] = 1
vals[5] = 100
# vals[6] = 3
vals[3] = 3
# vals[6] = 1
attrs = {}
for x in range(1,7):
if vals[x]:
attrs[(z3950.Z3950_ATTRS_BIB1_ov, x)] = vals[x]
return attrs
class CTerm(Term):
def toRPN(self, top):
return ('general', self.value)
class CModifierClause(ModifierClause):
class CModifierType(ModifierType):