Projects/worblehat-old
Projects
/
worblehat-old
Archived
12
0
Fork 0
This repository has been archived on 2024-07-04. You can view files and clone it, but cannot push or open issues or pull requests.
worblehat-old/python/PyZ3950/CQLUtils.py

545 lines
17 KiB
Python
Raw Normal View History

"""CQL utility functions and subclasses"""
from CQLParser import *
from types import ListType, IntType
from SRWDiagnostics import *
from PyZ3950 import z3950, asn1, oids
from PyZ3950.zdefs import make_attr
asn1.register_oid (oids.Z3950_QUERY_CQL, asn1.GeneralString)
class ZCQLConfig:
contextSets = {'dc' : 'info:srw/cql-context-set/1/dc-v1.1',
'cql' : 'info:srw/cql-context-set/1/cql-v1.1',
'bath' : 'http://zing.z3950.org/cql/bath/2.0/',
'zthes' : 'http://zthes.z3950.org/cql/1.0/',
'ccg' : 'http://srw.cheshire3.org/contextSets/ccg/1.1/ ',
'rec' : 'info:srw/cql-context-set/2/rec-1.0',
'net' : 'info:srw/cql-context-set/2/net-1.0'}
dc = {'title' : 4,
'subject' : 21,
'creator' : 1003,
'author' : 1003,
'editor' : 1020,
'contributor' : 1018,
'publisher' : 1018,
'description' : 62,
'date' : 30,
'resourceType' : 1031,
'type' : 1031,
'format' : 1034,
'identifier' : 12,
'source' : 1019,
'language' : 54,
'relation' : 1016,
'coverage' : 1016,
'rights' : 1016
}
cql = {'anywhere' : 1016,
'serverChoice' : 1016}
# The common bib1 points
bib1 = {"personal_name" : 1,
"corporate_name" : 2,
"conference_name" : 3,
"title" : 4,
"title_series" : 5,
"title_uniform" : 6,
"isbn" : 7,
"issn" : 8,
"lccn" : 9,
"local_number" : 12,
"dewey_number" : 13,
"lccn" : 16,
"local_classification" : 20,
"subject" : 21,
"subject_lc" : 27,
"subject_local" : 29,
"date" : 30,
"date_publication" : 31,
"date_acquisition" : 32,
"local_call_number" : 53,
"abstract" : 62,
"note" : 63,
"record_type" : 1001,
"name" : 1002,
"author" : 1003,
"author_personal" : 1004,
"identifier" : 1007,
"text_body" : 1010,
"date_modified" : 1012,
"date_added" : 1011,
"concept_text" : 1014,
"any" : 1016,
"default" : 1017,
"publisher" : 1018,
"record_source" : 1019,
"editor" : 1020,
"docid" : 1032,
"anywhere" : 1035,
"sici" : 1037
}
exp1 = {"explainCategory" :1,
"humanStringLanguage" : 2,
"databaseName" : 3,
"serverName" : 4,
"attributeSetOID" : 5,
"recordSyntaxOID" : 6,
"tagSetOID" : 7,
"extendedServiceOID" : 8,
"dateAdded" : 9,
"dateChanged" : 10,
"dateExpires" : 11,
"elementSetName" : 12,
"processingContext" : 13,
"processingName" : 14,
"termListName" : 15,
"schemaOID" : 16,
"producer" : 17,
"supplier" : 18,
"availability" : 19,
"proprietary" : 20,
"userFee" : 21,
"variantSetOID" : 22,
"unitSystem" : 23,
"keyword" : 24,
"explainDatabase" : 25,
"processingOID" : 26
}
xd1 = {"title" : 1,
"subject" : 2,
"name" : 3,
"description" : 4,
"date" : 5,
"type" : 6,
"format" : 7,
"identifier" : 8,
"source" : 9,
"langauge" : 10,
"relation" : 11,
"coverage" : 12,
"rights" : 13}
util = {"record_date" : 1,
"record_agent" : 2,
"record_language" : 3,
"control_number" : 4,
"cost" : 5,
"record_syntax" : 6,
"database_schema" : 7,
"score" : 8,
"rank" : 9,
"result_set_position" : 10,
"all" : 11,
"anywhere" : 12,
"server_choice" : 13,
"wildcard" : 14,
"wildpath" : 15}
defaultAttrSet = z3950.Z3950_ATTRS_BIB1_ov
def __init__(self):
self.util1 = self.util
self.xd = self.xd1
def attrsToCql(self, attrs):
hash = {}
for c in attrs:
if (not c[0]):
c[0] = self.defaultAttrSet
hash[(c[0], c[1])] = c[2]
bib1 = z3950.Z3950_ATTRS_BIB1_ov
use = hash.get((bib1, 1), 4)
rel = hash.get((bib1, 2), 3)
posn = hash.get((bib1, 3), None)
struct = hash.get((bib1, 4), None)
trunc = hash.get((bib1, 5), None)
comp = hash.get((bib1, 6), None)
index = None
if (not isinstance(use, int)):
index = indexType(use)
else:
for v in self.dc.items():
if use == v[1]:
index = indexType("dc.%s" % (v[0]))
break
if not index:
for v in self.bib1.items():
if (use == v[1]):
index = indexType("bib1.%s" % (v[0]))
break
if not index:
index = indexType("bib1.%i" % (use))
relations = ['', '<', '<=', '=', '>=', '>', '<>']
if (comp == 3):
relation = relationType("exact")
elif (rel > 6):
if struct in [2, 6]:
relation = relationType('any')
else:
relation = relationType('=')
else:
relation = relationType(relations[rel])
if (rel == 100):
relation.modifiers.append(modifierClauseType('phonetic'))
elif (rel == 101):
relation.modifiers.append(modifierClauseType('stem'))
elif (rel == 102):
relation.modifiers.append(modifierClauseType('relevant'))
if (struct in [2, 6]):
relation.modifiers.append(modifierClauseType('word'))
elif (struct in [4, 5, 100]):
relation.modifiers.append(modifierClauseType('date'))
elif (struct == 109):
relation.modifiers.append(modifierClauseType('number'))
elif (struct in [1, 108]):
relation.modifiers.append(modifierClauseType('string'))
elif (struct == 104):
relation.modifiers.append(modifierClauseType('uri'))
return (index, relation)
zConfig = ZCQLConfig()
def rpn2cql(rpn, config=zConfig, attrSet=None):
if rpn[0] == 'op':
# single search clause
op = rpn[1]
type = op[0]
if type == 'attrTerm':
attrs = op[1].attributes
term = op[1].term
combs = []
for comb in attrs:
if hasattr(comb, 'attributeSet'):
attrSet = comb.attributeSet
if hasattr(comb, 'attributeType'):
aType = comb.attributeType
else:
# Broken!
aType = 1
vstruct = comb.attributeValue
if (vstruct[0] == 'numeric'):
aValue = vstruct[1]
else:
# Complex attr value
vstruct = vstruct[1]
if (hasattr(vstruct, 'list')):
aValue = vstruct.list[0][1]
else:
# semanticAction?
aValue = vstruct.semanticAction[0][1]
combs.append([attrSet, aType, aValue])
# Now let config do its thing
(index, relation) = config.attrsToCql(combs)
return searchClauseType(index, relation, termType(term[1]))
elif type == 'resultSet':
return searchClauseType(indexType('cql.resultSetId'), relationType('='), termType(op[0]))
elif rpn[0] == 'rpnRpnOp':
triple = rpn[1]
bool = triple.op
lhs = triple.rpn1
rhs = triple.rpn2
ctrip = tripleType()
ctrip.leftOperation = rpn2cql(lhs, config)
ctrip.rightOperand = rpn2cql(rhs, config)
ctrip.boolean = booleanType(bool[0])
if bool[0] == 'prox':
distance = bool[1].distance
order = bool[1].ordered
if order:
order = "ordered"
else:
order = "unordered"
relation = bool[1].relationType
rels = ["", "<", "<=", "=", ">=", ">", "<>"]
relation = rels[relation]
unit = bool[1].proximityUnitCode
units = ["", "character", "word", "sentence", "paragraph", "section", "chapter", "document", "element", "subelement", "elementType", "byte"]
if unit[0] == "known":
unit = units[unit[1]]
mods = [cql.modifierClauseType('distance', relation, str(distance)), cql.modifierClauseType('word', '=', unit), cql.modifierClauseType(order)]
ctrip.boolean.modifiers = mods
return ctrip
elif rpn[0] == 'type_1':
q = rpn[1]
return rpn2cql(q.rpn, config, q.attributeSet)
class CSearchClause(SearchClause):
def convertMetachars(self, t):
"Convert SRW meta characters in to Cheshire's meta characters"
# Fail on ?, ^ or * not at the end.
if (count(t, "?") != count(t, "\\?")):
diag = Diagnostic28()
diag.details = "? Unsupported"
raise diag
elif (count(t, "^") != count(t, "\\^")):
diag = Diagnostic31()
diag.details = "^ Unsupported"
raise diag
elif (count(t, "*") != count(t, "\\*")):
if t[-1] != "*" or t[-2] == "\\":
diag = Diagnostic28()
diag.details = "Non trailing * unsupported"
raise diag
else:
t[-1] = "#"
t = replace(t, "\\^", "^")
t = replace(t, "\\?", "?")
t = replace(t, "\\*", "*")
return t
def toRPN(self, top=None):
if not top:
top = self
if (self.relation.value in ['any', 'all']):
# Need to split this into and/or tree
if (self.relation.value == 'any'):
bool = " or "
else:
bool = " and "
words = self.term.value.split()
self.relation.value = '='
# Add 'word' relationModifier
self.relation.modifiers.append(CModifierClause('cql.word'))
# Create CQL, parse it, walk new tree
idxrel = "%s %s" % (self.index.toCQL(), self.relation.toCQL())
text = []
for w in words:
text.append('%s "%s"' % (idxrel, w))
cql = bool.join(text)
tree = parse(cql)
tree.prefixes = self.prefixes
tree.parent = self.parent
tree.config = self.config
return tree.toRPN(top)
else:
# attributes, term
# AttributeElement: attributeType, attributeValue
# attributeValue ('numeric', n) or ('complex', struct)
if (self.index.value == 'resultsetid'):
return ('op', ('resultSet', self.term.value))
clause = z3950.AttributesPlusTerm()
attrs = self.index.toRPN(top)
if (self.term.value.isdigit()):
self.relation.modifiers.append(CModifierClause('cql.number'))
relattrs = self.relation.toRPN(top)
attrs.update(relattrs)
butes =[]
for e in attrs.iteritems():
butes.append((e[0][0], e[0][1], e[1]))
clause.attributes = [make_attr(*e) for e in butes]
clause.term = self.term.toRPN(top)
return ('op', ('attrTerm', clause))
class CBoolean(Boolean):
def toRPN(self, top):
op = self.value
if (self.value == 'not'):
op = 'and-not'
elif (self.value == 'prox'):
# Create ProximityOperator
prox = z3950.ProximityOperator()
# distance, ordered, proximityUnitCode, relationType
u = self['unit']
try:
units = ["", "character", "word", "sentence", "paragraph", "section", "chapter", "document", "element", "subelement", "elementType", "byte"]
if (u.value in units):
prox.unit = ('known', units.index(u.value))
else:
# Uhhhh.....
prox.unit = ('private', int(u.value))
except:
prox.unit = ('known', 2)
d = self['distance']
try:
prox.distance = int(d.value)
except:
if (prox.unit == ('known', 2)):
prox.distance = 1
else:
prox.distance = 0
try:
rels = ["", "<", "<=", "=", ">=", ">", "<>"]
prox.relationType = rels.index(d.comparison)
except:
prox.relationType = 2
prox.ordered = bool(self['ordered'])
return ('op', ('prox', prox))
return (op, None)
class CTriple(Triple):
def toRPN(self, top=None):
"""rpnRpnOp"""
if not top:
top = self
op = z3950.RpnRpnOp()
op.rpn1 = self.leftOperand.toRPN(top)
op.rpn2 = self.rightOperand.toRPN(top)
op.op = self.boolean.toRPN(top)
return ('rpnRpnOp', op)
class CIndex(Index):
def toRPN(self, top):
self.resolvePrefix()
pf = self.prefix
if (not pf and self.prefixURI):
# We have a default
for k in zConfig.contextSets:
if zConfig.contextSets[k] == self.prefixURI:
pf = k
break
# Default BIB1
set = oids.oids['Z3950']['ATTRS']['BIB1']['oid']
if (hasattr(top, 'config') and top.config):
config = top.config
# Check SRW Configuration
cql = config.contextSetNamespaces['cql']
index = self.value
if self.prefixURI == cql and self.value == "serverchoice":
# Have to resolve our prefixes etc, so create an index object to do it
index = config.defaultIndex
cidx = CIndex(index)
cidx.config = config
cidx.parent = config
cidx.resolvePrefix()
pf = cidx.prefix
index = cidx.value
if config.indexHash.has_key(pf):
if config.indexHash[pf].has_key(index):
idx = config.indexHash[pf][index]
# Need to map from this list to RPN list
attrs = {}
for i in idx:
set = asn1.OidVal(map(int, i[0].split('.')))
type = int(i[1])
if (i[2].isdigit()):
val = int(i[2])
else:
val = i[2]
attrs[(set, type)] = val
return attrs
else:
diag = Diagnostic16()
diag.details = index
diag.message = "Unknown index"
raise diag
else:
diag = Diagnostic15()
diag.details = pf
diag.message = "Unknown context set"
raise diag
elif (hasattr(zConfig, pf)):
mp = getattr(zConfig, pf)
if (mp.has_key(self.value)):
val = mp[self.value]
else:
val = self.value
elif (oids.oids['Z3950']['ATTRS'].has_key(pf.upper())):
set = oids.oids['Z3950']['ATTRS'][pf.upper()]['oid']
if (self.value.isdigit()):
# bib1.1018
val = int(self.value)
else:
# complex attribute for bib1
val = self.value
else:
print "Can't resolve %s" % pf
raise(ValueError)
return {(set, 1) : val}
class CRelation(Relation):
def toRPN(self, top):
rels = ['', '<', '<=', '=', '>=', '>', '<>']
set = z3950.Z3950_ATTRS_BIB1_ov
vals = [None, None, None, None, None, None, None]
if self.value in rels:
vals[2] = rels.index(self.value)
elif self.value in ['exact', 'scr']:
vals[2] = 3
elif (self.value == 'within'):
vals[2] = 104
if self['relevant']:
vals[2] = 102
elif self['stem']:
vals[2] = 101
elif self['phonetic']:
vals[2] = 100
if self['number']:
vals[4] = 109
vals[5] = 100
elif self['date']:
vals[4] = 5
elif self['word']:
vals[4] = 2
if self.value == 'exact':
vals[3] = 1
vals[5] = 100
# vals[6] = 3
else:
vals[3] = 3
# vals[6] = 1
attrs = {}
for x in range(1,7):
if vals[x]:
attrs[(z3950.Z3950_ATTRS_BIB1_ov, x)] = vals[x]
return attrs
class CTerm(Term):
def toRPN(self, top):
return ('general', self.value)
class CModifierClause(ModifierClause):
pass
class CModifierType(ModifierType):
pass