988 lines
32 KiB
Python
988 lines
32 KiB
Python
|
#!/usr/bin/python
|
||
|
|
||
|
# Author: Rob Sanderson (azaroth@liv.ac.uk)
|
||
|
# Distributed and Usable under the GPL
|
||
|
# Version: 1.7
|
||
|
# Most Recent Changes: contexts, new modifier style for 1.1
|
||
|
#
|
||
|
# With thanks to Adam from IndexData and Mike Taylor for their valuable input
|
||
|
|
||
|
from shlex import shlex
|
||
|
from xml.sax.saxutils import escape
|
||
|
from xml.dom.minidom import Node, parseString
|
||
|
from PyZ3950.SRWDiagnostics import *
|
||
|
# Don't use cStringIO as it borks Unicode (apparently)
|
||
|
from StringIO import StringIO
|
||
|
import types
|
||
|
|
||
|
# Parsing strictness flags
|
||
|
errorOnEmptyTerm = 0 # index = "" (often meaningless)
|
||
|
errorOnQuotedIdentifier = 0 # "/foo/bar" = "" (unnecessary BNF restriction)
|
||
|
errorOnDuplicatePrefix = 0 # >a=b >a=c "" (impossible due to BNF)
|
||
|
fullResultSetNameCheck = 1 # srw.rsn=foo and srw.rsn=foo (mutant!!)
|
||
|
|
||
|
# Base values for CQL
|
||
|
serverChoiceRelation = "scr"
|
||
|
serverChoiceIndex = "cql.serverchoice"
|
||
|
|
||
|
order = ['=', '>', '>=', '<', '<=', '<>']
|
||
|
modifierSeparator = "/"
|
||
|
booleans = ['and', 'or', 'not', 'prox']
|
||
|
|
||
|
reservedPrefixes = {"srw" : "http://www.loc.gov/zing/cql/srw-indexes/v1.0/",
|
||
|
"cql" : "info:srw/cql-context-set/1/cql-v1.1"}
|
||
|
|
||
|
XCQLNamespace = "http://www.loc.gov/zing/cql/xcql/"
|
||
|
|
||
|
# End of 'configurable' stuff
|
||
|
|
||
|
class PrefixableObject:
|
||
|
"Root object for triple and searchClause"
|
||
|
prefixes = {}
|
||
|
parent = None
|
||
|
config = None
|
||
|
|
||
|
def __init__(self):
|
||
|
self.prefixes = {}
|
||
|
self.parent = None
|
||
|
self.config = None
|
||
|
|
||
|
def toXCQL(self, depth=0):
|
||
|
# Just generate our prefixes
|
||
|
space = " " * depth
|
||
|
xml = ['%s<prefixes>\n' % (space)]
|
||
|
for p in self.prefixes.keys():
|
||
|
xml.append("%s <prefix>\n%s <name>%s</name>\n%s <identifier>%s</identifier>\n%s </prefix>\n" % (space, space, escape(p), space, escape(self.prefixes[p]), space))
|
||
|
xml.append("%s</prefixes>\n" % (space))
|
||
|
return ''.join(xml)
|
||
|
|
||
|
|
||
|
def addPrefix(self, name, identifier):
|
||
|
if (errorOnDuplicatePrefix and (self.prefixes.has_key(name) or reservedPrefixes.has_key(name))):
|
||
|
# Maybe error
|
||
|
diag = Diagnostic45()
|
||
|
diag.details = name
|
||
|
raise diag;
|
||
|
self.prefixes[name] = identifier
|
||
|
|
||
|
def resolvePrefix(self, name):
|
||
|
# Climb tree
|
||
|
if (reservedPrefixes.has_key(name)):
|
||
|
return reservedPrefixes[name]
|
||
|
elif (self.prefixes.has_key(name)):
|
||
|
return self.prefixes[name]
|
||
|
elif (self.parent <> None):
|
||
|
return self.parent.resolvePrefix(name)
|
||
|
elif (self.config <> None):
|
||
|
# Config is some sort of server config which specifies defaults
|
||
|
return self.config.resolvePrefix(name)
|
||
|
else:
|
||
|
# Top of tree, no config, no resolution->Unknown indexset
|
||
|
# For client we need to allow no prefix?
|
||
|
|
||
|
#diag = Diagnostic15()
|
||
|
#diag.details = name
|
||
|
#raise diag
|
||
|
return None
|
||
|
|
||
|
|
||
|
class PrefixedObject:
|
||
|
"Root object for relation, relationModifier and index"
|
||
|
prefix = ""
|
||
|
prefixURI = ""
|
||
|
value = ""
|
||
|
parent = None
|
||
|
|
||
|
def __init__(self, val):
|
||
|
# All prefixed things are case insensitive
|
||
|
val = val.lower()
|
||
|
if val and val[0] == '"' and val[-1] == '"':
|
||
|
if errorOnQuotedIdentifier:
|
||
|
diag = Diagnostic14()
|
||
|
diag.details = val
|
||
|
raise diag
|
||
|
else:
|
||
|
val = val[1:-1]
|
||
|
self.value = val
|
||
|
self.splitValue()
|
||
|
|
||
|
def __str__(self):
|
||
|
if (self.prefix):
|
||
|
return "%s.%s" % (self.prefix, self.value)
|
||
|
else:
|
||
|
return self.value
|
||
|
|
||
|
def splitValue(self):
|
||
|
f = self.value.find(".")
|
||
|
if (self.value.count('.') > 1):
|
||
|
diag = Diagnostic15()
|
||
|
diag.details = "Multiple '.' characters: %s" % (self.value)
|
||
|
raise(diag)
|
||
|
elif (f == 0):
|
||
|
diag = Diagnostic15()
|
||
|
diag.details = "Null indexset: %s" % (irt.index)
|
||
|
raise(diag)
|
||
|
elif f >= 0:
|
||
|
self.prefix = self.value[:f].lower()
|
||
|
self.value = self.value[f+1:].lower()
|
||
|
|
||
|
def resolvePrefix(self):
|
||
|
if (not self.prefixURI):
|
||
|
self.prefixURI = self.parent.resolvePrefix(self.prefix)
|
||
|
return self.prefixURI
|
||
|
|
||
|
class ModifiableObject:
|
||
|
# Treat modifiers as keys on boolean/relation?
|
||
|
modifiers = []
|
||
|
|
||
|
def __getitem__(self, k):
|
||
|
if (type(k) == types.IntType):
|
||
|
try:
|
||
|
return self.modifiers[k]
|
||
|
except:
|
||
|
return None
|
||
|
for m in self.modifiers:
|
||
|
if (str(m.type) == k or m.type.value == k):
|
||
|
return m
|
||
|
return None
|
||
|
|
||
|
class Triple (PrefixableObject):
|
||
|
"Object to represent a CQL triple"
|
||
|
leftOperand = None
|
||
|
boolean = None
|
||
|
rightOperand = None
|
||
|
|
||
|
def toXCQL(self, depth=0):
|
||
|
"Create the XCQL representation of the object"
|
||
|
space = " " * depth
|
||
|
if (depth == 0):
|
||
|
xml = ['<triple xmlns="%s">\n' % (XCQLNamespace)]
|
||
|
else:
|
||
|
xml = ['%s<triple>\n' % (space)]
|
||
|
|
||
|
if self.prefixes:
|
||
|
xml.append(PrefixableObject.toXCQL(self, depth+1))
|
||
|
|
||
|
xml.append(self.boolean.toXCQL(depth+1))
|
||
|
xml.append("%s <leftOperand>\n" % (space))
|
||
|
xml.append(self.leftOperand.toXCQL(depth+2))
|
||
|
xml.append("%s </leftOperand>\n" % (space))
|
||
|
xml.append("%s <rightOperand>\n" % (space))
|
||
|
xml.append(self.rightOperand.toXCQL(depth+2))
|
||
|
xml.append("%s </rightOperand>\n" % (space))
|
||
|
xml.append("%s</triple>\n" % (space))
|
||
|
return ''.join(xml)
|
||
|
|
||
|
def toCQL(self):
|
||
|
txt = []
|
||
|
if (self.prefixes):
|
||
|
for p in self.prefixes.keys():
|
||
|
if (p <> ''):
|
||
|
txt.append('>%s="%s"' % (p, self.prefixes[p]))
|
||
|
else:
|
||
|
txt.append('>"%s"' % (self.prefixes[p]))
|
||
|
prefs = ' '.join(txt)
|
||
|
return "(%s %s %s %s)" % (prefs, self.leftOperand.toCQL(), self.boolean.toCQL(), self.rightOperand.toCQL())
|
||
|
else:
|
||
|
return "(%s %s %s)" % (self.leftOperand.toCQL(), self.boolean.toCQL(), self.rightOperand.toCQL())
|
||
|
|
||
|
|
||
|
def getResultSetId(self, top=None):
|
||
|
|
||
|
if fullResultSetNameCheck == 0 or self.boolean.value in ['not', 'prox']:
|
||
|
return ""
|
||
|
|
||
|
if top == None:
|
||
|
topLevel = 1
|
||
|
top = self;
|
||
|
else:
|
||
|
topLevel = 0
|
||
|
|
||
|
# Iterate over operands and build a list
|
||
|
rsList = []
|
||
|
if isinstance(self.leftOperand, Triple):
|
||
|
rsList.extend(self.leftOperand.getResultSetId(top))
|
||
|
else:
|
||
|
rsList.append(self.leftOperand.getResultSetId(top))
|
||
|
if isinstance(self.rightOperand, Triple):
|
||
|
rsList.extend(self.rightOperand.getResultSetId(top))
|
||
|
else:
|
||
|
rsList.append(self.rightOperand.getResultSetId(top))
|
||
|
|
||
|
if topLevel == 1:
|
||
|
# Check all elements are the same, if so we're a fubar form of present
|
||
|
if (len(rsList) == rsList.count(rsList[0])):
|
||
|
return rsList[0]
|
||
|
else:
|
||
|
return ""
|
||
|
else:
|
||
|
return rsList
|
||
|
|
||
|
class SearchClause (PrefixableObject):
|
||
|
"Object to represent a CQL searchClause"
|
||
|
index = None
|
||
|
relation = None
|
||
|
term = None
|
||
|
|
||
|
def __init__(self, ind, rel, t):
|
||
|
PrefixableObject.__init__(self)
|
||
|
self.index = ind
|
||
|
self.relation = rel
|
||
|
self.term = t
|
||
|
ind.parent = self
|
||
|
rel.parent = self
|
||
|
t.parent = self
|
||
|
|
||
|
def toXCQL(self, depth=0):
|
||
|
"Produce XCQL version of the object"
|
||
|
space = " " * depth
|
||
|
if (depth == 0):
|
||
|
xml = ['<searchClause xmlns="%s">\n' % (XCQLNamespace)]
|
||
|
else:
|
||
|
xml = ['%s<searchClause>\n' % (space)]
|
||
|
|
||
|
if self.prefixes:
|
||
|
xml.append(PrefixableObject.toXCQL(self, depth+1))
|
||
|
|
||
|
xml.append(self.index.toXCQL(depth+1))
|
||
|
xml.append(self.relation.toXCQL(depth+1))
|
||
|
xml.append(self.term.toXCQL(depth+1))
|
||
|
xml.append("%s</searchClause>\n" % (space))
|
||
|
return ''.join(xml)
|
||
|
|
||
|
def toCQL(self):
|
||
|
text = []
|
||
|
for p in self.prefixes.keys():
|
||
|
if (p <> ''):
|
||
|
text.append('>%s="%s"' % (p, self.prefixes[p]))
|
||
|
else:
|
||
|
text.append('>"%s"' % (self.prefixes[p]))
|
||
|
text.append('%s %s "%s"' % (self.index, self.relation.toCQL(), self.term))
|
||
|
return ' '.join(text)
|
||
|
|
||
|
def getResultSetId(self, top=None):
|
||
|
idx = self.index
|
||
|
idx.resolvePrefix()
|
||
|
if (idx.prefixURI == reservedPrefixes['cql'] and idx.value.lower() == 'resultsetid'):
|
||
|
return self.term.value
|
||
|
else:
|
||
|
return ""
|
||
|
|
||
|
class Index(PrefixedObject):
|
||
|
"Object to represent a CQL index"
|
||
|
|
||
|
def toXCQL(self, depth=0):
|
||
|
if (depth == 0):
|
||
|
ns = ' xmlns="%s"' % (XCQLNamespace)
|
||
|
else:
|
||
|
ns = ""
|
||
|
return "%s<index%s>%s</index>\n" % (" "*depth, ns, escape(str(self)))
|
||
|
|
||
|
def toCQL(self):
|
||
|
return str(self)
|
||
|
|
||
|
class Relation(PrefixedObject, ModifiableObject):
|
||
|
"Object to represent a CQL relation"
|
||
|
def __init__(self, rel, mods=[]):
|
||
|
self.prefix = "cql"
|
||
|
PrefixedObject.__init__(self, rel)
|
||
|
self.modifiers = mods
|
||
|
for m in mods:
|
||
|
m.parent = self
|
||
|
|
||
|
def toXCQL(self, depth=0):
|
||
|
"Create XCQL representation of object"
|
||
|
if (depth == 0):
|
||
|
ns = ' xmlns="%s"' % (XCQLNamespace)
|
||
|
else:
|
||
|
ns = ""
|
||
|
|
||
|
space = " " * depth
|
||
|
|
||
|
xml = ["%s<relation%s>\n" % (space, ns)]
|
||
|
xml.append("%s <value>%s</value>\n" % (space, escape(self.value)))
|
||
|
if self.modifiers:
|
||
|
xml.append("%s <modifiers>\n" % (space))
|
||
|
for m in self.modifiers:
|
||
|
xml.append(m.toXCQL(depth+2))
|
||
|
xml.append("%s </modifiers>\n" % (space))
|
||
|
xml.append("%s</relation>\n" % (space))
|
||
|
return ''.join(xml)
|
||
|
|
||
|
def toCQL(self):
|
||
|
txt = [self.value]
|
||
|
txt.extend(map(str, self.modifiers))
|
||
|
return '/'.join(txt)
|
||
|
|
||
|
class Term:
|
||
|
value = ""
|
||
|
def __init__(self, v):
|
||
|
if (v <> ""):
|
||
|
# Unquoted literal
|
||
|
if v in ['>=', '<=', '>', '<', '<>', "/", '=']:
|
||
|
diag = Diagnostic25()
|
||
|
diag.details = self.value
|
||
|
raise diag
|
||
|
|
||
|
# Check existence of meaningful term
|
||
|
nonanchor = 0
|
||
|
for c in v:
|
||
|
if c != "^":
|
||
|
nonanchor = 1
|
||
|
break
|
||
|
if not nonanchor:
|
||
|
diag = Diagnostic32()
|
||
|
diag.details = "Only anchoring charater(s) in term: " + v
|
||
|
raise diag
|
||
|
|
||
|
# Unescape quotes
|
||
|
if (v[0] == '"' and v[-1] == '"'):
|
||
|
v = v[1:-1]
|
||
|
v = v.replace('\\"', '"')
|
||
|
|
||
|
if (not v and errorOnEmptyTerm):
|
||
|
diag = Diagnostic27()
|
||
|
raise diag
|
||
|
|
||
|
# Check for badly placed \s
|
||
|
startidx = 0
|
||
|
idx = v.find("\\", startidx)
|
||
|
while (idx > -1):
|
||
|
startidx = idx+1
|
||
|
if not irt.term[idx+1] in ['?', '\\', '*', '^']:
|
||
|
diag = Diagnostic26()
|
||
|
diag.details = irt.term
|
||
|
raise diag
|
||
|
v = v.find("\\", startidx)
|
||
|
|
||
|
elif (errorOnEmptyTerm):
|
||
|
diag = Diagnostic27()
|
||
|
raise diag
|
||
|
|
||
|
self.value = v
|
||
|
|
||
|
def __str__(self):
|
||
|
return self.value
|
||
|
|
||
|
def toXCQL(self, depth=0):
|
||
|
if (depth == 0):
|
||
|
ns = ' xmlns="%s"' % (XCQLNamespace)
|
||
|
else:
|
||
|
ns = ""
|
||
|
return "%s<term%s>%s</term>\n" % (" "*depth, ns, escape(self.value))
|
||
|
|
||
|
class Boolean(ModifiableObject):
|
||
|
"Object to represent a CQL boolean"
|
||
|
value = ""
|
||
|
parent = None
|
||
|
def __init__(self, bool, mods=[]):
|
||
|
self.value = bool
|
||
|
self.modifiers = mods
|
||
|
self.parent = None
|
||
|
|
||
|
def toXCQL(self, depth=0):
|
||
|
"Create XCQL representation of object"
|
||
|
space = " " * depth
|
||
|
xml = ["%s<boolean>\n" % (space)]
|
||
|
xml.append("%s <value>%s</value>\n" % (space, escape(self.value)))
|
||
|
if self.modifiers:
|
||
|
xml.append("%s <modifiers>\n" % (space))
|
||
|
for m in self.modifiers:
|
||
|
xml.append(m.toXCQL(depth+2))
|
||
|
xml.append("%s </modifiers>\n" % (space))
|
||
|
xml.append("%s</boolean>\n" % (space))
|
||
|
return ''.join(xml)
|
||
|
|
||
|
def toCQL(self):
|
||
|
txt = [self.value]
|
||
|
for m in self.modifiers:
|
||
|
txt.append(m.toCQL())
|
||
|
return '/'.join(txt)
|
||
|
|
||
|
def resolvePrefix(self, name):
|
||
|
return self.parent.resolvePrefix(name)
|
||
|
|
||
|
class ModifierType(PrefixedObject):
|
||
|
# Same as index, but we'll XCQLify in ModifierClause
|
||
|
parent = None
|
||
|
prefix = "cql"
|
||
|
|
||
|
class ModifierClause:
|
||
|
"Object to represent a relation modifier"
|
||
|
parent = None
|
||
|
type = None
|
||
|
comparison = ""
|
||
|
value = ""
|
||
|
|
||
|
def __init__(self, type, comp="", val=""):
|
||
|
self.type = ModifierType(type)
|
||
|
self.type.parent = self
|
||
|
self.comparison = comp
|
||
|
self.value = val
|
||
|
|
||
|
def __str__(self):
|
||
|
if (self.value):
|
||
|
return "%s%s%s" % (str(self.type), self.comparison, self.value)
|
||
|
else:
|
||
|
return "%s" % (str(self.type))
|
||
|
|
||
|
def toXCQL(self, depth=0):
|
||
|
if (self.value):
|
||
|
return "%s<modifier>\n%s<type>%s</type>\n%s<comparison>%s</comparison>\n%s<value>%s</value>\n%s</modifier>\n" % (" " * depth, " " * (depth+1), escape(str(self.type)), " " * (depth+1), escape(self.comparison), " " * (depth+1), escape(self.value), " " * depth)
|
||
|
else:
|
||
|
return "%s<modifier><type>%s</type></modifier>\n" % (" " * depth, escape(str(self.type)))
|
||
|
|
||
|
def toCQL(self):
|
||
|
return str(self)
|
||
|
|
||
|
def resolvePrefix(self, name):
|
||
|
# Need to skip parent, which has its own resolvePrefix
|
||
|
# eg boolean or relation, neither of which is prefixable
|
||
|
return self.parent.parent.resolvePrefix(name)
|
||
|
|
||
|
|
||
|
|
||
|
# Requires changes for: <= >= <>, and escaped \" in "
|
||
|
# From shlex.py (std library for 2.2+)
|
||
|
class CQLshlex(shlex):
|
||
|
"shlex with additions for CQL parsing"
|
||
|
quotes = '"'
|
||
|
commenters = ""
|
||
|
nextToken = ""
|
||
|
|
||
|
def __init__(self, thing):
|
||
|
shlex.__init__(self, thing)
|
||
|
self.wordchars += "!@#$%^&*-+{}[];,.?|~`:\\"
|
||
|
self.wordchars += ''.join(map(chr, range(128,254)))
|
||
|
|
||
|
def read_token(self):
|
||
|
"Read a token from the input stream (no pushback or inclusions)"
|
||
|
|
||
|
while 1:
|
||
|
if (self.nextToken != ""):
|
||
|
self.token = self.nextToken
|
||
|
self.nextToken = ""
|
||
|
# Bah. SUPER ugly non portable
|
||
|
if self.token == "/":
|
||
|
self.state = ' '
|
||
|
break
|
||
|
|
||
|
nextchar = self.instream.read(1)
|
||
|
if nextchar == '\n':
|
||
|
self.lineno = self.lineno + 1
|
||
|
if self.debug >= 3:
|
||
|
print "shlex: in state ", repr(self.state), " I see character:", repr(nextchar)
|
||
|
|
||
|
if self.state is None:
|
||
|
self.token = '' # past end of file
|
||
|
break
|
||
|
elif self.state == ' ':
|
||
|
if not nextchar:
|
||
|
self.state = None # end of file
|
||
|
break
|
||
|
elif nextchar in self.whitespace:
|
||
|
if self.debug >= 2:
|
||
|
print "shlex: I see whitespace in whitespace state"
|
||
|
if self.token:
|
||
|
break # emit current token
|
||
|
else:
|
||
|
continue
|
||
|
elif nextchar in self.commenters:
|
||
|
self.instream.readline()
|
||
|
self.lineno = self.lineno + 1
|
||
|
elif nextchar in self.wordchars:
|
||
|
self.token = nextchar
|
||
|
self.state = 'a'
|
||
|
elif nextchar in self.quotes:
|
||
|
self.token = nextchar
|
||
|
self.state = nextchar
|
||
|
elif nextchar in ['<', '>']:
|
||
|
self.token = nextchar
|
||
|
self.state = '<'
|
||
|
else:
|
||
|
self.token = nextchar
|
||
|
if self.token:
|
||
|
break # emit current token
|
||
|
else:
|
||
|
continue
|
||
|
elif self.state == '<':
|
||
|
# Only accumulate <=, >= or <>
|
||
|
|
||
|
if self.token == ">" and nextchar == "=":
|
||
|
self.token = self.token + nextchar
|
||
|
self.state = ' '
|
||
|
break
|
||
|
elif self.token == "<" and nextchar in ['>', '=']:
|
||
|
self.token = self.token + nextchar
|
||
|
self.state = ' '
|
||
|
break
|
||
|
elif not nextchar:
|
||
|
self.state = None
|
||
|
break
|
||
|
elif nextchar == "/":
|
||
|
self.state = "/"
|
||
|
self.nextToken = "/"
|
||
|
break
|
||
|
elif nextchar in self.wordchars:
|
||
|
self.state='a'
|
||
|
self.nextToken = nextchar
|
||
|
break
|
||
|
elif nextchar in self.quotes:
|
||
|
self.state=nextchar
|
||
|
self.nextToken = nextchar
|
||
|
break
|
||
|
else:
|
||
|
self.state = ' '
|
||
|
break
|
||
|
|
||
|
elif self.state in self.quotes:
|
||
|
self.token = self.token + nextchar
|
||
|
# Allow escaped quotes
|
||
|
if nextchar == self.state and self.token[-2] != '\\':
|
||
|
self.state = ' '
|
||
|
break
|
||
|
elif not nextchar: # end of file
|
||
|
if self.debug >= 2:
|
||
|
print "shlex: I see EOF in quotes state"
|
||
|
# Override SHLEX's ValueError to throw diagnostic
|
||
|
diag = Diagnostic14()
|
||
|
diag.details = self.token[:-1]
|
||
|
raise diag
|
||
|
elif self.state == 'a':
|
||
|
if not nextchar:
|
||
|
self.state = None # end of file
|
||
|
break
|
||
|
elif nextchar in self.whitespace:
|
||
|
if self.debug >= 2:
|
||
|
print "shlex: I see whitespace in word state"
|
||
|
self.state = ' '
|
||
|
if self.token:
|
||
|
break # emit current token
|
||
|
else:
|
||
|
continue
|
||
|
elif nextchar in self.commenters:
|
||
|
self.instream.readline()
|
||
|
self.lineno = self.lineno + 1
|
||
|
elif nextchar in self.wordchars or nextchar in self.quotes:
|
||
|
self.token = self.token + nextchar
|
||
|
elif nextchar in ['>', '<']:
|
||
|
self.nextToken = nextchar
|
||
|
self.state = '<'
|
||
|
break
|
||
|
else:
|
||
|
self.pushback = [nextchar] + self.pushback
|
||
|
if self.debug >= 2:
|
||
|
print "shlex: I see punctuation in word state"
|
||
|
self.state = ' '
|
||
|
if self.token:
|
||
|
break # emit current token
|
||
|
else:
|
||
|
continue
|
||
|
result = self.token
|
||
|
self.token = ''
|
||
|
if self.debug > 1:
|
||
|
if result:
|
||
|
print "shlex: raw token=" + `result`
|
||
|
else:
|
||
|
print "shlex: raw token=EOF"
|
||
|
return result
|
||
|
|
||
|
class CQLParser:
|
||
|
"Token parser to create object structure for CQL"
|
||
|
parser = ""
|
||
|
currentToken = ""
|
||
|
nextToken = ""
|
||
|
|
||
|
def __init__(self, p):
|
||
|
""" Initialise with shlex parser """
|
||
|
self.parser = p
|
||
|
self.fetch_token() # Fetches to next
|
||
|
self.fetch_token() # Fetches to curr
|
||
|
|
||
|
def is_boolean(self, token):
|
||
|
"Is the token a boolean"
|
||
|
token = token.lower()
|
||
|
return token in booleans
|
||
|
|
||
|
def fetch_token(self):
|
||
|
""" Read ahead one token """
|
||
|
tok = self.parser.get_token()
|
||
|
self.currentToken = self.nextToken
|
||
|
self.nextToken = tok
|
||
|
|
||
|
def prefixes(self):
|
||
|
"Create prefixes dictionary"
|
||
|
prefs = {}
|
||
|
while (self.currentToken == ">"):
|
||
|
# Strip off maps
|
||
|
self.fetch_token()
|
||
|
if self.nextToken == "=":
|
||
|
# Named map
|
||
|
name = self.currentToken
|
||
|
self.fetch_token() # = is current
|
||
|
self.fetch_token() # id is current
|
||
|
identifier = self.currentToken
|
||
|
self.fetch_token()
|
||
|
else:
|
||
|
name = ""
|
||
|
identifier = self.currentToken
|
||
|
self.fetch_token()
|
||
|
if (errorOnDuplicatePrefix and prefs.has_key(name)):
|
||
|
# Error condition
|
||
|
diag = Diagnostic45()
|
||
|
diag.details = name
|
||
|
raise diag;
|
||
|
if len(identifier) > 1 and identifier[0] == '"' and identifier[-1] == '"':
|
||
|
identifier = identifier[1:-1]
|
||
|
prefs[name.lower()] = identifier
|
||
|
|
||
|
return prefs
|
||
|
|
||
|
|
||
|
def query(self):
|
||
|
""" Parse query """
|
||
|
prefs = self.prefixes()
|
||
|
left = self.subQuery()
|
||
|
while 1:
|
||
|
if not self.currentToken:
|
||
|
break;
|
||
|
bool = self.is_boolean(self.currentToken)
|
||
|
if bool:
|
||
|
boolobject = self.boolean()
|
||
|
right = self.subQuery()
|
||
|
# Setup Left Object
|
||
|
trip = tripleType()
|
||
|
trip.leftOperand = left
|
||
|
trip.boolean = boolobject
|
||
|
trip.rightOperand = right
|
||
|
left.parent = trip
|
||
|
right.parent = trip
|
||
|
boolobject.parent = trip
|
||
|
left = trip
|
||
|
else:
|
||
|
break;
|
||
|
|
||
|
for p in prefs.keys():
|
||
|
left.addPrefix(p, prefs[p])
|
||
|
return left
|
||
|
|
||
|
def subQuery(self):
|
||
|
""" Find either query or clause """
|
||
|
if self.currentToken == "(":
|
||
|
self.fetch_token() # Skip (
|
||
|
object = self.query()
|
||
|
if self.currentToken == ")":
|
||
|
self.fetch_token() # Skip )
|
||
|
else:
|
||
|
diag = Diagnostic13()
|
||
|
diag.details = self.currentToken
|
||
|
raise diag
|
||
|
else:
|
||
|
prefs = self.prefixes()
|
||
|
if (prefs):
|
||
|
object = self.query()
|
||
|
for p in prefs.keys():
|
||
|
object.addPrefix(p, prefs[p])
|
||
|
else:
|
||
|
object = self.clause()
|
||
|
return object
|
||
|
|
||
|
def clause(self):
|
||
|
""" Find searchClause """
|
||
|
bool = self.is_boolean(self.nextToken)
|
||
|
if not bool and not (self.nextToken in [')', '(', '']):
|
||
|
|
||
|
index = indexType(self.currentToken)
|
||
|
self.fetch_token() # Skip Index
|
||
|
rel = self.relation()
|
||
|
if (self.currentToken == ''):
|
||
|
diag = Diagnostic10()
|
||
|
diag.details = "Expected Term, got end of query."
|
||
|
raise(diag)
|
||
|
term = termType(self.currentToken)
|
||
|
self.fetch_token() # Skip Term
|
||
|
|
||
|
irt = searchClauseType(index, rel, term)
|
||
|
|
||
|
elif self.currentToken and (bool or self.nextToken in [')', '']):
|
||
|
|
||
|
irt = searchClauseType(indexType(serverChoiceIndex), relationType(serverChoiceRelation), termType(self.currentToken))
|
||
|
self.fetch_token()
|
||
|
|
||
|
elif self.currentToken == ">":
|
||
|
prefs = self.prefixes()
|
||
|
# iterate to get object
|
||
|
object = self.clause()
|
||
|
for p in prefs.keys():
|
||
|
object.addPrefix(p, prefs[p]);
|
||
|
return object
|
||
|
|
||
|
else:
|
||
|
diag = Diagnostic10()
|
||
|
diag.details = "Expected Boolean or Relation but got: " + self.currentToken
|
||
|
raise diag
|
||
|
|
||
|
return irt
|
||
|
|
||
|
def modifiers(self):
|
||
|
mods = []
|
||
|
while (self.currentToken == modifierSeparator):
|
||
|
self.fetch_token()
|
||
|
mod = self.currentToken
|
||
|
mod = mod.lower()
|
||
|
if (mod == modifierSeparator):
|
||
|
diag = Diagnostic20()
|
||
|
diag.details = "Null modifier"
|
||
|
raise diag
|
||
|
self.fetch_token()
|
||
|
comp = self.currentToken
|
||
|
if (comp in order):
|
||
|
self.fetch_token()
|
||
|
value = self.currentToken
|
||
|
self.fetch_token()
|
||
|
else:
|
||
|
comp = ""
|
||
|
value = ""
|
||
|
mods.append(ModifierClause(mod, comp, value))
|
||
|
return mods
|
||
|
|
||
|
|
||
|
def boolean(self):
|
||
|
""" Find boolean """
|
||
|
self.currentToken = self.currentToken.lower()
|
||
|
if self.currentToken in booleans:
|
||
|
bool = booleanType(self.currentToken)
|
||
|
self.fetch_token()
|
||
|
bool.modifiers = self.modifiers()
|
||
|
for b in bool.modifiers:
|
||
|
b.parent = bool
|
||
|
|
||
|
else:
|
||
|
diag = Diagnostic37()
|
||
|
diag.details = self.currentToken
|
||
|
raise diag
|
||
|
|
||
|
return bool
|
||
|
|
||
|
def relation(self):
|
||
|
""" Find relation """
|
||
|
self.currentToken = self.currentToken.lower()
|
||
|
rel = relationType(self.currentToken)
|
||
|
self.fetch_token()
|
||
|
rel.modifiers = self.modifiers()
|
||
|
for r in rel.modifiers:
|
||
|
r.parent = rel
|
||
|
|
||
|
return rel
|
||
|
|
||
|
|
||
|
|
||
|
class XCQLParser:
|
||
|
""" Parser for XCQL using some very simple DOM """
|
||
|
|
||
|
def firstChildElement(self, elem):
|
||
|
""" Find first child which is an Element """
|
||
|
for c in elem.childNodes:
|
||
|
if c.nodeType == Node.ELEMENT_NODE:
|
||
|
return c
|
||
|
return None
|
||
|
|
||
|
def firstChildData(self,elem):
|
||
|
""" Find first child which is Data """
|
||
|
for c in elem.childNodes:
|
||
|
if c.nodeType == Node.TEXT_NODE:
|
||
|
return c
|
||
|
return None
|
||
|
|
||
|
def searchClause(self, elem):
|
||
|
""" Process a <searchClause> """
|
||
|
sc = searchClauseType()
|
||
|
for c in elem.childNodes:
|
||
|
if c.nodeType == Node.ELEMENT_NODE:
|
||
|
if c.localName == "index":
|
||
|
sc.index = indexType(self.firstChildData(c).data.lower())
|
||
|
elif c.localName == "term":
|
||
|
sc.term = termType(self.firstChildData(c).data)
|
||
|
elif c.localName == "relation":
|
||
|
sc.relation = self.relation(c)
|
||
|
elif c.localName == "prefixes":
|
||
|
sc.prefixes = self.prefixes(c)
|
||
|
else:
|
||
|
raise(ValueError, c.localName)
|
||
|
return sc
|
||
|
|
||
|
def triple(self, elem):
|
||
|
""" Process a <triple> """
|
||
|
trip = tripleType()
|
||
|
for c in elem.childNodes:
|
||
|
if c.nodeType == Node.ELEMENT_NODE:
|
||
|
if c.localName == "boolean":
|
||
|
trip.boolean = self.boolean(c)
|
||
|
elif c.localName == "prefixes":
|
||
|
trip.prefixes = self.prefixes(c)
|
||
|
elif c.localName == "leftOperand":
|
||
|
c2 = self.firstChildElement(c)
|
||
|
if c2.localName == "searchClause":
|
||
|
trip.leftOperand = self.searchClause(c2)
|
||
|
else:
|
||
|
trip.leftOperand = self.triple(c2)
|
||
|
else:
|
||
|
c2 = self.firstChildElement(c)
|
||
|
if c2.localName == "searchClause":
|
||
|
trip.rightOperand = self.searchClause(c2)
|
||
|
else:
|
||
|
trip.rightOperand = self.triple(c2)
|
||
|
return trip
|
||
|
|
||
|
def relation(self, elem):
|
||
|
""" Process a <relation> """
|
||
|
rel = relationType()
|
||
|
for c in elem.childNodes:
|
||
|
if c.nodeType == Node.ELEMENT_NODE:
|
||
|
if c.localName == "value":
|
||
|
rel.value = c.firstChild.data.lower()
|
||
|
elif c.localName == "modifiers":
|
||
|
mods = []
|
||
|
for c2 in c.childNodes:
|
||
|
if c2.nodeType == Node.ELEMENT_NODE:
|
||
|
if c2.localName == "modifier":
|
||
|
for c3 in c2.childNodes:
|
||
|
if c3.localName == "value":
|
||
|
val = self.firstChildData(c2).data.lower()
|
||
|
mods.append(val)
|
||
|
rel.modifiers = mods
|
||
|
return rel
|
||
|
|
||
|
def boolean(self, elem):
|
||
|
"Process a <boolean>"
|
||
|
bool = booleanType()
|
||
|
for c in elem.childNodes:
|
||
|
if c.nodeType == Node.ELEMENT_NODE:
|
||
|
if c.localName == "value":
|
||
|
bool.value = self.firstChildData(c).data.lower()
|
||
|
else:
|
||
|
# Can be in any order, so we need to extract, then order
|
||
|
mods = {}
|
||
|
for c2 in c.childNodes:
|
||
|
if c2.nodeType == Node.ELEMENT_NODE:
|
||
|
if c2.localName == "modifier":
|
||
|
type = ""
|
||
|
value = ""
|
||
|
for c3 in c2.childNodes:
|
||
|
if c3.nodeType == Node.ELEMENT_NODE:
|
||
|
if c3.localName == "value":
|
||
|
value = self.firstChildData(c3).data.lower()
|
||
|
elif c3.localName == "type":
|
||
|
type = self.firstChildData(c3).data
|
||
|
mods[type] = value
|
||
|
|
||
|
modlist = []
|
||
|
for t in booleanModifierTypes[1:]:
|
||
|
if mods.has_key(t):
|
||
|
modlist.append(mods[t])
|
||
|
else:
|
||
|
modlist.append('')
|
||
|
bool.modifiers = modlist
|
||
|
return bool
|
||
|
|
||
|
def prefixes(self, elem):
|
||
|
"Process <prefixes>"
|
||
|
prefs = {}
|
||
|
for c in elem.childNodes:
|
||
|
if c.nodeType == Node.ELEMENT_NODE:
|
||
|
# prefix
|
||
|
name = ""
|
||
|
identifier = ""
|
||
|
for c2 in c.childNodes:
|
||
|
if c2.nodeType == Node.ELEMENT_NODE:
|
||
|
if c2.localName == "name":
|
||
|
name = self.firstChildData(c2).data.lower()
|
||
|
elif c2.localName == "identifier":
|
||
|
identifier = self.firstChildData(c2).data
|
||
|
prefs[name] = identifier
|
||
|
return prefs
|
||
|
|
||
|
|
||
|
def xmlparse(s):
|
||
|
""" API. Return a seachClause/triple object from XML string """
|
||
|
doc = parseString(s)
|
||
|
q = xcqlparse(doc.firstChild)
|
||
|
return q
|
||
|
|
||
|
def xcqlparse(query):
|
||
|
""" API. Return a searchClause/triple object from XML DOM objects"""
|
||
|
# Requires only properties of objects so we don't care how they're generated
|
||
|
|
||
|
p = XCQLParser()
|
||
|
if query.localName == "searchClause":
|
||
|
return p.searchClause(query)
|
||
|
else:
|
||
|
return p.triple(query)
|
||
|
|
||
|
|
||
|
def parse(query):
|
||
|
""" API. Return a searchClause/triple object from CQL string"""
|
||
|
|
||
|
try:
|
||
|
query = query.encode("utf-8")
|
||
|
except:
|
||
|
diag = Diagnostic10()
|
||
|
diag.details = "Cannot parse non utf-8 characters"
|
||
|
raise diag
|
||
|
|
||
|
q = StringIO(query)
|
||
|
lexer = CQLshlex(q)
|
||
|
parser = CQLParser(lexer)
|
||
|
object = parser.query()
|
||
|
if parser.currentToken != '':
|
||
|
diag = Diagnostic10()
|
||
|
diag.details = "Unprocessed tokens remain: " + repr(parser.currentToken)
|
||
|
raise diag
|
||
|
else:
|
||
|
del lexer
|
||
|
del parser
|
||
|
del q
|
||
|
return object
|
||
|
|
||
|
|
||
|
# Assign our objects to generate
|
||
|
tripleType = Triple
|
||
|
booleanType = Boolean
|
||
|
relationType = Relation
|
||
|
searchClauseType = SearchClause
|
||
|
modifierClauseType = ModifierClause
|
||
|
modifierTypeType = ModifierType
|
||
|
indexType = Index
|
||
|
termType = Term
|
||
|
|
||
|
try:
|
||
|
from CQLUtils import *
|
||
|
tripleType = CTriple
|
||
|
booleanType = CBoolean
|
||
|
relationType = CRelation
|
||
|
searchClauseType = CSearchClause
|
||
|
modifierClauseType = CModifierClause
|
||
|
modifierTypeType = CModifierType
|
||
|
indexType = CIndex
|
||
|
termType = CTerm
|
||
|
except:
|
||
|
# Nested scopes. Utils needs our classes to parent
|
||
|
# We need its classes to build (maybe)
|
||
|
pass
|
||
|
|
||
|
|
||
|
if (__name__ == "__main__"):
|
||
|
import sys;
|
||
|
s = sys.stdin.readline()
|
||
|
try:
|
||
|
q = parse(s);
|
||
|
except SRWDiagnostic, diag:
|
||
|
# Print a full version, not just str()
|
||
|
print "Diagnostic Generated."
|
||
|
print " Code: " + str(diag.code)
|
||
|
print " Details: " + str(diag.details)
|
||
|
print " Message: " + str(diag.message)
|
||
|
else:
|
||
|
print q.toXCQL()[:-1];
|
||
|
|