Projects/worblehat-old
Projects
/
worblehat-old
Archived
12
0
Fork 0
This repository has been archived on 2024-07-04. You can view files and clone it, but cannot push or open issues or pull requests.
worblehat-old/python/PyZ3950/zoom.py

966 lines
36 KiB
Python
Raw Normal View History

#!/usr/bin/env python
"""Implements the ZOOM 1.4 API (http://zoom.z3950.org/api)
for Z39.50.
Some global notes on the binding (these will only make sense when read
after the API document):
Get/Set Option is implemented as member attribute access or
assignment. Implementations are encouraged to throw an AttributeError
for unsupported (or, possibly, mistyped) attributes. (Production
applications are encouraged to catch such errors.)
All errors are reported as exceptions deriving from ZoomError (or, at
least, it's a bug if they aren't). Bib1Err is defined as part of the
binding; all the rest are specific to this implementation.
ResultSet provides a sequence interface, with standard Python
iteration, indexing, and slicing. So if rs is a ResultSet, use len
(rs) for Get_Size and rs[i] for Get_Record, or iterate with for r in
rs: foo(r). Any attempt to access a record for which the server
returned a surrogate diagnostic will raise the appropriate Bib1Err
exception.
For Record, Render_Record is implemented as Python __str__. The
'syntax' member contains the string-format record syntax, and the
'data' member contains the raw data.
The following query types are supported:
- "CCL", ISO 8777, (http://www.indexdata.dk/yaz/doc/tools.tkl#CCL)
- "S-CCL", the same, but interpreted on the server side
- "CQL", the Common Query Language, (http://www.loc.gov/z3950/agency/zing/cql/)
- "S-CQL", the same, but interpreted on the server side
- "PQF", Index Data's Prefix Query Format, (http://www.indexdata.dk/yaz/doc/tools.tkl#PQF)
- "C2", Cheshire II query syntax, (http://cheshire.berkeley.edu/cheshire2.html#zfind)
- "ZSQL", Z-SQL, see (http://archive.dstc.edu.au/DDU/projects/Z3950/Z+SQL/)
- "CQL-TREE", a general-purpose escape allowing any object with a toRPN method to be used, e.g. the CQL tree objects
ScanSet, like ResultSet, has a sequence interface. The i-th element
is a dictionary. See the ScanSet documentation for supported keys.
Sample usage:
from PyZ3950 import zoom
conn = zoom.Connection ('z3950.loc.gov', 7090)
conn.databaseName = 'VOYAGER'
conn.preferredRecordSyntax = 'USMARC'
query = zoom.Query ('CCL', 'ti="1066 and all that"')
res = conn.search (query)
for r in res:
print str(r)
conn.close ()
I hope everything else is clear from the docstrings and the abstract
API: let me know if that's wrong, and I'll try to do better.
For some purposes (I think the only one is writing Z39.50 servers),
you may want to use the functions in the z3950 module instead. """
from __future__ import nested_scopes
__author__ = 'Aaron Lav (asl2@pobox.com)'
__version__ = '1.0' # XXX
import getopt
import sys
# TODO:
# finish lang/charset (requires charset normalization, confer w/ Adam)
# implement piggyback
# implement schema (Non useful)
# implement setname (Impossible?)
from PyZ3950 import z3950
from PyZ3950 import ccl
from PyZ3950 import asn1
from PyZ3950 import zmarc
from PyZ3950 import bib1msg
from PyZ3950 import grs1
from PyZ3950 import oids
# Azaroth 2003-12-04:
from PyZ3950 import CQLParser, SRWDiagnostics, pqf
from PyZ3950 import c2query as c2
asn1.register_oid (oids.Z3950_QUERY_SQL, z3950.SQLQuery)
def my_enumerate (l): # replace w/ enumerate when we go to Python 2.3
return zip (range (len (l)), l)
trace_extract = 0
"""trace extracting records from search/present reqs"""
class ZoomError (Exception):
"""Base class for all errors reported from this module"""
pass
class ConnectionError(ZoomError):
"""Exception for TCP error"""
pass
class ClientNotImplError (ZoomError):
"""Exception for ZOOM client-side functionality not implemented (bug
author)"""
pass
class ServerNotImplError (ZoomError):
"""Exception for function not implemented on server"""
pass
class QuerySyntaxError (ZoomError):
"""Exception for query not parsable by client"""
pass
class ProtocolError (ZoomError):
"""Exception for malformatted server response"""
pass
class UnexpectedCloseError (ProtocolError):
"""Exception for unexpected (z3950, not tcp) close from server"""
pass
class UnknownRecSyn (ZoomError):
"""Exception for unknown record syntax returned from server"""
pass
class Bib1Err (ZoomError):
"""Exception for BIB-1 error"""
def __init__ (self, condition, message, addtlInfo):
self.condition = condition
self.message = message
self.addtlInfo = addtlInfo
ZoomError.__init__ (self)
def __str__ (self):
return "Bib1Err: %d %s %s" % (self.condition, self.message, self.addtlInfo)
class _ErrHdlr:
"""Error-handling services"""
err_attrslist = ['errCode','errMsg', 'addtlInfo']
def err (self, condition, addtlInfo, oid):
"""Translate condition + oid to message, save, and raise exception"""
self.errCode = condition
self.errMsg = bib1msg.lookup_errmsg (condition, oid)
self.addtlInfo = addtlInfo
raise Bib1Err (self.errCode, self.errMsg, self.addtlInfo)
def err_diagrec (self, diagrec):
(typ, data) = diagrec
if typ == 'externallyDefined':
raise ClientNotImplErr ("Unknown external diagnostic" + str (data))
addinfo = data.addinfo [1] # don't care about v2 vs v3
self.err (data.condition, addinfo, data.diagnosticSetId)
_record_type_dict = {}
"""Map oid to renderer, field-counter, and field-getter functions"""
def _oid_to_key (oid):
for (k,v) in _record_type_dict.items ():
if v.oid == oid:
return k
raise UnknownRecSyn (oid)
def _extract_attrs (obj, attrlist):
kw = {}
for key in attrlist:
if hasattr (obj, key):
kw[key] = getattr (obj, key)
return kw
class _AttrCheck:
"""Prevent typos"""
attrlist = []
not_implement_attrs = []
def __setattr__ (self, attr, val):
"""Ensure attr is in attrlist (list of allowed attributes), or
private (begins w/ '_'), or begins with 'X-' (reserved for users)"""
if attr[0] == '_' or attr in self.attrlist or attr[0:2] == 'X-':
self.__dict__[attr] = val
elif (attr in self.not_implement_attrs):
raise ClientNotImplError(attr)
else:
raise AttributeError (attr, val)
class Connection(_AttrCheck, _ErrHdlr):
"""Connection object"""
not_implement_attrs = ['piggyback',
'schema',
'proxy',
'async']
search_attrs = ['smallSetUpperBound',
'largeSetLowerBound',
'mediumSetPresentNumber',
'smallSetElementSetNames',
'mediumSetElementSetNames']
init_attrs = ['user',
'password',
'group',
'maximumRecordSize',
'preferredMessageSize',
'lang',
'charset',
'implementationId',
'implementationName',
'implementationVersion'
]
scan_zoom_to_z3950 = {
# translate names from ZOOM spec to Z39.50 spec names
'stepSize' : 'stepSize',
'numberOfEntries' : 'numberOfTermsRequested',
'responsePosition' : 'preferredPositionInResponse'
}
attrlist = search_attrs + init_attrs + scan_zoom_to_z3950.keys () + [
'databaseName',
'namedResultSets',
'preferredRecordSyntax', # these three inheritable by RecordSet
'elementSetName',
'presentChunk',
'targetImplementationId',
'targetImplementationName',
'targetImplementationVersion',
'host',
'port',
] + _ErrHdlr.err_attrslist
_queryTypes = ['S-CQL', 'S-CCL', 'RPN', 'ZSQL']
_cli = None
host = ""
port = 0
# and now, some defaults
namedResultSets = 1
elementSetName = 'F'
preferredRecordSyntax = 'USMARC'
preferredMessageSize = 0x100000
maximumRecordSize = 0x100000
stepSize = 0
numberOfEntries = 20 # for SCAN
responsePosition = 1
databaseName = 'Default'
implementationId = 'PyZ3950'
implementationName = 'PyZ3950 1.0/ZOOM v1.4'
implementationVersion = '1.0'
lang = None
charset = None
user = None
password = None
group = None
presentChunk = 20 # for result sets
def __init__(self, host, port, connect=1, **kw):
"""Establish connection to hostname:port. kw contains initial
values for options, and is useful for options which affect
the InitializeRequest. Currently supported values:
user Username for authentication
password Password for authentication
group Group for authentication
maximumRecordSize Maximum size in bytes of one record
preferredMessageSize Maximum size in bytes for response
lang 3 letter language code
charset Character set
implementationId Id for client implementation
implementationName Name for client implementation
implementationVersion Version of client implementation
"""
self.host = host
self.port = port
self._resultSetCtr = 0
for (k,v) in kw.items ():
setattr (self, k, v)
if (connect):
self.connect()
def connect(self):
self._resultSetCtr += 1
self._lastConnectCtr = self._resultSetCtr
# Bump counters first, since even if we didn't reconnect
# this time, we could have, and so any use of old connections
# is an error. (Old cached-and-accessed data is OK to use:
# cached but not-yet-accessed data is probably an error, but
# a not-yet-caught error.)
if self._cli <> None and self._cli.sock <> None:
return
initkw = {}
for attr in self.init_attrs:
initkw[attr] = getattr(self, attr)
if (self.namedResultSets):
options = ['namedResultSets']
else:
options = []
initkw ['ConnectionError'] = ConnectionError
initkw ['ProtocolError'] = ProtocolError
initkw ['UnexpectedCloseError'] = UnexpectedCloseError
self._cli = z3950.Client (self.host, self.port,
optionslist = options, **initkw)
self.namedResultSets = self._cli.get_option ('namedResultSets')
self.targetImplementationId = getattr (self._cli.initresp, 'implementationId', None)
self.targetImplementationName = getattr (self._cli.initresp, 'implementationName', None)
self.targetImplementationVersion = getattr (self._cli.initresp, 'implementationVersion', None)
if (hasattr (self._cli.initresp, 'userInformationField')):
# weird. U of Chicago returns an EXTERNAL with nothing
# but 'encoding', ('octet-aligned', '2545') filled in.
if (hasattr (self._cli.initresp.userInformationField,
'direct_reference') and
self._cli.initresp.userInformationField.direct_reference ==
oids.Z3950_USR_PRIVATE_OCLC_INFO_ov):
# see http://www.oclc.org/support/documentation/firstsearch/z3950/fs_z39_config_guide/ for docs
oclc_info = self._cli.initresp.userInformationField.encoding [1]
# the docs are a little unclear, but I presume we're
# supposed to report failure whenever a failReason is given.
if hasattr (oclc_info, 'failReason'):
raise UnexpectedCloseError ('OCLC_Info ',
oclc_info.failReason,
getattr (oclc_info, 'text',
' no text given '))
def search (self, query):
"""Search, taking Query object, returning ResultSet"""
if (not self._cli):
self.connect()
assert (query.typ in self._queryTypes)
dbnames = self.databaseName.split ('+')
self._cli.set_dbnames (dbnames)
cur_rsn = self._make_rsn ()
recv = self._cli.search_2 (query.query,
rsn = cur_rsn,
**_extract_attrs (self, self.search_attrs))
self._resultSetCtr += 1
rs = ResultSet (self, recv, cur_rsn, self._resultSetCtr)
return rs
# and 'Error Code', 'Error Message', and 'Addt'l Info' methods still
# eeded
def scan (self, query):
if (not self._cli):
self.connect()
self._cli.set_dbnames ([self.databaseName])
kw = {}
for k, xl in self.scan_zoom_to_z3950.items ():
if hasattr (self, k):
kw [xl] = getattr (self, k)
return ScanSet (self._cli.scan (query.query, **kw))
def _make_rsn (self):
"""Return result set name"""
if self.namedResultSets:
return "rs%d" % self._resultSetCtr
else:
return z3950.default_resultSetName
def close (self):
"""Close connection"""
self._cli.close ()
def sort (self, sets, keys):
""" Sort sets by keys, return resultset interface """
if (not self._cli):
self.connect()
# XXX This should probably be shuffled down into z3950.py
sortrelations = ['ascending', 'descending', 'ascendingByFrequency', 'descendingByFrequency']
req = z3950.SortRequest()
req.inputResultSetNames = []
for s in sets:
s._check_stale ()
req.inputResultSetNames.append(s._resultSetName)
cur_rsn = self._make_rsn()
req.sortedResultSetName = cur_rsn
zkeys = []
for k in keys:
zk = z3950.SortKeySpec()
zk.sortRelation = sortrelations.index(k.relation)
zk.caseSensitivity = k.caseInsensitive
if (k.missingValueAction):
zk.missingValueAction = (k.missingValueAction, None)
if (k.missingValueData):
zk.missingValueAction = ('missingValueData', k.missingValueData)
value = k.sequence
if (k.type == 'accessPoint'):
if (value.typ <> 'RPN'):
raise ValueError # XXX
l = z3950.SortKey['sortAttributes']()
l.id = value.query[1].attributeSet
l.list = value.query[1].rpn[1][1].attributes
seq = ('sortAttributes', l)
elif (k.type == 'private'):
seq = ('privateSortKey', value)
elif (k.type == 'elementSetName'):
spec = z3950.Specification()
spec.elementSpec = ('elementSetName', value)
seq = ('elementSpec', spec)
else:
raise ValueError # XXX
spec = ('generic', seq)
zk.sortElement = spec
zkeys.append(zk)
req.sortSequence = zkeys
recv = self._cli.transact(('sortRequest', req), 'sortResponse')
self._resultSetCtr += 1
if (hasattr(recv, 'diagnostics')):
diag = recv.diagnostics[0][1]
self.err(diag.condition, diag.addinfo, diag.diagnosticSetId)
if (not hasattr(recv, 'resultCount')):
# First guess: sum of all input sets
recv.resultCount = 0
for set in sets:
recv.resultCount += len(set)
# Check for addInfo to override
try:
val = recv.otherInfo[0].information[1]
if (val[:14] == 'Result-count: '):
recv.resultCount = int(val[14:])
except:
pass
rs = ResultSet (self, recv, cur_rsn, self._resultSetCtr)
return rs
class SortKey(_AttrCheck):
attrlist = ['relation', 'caseInsensitive', 'missingValueAction', 'missingValueData', 'type', 'sequence']
relation = "ascending"
caseInsensitive = 1
missingValueAction = ""
missingValueData = ""
type = "accessPoint"
sequence = ""
def __init__ (self, **kw):
for k in kw.keys():
setattr(self, k, kw[k])
class Query:
def __init__ (self, typ, query):
"""Creates Query object.
Supported query types: CCL, S-CCL, CQL, S-CQL, PQF, C2, ZSQL, CQL-TREE
"""
typ = typ.upper()
# XXX maybe replace if ... elif ... with dict mapping querytype to func
if typ == 'CCL':
self.typ = 'RPN'
try:
self.query = ccl.mk_rpn_query (query)
except ccl.QuerySyntaxError, err:
print "zoom raising", str (err), " for", query
raise QuerySyntaxError (str(err))
elif typ == 'S-CCL': # server-side ccl
self.typ = typ
self.query = ('type-2', query)
elif typ == 'S-CQL': # server-side cql
self.typ = typ
xq = asn1.EXTERNAL()
xq.direct_reference = oids.Z3950_QUERY_CQL_ov
xq.encoding = ('single-ASN1-type', query)
self.query = ('type_104', xq)
elif typ == 'CQL': # CQL to RPN transformation
self.typ = 'RPN'
try:
q = CQLParser.parse(query)
rpnq = z3950.RPNQuery()
# XXX Allow Attribute Architecture somehow?
rpnq.attributeSet = oids.Z3950_ATTRS_BIB1_ov
rpnq.rpn = q.toRPN()
self.query = ('type_1', rpnq)
except SRWDiagnostics.SRWDiagnostic, err:
raise err
except:
raise QuerySyntaxError
elif typ == 'PQF': # PQF to RPN transformation
self.typ = 'RPN'
try:
self.query = pqf.parse(query)
except:
raise QuerySyntaxError
elif typ == 'C2': # Cheshire2 Syntax
self.typ = 'RPN'
try:
q = c2.parse(query)
self.query = q[0]
except:
raise QuerySyntaxError
elif typ == 'ZSQL': # External SQL
self.typ = typ
xq = asn1.EXTERNAL()
xq.direct_reference = oids.Z3950_QUERY_SQL_ov
q = z3950.SQLQuery()
q.queryExpression = query
xq.encoding = ('single-ASN1-type', q)
self.query = ('type_104', xq)
elif typ == 'CQL-TREE': # Tree to RPN
self.typ = 'RPN'
try:
rpnq = z3950.RPNQuery()
# XXX Allow Attribute Architecture
rpnq.attributeSet = oids.Z3950_ATTRS_BIB1_ov
rpnq.rpn = query.toRPN()
self.query = ('type_1', rpnq)
except SRWDiagnostics.SRWDiagnostic, err:
raise err
except:
raise QuerySyntaxError
else:
raise ClientNotImplError ('%s queries not supported' % typ)
class ResultSet(_AttrCheck, _ErrHdlr):
"""Cache results, presenting read-only sequence interface. If
a surrogate diagnostic is returned for the i-th record, an
appropriate exception will be raised on access to the i-th
element (either access by itself or as part of a slice)."""
inherited_elts = ['elementSetName', 'preferredRecordSyntax',
'presentChunk']
attrlist = inherited_elts + _ErrHdlr.err_attrslist
not_implement_attrs = ['piggyback',
'schema']
def __init__ (self, conn, searchResult, resultSetName, ctr):
"""Only for creation by Connection object"""
self._conn = conn # needed for 'option inheritance', see ZOOM spec
self._searchResult = searchResult
self._resultSetName = resultSetName
self._records = {}
self._ctr = ctr
# _records is a dict indexed by preferredRecordSyntax of
# dicts indexed by elementSetName of lists of records
self._ensure_recs ()
# whether there are any records or not, there may be
# nonsurrogate diagnostics. _extract_recs will get them.
if hasattr (self._searchResult, 'records'):
self._extract_recs (self._searchResult.records, 0)
def __getattr__ (self, key):
"""Forward attribute access to Connection if appropriate"""
if self.__dict__.has_key (key):
return self.__dict__[key]
if key in self.inherited_elts:
return getattr (self._conn, key) # may raise AttributeError
raise AttributeError (key)
def _make_keywords (self):
"""Set up dict of parms for present request"""
kw = {}
# need for translation here from preferredRecordSyntax to recsyn
# is kinda pointless
if hasattr (self, 'preferredRecordSyntax'):
try:
kw['recsyn'] = _record_type_dict [
self.preferredRecordSyntax].oid
except KeyError, err:
raise ClientNotImplError ('Unknown record syntax ' +
self.preferredRecordSyntax)
if hasattr (self, 'elementSetName'):
kw['esn'] = ('genericElementSetName', self.elementSetName)
return kw
def __len__ (self):
"""Get number of records"""
return self._searchResult.resultCount
def _pin (self, i):
"""Handle negative indices"""
if i < 0:
return i + len (self)
return i
def _ensure_recs (self):
if not self._records.has_key (self.preferredRecordSyntax):
self._records [self.preferredRecordSyntax] = {}
self._records [self.preferredRecordSyntax][
self.elementSetName] = [None] * len (self)
if not self._records[self.preferredRecordSyntax].has_key (
self.elementSetName):
self._records [self.preferredRecordSyntax][
self.elementSetName] = [None] * len (self)
def _get_rec (self, i):
return self._records [self.preferredRecordSyntax][
self.elementSetName][i]
def _check_stale (self):
if self._ctr < self._conn._lastConnectCtr:
raise ConnectionError ('Stale result set used')
# XXX is this right?
if (not self._conn.namedResultSets) and \
self._ctr <> self._conn._resultSetCtr:
raise ServerNotImplError ('Multiple Result Sets')
# XXX or this?
def _ensure_present (self, i):
self._ensure_recs ()
if self._get_rec (i) == None:
self._check_stale ()
maxreq = self.presentChunk
if maxreq == 0: # get everything at once
lbound = i
count = len (self) - lbound
else:
lbound = (i / maxreq) * maxreq
count = min (maxreq, len (self) - lbound)
kw = self._make_keywords ()
if self._get_rec (lbound) == None:
presentResp = self._conn._cli.present (
start = lbound + 1, # + 1 b/c 1-based
count = count,
rsn = self._resultSetName,
**kw)
if not hasattr (presentResp, 'records'):
raise ProtocolError (str (presentResp))
self._extract_recs (presentResp.records, lbound)
# Maybe there was too much data to fit into
# range (lbound, lbound + count). If so, try
# retrieving just one record. XXX could try
# retrieving more, up to next cache bdary.
if i <> lbound and self._get_rec (i) == None:
presentResp = self._conn._cli.present (
start = i + 1,
count = 1,
rsn = self._resultSetName,
**kw)
self._extract_recs (presentResp.records, i)
rec = self._records [self.preferredRecordSyntax][
self.elementSetName][i]
if rec <> None and rec.is_surrogate_diag ():
rec.raise_exn ()
def __getitem__ (self, i):
"""Ensure item is present, and return a Record"""
i = self._pin (i)
if i >= len (self):
raise IndexError
self._ensure_present (i)
return self._records [self.preferredRecordSyntax][
self.elementSetName][i]
def __getslice__(self, i, j):
i = self._pin (i)
j = self._pin (j)
if j > len (self):
j = len (self)
for k in range (i, j):
self._ensure_present (k)
if len (self._records) == 0: # XXX is this right?
return []
return self._records[self.preferredRecordSyntax][
self.elementSetName] [i:j]
def _extract_recs (self, records, lbound):
(typ, recs) = records
if trace_extract:
print "Extracting", len (recs), "starting at", lbound
if typ == 'nonSurrogateDiagnostic':
self.err (recs.condition, "", recs.diagnosticSetId)
elif typ == 'multipleNonSurDiagnostics':
# see Zoom mailing list discussion of 2002/7/24 to justify
# ignoring all but first error.
diagRec = recs [0]
self.err_diagrec (diagRec)
if (typ <> 'responseRecords'):
raise ProtocolError ("Bad records typ " + str (typ) + str (recs))
for i,r in my_enumerate (recs):
r = recs [i]
dbname = getattr (r, 'name', '')
(typ, data) = r.record
if (typ == 'surrogateDiagnostic'):
rec = SurrogateDiagnostic (data)
elif typ == 'retrievalRecord':
oid = data.direct_reference
dat = data.encoding
(typ, dat) = dat
if (oid == oids.Z3950_RECSYN_USMARC_ov):
if typ <> 'octet-aligned':
raise ProtocolError (
"Weird record EXTERNAL MARC type: " + typ)
rec = Record (oid, dat, dbname)
else:
raise ProtocolError ("Bad typ %s data %s" %
(str (typ), str(data)))
self._records[self.preferredRecordSyntax][
self.elementSetName][lbound + i] = rec
def delete (self): # XXX or can I handle this w/ a __del__ method?
"""Delete result set"""
res = self._conn._cli.delete (self._resultSetName)
if res == None: return # server doesn't support Delete
# XXX should I throw an exn for delete errors? Probably.
# and 'Error Code', 'Error Message', and 'Addt'l Info' methods
def sort(self, keys):
return self._conn.sort([self], keys)
class SurrogateDiagnostic(_ErrHdlr):
"""Represent surrogate diagnostic. Raise appropriate exception
on access to syntax or data, or when raise_exn method is called.
Currently, RecordSet relies on the return from is_surrogate_diag (),
and calls raise_exn based on that."""
def __init__ (self, diagrec):
self.diagrec = diagrec
def is_surrogate_diag (self):
return 1
def raise_exn (self):
self.err_diagrec (self.diagrec)
def __getattr__ (self, attr):
if attr == 'data' or attr == 'syntax':
self.raise_exn ()
return _ErrHdlr.__getattr (self, attr)
class Record:
"""Represent retrieved record. 'syntax' attribute is a string,
'data' attribute is the data, which is:
USMARC -- raw MARC data
SUTRS -- a string (possibly in the future unicode)
XML -- ditto
GRS-1 -- a tree (see grs1.py for details)
EXPLAIN -- a hard-to-describe format (contact me if you're actually \
using this)
OPAC -- ditto
Other representations are not yet defined."""
def __init__ (self, oid, data, dbname):
"""Only for use by ResultSet"""
self.syntax = _oid_to_key (oid)
self._rt = _record_type_dict [self.syntax]
self.data = self._rt.preproc (data)
self.databaseName = dbname
def is_surrogate_diag (self):
return 0
def get_fieldcount (self):
"""Get number of fields"""
return self._rt.fieldcount (self.data)
def get_field (self,spec):
"""Get field"""
return self._rt.field (self.data, spec)
def __str__ (self):
"""Render printably"""
s = self._rt.renderer (self.data)
return 'Rec: ' + str (self.syntax) + " " + s
class _RecordType:
"""Map syntax string to OID and per-syntax utility functions"""
def __init__ (self, name, oid, renderer = lambda v:v,
fieldcount = lambda v:1, field = None, preproc = lambda v:v):
"""Register syntax"""
self.oid = oid
self.renderer = renderer
self.fieldcount = fieldcount
self.field = field
self.preproc = preproc
_record_type_dict [name] = self
# XXX do I want an OPAC class? Probably, and render_OPAC should be
# a member function.
def render_OPAC (opac_data):
s_list = []
biblio_oid = opac_data.bibliographicRecord.direct_reference
if (biblio_oid == z3950.Z3950_RECSYN_USMARC_ov):
bib_marc = zmarc.MARC (opac_data.bibliographicRecord.encoding [1])
s_list.append ("Bibliographic %s\n" % (str (bib_marc),) )
else:
s_list.append ("Unknown bibliographicRecord OID: " + str(biblio_oid))
for i, hd in my_enumerate (opac_data.holdingsData):
typ, data = hd
s_list.append ('Holdings %d:' % (i,))
if typ == 'holdingsAndCirc':
def render (item, level = 1):
s_list = []
if isinstance (item, asn1.StructBase):
for attr, val in item.__dict__.items ():
if attr [0] <> '_':
s_list.append ("%s%s: %s" % (
"\t" * level, attr, "\n".join(render (val, level + 1))))
elif (isinstance (item, type ([])) and len (item) > 0
and isinstance (item [0], asn1.StructBase)):
s_list.append ("") # generate newline
for i, v in my_enumerate (item):
s_list.append ("\t" * (level + 1) + str (i))
s_list += render (v, level + 1)
else:
s_list.append (repr (item))
return s_list
s_list.append ("\n".join (render (data)))
elif typ == 'marcHoldingsRecord':
hold_oid = data.direct_reference
if hold_oid == z3950.Z3950_RECSYN_USMARC_ov:
holdings_marc = zmarc.MARC (data.encoding [1])
s_list.append ("Holdings %s\n" % (str (holdings_marc),))
else:
s_list.append ("Unknown holdings OID: " + str (hold_oid))
else:
s_list.append ("Unknown holdings type: " + typ)
# shouldn't happen unless z39.50 definition is extended
return "\n".join (s_list)
_RecordType ('USMARC', z3950.Z3950_RECSYN_USMARC_ov,
renderer = lambda v: str(zmarc.MARC(v)))
_RecordType ('UKMARC', z3950.Z3950_RECSYN_UKMARC_ov,
renderer = lambda v: str(zmarc.MARC(v)))
_RecordType ('SUTRS', z3950.Z3950_RECSYN_SUTRS_ov)
_RecordType ('XML', z3950.Z3950_RECSYN_MIME_XML_ov)
_RecordType ('SGML', z3950.Z3950_RECSYN_MIME_SGML_ov)
_RecordType ('GRS-1', z3950.Z3950_RECSYN_GRS1_ov,
renderer = lambda v: str (v),
preproc = grs1.preproc)
_RecordType ('OPAC', z3950.Z3950_RECSYN_OPAC_ov, renderer = render_OPAC)
_RecordType ('EXPLAIN', z3950.Z3950_RECSYN_EXPLAIN_ov,
renderer = lambda v: str (v))
class ScanSet (_AttrCheck, _ErrHdlr):
"""Hold result of scan.
"""
zoom_to_z3950 = { # XXX need to provide more processing for attrs, alt
'freq' : 'globalOccurrences',
'display': 'displayTerm',
'attrs' : 'suggestedAttributes',
'alt' : 'alternativeTerm',
'other' : 'otherTermInfo'}
attrlist = _ErrHdlr.err_attrslist
def __init__ (self, scanresp):
"""For internal use only!"""
self._scanresp = scanresp
if hasattr (scanresp.entries, 'nonsurrogateDiagnostics'):
self.err_diagrec (scanresp.entries.nonsurrogateDiagnostics[0])
# Note that specification says that both entries and
# nonsurrogate diags can be present. This code will always
# raise the exn, and will need to be changed if both are needed.
def __len__ (self):
"""Return number of entries"""
return self._scanresp.numberOfEntriesReturned
def _get_rec (self, i):
if (not hasattr(self._scanresp.entries, 'entries')):
raise IndexError
t = self._scanresp.entries.entries[i]
if t[0] == 'termInfo':
return t[1]
else:
# Only way asserts can fail here is if someone changes
# the Z39.50 ASN.1 definitions.
assert (t[0] == 'surrogateDiagnostic')
diagRec = t[1]
if diagRec [0] == 'externallyDefined':
raise ClientNotImplError (
'Scan unknown surrogate diagnostic type: ' +
str (diagRec))
assert (diagRec[0] == 'defaultFormat')
defDiagFmt = diagRec [1]
self.err (defDiagFmt.condition, defDiagFmt.addinfo,
defDiagFmt.diagnosticSetId)
def get_term (self, i):
"""Return term. Note that get_{term,field,fields} can throw an
exception if the i'th term is a surrogate diagnostic."""
return self._get_rec (i).term
def get_field (self, field, i):
"""Returns value of field:
term: term
freq: integer
display: string
attrs: currently z3950 structure, should be string of attributes
alt: currently z3950 structure, should be [string of attrs, term]
other: currently z3950 structure, dunno what the best Python representation would be
"""
f = self.zoom_to_z3950 [field]
r = self._get_rec (i)
return r.__dict__[f]
def get_fields (self, i):
"""Return a dictionary mapping ZOOM's field names to values
present in the response. (Like get_field, but for all fields.)"""
r = self._get_rec (i)
d = {}
for k,v in self.zoom_to_z3950.items ():
val = getattr (r, v, None)
if val <> None:
d[k] = val
d["term"] = self.get_term (i)
return d
def _pin (self, i):
if i < 0:
return i + len (self)
return i
def __getitem__ (self, i):
return self.get_fields (self._pin (i))
def __getslice__ (self, i, j):
i = self._pin (i)
j = self._pin (j)
if j > len (self):
j = len (self)
return [self.get_fields (k) for k in range (i,j)]
if __name__ == '__main__':
optlist, args = getopt.getopt (sys.argv[1:], 'h:q:t:f:a:e:v:')
host = 'LC'
query = ''
qtype = 'CCL'
fmts = ['USMARC']
esns = ['F']
validation = None
for (opt, val) in optlist:
if opt == '-h':
host = val
elif opt == '-q':
query = val
elif opt == '-t':
qtype = val
elif opt == '-f':
fmts = val.split (',')
elif opt == '-e':
esns = val.split (',')
elif opt == '-v':
validation = val.split (',')
rv = z3950.host_dict.get (host)
if rv == None:
(name, port, dbname) = host.split (':')
port = int (port)
else:
(name, port, dbname) = rv
conn = Connection (name, port)
conn.databaseName = dbname
conn.preferredRecordSyntax = fmts [0]
def run_one (q):
try:
query = Query (qtype, q)
res = conn.search (query)
for esn in esns:
for syn in fmts:
print "Syntax", syn, "Esn", esn
res.preferredRecordSyntax = syn
if esn <> 'NONE':
res.elementSetName = esn
try:
for r in res:
print str(r)
except ZoomError, err:
print "Zoom exception", err.__class__, err
# res.delete ()
# Looks as if Oxford will close the connection if a delete is sent,
# despite claiming delete support (verified with yaz client, too).
except ZoomError, err:
print "Zoom exception", err.__class__, err
if query == '':
while 1:
q_str = raw_input ('CCL query: ')
if q_str == '': break
run_one (q_str)
else:
run_one (query)
conn.close ()