worblehat-old/python/PyZ3950/zoom.py

#!/usr/bin/env python

"""Implements the ZOOM 1.4 API (http://zoom.z3950.org/api)
for Z39.50.

Some global notes on the binding (these will only make sense when read
after the API document):

Get/Set Option is implemented as member attribute access or
assignment.  Implementations are encouraged to throw an AttributeError
for unsupported (or, possibly, mistyped) attributes.  (Production
applications are encouraged to catch such errors.)

All errors are reported as exceptions deriving from ZoomError (or, at
least, it's a bug if they aren't).  Bib1Err is defined as part of the
binding; all the rest are specific to this implementation.

ResultSet provides a sequence interface, with standard Python
iteration, indexing, and slicing.  So if rs is a ResultSet, use len
(rs) for Get_Size and rs[i] for Get_Record, or iterate with for r in
rs: foo(r).  Any attempt to access a record for which the server
returned a surrogate diagnostic will raise the appropriate Bib1Err
exception.

For Record, Render_Record is implemented as Python __str__.  The
'syntax' member contains the string-format record syntax, and the
'data' member contains the raw data.

The following query types are supported:
- "CCL", ISO 8777, (http://www.indexdata.dk/yaz/doc/tools.tkl#CCL)
- "S-CCL", the same, but interpreted on the server side
- "CQL", the Common Query Language, (http://www.loc.gov/z3950/agency/zing/cql/)
- "S-CQL", the same, but interpreted on the server side
- "PQF", Index Data's Prefix Query Format, (http://www.indexdata.dk/yaz/doc/tools.tkl#PQF)
- "C2", Cheshire II query syntax, (http://cheshire.berkeley.edu/cheshire2.html#zfind)
- "ZSQL", Z-SQL, see (http://archive.dstc.edu.au/DDU/projects/Z3950/Z+SQL/)
- "CQL-TREE", a general-purpose escape allowing any object with a toRPN method to be used, e.g. the CQL tree objects

ScanSet, like ResultSet, has a sequence interface.  The i-th element
is a dictionary.  See the ScanSet documentation for supported keys.

Sample usage:
    from PyZ3950 import zoom
    conn = zoom.Connection ('z3950.loc.gov', 7090)
    conn.databaseName = 'VOYAGER'
    conn.preferredRecordSyntax = 'USMARC'
    query = zoom.Query ('CCL', 'ti="1066 and all that"')
    res = conn.search (query)
    for r in res:
            print str(r)
    conn.close ()
I hope everything else is clear from the docstrings and the abstract
API: let me know if that's wrong, and I'll try to do better.

For some purposes (I think the only one is writing Z39.50 servers),
you may want to use the functions in the z3950 module instead.  """

from __future__ import nested_scopes    

__author__ = 'Aaron Lav (asl2@pobox.com)'
__version__ = '1.0' # XXX

import getopt
import sys 

# TODO:
# finish lang/charset (requires charset normalization, confer w/ Adam)
# implement piggyback   
# implement schema    (Non useful)
# implement setname   (Impossible?)

from PyZ3950 import z3950
from PyZ3950 import ccl
from PyZ3950 import asn1
from PyZ3950 import zmarc
from PyZ3950 import bib1msg
from PyZ3950 import grs1
from PyZ3950 import oids

# Azaroth 2003-12-04:
from PyZ3950 import CQLParser, SRWDiagnostics, pqf
from PyZ3950 import c2query as c2
asn1.register_oid (oids.Z3950_QUERY_SQL, z3950.SQLQuery)


def my_enumerate (l): # replace w/ enumerate when we go to Python 2.3
    return zip (range (len (l)), l)

trace_extract = 0
"""trace extracting records from search/present reqs"""

class ZoomError (Exception):
    """Base class for all errors reported from this module"""
    pass

class ConnectionError(ZoomError):
    """Exception for TCP error"""
    pass

class ClientNotImplError (ZoomError):
    """Exception for ZOOM client-side functionality not implemented (bug
       author)"""
    pass

class ServerNotImplError (ZoomError):
    """Exception for function not implemented on server"""
    pass

class QuerySyntaxError (ZoomError):
    """Exception for query not parsable by client"""
    pass

class ProtocolError (ZoomError):
    """Exception for malformatted server response"""
    pass

class UnexpectedCloseError (ProtocolError):
    """Exception for unexpected (z3950, not tcp) close from server"""
    pass

class UnknownRecSyn (ZoomError):
    """Exception for unknown record syntax returned from server"""
    pass

class Bib1Err (ZoomError):
    """Exception for BIB-1 error"""
    def __init__ (self, condition, message, addtlInfo):
        self.condition = condition
        self.message = message
        self.addtlInfo = addtlInfo
        ZoomError.__init__ (self)
    def __str__ (self):
        return "Bib1Err: %d %s %s" % (self.condition, self.message, self.addtlInfo)


class _ErrHdlr:
    """Error-handling services"""
    err_attrslist = ['errCode','errMsg', 'addtlInfo']
    def err (self, condition, addtlInfo, oid):
        """Translate condition + oid to message, save, and raise exception"""
        self.errCode = condition
        self.errMsg  = bib1msg.lookup_errmsg (condition, oid)
        self.addtlInfo = addtlInfo
        raise Bib1Err (self.errCode, self.errMsg, self.addtlInfo)
    def err_diagrec (self, diagrec):
        (typ, data) = diagrec
        if typ == 'externallyDefined':
            raise ClientNotImplErr ("Unknown external diagnostic" + str (data))
        addinfo = data.addinfo [1] # don't care about v2 vs v3
        self.err (data.condition, addinfo, data.diagnosticSetId)
    

_record_type_dict = {}
"""Map oid to renderer, field-counter, and field-getter functions"""

def _oid_to_key (oid):
    for (k,v) in _record_type_dict.items ():
        if v.oid == oid:
            return k
    raise UnknownRecSyn (oid)

def _extract_attrs (obj, attrlist):
    kw = {}
    for key in attrlist:
        if hasattr (obj, key):
            kw[key] = getattr (obj, key)
    return kw

class _AttrCheck:
    """Prevent typos"""
    attrlist = []
    not_implement_attrs = []
    def __setattr__ (self, attr, val):
        """Ensure attr is in attrlist (list of allowed attributes), or
        private (begins w/ '_'), or begins with 'X-' (reserved for users)"""
        if attr[0] == '_' or attr in self.attrlist or attr[0:2] == 'X-':
            self.__dict__[attr] = val
        elif (attr in self.not_implement_attrs):
            raise ClientNotImplError(attr)
        else:
            raise AttributeError (attr, val)
    
class Connection(_AttrCheck, _ErrHdlr):
    """Connection object"""

    not_implement_attrs = ['piggyback',
                        'schema',
                        'proxy',
                        'async']
    search_attrs = ['smallSetUpperBound',
                'largeSetLowerBound',
                'mediumSetPresentNumber',
                'smallSetElementSetNames',
                'mediumSetElementSetNames']
    init_attrs = ['user',
                  'password',
                  'group',
                  'maximumRecordSize',
                  'preferredMessageSize',
                  'lang',
                  'charset',
                  'implementationId',
                  'implementationName',
                  'implementationVersion'
                  ]
    scan_zoom_to_z3950 = {
        # translate names from ZOOM spec to Z39.50 spec names
        'stepSize' : 'stepSize',
        'numberOfEntries' : 'numberOfTermsRequested',
        'responsePosition' : 'preferredPositionInResponse'
        }

    attrlist = search_attrs + init_attrs + scan_zoom_to_z3950.keys () + [
        'databaseName',
        'namedResultSets',
        'preferredRecordSyntax', # these three inheritable by RecordSet
        'elementSetName',
        'presentChunk',
        'targetImplementationId',
        'targetImplementationName',
        'targetImplementationVersion',
        'host',
        'port',

        ] + _ErrHdlr.err_attrslist

    _queryTypes = ['S-CQL', 'S-CCL', 'RPN', 'ZSQL']
    _cli = None
    host = ""
    port = 0

    # and now, some defaults
    namedResultSets = 1
    elementSetName = 'F' 
    preferredRecordSyntax = 'USMARC'
    preferredMessageSize = 0x100000
    maximumRecordSize = 0x100000
    stepSize = 0
    numberOfEntries = 20 # for SCAN
    responsePosition = 1
    databaseName = 'Default'
    implementationId = 'PyZ3950'
    implementationName = 'PyZ3950 1.0/ZOOM v1.4'
    implementationVersion = '1.0'
    lang = None
    charset = None
    user = None
    password = None
    group = None
    presentChunk = 20 # for result sets

    def __init__(self, host, port, connect=1, **kw):
        """Establish connection to hostname:port.  kw contains initial
        values for options, and is useful for options which affect
        the InitializeRequest.  Currently supported values:
        
        user                   Username for authentication
        password               Password for authentication
        group                  Group for authentication
        maximumRecordSize      Maximum size in bytes of one record
        preferredMessageSize   Maximum size in bytes for response
        lang                   3 letter language code
        charset                Character set
        implementationId       Id for client implementation
        implementationName     Name for client implementation
        implementationVersion  Version of client implementation
        
        """

        self.host = host
        self.port = port
        self._resultSetCtr = 0
        for (k,v) in kw.items ():
            setattr (self, k, v)
        if (connect):
            self.connect()

    def connect(self):
        self._resultSetCtr += 1
        self._lastConnectCtr = self._resultSetCtr
        
        # Bump counters first, since even if we didn't reconnect
        # this time, we could have, and so any use of old connections
        # is an error.  (Old cached-and-accessed data is OK to use:
        # cached but not-yet-accessed data is probably an error, but
        # a not-yet-caught error.)
        
        if self._cli <> None and self._cli.sock <> None:
            return
        
        initkw = {}
        for attr in self.init_attrs:
            initkw[attr] = getattr(self, attr)
        if (self.namedResultSets):
            options = ['namedResultSets']
        else:
            options = []
        initkw ['ConnectionError'] = ConnectionError
        initkw ['ProtocolError'] = ProtocolError
        initkw ['UnexpectedCloseError'] = UnexpectedCloseError
        self._cli = z3950.Client (self.host, self.port,
                                  optionslist = options, **initkw)
        self.namedResultSets = self._cli.get_option ('namedResultSets')
        self.targetImplementationId = getattr (self._cli.initresp, 'implementationId', None)
        self.targetImplementationName = getattr (self._cli.initresp, 'implementationName', None)
        self.targetImplementationVersion  = getattr (self._cli.initresp, 'implementationVersion', None)
        if (hasattr (self._cli.initresp, 'userInformationField')):
            # weird.  U of Chicago returns an EXTERNAL with nothing
            # but 'encoding', ('octet-aligned', '2545') filled in.
            if (hasattr (self._cli.initresp.userInformationField,
                         'direct_reference') and
                self._cli.initresp.userInformationField.direct_reference ==
                oids.Z3950_USR_PRIVATE_OCLC_INFO_ov):
# see http://www.oclc.org/support/documentation/firstsearch/z3950/fs_z39_config_guide/ for docs                
                oclc_info = self._cli.initresp.userInformationField.encoding [1]
                # the docs are a little unclear, but I presume we're
                # supposed to report failure whenever a failReason is given.
                
                if hasattr (oclc_info, 'failReason'):
                    raise UnexpectedCloseError ('OCLC_Info ',
                                                oclc_info.failReason,
                                                getattr (oclc_info, 'text',
                                                         ' no text given '))
            
        
    def search (self, query):
        """Search, taking Query object, returning ResultSet"""
        if (not self._cli):
            self.connect()
        assert (query.typ in self._queryTypes)
        dbnames = self.databaseName.split ('+')
        self._cli.set_dbnames (dbnames)
        cur_rsn = self._make_rsn ()
        recv = self._cli.search_2 (query.query,
                                   rsn = cur_rsn,
                                   **_extract_attrs (self, self.search_attrs))
        self._resultSetCtr += 1
        rs = ResultSet (self, recv, cur_rsn, self._resultSetCtr)
        return rs        
    # and 'Error Code', 'Error Message', and 'Addt'l Info' methods still
    # eeded
    def scan (self, query):
        if (not self._cli):
            self.connect()
        self._cli.set_dbnames ([self.databaseName])
        kw = {}
        for k, xl in self.scan_zoom_to_z3950.items ():
            if hasattr (self, k):
                kw [xl] = getattr (self, k)
        return ScanSet (self._cli.scan (query.query, **kw))
    def _make_rsn (self):
        """Return result set name"""
        if self.namedResultSets:
            return "rs%d" % self._resultSetCtr
        else:
            return z3950.default_resultSetName
    def close (self):
        """Close connection"""
        self._cli.close ()
        
    def sort (self, sets, keys):
        """ Sort sets by keys, return resultset interface """
        if (not self._cli):
            self.connect()

        # XXX This should probably be shuffled down into z3950.py
        sortrelations = ['ascending', 'descending', 'ascendingByFrequency', 'descendingByFrequency']

        req = z3950.SortRequest()
        req.inputResultSetNames = []
        for s in sets:
            s._check_stale ()
            req.inputResultSetNames.append(s._resultSetName)
        cur_rsn = self._make_rsn()
        req.sortedResultSetName = cur_rsn

        zkeys = []
        for k in keys:
            zk = z3950.SortKeySpec()
            zk.sortRelation = sortrelations.index(k.relation)
            zk.caseSensitivity = k.caseInsensitive
            if (k.missingValueAction):
                zk.missingValueAction = (k.missingValueAction, None)
            if (k.missingValueData):
                zk.missingValueAction = ('missingValueData', k.missingValueData)
            value = k.sequence
            if (k.type == 'accessPoint'):
                if (value.typ <> 'RPN'):
                    raise ValueError # XXX
                l = z3950.SortKey['sortAttributes']()
                l.id = value.query[1].attributeSet
                l.list = value.query[1].rpn[1][1].attributes
                seq = ('sortAttributes', l)
            elif (k.type == 'private'):
                seq = ('privateSortKey', value)
            elif (k.type == 'elementSetName'):
                spec = z3950.Specification()
                spec.elementSpec = ('elementSetName', value)
                seq = ('elementSpec', spec)
            else:
                raise ValueError # XXX
            spec = ('generic', seq)
            zk.sortElement = spec
            zkeys.append(zk)
        req.sortSequence = zkeys
        recv = self._cli.transact(('sortRequest', req), 'sortResponse')

        self._resultSetCtr += 1
        if (hasattr(recv, 'diagnostics')):
            diag = recv.diagnostics[0][1]
            self.err(diag.condition, diag.addinfo, diag.diagnosticSetId)            

        if (not hasattr(recv, 'resultCount')):
            # First guess: sum of all input sets
            recv.resultCount = 0
            for set in sets:
                recv.resultCount += len(set)
            # Check for addInfo to override
            try:
                val = recv.otherInfo[0].information[1]
                if (val[:14] == 'Result-count: '):
                    recv.resultCount = int(val[14:])
            except:
                pass

        rs = ResultSet (self, recv, cur_rsn, self._resultSetCtr)
        return rs


class SortKey(_AttrCheck):
    attrlist = ['relation', 'caseInsensitive', 'missingValueAction', 'missingValueData', 'type', 'sequence']
    relation = "ascending"
    caseInsensitive = 1
    missingValueAction = ""
    missingValueData = ""
    type = "accessPoint"
    sequence = ""

    def __init__ (self, **kw):
        for k in kw.keys():
            setattr(self, k, kw[k])

class Query:
    def __init__ (self, typ, query):
        """Creates Query object.
Supported query types:  CCL, S-CCL, CQL, S-CQL, PQF, C2, ZSQL, CQL-TREE
"""
        typ = typ.upper()
# XXX maybe replace if ... elif ...  with dict mapping querytype to func
        if typ == 'CCL':
           self.typ = 'RPN'
           try:
               self.query = ccl.mk_rpn_query (query)
           except ccl.QuerySyntaxError, err:
               print "zoom raising", str (err), " for", query
               raise QuerySyntaxError (str(err))
        elif typ == 'S-CCL': # server-side ccl
            self.typ = typ
            self.query =  ('type-2', query)
        elif typ == 'S-CQL': # server-side cql
            self.typ = typ
            xq = asn1.EXTERNAL()
            xq.direct_reference = oids.Z3950_QUERY_CQL_ov
            xq.encoding = ('single-ASN1-type', query)
            self.query = ('type_104', xq)
        elif typ == 'CQL': # CQL to RPN transformation
            self.typ = 'RPN'
            try:
                q = CQLParser.parse(query)
                rpnq = z3950.RPNQuery()
                # XXX Allow Attribute Architecture somehow?
                rpnq.attributeSet = oids.Z3950_ATTRS_BIB1_ov
                rpnq.rpn = q.toRPN()
                self.query = ('type_1', rpnq)
            except SRWDiagnostics.SRWDiagnostic, err:
                raise err
            except:
                raise QuerySyntaxError
        elif typ == 'PQF':  # PQF to RPN transformation
            self.typ = 'RPN'
            try:
                self.query = pqf.parse(query)
            except:
                raise QuerySyntaxError

        elif typ == 'C2': # Cheshire2 Syntax
            self.typ = 'RPN'
            try:
                q = c2.parse(query)
                self.query = q[0]
            except:
                raise QuerySyntaxError
        elif typ == 'ZSQL': # External SQL
            self.typ = typ
            xq = asn1.EXTERNAL()
            xq.direct_reference = oids.Z3950_QUERY_SQL_ov
            q = z3950.SQLQuery()
            q.queryExpression = query
            xq.encoding = ('single-ASN1-type', q)
            self.query = ('type_104', xq)
        elif typ == 'CQL-TREE': # Tree to RPN
            self.typ = 'RPN'
            try:
                rpnq = z3950.RPNQuery()
                # XXX Allow Attribute Architecture
                rpnq.attributeSet = oids.Z3950_ATTRS_BIB1_ov
                rpnq.rpn = query.toRPN()
                self.query = ('type_1', rpnq)
            except SRWDiagnostics.SRWDiagnostic, err:
                raise err
            except:
                raise QuerySyntaxError
        else:
            raise ClientNotImplError ('%s queries not supported' % typ)


class ResultSet(_AttrCheck, _ErrHdlr):
    """Cache results, presenting read-only sequence interface.  If
    a surrogate diagnostic is returned for the i-th record, an
    appropriate exception will be raised on access to the i-th
    element (either access by itself or as part of a slice)."""

    inherited_elts = ['elementSetName', 'preferredRecordSyntax',
                      'presentChunk']
    attrlist = inherited_elts + _ErrHdlr.err_attrslist
    not_implement_attrs = ['piggyback',
                        'schema']

    def __init__ (self, conn, searchResult, resultSetName, ctr):
        """Only for creation by Connection object"""
        self._conn = conn # needed for 'option inheritance', see ZOOM spec
        self._searchResult = searchResult
        self._resultSetName = resultSetName
        self._records = {}
        self._ctr = ctr
        # _records is a dict indexed by preferredRecordSyntax of
        # dicts indexed by elementSetName of lists of records
        self._ensure_recs ()
        
        # whether there are any records or not, there may be
        # nonsurrogate diagnostics.  _extract_recs will get them.
        if hasattr (self._searchResult, 'records'):
            self._extract_recs (self._searchResult.records, 0)
    def __getattr__ (self, key):
        """Forward attribute access to Connection if appropriate"""
        if self.__dict__.has_key (key):
            return self.__dict__[key]
        if key in self.inherited_elts:
            return getattr (self._conn, key) # may raise AttributeError
        raise AttributeError (key)
    def _make_keywords (self):
        """Set up dict of parms for present request"""
        kw = {}
        # need for translation here from preferredRecordSyntax to recsyn
        # is kinda pointless
        if hasattr (self, 'preferredRecordSyntax'):
            try:
                kw['recsyn'] = _record_type_dict [
                    self.preferredRecordSyntax].oid
            except KeyError, err:
                raise ClientNotImplError ('Unknown record syntax ' +
                                          self.preferredRecordSyntax)
        if hasattr (self, 'elementSetName'):
            kw['esn'] = ('genericElementSetName', self.elementSetName)
        return kw
    def __len__ (self):
        """Get number of records"""
        return self._searchResult.resultCount
    def _pin (self, i):
        """Handle negative indices"""
        if i < 0:
            return i + len (self)
        return i
    def _ensure_recs (self):
        if not self._records.has_key (self.preferredRecordSyntax):
            self._records [self.preferredRecordSyntax] = {}
            self._records [self.preferredRecordSyntax][
                self.elementSetName] = [None] * len (self)
        if not self._records[self.preferredRecordSyntax].has_key (
            self.elementSetName):
            self._records [self.preferredRecordSyntax][
                self.elementSetName] = [None] * len (self)

    def _get_rec (self, i):
        return self._records [self.preferredRecordSyntax][
            self.elementSetName][i]

    def _check_stale (self):
        if self._ctr < self._conn._lastConnectCtr:
            raise ConnectionError ('Stale result set used')
        # XXX is this right?
        if (not self._conn.namedResultSets) and \
           self._ctr <> self._conn._resultSetCtr:
            raise ServerNotImplError ('Multiple Result Sets')
        # XXX or this?
    
    def _ensure_present (self, i):
        self._ensure_recs ()
        if self._get_rec (i) == None:
            self._check_stale ()
            maxreq = self.presentChunk
            if maxreq == 0: # get everything at once
                lbound = i
                count = len (self) - lbound
            else:
                lbound = (i / maxreq) * maxreq
                count = min (maxreq, len (self) - lbound)
            kw = self._make_keywords ()
            if self._get_rec (lbound) == None:
                presentResp = self._conn._cli.present (
                    start = lbound + 1,  # + 1 b/c 1-based
                    count = count,
                    rsn = self._resultSetName,
                    **kw)
                if not hasattr (presentResp, 'records'):
                    raise ProtocolError (str (presentResp))
                self._extract_recs (presentResp.records, lbound)
            # Maybe there was too much data to fit into
            # range (lbound, lbound + count).  If so, try
            # retrieving just one record. XXX could try
            # retrieving more, up to next cache bdary.
            if i <> lbound and self._get_rec (i) == None:
                presentResp  = self._conn._cli.present (
                    start = i + 1,
                    count = 1,
                    rsn = self._resultSetName,
                    **kw)
                self._extract_recs (presentResp.records, i)
        rec = self._records [self.preferredRecordSyntax][
            self.elementSetName][i]
        if rec <> None and rec.is_surrogate_diag ():
            rec.raise_exn ()
    def __getitem__ (self, i):
        """Ensure item is present, and return a Record"""
        i = self._pin (i)
        if i >= len (self):
            raise IndexError
        self._ensure_present (i)
        return self._records [self.preferredRecordSyntax][
            self.elementSetName][i]
    def __getslice__(self, i, j):
        i = self._pin (i)
        j = self._pin (j)
        if j > len (self):
            j = len (self)
        for k in range (i, j):
            self._ensure_present (k)
        if len (self._records) == 0: # XXX is this right?
            return []
        return self._records[self.preferredRecordSyntax][
            self.elementSetName] [i:j]
    def _extract_recs (self, records, lbound):
        (typ, recs) = records
        if trace_extract:
            print "Extracting", len (recs), "starting at", lbound
        if typ == 'nonSurrogateDiagnostic':
            self.err (recs.condition, "", recs.diagnosticSetId)
        elif typ == 'multipleNonSurDiagnostics':
            # see Zoom mailing list discussion of 2002/7/24 to justify
            # ignoring all but first error.
            diagRec = recs [0]
            self.err_diagrec (diagRec)
        if (typ <> 'responseRecords'):
            raise ProtocolError ("Bad records typ " + str (typ) + str (recs))
        for i,r in my_enumerate (recs):
            r = recs [i]
            dbname = getattr (r, 'name', '')
            (typ, data) = r.record
            if (typ == 'surrogateDiagnostic'):
                rec = SurrogateDiagnostic (data)

            elif typ == 'retrievalRecord':
                oid = data.direct_reference
                dat = data.encoding
                (typ, dat) = dat
                if (oid == oids.Z3950_RECSYN_USMARC_ov):
                    if typ <> 'octet-aligned':
                        raise ProtocolError (
                            "Weird record EXTERNAL MARC type: " + typ)
                rec = Record (oid, dat, dbname)
            else:
                raise ProtocolError ("Bad typ %s data %s" %
                                     (str (typ), str(data)))
            self._records[self.preferredRecordSyntax][
                self.elementSetName][lbound + i] = rec
    def delete (self): # XXX or can I handle this w/ a __del__ method?
        """Delete result set"""
        res = self._conn._cli.delete (self._resultSetName)
        if res == None: return # server doesn't support Delete
        # XXX should I throw an exn for delete errors?  Probably.

    # and 'Error Code', 'Error Message', and 'Addt'l Info' methods

    def sort(self, keys):
        return self._conn.sort([self], keys)


class SurrogateDiagnostic(_ErrHdlr):
    """Represent surrogate diagnostic.  Raise appropriate exception
    on access to syntax or data, or when raise_exn method is called.
    Currently, RecordSet relies on the return from is_surrogate_diag (),
    and calls raise_exn based on that."""
    def __init__ (self, diagrec):
        self.diagrec = diagrec
    def is_surrogate_diag (self):
        return 1
    def raise_exn (self):
        self.err_diagrec (self.diagrec)
    def __getattr__ (self, attr):
        if attr == 'data' or attr == 'syntax':
            self.raise_exn ()
        return _ErrHdlr.__getattr (self, attr)

class Record:
    """Represent retrieved record.  'syntax' attribute is a string,
      'data' attribute is the data, which is:
      
      USMARC   -- raw MARC data
      SUTRS    -- a string (possibly in the future unicode)
      XML      -- ditto
      GRS-1    -- a tree (see grs1.py for details)
      EXPLAIN  -- a hard-to-describe format (contact me if you're actually \
using this)
      OPAC     -- ditto
      
      Other representations are not yet defined."""
    def __init__ (self, oid, data, dbname):
        """Only for use by ResultSet"""
        self.syntax = _oid_to_key (oid)
        self._rt = _record_type_dict [self.syntax]
        self.data = self._rt.preproc (data)
        self.databaseName = dbname
    def is_surrogate_diag (self):
        return 0
    def get_fieldcount (self):
        """Get number of fields"""
        return self._rt.fieldcount (self.data)
    def get_field (self,spec):
        """Get field"""
        return self._rt.field (self.data, spec)
    def __str__ (self):
        """Render printably"""
        s = self._rt.renderer (self.data)
        return 'Rec: ' + str (self.syntax) + " " + s

class _RecordType:
    """Map syntax string to OID and per-syntax utility functions"""
    def __init__ (self, name, oid, renderer = lambda v:v,
                  fieldcount = lambda v:1, field = None, preproc = lambda v:v):
        """Register syntax"""
        self.oid = oid
        self.renderer = renderer
        self.fieldcount = fieldcount
        self.field = field
        self.preproc = preproc
        _record_type_dict [name] = self

# XXX do I want an OPAC class?  Probably, and render_OPAC should be
# a member function.


def render_OPAC (opac_data):
    s_list = []
    biblio_oid = opac_data.bibliographicRecord.direct_reference
    if (biblio_oid == z3950.Z3950_RECSYN_USMARC_ov):
        bib_marc = zmarc.MARC (opac_data.bibliographicRecord.encoding [1])
        s_list.append ("Bibliographic %s\n" % (str (bib_marc),) )
    else:
        s_list.append ("Unknown bibliographicRecord OID: " + str(biblio_oid))
    for i, hd in my_enumerate (opac_data.holdingsData):
        typ, data = hd
        s_list.append ('Holdings %d:' % (i,))
        if typ == 'holdingsAndCirc':
            def render (item, level = 1):
                s_list = []
                if isinstance (item, asn1.StructBase):
                    for attr, val in item.__dict__.items ():
                        if attr [0] <> '_':
                            s_list.append ("%s%s: %s" % (
                                "\t" * level, attr, "\n".join(render (val, level + 1))))
                elif (isinstance (item, type ([])) and len (item) > 0
                      and isinstance (item [0], asn1.StructBase)):
                    s_list.append ("") # generate newline
                    for i, v in my_enumerate (item):
                        s_list.append ("\t" * (level + 1) + str (i))
                        s_list += render (v, level + 1)
                else:
                    s_list.append (repr (item))
                return s_list
            s_list.append ("\n".join (render (data)))
        elif typ == 'marcHoldingsRecord':
            hold_oid = data.direct_reference
            if hold_oid == z3950.Z3950_RECSYN_USMARC_ov:
                holdings_marc = zmarc.MARC (data.encoding [1])
                s_list.append ("Holdings %s\n" % (str (holdings_marc),))
            else:
                s_list.append ("Unknown holdings OID: " + str (hold_oid))
        else:
            s_list.append ("Unknown holdings type: " + typ)
            # shouldn't happen unless z39.50 definition is extended
    return "\n".join (s_list)

_RecordType ('USMARC', z3950.Z3950_RECSYN_USMARC_ov,
            renderer = lambda v: str(zmarc.MARC(v)))
_RecordType ('UKMARC', z3950.Z3950_RECSYN_UKMARC_ov,
            renderer = lambda v: str(zmarc.MARC(v)))
_RecordType ('SUTRS', z3950.Z3950_RECSYN_SUTRS_ov)
_RecordType ('XML', z3950.Z3950_RECSYN_MIME_XML_ov)
_RecordType ('SGML', z3950.Z3950_RECSYN_MIME_SGML_ov)
_RecordType ('GRS-1', z3950.Z3950_RECSYN_GRS1_ov,
             renderer = lambda v: str (v),
             preproc = grs1.preproc)
_RecordType ('OPAC', z3950.Z3950_RECSYN_OPAC_ov, renderer = render_OPAC)
_RecordType ('EXPLAIN', z3950.Z3950_RECSYN_EXPLAIN_ov,
             renderer = lambda v: str (v))

class ScanSet (_AttrCheck, _ErrHdlr):
    """Hold result of scan.
    """
    zoom_to_z3950 = { # XXX need to provide more processing for attrs, alt 
            'freq'   : 'globalOccurrences',
            'display':  'displayTerm',
            'attrs'  :  'suggestedAttributes',
            'alt'    :  'alternativeTerm',
            'other'  :  'otherTermInfo'}
    attrlist = _ErrHdlr.err_attrslist

    def __init__ (self, scanresp):
        """For internal use only!"""
        self._scanresp = scanresp
        if hasattr (scanresp.entries, 'nonsurrogateDiagnostics'):
            self.err_diagrec (scanresp.entries.nonsurrogateDiagnostics[0])
        # Note that specification says that both entries and
        # nonsurrogate diags can be present.  This code will always
        # raise the exn, and will need to be changed if both are needed.

    def __len__ (self):
        """Return number of entries"""
        return self._scanresp.numberOfEntriesReturned
    def _get_rec (self, i):
        if (not hasattr(self._scanresp.entries, 'entries')):
            raise IndexError
        t = self._scanresp.entries.entries[i]
        if t[0] == 'termInfo':
            return t[1]
        else:
            # Only way asserts can fail here is if someone changes
            # the Z39.50 ASN.1 definitions.
            assert (t[0] == 'surrogateDiagnostic')
            diagRec = t[1]
            if diagRec [0] == 'externallyDefined':
                raise ClientNotImplError (
                    'Scan unknown surrogate diagnostic type: ' +
                                          str (diagRec))
            assert (diagRec[0] == 'defaultFormat')
            defDiagFmt = diagRec [1]
            self.err (defDiagFmt.condition, defDiagFmt.addinfo,
                      defDiagFmt.diagnosticSetId)
    def get_term (self, i):
        """Return term.  Note that get_{term,field,fields} can throw an
        exception if the i'th term is a surrogate diagnostic."""
        return self._get_rec (i).term
    def get_field (self, field, i):
        """Returns value of field:
        term: term
        freq: integer
        display: string
        attrs: currently z3950 structure, should be string of attributes
        alt: currently z3950 structure, should be [string of attrs, term] 
        other: currently z3950 structure, dunno what the best Python representation would be
        """
        f = self.zoom_to_z3950 [field]
        r = self._get_rec (i)
        return r.__dict__[f]
    def get_fields (self, i):
        """Return a dictionary mapping ZOOM's field names to values
        present in the response.  (Like get_field, but for all fields.)"""
        r = self._get_rec (i)
        d = {}
        for k,v in self.zoom_to_z3950.items ():
            val = getattr (r, v, None)
            if val <> None:
                d[k] = val
        d["term"] = self.get_term (i)
        return d
    def _pin (self, i):
        if i < 0:
            return i + len (self)
        return i
    def __getitem__ (self, i):
        return self.get_fields (self._pin (i))
    def __getslice__ (self, i, j):
        i = self._pin (i)
        j = self._pin (j)
        if j > len (self):
            j = len (self)
        return [self.get_fields (k) for k in range (i,j)]


if __name__ == '__main__':
    optlist, args = getopt.getopt (sys.argv[1:], 'h:q:t:f:a:e:v:')
    host = 'LC'
    query = ''
    qtype = 'CCL'
    fmts = ['USMARC']
    esns = ['F']
    validation = None
    for (opt, val) in optlist:
        if opt == '-h':
            host = val
        elif opt == '-q':
            query = val
        elif opt == '-t':
            qtype = val
        elif opt == '-f':
            fmts = val.split (',')
        elif opt == '-e':
            esns = val.split (',')
        elif opt == '-v':
            validation = val.split (',')
            
    rv = z3950.host_dict.get (host)
    if rv == None:
        (name, port, dbname) = host.split (':')
        port = int (port)
    else:
        (name, port, dbname) = rv
    
    conn = Connection (name, port)
    conn.databaseName = dbname

    conn.preferredRecordSyntax = fmts [0]
    def run_one (q):
        try:
            query = Query (qtype, q)
            res = conn.search (query)
            for esn in esns:
                for syn in fmts:
                    print "Syntax", syn, "Esn", esn
                    res.preferredRecordSyntax = syn
                    if esn <> 'NONE':
                        res.elementSetName = esn
                    try:
                        for r in res:
                            print str(r)
                    except ZoomError, err:
                        print "Zoom exception", err.__class__, err
#           res.delete ()
# Looks as if Oxford will close the connection if a delete is sent,
# despite claiming delete support (verified with yaz client, too).
        except ZoomError, err:
            print "Zoom exception", err.__class__, err

                    
    if query == '':
        while 1:
            q_str = raw_input ('CCL query: ')
            if q_str == '': break
            run_one (q_str)
    else:
        run_one (query)
    conn.close ()