341 lines
12 KiB
Python
341 lines
12 KiB
Python
#!/usr/bin/env python
|
|
|
|
import codecs
|
|
|
|
from PyZ3950.z3950_2001 import *
|
|
from PyZ3950.oids import *
|
|
|
|
|
|
asn1.register_oid (Z3950_RECSYN_GRS1, GenericRecord)
|
|
asn1.register_oid (Z3950_RECSYN_SUTRS, asn1.GeneralString)
|
|
asn1.register_oid (Z3950_RECSYN_EXPLAIN, Explain_Record)
|
|
asn1.register_oid (Z3950_RECSYN_OPAC, OPACRecord)
|
|
|
|
asn1.register_oid (Z3950_ES_PERSISTRS, PersistentResultSet)
|
|
asn1.register_oid (Z3950_ES_PERSISTQRY, PersistentQuery)
|
|
asn1.register_oid (Z3950_ES_PERIODQRY, PeriodicQuerySchedule)
|
|
asn1.register_oid (Z3950_ES_ITEMORDER, ItemOrder)
|
|
asn1.register_oid (Z3950_ES_DBUPDATE, Update)
|
|
asn1.register_oid (Z3950_ES_DBUPDATE_REV_1, Update_updrev1)
|
|
asn1.register_oid (Z3950_ES_EXPORTSPEC, ExportSpecification)
|
|
asn1.register_oid (Z3950_ES_EXPORTINV, ExportInvocation)
|
|
|
|
|
|
asn1.register_oid (Z3950_USR_SEARCHRES1, SearchInfoReport)
|
|
asn1.register_oid (Z3950_USR_INFO1, OtherInformation)
|
|
asn1.register_oid (Z3950_NEG_CHARSET3, CharSetandLanguageNegotiation_3)
|
|
asn1.register_oid (Z3950_USR_PRIVATE_OCLC_INFO, OCLC_UserInformation)
|
|
|
|
# below here is subject to change without notice, as I try to
|
|
# figure out the appropriate balance between convenience and flexibility
|
|
|
|
trace_charset = 0
|
|
|
|
impl_vers = "1.0 beta" # XXX
|
|
implementationId = 'PyZ39.50 - contact asl2@pobox.com' # haven't been assigned an official id, apply XXX
|
|
|
|
def make_attr(set=None, atype=None, val=None, valType=None):
|
|
ae = AttributeElement()
|
|
if (set <> None):
|
|
ae.attributeSet = set
|
|
ae.attributeType = atype
|
|
if (valType == 'numeric' or (valType == None and isinstance(val, int))):
|
|
ae.attributeValue = ('numeric', val)
|
|
else:
|
|
cattr = AttributeElement['attributeValue']['complex']()
|
|
if (valType == None):
|
|
valType = 'string'
|
|
cattr.list = [(valType, val)]
|
|
ae.attributeValue = ('complex', cattr)
|
|
return ae
|
|
|
|
# This list is needed to support recordsInSelectedCharSets == 0 when
|
|
# character set negotiation is in effect. The reason we don't
|
|
# just iterate over Z3950_RECSYN is that many of those are carried
|
|
# in OCTET STRINGs, and thus immune to negotiation; but maybe we should
|
|
# anyway.
|
|
|
|
retrievalRecord_oids = [
|
|
Z3950_RECSYN_EXPLAIN_ov,
|
|
Z3950_RECSYN_SUTRS_ov,
|
|
Z3950_RECSYN_OPAC_ov,
|
|
Z3950_RECSYN_SUMMARY_ov,
|
|
Z3950_RECSYN_GRS1_ov,
|
|
Z3950_RECSYN_ES_ov,
|
|
Z3950_RECSYN_FRAGMENT_ov,
|
|
Z3950_RECSYN_SQL_ov]
|
|
|
|
|
|
def register_retrieval_record_oids (ctx, new_codec_name = 'ascii'):
|
|
new_codec = codecs.lookup (new_codec_name)
|
|
def switch_codec ():
|
|
ctx.push_codec ()
|
|
ctx.set_codec (asn1.GeneralString, new_codec)
|
|
for oid in retrievalRecord_oids:
|
|
ctx.register_charset_switcher (oid, switch_codec)
|
|
|
|
iso_10646_oid_to_name = {
|
|
UNICODE_PART1_XFERSYN_UCS2_ov : 'utf-16', # XXX ucs-2 should differ from utf-16, in that ucs-2 forbids any characters not in the BMP, whereas utf-16 is a 16-bit encoding which encodes those characters into multiple 16-bit units
|
|
|
|
# UNICODE_PART1_XFERSYN_UCS4_ov : 'ucs-4', # XXX no python support for this encoding?
|
|
UNICODE_PART1_XFERSYN_UTF16_ov : 'utf-16',
|
|
UNICODE_PART1_XFERSYN_UTF8_ov : 'utf-8'
|
|
}
|
|
|
|
def try_get_iso10646_oid (charset_name):
|
|
for k,v in iso_10646_oid_to_name.iteritems ():
|
|
if charset_name == v:
|
|
return k
|
|
# XXX note that we don't know which of {UCS2, UTF16} oids we'll
|
|
# get from this.
|
|
|
|
def asn_charset_to_name (charset_tup):
|
|
if trace_charset:
|
|
print "asn_charset_to_name", charset_tup
|
|
charset_name = None
|
|
(typ, charset) = charset_tup
|
|
if typ == 'iso10646':
|
|
charset_name = iso_10646_oid_to_name.get (charset.encodingLevel,
|
|
None)
|
|
elif typ == 'private':
|
|
(spectyp, val) = charset
|
|
if spectyp == 'externallySpecified':
|
|
oid = getattr (val, 'direct_reference', None)
|
|
if oid == Z3950_NEG_PRIVATE_INDEXDATA_CHARSETNAME_ov:
|
|
enctyp, encval = val.encoding
|
|
if enctyp == 'octet-aligned':
|
|
charset_name = encval
|
|
if trace_charset:
|
|
print "returning charset", charset_name
|
|
return charset_name
|
|
|
|
|
|
def charset_to_asn (charset_name):
|
|
oid = try_get_iso10646_oid (charset_name)
|
|
if oid <> None:
|
|
iso10646 = Iso10646_3 ()
|
|
iso10646.encodingLevel = oid
|
|
return ('iso10646', iso10646)
|
|
else:
|
|
ext = asn1.EXTERNAL ()
|
|
ext.direct_reference = Z3950_NEG_PRIVATE_INDEXDATA_CHARSETNAME_ov
|
|
ext.encoding = ('octet-aligned', charset_name)
|
|
return ('private', ('externallySpecified', ext))
|
|
|
|
class CharsetNegotReq:
|
|
def __init__ (self, charset_list = None, lang_list = None,
|
|
records_in_charsets = None):
|
|
"""charset_list is a list of character set names, either ISO10646
|
|
(UTF-8 or UTF-16), or private. We support Index Data's semantics
|
|
for private character sets (see
|
|
http://www.indexdata.dk/pipermail/yazlist/2003-March/000504.html), so
|
|
you can pass any character set name for which Python has a codec installed
|
|
(but please don't use rot13 in production). Note that there should be
|
|
at most one of each of (ISO10646, private). (No, I don't know why, but
|
|
it says so in the ASN.1 definition comments.)
|
|
|
|
lang_list is a list of language codes, as defined in ANSI Z39.53-1994
|
|
(see, e.g., http://xml.coverpages.org/nisoLang3-1994.html).
|
|
|
|
records_in_charsets governs whether charset negotiation applies to
|
|
records, as well.)
|
|
|
|
Any of these parameters can be None, since the corresponding
|
|
elements in the ASN.1 are OPTIONAL.
|
|
"""
|
|
self.charset_list = charset_list
|
|
self.lang_list = lang_list
|
|
self.records_in_charsets = records_in_charsets
|
|
def __str__ (self):
|
|
return "Charset negot request %s %s %s" % (
|
|
str (self.charset_list), str (self.lang_list),
|
|
str (self.records_in_charsets))
|
|
def pack_proposal (self):
|
|
origin_prop = OriginProposal_3 ()
|
|
if self.charset_list <> None:
|
|
proposedCharSets = []
|
|
for charset_name in self.charset_list:
|
|
proposedCharSets.append (charset_to_asn (charset_name))
|
|
|
|
origin_prop.proposedCharSets = proposedCharSets
|
|
if self.lang_list <> None:
|
|
origin_prop.proposedlanguages = self.lang_list
|
|
if self.records_in_charsets <> None:
|
|
origin_prop.recordsInSelectedCharSets = (
|
|
self.records_in_charsets)
|
|
return ('proposal', origin_prop)
|
|
def unpack_proposal (self, csn):
|
|
(tag, proposal) = csn
|
|
assert (tag == 'proposal')
|
|
pcs = getattr (proposal, 'proposedCharSets', None)
|
|
if pcs <> None:
|
|
if trace_charset:
|
|
print "pcs", pcs
|
|
self.charset_list = []
|
|
|
|
for charset in pcs:
|
|
charset_name = asn_charset_to_name (charset)
|
|
if charset_name <> None:
|
|
self.charset_list.append (charset_name)
|
|
|
|
lang = getattr (proposal, 'proposedlanguages', None)
|
|
if lang <> None:
|
|
self.lang_list = lang
|
|
self.records_in_charsets = getattr (proposal,
|
|
'recordsInSelectedCharSets', None)
|
|
|
|
|
|
class CharsetNegotResp:
|
|
def __init__ (self, charset = None, lang = None,
|
|
records_in_charsets = None):
|
|
self.charset = charset
|
|
self.lang = lang
|
|
self.records_in_charsets = records_in_charsets
|
|
def __str__ (self):
|
|
return "CharsetNegotResp: %s %s %s" % (
|
|
str (self.charset), str (self.lang),
|
|
str (self.records_in_charsets))
|
|
def unpack_negot_resp (self, neg_resp):
|
|
typ, val = neg_resp
|
|
assert (typ == 'response')
|
|
self.charset = None
|
|
scs = getattr (val, 'selectedCharSets', None)
|
|
if scs <> None:
|
|
self.charset = asn_charset_to_name (scs)
|
|
self.lang = getattr (val, 'selectedLanguage', None)
|
|
self.records_in_charsets = getattr (
|
|
val, 'recordsInSelectedCharSets', None)
|
|
def pack_negot_resp (self):
|
|
resp = TargetResponse_3 ()
|
|
if self.charset <> None:
|
|
resp.selectedCharSets = charset_to_asn (self.charset)
|
|
if self.lang <> None:
|
|
resp.selectedLanguage = self.lang
|
|
if self.records_in_charsets <> None:
|
|
resp.recordsInSelectedCharSets = self.records_in_charsets
|
|
return ('response', resp)
|
|
|
|
|
|
def get_charset_negot (init): # can be passed either InitializeRequest or InitializeResponse
|
|
if trace_charset:
|
|
print init
|
|
if not init.options ['negotiation']:
|
|
return None
|
|
otherInfo = []
|
|
if hasattr (init, 'otherInfo'):
|
|
otherInfo = init.otherInfo
|
|
elif hasattr (init, 'userInformationField'):
|
|
ui = init.userInformationField
|
|
if ui.direct_reference == Z3950_USR_INFO1_ov:
|
|
(enctype, otherInfo) = ui.encoding
|
|
|
|
for oi in otherInfo:
|
|
if trace_charset:
|
|
print oi
|
|
(typ, val) = oi.information
|
|
if typ == 'externallyDefinedInfo':
|
|
if val.direct_reference == Z3950_NEG_CHARSET3_ov:
|
|
(typ, val) = val.encoding
|
|
if typ == 'single-ASN1-type':
|
|
return val
|
|
|
|
return None
|
|
|
|
|
|
def set_charset_negot (init, val, v3_flag):
|
|
# again, can be passed either InitializeRequest or Response
|
|
negot = asn1.EXTERNAL ()
|
|
negot.direct_reference = Z3950_NEG_CHARSET3_ov
|
|
negot.encoding= ('single-ASN1-type', val)
|
|
OtherInfoElt = OtherInformation[0]
|
|
oi_elt = OtherInfoElt ()
|
|
oi_elt.information = ('externallyDefinedInfo', negot)
|
|
other_info = [oi_elt]
|
|
if trace_charset:
|
|
print v3_flag, oi_elt
|
|
|
|
if v3_flag:
|
|
init.otherInfo = other_info
|
|
else:
|
|
ui = asn1.EXTERNAL ()
|
|
|
|
ui.direct_reference = Z3950_USR_INFO1_ov
|
|
ui.encoding = ('single-ASN1-type', other_info) # XXX test this
|
|
# see http://lcweb.loc.gov/z3950/agency/defns/user-1.html
|
|
init.userInformationField = ui
|
|
|
|
|
|
def_msg_size = 0x10000
|
|
|
|
# rethink optionslist. Maybe we should just turn on all the
|
|
# bits the underlying code supports? We do need to be able to
|
|
# turn off multiple result sets for testing (see tests/test2.py),
|
|
# but that doesn't have to be the default.
|
|
def make_initreq (optionslist = None, authentication = None, v3 = 0,
|
|
negotiate_charset = 0, preferredMessageSize = 0x100000,
|
|
maximumRecordSize = 0x100000, implementationId = "",
|
|
implementationName = "", implementationVersion = ""):
|
|
|
|
# see http://lcweb.loc.gov/z3950/agency/wisdom/unicode.html
|
|
InitReq = InitializeRequest ()
|
|
InitReq.protocolVersion = ProtocolVersion ()
|
|
InitReq.protocolVersion ['version_1'] = 1
|
|
InitReq.protocolVersion ['version_2'] = 1
|
|
InitReq.protocolVersion ['version_3'] = v3
|
|
InitReq.options = Options ()
|
|
if optionslist <> None:
|
|
for o in optionslist:
|
|
InitReq.options[o] = 1
|
|
InitReq.options ['search'] = 1
|
|
InitReq.options ['present'] = 1
|
|
InitReq.options ['delSet'] = 1
|
|
InitReq.options ['scan'] = 1
|
|
InitReq.options ['sort'] = 1
|
|
InitReq.options ['extendedServices'] = 1
|
|
InitReq.options ['dedup'] = 1
|
|
InitReq.options ['negotiation'] = negotiate_charset # XXX can negotiate other stuff, too
|
|
|
|
# Preferred and Exceptional msg sizes are pretty arbitrary --
|
|
# we dynamically allocate no matter what
|
|
InitReq.preferredMessageSize = preferredMessageSize
|
|
InitReq.exceptionalRecordSize = maximumRecordSize
|
|
|
|
if (implementationId):
|
|
InitReq.implementationId = implementationId
|
|
else:
|
|
InitReq.implementationId = impl_id
|
|
if (implementationName):
|
|
InitReq.implementationName = implementationName
|
|
else:
|
|
InitReq.implementationName = 'PyZ3950'
|
|
if (implementationVersion):
|
|
InitReq.implementationVersion = implementationVersion
|
|
else:
|
|
InitReq.implementationVersion = impl_vers
|
|
|
|
if authentication <> None:
|
|
class UP: pass
|
|
up = UP ()
|
|
upAttrList = ['userId', 'password', 'groupId']
|
|
for val, attr in zip (authentication, upAttrList): # silently truncate
|
|
if val <> None:
|
|
setattr (up, attr, val)
|
|
InitReq.idAuthentication = ('idPass', up)
|
|
|
|
return InitReq
|
|
|
|
def make_sreq (query, dbnames, rsn, **kw):
|
|
sreq = SearchRequest ()
|
|
sreq.smallSetUpperBound = 0
|
|
sreq.largeSetLowerBound = 1
|
|
sreq.mediumSetPresentNumber = 0
|
|
# as per http://lcweb.loc.gov/z3950/lcserver.html, Jun 07 2001,
|
|
# to work around Endeavor bugs in 1.13
|
|
sreq.replaceIndicator = 1
|
|
sreq.resultSetName = rsn
|
|
sreq.databaseNames = dbnames
|
|
sreq.query = query
|
|
for (key, val) in kw.items ():
|
|
setattr (sreq, key, val)
|
|
return sreq
|