faset over fra Z3950 til google books
This commit is contained in:
202
python/gdata/tlslite/utils/xmltools.py
Normal file
202
python/gdata/tlslite/utils/xmltools.py
Normal file
@@ -0,0 +1,202 @@
|
||||
"""Helper functions for XML.
|
||||
|
||||
This module has misc. helper functions for working with XML DOM nodes."""
|
||||
|
||||
from compat import *
|
||||
import os
|
||||
import re
|
||||
|
||||
if os.name == "java":
|
||||
# Only for Jython
|
||||
from javax.xml.parsers import *
|
||||
import java
|
||||
|
||||
builder = DocumentBuilderFactory.newInstance().newDocumentBuilder()
|
||||
|
||||
def parseDocument(s):
|
||||
stream = java.io.ByteArrayInputStream(java.lang.String(s).getBytes())
|
||||
return builder.parse(stream)
|
||||
else:
|
||||
from xml.dom import minidom
|
||||
from xml.sax import saxutils
|
||||
|
||||
def parseDocument(s):
|
||||
return minidom.parseString(s)
|
||||
|
||||
def parseAndStripWhitespace(s):
|
||||
try:
|
||||
element = parseDocument(s).documentElement
|
||||
except BaseException, e:
|
||||
raise SyntaxError(str(e))
|
||||
stripWhitespace(element)
|
||||
return element
|
||||
|
||||
#Goes through a DOM tree and removes whitespace besides child elements,
|
||||
#as long as this whitespace is correctly tab-ified
|
||||
def stripWhitespace(element, tab=0):
|
||||
element.normalize()
|
||||
|
||||
lastSpacer = "\n" + ("\t"*tab)
|
||||
spacer = lastSpacer + "\t"
|
||||
|
||||
#Zero children aren't allowed (i.e. <empty/>)
|
||||
#This makes writing output simpler, and matches Canonical XML
|
||||
if element.childNodes.length==0: #DON'T DO len(element.childNodes) - doesn't work in Jython
|
||||
raise SyntaxError("Empty XML elements not allowed")
|
||||
|
||||
#If there's a single child, it must be text context
|
||||
if element.childNodes.length==1:
|
||||
if element.firstChild.nodeType == element.firstChild.TEXT_NODE:
|
||||
#If it's an empty element, remove
|
||||
if element.firstChild.data == lastSpacer:
|
||||
element.removeChild(element.firstChild)
|
||||
return
|
||||
#If not text content, give an error
|
||||
elif element.firstChild.nodeType == element.firstChild.ELEMENT_NODE:
|
||||
raise SyntaxError("Bad whitespace under '%s'" % element.tagName)
|
||||
else:
|
||||
raise SyntaxError("Unexpected node type in XML document")
|
||||
|
||||
#Otherwise there's multiple child element
|
||||
child = element.firstChild
|
||||
while child:
|
||||
if child.nodeType == child.ELEMENT_NODE:
|
||||
stripWhitespace(child, tab+1)
|
||||
child = child.nextSibling
|
||||
elif child.nodeType == child.TEXT_NODE:
|
||||
if child == element.lastChild:
|
||||
if child.data != lastSpacer:
|
||||
raise SyntaxError("Bad whitespace under '%s'" % element.tagName)
|
||||
elif child.data != spacer:
|
||||
raise SyntaxError("Bad whitespace under '%s'" % element.tagName)
|
||||
next = child.nextSibling
|
||||
element.removeChild(child)
|
||||
child = next
|
||||
else:
|
||||
raise SyntaxError("Unexpected node type in XML document")
|
||||
|
||||
|
||||
def checkName(element, name):
|
||||
if element.nodeType != element.ELEMENT_NODE:
|
||||
raise SyntaxError("Missing element: '%s'" % name)
|
||||
|
||||
if name == None:
|
||||
return
|
||||
|
||||
if element.tagName != name:
|
||||
raise SyntaxError("Wrong element name: should be '%s', is '%s'" % (name, element.tagName))
|
||||
|
||||
def getChild(element, index, name=None):
|
||||
if element.nodeType != element.ELEMENT_NODE:
|
||||
raise SyntaxError("Wrong node type in getChild()")
|
||||
|
||||
child = element.childNodes.item(index)
|
||||
if child == None:
|
||||
raise SyntaxError("Missing child: '%s'" % name)
|
||||
checkName(child, name)
|
||||
return child
|
||||
|
||||
def getChildIter(element, index):
|
||||
class ChildIter:
|
||||
def __init__(self, element, index):
|
||||
self.element = element
|
||||
self.index = index
|
||||
|
||||
def next(self):
|
||||
if self.index < len(self.element.childNodes):
|
||||
retVal = self.element.childNodes.item(self.index)
|
||||
self.index += 1
|
||||
else:
|
||||
retVal = None
|
||||
return retVal
|
||||
|
||||
def checkEnd(self):
|
||||
if self.index != len(self.element.childNodes):
|
||||
raise SyntaxError("Too many elements under: '%s'" % self.element.tagName)
|
||||
return ChildIter(element, index)
|
||||
|
||||
def getChildOrNone(element, index):
|
||||
if element.nodeType != element.ELEMENT_NODE:
|
||||
raise SyntaxError("Wrong node type in getChild()")
|
||||
child = element.childNodes.item(index)
|
||||
return child
|
||||
|
||||
def getLastChild(element, index, name=None):
|
||||
if element.nodeType != element.ELEMENT_NODE:
|
||||
raise SyntaxError("Wrong node type in getLastChild()")
|
||||
|
||||
child = element.childNodes.item(index)
|
||||
if child == None:
|
||||
raise SyntaxError("Missing child: '%s'" % name)
|
||||
if child != element.lastChild:
|
||||
raise SyntaxError("Too many elements under: '%s'" % element.tagName)
|
||||
checkName(child, name)
|
||||
return child
|
||||
|
||||
#Regular expressions for syntax-checking attribute and element content
|
||||
nsRegEx = "http://trevp.net/cryptoID\Z"
|
||||
cryptoIDRegEx = "([a-km-z3-9]{5}\.){3}[a-km-z3-9]{5}\Z"
|
||||
urlRegEx = "http(s)?://.{1,100}\Z"
|
||||
sha1Base64RegEx = "[A-Za-z0-9+/]{27}=\Z"
|
||||
base64RegEx = "[A-Za-z0-9+/]+={0,4}\Z"
|
||||
certsListRegEx = "(0)?(1)?(2)?(3)?(4)?(5)?(6)?(7)?(8)?(9)?\Z"
|
||||
keyRegEx = "[A-Z]\Z"
|
||||
keysListRegEx = "(A)?(B)?(C)?(D)?(E)?(F)?(G)?(H)?(I)?(J)?(K)?(L)?(M)?(N)?(O)?(P)?(Q)?(R)?(S)?(T)?(U)?(V)?(W)?(X)?(Y)?(Z)?\Z"
|
||||
dateTimeRegEx = "\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\dZ\Z"
|
||||
shortStringRegEx = ".{1,100}\Z"
|
||||
exprRegEx = "[a-zA-Z0-9 ,()]{1,200}\Z"
|
||||
notAfterDeltaRegEx = "0|([1-9][0-9]{0,8})\Z" #A number from 0 to (1 billion)-1
|
||||
booleanRegEx = "(true)|(false)"
|
||||
|
||||
def getReqAttribute(element, attrName, regEx=""):
|
||||
if element.nodeType != element.ELEMENT_NODE:
|
||||
raise SyntaxError("Wrong node type in getReqAttribute()")
|
||||
|
||||
value = element.getAttribute(attrName)
|
||||
if not value:
|
||||
raise SyntaxError("Missing Attribute: " + attrName)
|
||||
if not re.match(regEx, value):
|
||||
raise SyntaxError("Bad Attribute Value for '%s': '%s' " % (attrName, value))
|
||||
element.removeAttribute(attrName)
|
||||
return str(value) #de-unicode it; this is needed for bsddb, for example
|
||||
|
||||
def getAttribute(element, attrName, regEx=""):
|
||||
if element.nodeType != element.ELEMENT_NODE:
|
||||
raise SyntaxError("Wrong node type in getAttribute()")
|
||||
|
||||
value = element.getAttribute(attrName)
|
||||
if value:
|
||||
if not re.match(regEx, value):
|
||||
raise SyntaxError("Bad Attribute Value for '%s': '%s' " % (attrName, value))
|
||||
element.removeAttribute(attrName)
|
||||
return str(value) #de-unicode it; this is needed for bsddb, for example
|
||||
|
||||
def checkNoMoreAttributes(element):
|
||||
if element.nodeType != element.ELEMENT_NODE:
|
||||
raise SyntaxError("Wrong node type in checkNoMoreAttributes()")
|
||||
|
||||
if element.attributes.length!=0:
|
||||
raise SyntaxError("Extra attributes on '%s'" % element.tagName)
|
||||
|
||||
def getText(element, regEx=""):
|
||||
textNode = element.firstChild
|
||||
if textNode == None:
|
||||
raise SyntaxError("Empty element '%s'" % element.tagName)
|
||||
if textNode.nodeType != textNode.TEXT_NODE:
|
||||
raise SyntaxError("Non-text node: '%s'" % element.tagName)
|
||||
if not re.match(regEx, textNode.data):
|
||||
raise SyntaxError("Bad Text Value for '%s': '%s' " % (element.tagName, textNode.data))
|
||||
return str(textNode.data) #de-unicode it; this is needed for bsddb, for example
|
||||
|
||||
#Function for adding tabs to a string
|
||||
def indent(s, steps, ch="\t"):
|
||||
tabs = ch*steps
|
||||
if s[-1] != "\n":
|
||||
s = tabs + s.replace("\n", "\n"+tabs)
|
||||
else:
|
||||
s = tabs + s.replace("\n", "\n"+tabs)
|
||||
s = s[ : -len(tabs)]
|
||||
return s
|
||||
|
||||
def escape(s):
|
||||
return saxutils.escape(s)
|
Reference in New Issue
Block a user