546 lines
20 KiB
Python
546 lines
20 KiB
Python
#!/usr/bin/env python
|
|
#
|
|
# Copyright (C) 2008 Google Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
|
|
# This module is used for version 2 of the Google Data APIs.
|
|
|
|
|
|
__author__ = 'j.s@google.com (Jeff Scudder)'
|
|
|
|
|
|
import inspect
|
|
try:
|
|
from xml.etree import cElementTree as ElementTree
|
|
except ImportError:
|
|
try:
|
|
import cElementTree as ElementTree
|
|
except ImportError:
|
|
try:
|
|
from xml.etree import ElementTree
|
|
except ImportError:
|
|
from elementtree import ElementTree
|
|
|
|
|
|
try:
|
|
from xml.dom.minidom import parseString as xmlString
|
|
except ImportError:
|
|
xmlString = None
|
|
|
|
STRING_ENCODING = 'utf-8'
|
|
|
|
|
|
class XmlElement(object):
|
|
"""Represents an element node in an XML document.
|
|
|
|
The text member is a UTF-8 encoded str or unicode.
|
|
"""
|
|
_qname = None
|
|
_other_elements = None
|
|
_other_attributes = None
|
|
# The rule set contains mappings for XML qnames to child members and the
|
|
# appropriate member classes.
|
|
_rule_set = None
|
|
_members = None
|
|
text = None
|
|
|
|
def __init__(self, text=None, *args, **kwargs):
|
|
if ('_members' not in self.__class__.__dict__
|
|
or self.__class__._members is None):
|
|
self.__class__._members = tuple(self.__class__._list_xml_members())
|
|
for member_name, member_type in self.__class__._members:
|
|
if member_name in kwargs:
|
|
setattr(self, member_name, kwargs[member_name])
|
|
else:
|
|
if isinstance(member_type, list):
|
|
setattr(self, member_name, [])
|
|
else:
|
|
setattr(self, member_name, None)
|
|
self._other_elements = []
|
|
self._other_attributes = {}
|
|
if text is not None:
|
|
self.text = text
|
|
|
|
def _list_xml_members(cls):
|
|
"""Generator listing all members which are XML elements or attributes.
|
|
|
|
The following members would be considered XML members:
|
|
foo = 'abc' - indicates an XML attribute with the qname abc
|
|
foo = SomeElement - indicates an XML child element
|
|
foo = [AnElement] - indicates a repeating XML child element, each instance
|
|
will be stored in a list in this member
|
|
foo = ('att1', '{http://example.com/namespace}att2') - indicates an XML
|
|
attribute which has different parsing rules in different versions of
|
|
the protocol. Version 1 of the XML parsing rules will look for an
|
|
attribute with the qname 'att1' but verion 2 of the parsing rules will
|
|
look for a namespaced attribute with the local name of 'att2' and an
|
|
XML namespace of 'http://example.com/namespace'.
|
|
"""
|
|
members = []
|
|
for pair in inspect.getmembers(cls):
|
|
if not pair[0].startswith('_') and pair[0] != 'text':
|
|
member_type = pair[1]
|
|
if (isinstance(member_type, tuple) or isinstance(member_type, list)
|
|
or isinstance(member_type, (str, unicode))
|
|
or (inspect.isclass(member_type)
|
|
and issubclass(member_type, XmlElement))):
|
|
members.append(pair)
|
|
return members
|
|
|
|
_list_xml_members = classmethod(_list_xml_members)
|
|
|
|
def _get_rules(cls, version):
|
|
"""Initializes the _rule_set for the class which is used when parsing XML.
|
|
|
|
This method is used internally for parsing and generating XML for an
|
|
XmlElement. It is not recommended that you call this method directly.
|
|
|
|
Returns:
|
|
A tuple containing the XML parsing rules for the appropriate version.
|
|
|
|
The tuple looks like:
|
|
(qname, {sub_element_qname: (member_name, member_class, repeating), ..},
|
|
{attribute_qname: member_name})
|
|
|
|
To give a couple of concrete example, the atom.data.Control _get_rules
|
|
with version of 2 will return:
|
|
('{http://www.w3.org/2007/app}control',
|
|
{'{http://www.w3.org/2007/app}draft': ('draft',
|
|
<class 'atom.data.Draft'>,
|
|
False)},
|
|
{})
|
|
Calling _get_rules with version 1 on gdata.data.FeedLink will produce:
|
|
('{http://schemas.google.com/g/2005}feedLink',
|
|
{'{http://www.w3.org/2005/Atom}feed': ('feed',
|
|
<class 'gdata.data.GDFeed'>,
|
|
False)},
|
|
{'href': 'href', 'readOnly': 'read_only', 'countHint': 'count_hint',
|
|
'rel': 'rel'})
|
|
"""
|
|
# Initialize the _rule_set to make sure there is a slot available to store
|
|
# the parsing rules for this version of the XML schema.
|
|
# Look for rule set in the class __dict__ proxy so that only the
|
|
# _rule_set for this class will be found. By using the dict proxy
|
|
# we avoid finding rule_sets defined in superclasses.
|
|
# The four lines below provide support for any number of versions, but it
|
|
# runs a bit slower then hard coding slots for two versions, so I'm using
|
|
# the below two lines.
|
|
#if '_rule_set' not in cls.__dict__ or cls._rule_set is None:
|
|
# cls._rule_set = []
|
|
#while len(cls.__dict__['_rule_set']) < version:
|
|
# cls._rule_set.append(None)
|
|
# If there is no rule set cache in the class, provide slots for two XML
|
|
# versions. If and when there is a version 3, this list will need to be
|
|
# expanded.
|
|
if '_rule_set' not in cls.__dict__ or cls._rule_set is None:
|
|
cls._rule_set = [None, None]
|
|
# If a version higher than 2 is requested, fall back to version 2 because
|
|
# 2 is currently the highest supported version.
|
|
if version > 2:
|
|
return cls._get_rules(2)
|
|
# Check the dict proxy for the rule set to avoid finding any rule sets
|
|
# which belong to the superclass. We only want rule sets for this class.
|
|
if cls._rule_set[version-1] is None:
|
|
# The rule set for each version consists of the qname for this element
|
|
# ('{namespace}tag'), a dictionary (elements) for looking up the
|
|
# corresponding class member when given a child element's qname, and a
|
|
# dictionary (attributes) for looking up the corresponding class member
|
|
# when given an XML attribute's qname.
|
|
elements = {}
|
|
attributes = {}
|
|
if ('_members' not in cls.__dict__ or cls._members is None):
|
|
cls._members = tuple(cls._list_xml_members())
|
|
for member_name, target in cls._members:
|
|
if isinstance(target, list):
|
|
# This member points to a repeating element.
|
|
elements[_get_qname(target[0], version)] = (member_name, target[0],
|
|
True)
|
|
elif isinstance(target, tuple):
|
|
# This member points to a versioned XML attribute.
|
|
if version <= len(target):
|
|
attributes[target[version-1]] = member_name
|
|
else:
|
|
attributes[target[-1]] = member_name
|
|
elif isinstance(target, (str, unicode)):
|
|
# This member points to an XML attribute.
|
|
attributes[target] = member_name
|
|
elif issubclass(target, XmlElement):
|
|
# This member points to a single occurance element.
|
|
elements[_get_qname(target, version)] = (member_name, target, False)
|
|
version_rules = (_get_qname(cls, version), elements, attributes)
|
|
cls._rule_set[version-1] = version_rules
|
|
return version_rules
|
|
else:
|
|
return cls._rule_set[version-1]
|
|
|
|
_get_rules = classmethod(_get_rules)
|
|
|
|
def get_elements(self, tag=None, namespace=None, version=1):
|
|
"""Find all sub elements which match the tag and namespace.
|
|
|
|
To find all elements in this object, call get_elements with the tag and
|
|
namespace both set to None (the default). This method searches through
|
|
the object's members and the elements stored in _other_elements which
|
|
did not match any of the XML parsing rules for this class.
|
|
|
|
Args:
|
|
tag: str
|
|
namespace: str
|
|
version: int Specifies the version of the XML rules to be used when
|
|
searching for matching elements.
|
|
|
|
Returns:
|
|
A list of the matching XmlElements.
|
|
"""
|
|
matches = []
|
|
ignored1, elements, ignored2 = self.__class__._get_rules(version)
|
|
if elements:
|
|
for qname, element_def in elements.iteritems():
|
|
member = getattr(self, element_def[0])
|
|
if member:
|
|
if _qname_matches(tag, namespace, qname):
|
|
if element_def[2]:
|
|
# If this is a repeating element, copy all instances into the
|
|
# result list.
|
|
matches.extend(member)
|
|
else:
|
|
matches.append(member)
|
|
for element in self._other_elements:
|
|
if _qname_matches(tag, namespace, element._qname):
|
|
matches.append(element)
|
|
return matches
|
|
|
|
GetElements = get_elements
|
|
# FindExtensions and FindChildren are provided for backwards compatibility
|
|
# to the atom.AtomBase class.
|
|
# However, FindExtensions may return more results than the v1 atom.AtomBase
|
|
# method does, because get_elements searches both the expected children
|
|
# and the unexpected "other elements". The old AtomBase.FindExtensions
|
|
# method searched only "other elements" AKA extension_elements.
|
|
FindExtensions = get_elements
|
|
FindChildren = get_elements
|
|
|
|
def get_attributes(self, tag=None, namespace=None, version=1):
|
|
"""Find all attributes which match the tag and namespace.
|
|
|
|
To find all attributes in this object, call get_attributes with the tag
|
|
and namespace both set to None (the default). This method searches
|
|
through the object's members and the attributes stored in
|
|
_other_attributes which did not fit any of the XML parsing rules for this
|
|
class.
|
|
|
|
Args:
|
|
tag: str
|
|
namespace: str
|
|
version: int Specifies the version of the XML rules to be used when
|
|
searching for matching attributes.
|
|
|
|
Returns:
|
|
A list of XmlAttribute objects for the matching attributes.
|
|
"""
|
|
matches = []
|
|
ignored1, ignored2, attributes = self.__class__._get_rules(version)
|
|
if attributes:
|
|
for qname, attribute_def in attributes.iteritems():
|
|
if isinstance(attribute_def, (list, tuple)):
|
|
attribute_def = attribute_def[0]
|
|
member = getattr(self, attribute_def)
|
|
# TODO: ensure this hasn't broken existing behavior.
|
|
#member = getattr(self, attribute_def[0])
|
|
if member:
|
|
if _qname_matches(tag, namespace, qname):
|
|
matches.append(XmlAttribute(qname, member))
|
|
for qname, value in self._other_attributes.iteritems():
|
|
if _qname_matches(tag, namespace, qname):
|
|
matches.append(XmlAttribute(qname, value))
|
|
return matches
|
|
|
|
GetAttributes = get_attributes
|
|
|
|
def _harvest_tree(self, tree, version=1):
|
|
"""Populates object members from the data in the tree Element."""
|
|
qname, elements, attributes = self.__class__._get_rules(version)
|
|
for element in tree:
|
|
if elements and element.tag in elements:
|
|
definition = elements[element.tag]
|
|
# If this is a repeating element, make sure the member is set to a
|
|
# list.
|
|
if definition[2]:
|
|
if getattr(self, definition[0]) is None:
|
|
setattr(self, definition[0], [])
|
|
getattr(self, definition[0]).append(_xml_element_from_tree(element,
|
|
definition[1], version))
|
|
else:
|
|
setattr(self, definition[0], _xml_element_from_tree(element,
|
|
definition[1], version))
|
|
else:
|
|
self._other_elements.append(_xml_element_from_tree(element, XmlElement,
|
|
version))
|
|
for attrib, value in tree.attrib.iteritems():
|
|
if attributes and attrib in attributes:
|
|
setattr(self, attributes[attrib], value)
|
|
else:
|
|
self._other_attributes[attrib] = value
|
|
if tree.text:
|
|
self.text = tree.text
|
|
|
|
def _to_tree(self, version=1, encoding=None):
|
|
new_tree = ElementTree.Element(_get_qname(self, version))
|
|
self._attach_members(new_tree, version, encoding)
|
|
return new_tree
|
|
|
|
def _attach_members(self, tree, version=1, encoding=None):
|
|
"""Convert members to XML elements/attributes and add them to the tree.
|
|
|
|
Args:
|
|
tree: An ElementTree.Element which will be modified. The members of
|
|
this object will be added as child elements or attributes
|
|
according to the rules described in _expected_elements and
|
|
_expected_attributes. The elements and attributes stored in
|
|
other_attributes and other_elements are also added a children
|
|
of this tree.
|
|
version: int Ingnored in this method but used by VersionedElement.
|
|
encoding: str (optional)
|
|
"""
|
|
qname, elements, attributes = self.__class__._get_rules(version)
|
|
encoding = encoding or STRING_ENCODING
|
|
# Add the expected elements and attributes to the tree.
|
|
if elements:
|
|
for tag, element_def in elements.iteritems():
|
|
member = getattr(self, element_def[0])
|
|
# If this is a repeating element and there are members in the list.
|
|
if member and element_def[2]:
|
|
for instance in member:
|
|
instance._become_child(tree, version)
|
|
elif member:
|
|
member._become_child(tree, version)
|
|
if attributes:
|
|
for attribute_tag, member_name in attributes.iteritems():
|
|
value = getattr(self, member_name)
|
|
if value:
|
|
tree.attrib[attribute_tag] = value
|
|
# Add the unexpected (other) elements and attributes to the tree.
|
|
for element in self._other_elements:
|
|
element._become_child(tree, version)
|
|
for key, value in self._other_attributes.iteritems():
|
|
# I'm not sure if unicode can be used in the attribute name, so for now
|
|
# we assume the encoding is correct for the attribute name.
|
|
if not isinstance(value, unicode):
|
|
value = value.decode(encoding)
|
|
tree.attrib[key] = value
|
|
if self.text:
|
|
if isinstance(self.text, unicode):
|
|
tree.text = self.text
|
|
else:
|
|
tree.text = self.text.decode(encoding)
|
|
|
|
def to_string(self, version=1, encoding=None, pretty_print=None):
|
|
"""Converts this object to XML."""
|
|
|
|
tree_string = ElementTree.tostring(self._to_tree(version, encoding))
|
|
|
|
if pretty_print and xmlString is not None:
|
|
return xmlString(tree_string).toprettyxml()
|
|
|
|
return tree_string
|
|
|
|
ToString = to_string
|
|
|
|
def __str__(self):
|
|
return self.to_string()
|
|
|
|
def _become_child(self, tree, version=1):
|
|
"""Adds a child element to tree with the XML data in self."""
|
|
new_child = ElementTree.Element('')
|
|
tree.append(new_child)
|
|
new_child.tag = _get_qname(self, version)
|
|
self._attach_members(new_child, version)
|
|
|
|
def __get_extension_elements(self):
|
|
return self._other_elements
|
|
|
|
def __set_extension_elements(self, elements):
|
|
self._other_elements = elements
|
|
|
|
extension_elements = property(__get_extension_elements,
|
|
__set_extension_elements,
|
|
"""Provides backwards compatibility for v1 atom.AtomBase classes.""")
|
|
|
|
def __get_extension_attributes(self):
|
|
return self._other_attributes
|
|
|
|
def __set_extension_attributes(self, attributes):
|
|
self._other_attributes = attributes
|
|
|
|
extension_attributes = property(__get_extension_attributes,
|
|
__set_extension_attributes,
|
|
"""Provides backwards compatibility for v1 atom.AtomBase classes.""")
|
|
|
|
def _get_tag(self, version=1):
|
|
qname = _get_qname(self, version)
|
|
return qname[qname.find('}')+1:]
|
|
|
|
def _get_namespace(self, version=1):
|
|
qname = _get_qname(self, version)
|
|
if qname.startswith('{'):
|
|
return qname[1:qname.find('}')]
|
|
else:
|
|
return None
|
|
|
|
def _set_tag(self, tag):
|
|
if isinstance(self._qname, tuple):
|
|
self._qname = self._qname.copy()
|
|
if self._qname[0].startswith('{'):
|
|
self._qname[0] = '{%s}%s' % (self._get_namespace(1), tag)
|
|
else:
|
|
self._qname[0] = tag
|
|
else:
|
|
if self._qname.startswith('{'):
|
|
self._qname = '{%s}%s' % (self._get_namespace(), tag)
|
|
else:
|
|
self._qname = tag
|
|
|
|
def _set_namespace(self, namespace):
|
|
if isinstance(self._qname, tuple):
|
|
self._qname = self._qname.copy()
|
|
if namespace:
|
|
self._qname[0] = '{%s}%s' % (namespace, self._get_tag(1))
|
|
else:
|
|
self._qname[0] = self._get_tag(1)
|
|
else:
|
|
if namespace:
|
|
self._qname = '{%s}%s' % (namespace, self._get_tag(1))
|
|
else:
|
|
self._qname = self._get_tag(1)
|
|
|
|
tag = property(_get_tag, _set_tag,
|
|
"""Provides backwards compatibility for v1 atom.AtomBase classes.""")
|
|
|
|
namespace = property(_get_namespace, _set_namespace,
|
|
"""Provides backwards compatibility for v1 atom.AtomBase classes.""")
|
|
|
|
# Provided for backwards compatibility to atom.ExtensionElement
|
|
children = extension_elements
|
|
attributes = extension_attributes
|
|
|
|
|
|
def _get_qname(element, version):
|
|
if isinstance(element._qname, tuple):
|
|
if version <= len(element._qname):
|
|
return element._qname[version-1]
|
|
else:
|
|
return element._qname[-1]
|
|
else:
|
|
return element._qname
|
|
|
|
|
|
def _qname_matches(tag, namespace, qname):
|
|
"""Logic determines if a QName matches the desired local tag and namespace.
|
|
|
|
This is used in XmlElement.get_elements and XmlElement.get_attributes to
|
|
find matches in the element's members (among all expected-and-unexpected
|
|
elements-and-attributes).
|
|
|
|
Args:
|
|
expected_tag: string
|
|
expected_namespace: string
|
|
qname: string in the form '{xml_namespace}localtag' or 'tag' if there is
|
|
no namespace.
|
|
|
|
Returns:
|
|
boolean True if the member's tag and namespace fit the expected tag and
|
|
namespace.
|
|
"""
|
|
# If there is no expected namespace or tag, then everything will match.
|
|
if qname is None:
|
|
member_tag = None
|
|
member_namespace = None
|
|
else:
|
|
if qname.startswith('{'):
|
|
member_namespace = qname[1:qname.index('}')]
|
|
member_tag = qname[qname.index('}') + 1:]
|
|
else:
|
|
member_namespace = None
|
|
member_tag = qname
|
|
return ((tag is None and namespace is None)
|
|
# If there is a tag, but no namespace, see if the local tag matches.
|
|
or (namespace is None and member_tag == tag)
|
|
# There was no tag, but there was a namespace so see if the namespaces
|
|
# match.
|
|
or (tag is None and member_namespace == namespace)
|
|
# There was no tag, and the desired elements have no namespace, so check
|
|
# to see that the member's namespace is None.
|
|
or (tag is None and namespace == ''
|
|
and member_namespace is None)
|
|
# The tag and the namespace both match.
|
|
or (tag == member_tag
|
|
and namespace == member_namespace)
|
|
# The tag matches, and the expected namespace is the empty namespace,
|
|
# check to make sure the member's namespace is None.
|
|
or (tag == member_tag and namespace == ''
|
|
and member_namespace is None))
|
|
|
|
|
|
def parse(xml_string, target_class=None, version=1, encoding=None):
|
|
"""Parses the XML string according to the rules for the target_class.
|
|
|
|
Args:
|
|
xml_string: str or unicode
|
|
target_class: XmlElement or a subclass. If None is specified, the
|
|
XmlElement class is used.
|
|
version: int (optional) The version of the schema which should be used when
|
|
converting the XML into an object. The default is 1.
|
|
encoding: str (optional) The character encoding of the bytes in the
|
|
xml_string. Default is 'UTF-8'.
|
|
"""
|
|
if target_class is None:
|
|
target_class = XmlElement
|
|
if isinstance(xml_string, unicode):
|
|
if encoding is None:
|
|
xml_string = xml_string.encode(STRING_ENCODING)
|
|
else:
|
|
xml_string = xml_string.encode(encoding)
|
|
tree = ElementTree.fromstring(xml_string)
|
|
return _xml_element_from_tree(tree, target_class, version)
|
|
|
|
|
|
Parse = parse
|
|
xml_element_from_string = parse
|
|
XmlElementFromString = xml_element_from_string
|
|
|
|
|
|
def _xml_element_from_tree(tree, target_class, version=1):
|
|
if target_class._qname is None:
|
|
instance = target_class()
|
|
instance._qname = tree.tag
|
|
instance._harvest_tree(tree, version)
|
|
return instance
|
|
# TODO handle the namespace-only case
|
|
# Namespace only will be used with Google Spreadsheets rows and
|
|
# Google Base item attributes.
|
|
elif tree.tag == _get_qname(target_class, version):
|
|
instance = target_class()
|
|
instance._harvest_tree(tree, version)
|
|
return instance
|
|
return None
|
|
|
|
|
|
class XmlAttribute(object):
|
|
|
|
def __init__(self, qname, value):
|
|
self._qname = qname
|
|
self.value = value
|
|
|