#!/usr/bin/env python # # Copyright (C) 2008 Google Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # This module is used for version 2 of the Google Data APIs. __author__ = 'j.s@google.com (Jeff Scudder)' import inspect try: from xml.etree import cElementTree as ElementTree except ImportError: try: import cElementTree as ElementTree except ImportError: try: from xml.etree import ElementTree except ImportError: from elementtree import ElementTree try: from xml.dom.minidom import parseString as xmlString except ImportError: xmlString = None STRING_ENCODING = 'utf-8' class XmlElement(object): """Represents an element node in an XML document. The text member is a UTF-8 encoded str or unicode. """ _qname = None _other_elements = None _other_attributes = None # The rule set contains mappings for XML qnames to child members and the # appropriate member classes. _rule_set = None _members = None text = None def __init__(self, text=None, *args, **kwargs): if ('_members' not in self.__class__.__dict__ or self.__class__._members is None): self.__class__._members = tuple(self.__class__._list_xml_members()) for member_name, member_type in self.__class__._members: if member_name in kwargs: setattr(self, member_name, kwargs[member_name]) else: if isinstance(member_type, list): setattr(self, member_name, []) else: setattr(self, member_name, None) self._other_elements = [] self._other_attributes = {} if text is not None: self.text = text def _list_xml_members(cls): """Generator listing all members which are XML elements or attributes. The following members would be considered XML members: foo = 'abc' - indicates an XML attribute with the qname abc foo = SomeElement - indicates an XML child element foo = [AnElement] - indicates a repeating XML child element, each instance will be stored in a list in this member foo = ('att1', '{http://example.com/namespace}att2') - indicates an XML attribute which has different parsing rules in different versions of the protocol. Version 1 of the XML parsing rules will look for an attribute with the qname 'att1' but verion 2 of the parsing rules will look for a namespaced attribute with the local name of 'att2' and an XML namespace of 'http://example.com/namespace'. """ members = [] for pair in inspect.getmembers(cls): if not pair[0].startswith('_') and pair[0] != 'text': member_type = pair[1] if (isinstance(member_type, tuple) or isinstance(member_type, list) or isinstance(member_type, (str, unicode)) or (inspect.isclass(member_type) and issubclass(member_type, XmlElement))): members.append(pair) return members _list_xml_members = classmethod(_list_xml_members) def _get_rules(cls, version): """Initializes the _rule_set for the class which is used when parsing XML. This method is used internally for parsing and generating XML for an XmlElement. It is not recommended that you call this method directly. Returns: A tuple containing the XML parsing rules for the appropriate version. The tuple looks like: (qname, {sub_element_qname: (member_name, member_class, repeating), ..}, {attribute_qname: member_name}) To give a couple of concrete example, the atom.data.Control _get_rules with version of 2 will return: ('{http://www.w3.org/2007/app}control', {'{http://www.w3.org/2007/app}draft': ('draft', , False)}, {}) Calling _get_rules with version 1 on gdata.data.FeedLink will produce: ('{http://schemas.google.com/g/2005}feedLink', {'{http://www.w3.org/2005/Atom}feed': ('feed', , False)}, {'href': 'href', 'readOnly': 'read_only', 'countHint': 'count_hint', 'rel': 'rel'}) """ # Initialize the _rule_set to make sure there is a slot available to store # the parsing rules for this version of the XML schema. # Look for rule set in the class __dict__ proxy so that only the # _rule_set for this class will be found. By using the dict proxy # we avoid finding rule_sets defined in superclasses. # The four lines below provide support for any number of versions, but it # runs a bit slower then hard coding slots for two versions, so I'm using # the below two lines. #if '_rule_set' not in cls.__dict__ or cls._rule_set is None: # cls._rule_set = [] #while len(cls.__dict__['_rule_set']) < version: # cls._rule_set.append(None) # If there is no rule set cache in the class, provide slots for two XML # versions. If and when there is a version 3, this list will need to be # expanded. if '_rule_set' not in cls.__dict__ or cls._rule_set is None: cls._rule_set = [None, None] # If a version higher than 2 is requested, fall back to version 2 because # 2 is currently the highest supported version. if version > 2: return cls._get_rules(2) # Check the dict proxy for the rule set to avoid finding any rule sets # which belong to the superclass. We only want rule sets for this class. if cls._rule_set[version-1] is None: # The rule set for each version consists of the qname for this element # ('{namespace}tag'), a dictionary (elements) for looking up the # corresponding class member when given a child element's qname, and a # dictionary (attributes) for looking up the corresponding class member # when given an XML attribute's qname. elements = {} attributes = {} if ('_members' not in cls.__dict__ or cls._members is None): cls._members = tuple(cls._list_xml_members()) for member_name, target in cls._members: if isinstance(target, list): # This member points to a repeating element. elements[_get_qname(target[0], version)] = (member_name, target[0], True) elif isinstance(target, tuple): # This member points to a versioned XML attribute. if version <= len(target): attributes[target[version-1]] = member_name else: attributes[target[-1]] = member_name elif isinstance(target, (str, unicode)): # This member points to an XML attribute. attributes[target] = member_name elif issubclass(target, XmlElement): # This member points to a single occurance element. elements[_get_qname(target, version)] = (member_name, target, False) version_rules = (_get_qname(cls, version), elements, attributes) cls._rule_set[version-1] = version_rules return version_rules else: return cls._rule_set[version-1] _get_rules = classmethod(_get_rules) def get_elements(self, tag=None, namespace=None, version=1): """Find all sub elements which match the tag and namespace. To find all elements in this object, call get_elements with the tag and namespace both set to None (the default). This method searches through the object's members and the elements stored in _other_elements which did not match any of the XML parsing rules for this class. Args: tag: str namespace: str version: int Specifies the version of the XML rules to be used when searching for matching elements. Returns: A list of the matching XmlElements. """ matches = [] ignored1, elements, ignored2 = self.__class__._get_rules(version) if elements: for qname, element_def in elements.iteritems(): member = getattr(self, element_def[0]) if member: if _qname_matches(tag, namespace, qname): if element_def[2]: # If this is a repeating element, copy all instances into the # result list. matches.extend(member) else: matches.append(member) for element in self._other_elements: if _qname_matches(tag, namespace, element._qname): matches.append(element) return matches GetElements = get_elements # FindExtensions and FindChildren are provided for backwards compatibility # to the atom.AtomBase class. # However, FindExtensions may return more results than the v1 atom.AtomBase # method does, because get_elements searches both the expected children # and the unexpected "other elements". The old AtomBase.FindExtensions # method searched only "other elements" AKA extension_elements. FindExtensions = get_elements FindChildren = get_elements def get_attributes(self, tag=None, namespace=None, version=1): """Find all attributes which match the tag and namespace. To find all attributes in this object, call get_attributes with the tag and namespace both set to None (the default). This method searches through the object's members and the attributes stored in _other_attributes which did not fit any of the XML parsing rules for this class. Args: tag: str namespace: str version: int Specifies the version of the XML rules to be used when searching for matching attributes. Returns: A list of XmlAttribute objects for the matching attributes. """ matches = [] ignored1, ignored2, attributes = self.__class__._get_rules(version) if attributes: for qname, attribute_def in attributes.iteritems(): if isinstance(attribute_def, (list, tuple)): attribute_def = attribute_def[0] member = getattr(self, attribute_def) # TODO: ensure this hasn't broken existing behavior. #member = getattr(self, attribute_def[0]) if member: if _qname_matches(tag, namespace, qname): matches.append(XmlAttribute(qname, member)) for qname, value in self._other_attributes.iteritems(): if _qname_matches(tag, namespace, qname): matches.append(XmlAttribute(qname, value)) return matches GetAttributes = get_attributes def _harvest_tree(self, tree, version=1): """Populates object members from the data in the tree Element.""" qname, elements, attributes = self.__class__._get_rules(version) for element in tree: if elements and element.tag in elements: definition = elements[element.tag] # If this is a repeating element, make sure the member is set to a # list. if definition[2]: if getattr(self, definition[0]) is None: setattr(self, definition[0], []) getattr(self, definition[0]).append(_xml_element_from_tree(element, definition[1], version)) else: setattr(self, definition[0], _xml_element_from_tree(element, definition[1], version)) else: self._other_elements.append(_xml_element_from_tree(element, XmlElement, version)) for attrib, value in tree.attrib.iteritems(): if attributes and attrib in attributes: setattr(self, attributes[attrib], value) else: self._other_attributes[attrib] = value if tree.text: self.text = tree.text def _to_tree(self, version=1, encoding=None): new_tree = ElementTree.Element(_get_qname(self, version)) self._attach_members(new_tree, version, encoding) return new_tree def _attach_members(self, tree, version=1, encoding=None): """Convert members to XML elements/attributes and add them to the tree. Args: tree: An ElementTree.Element which will be modified. The members of this object will be added as child elements or attributes according to the rules described in _expected_elements and _expected_attributes. The elements and attributes stored in other_attributes and other_elements are also added a children of this tree. version: int Ingnored in this method but used by VersionedElement. encoding: str (optional) """ qname, elements, attributes = self.__class__._get_rules(version) encoding = encoding or STRING_ENCODING # Add the expected elements and attributes to the tree. if elements: for tag, element_def in elements.iteritems(): member = getattr(self, element_def[0]) # If this is a repeating element and there are members in the list. if member and element_def[2]: for instance in member: instance._become_child(tree, version) elif member: member._become_child(tree, version) if attributes: for attribute_tag, member_name in attributes.iteritems(): value = getattr(self, member_name) if value: tree.attrib[attribute_tag] = value # Add the unexpected (other) elements and attributes to the tree. for element in self._other_elements: element._become_child(tree, version) for key, value in self._other_attributes.iteritems(): # I'm not sure if unicode can be used in the attribute name, so for now # we assume the encoding is correct for the attribute name. if not isinstance(value, unicode): value = value.decode(encoding) tree.attrib[key] = value if self.text: if isinstance(self.text, unicode): tree.text = self.text else: tree.text = self.text.decode(encoding) def to_string(self, version=1, encoding=None, pretty_print=None): """Converts this object to XML.""" tree_string = ElementTree.tostring(self._to_tree(version, encoding)) if pretty_print and xmlString is not None: return xmlString(tree_string).toprettyxml() return tree_string ToString = to_string def __str__(self): return self.to_string() def _become_child(self, tree, version=1): """Adds a child element to tree with the XML data in self.""" new_child = ElementTree.Element('') tree.append(new_child) new_child.tag = _get_qname(self, version) self._attach_members(new_child, version) def __get_extension_elements(self): return self._other_elements def __set_extension_elements(self, elements): self._other_elements = elements extension_elements = property(__get_extension_elements, __set_extension_elements, """Provides backwards compatibility for v1 atom.AtomBase classes.""") def __get_extension_attributes(self): return self._other_attributes def __set_extension_attributes(self, attributes): self._other_attributes = attributes extension_attributes = property(__get_extension_attributes, __set_extension_attributes, """Provides backwards compatibility for v1 atom.AtomBase classes.""") def _get_tag(self, version=1): qname = _get_qname(self, version) return qname[qname.find('}')+1:] def _get_namespace(self, version=1): qname = _get_qname(self, version) if qname.startswith('{'): return qname[1:qname.find('}')] else: return None def _set_tag(self, tag): if isinstance(self._qname, tuple): self._qname = self._qname.copy() if self._qname[0].startswith('{'): self._qname[0] = '{%s}%s' % (self._get_namespace(1), tag) else: self._qname[0] = tag else: if self._qname.startswith('{'): self._qname = '{%s}%s' % (self._get_namespace(), tag) else: self._qname = tag def _set_namespace(self, namespace): if isinstance(self._qname, tuple): self._qname = self._qname.copy() if namespace: self._qname[0] = '{%s}%s' % (namespace, self._get_tag(1)) else: self._qname[0] = self._get_tag(1) else: if namespace: self._qname = '{%s}%s' % (namespace, self._get_tag(1)) else: self._qname = self._get_tag(1) tag = property(_get_tag, _set_tag, """Provides backwards compatibility for v1 atom.AtomBase classes.""") namespace = property(_get_namespace, _set_namespace, """Provides backwards compatibility for v1 atom.AtomBase classes.""") # Provided for backwards compatibility to atom.ExtensionElement children = extension_elements attributes = extension_attributes def _get_qname(element, version): if isinstance(element._qname, tuple): if version <= len(element._qname): return element._qname[version-1] else: return element._qname[-1] else: return element._qname def _qname_matches(tag, namespace, qname): """Logic determines if a QName matches the desired local tag and namespace. This is used in XmlElement.get_elements and XmlElement.get_attributes to find matches in the element's members (among all expected-and-unexpected elements-and-attributes). Args: expected_tag: string expected_namespace: string qname: string in the form '{xml_namespace}localtag' or 'tag' if there is no namespace. Returns: boolean True if the member's tag and namespace fit the expected tag and namespace. """ # If there is no expected namespace or tag, then everything will match. if qname is None: member_tag = None member_namespace = None else: if qname.startswith('{'): member_namespace = qname[1:qname.index('}')] member_tag = qname[qname.index('}') + 1:] else: member_namespace = None member_tag = qname return ((tag is None and namespace is None) # If there is a tag, but no namespace, see if the local tag matches. or (namespace is None and member_tag == tag) # There was no tag, but there was a namespace so see if the namespaces # match. or (tag is None and member_namespace == namespace) # There was no tag, and the desired elements have no namespace, so check # to see that the member's namespace is None. or (tag is None and namespace == '' and member_namespace is None) # The tag and the namespace both match. or (tag == member_tag and namespace == member_namespace) # The tag matches, and the expected namespace is the empty namespace, # check to make sure the member's namespace is None. or (tag == member_tag and namespace == '' and member_namespace is None)) def parse(xml_string, target_class=None, version=1, encoding=None): """Parses the XML string according to the rules for the target_class. Args: xml_string: str or unicode target_class: XmlElement or a subclass. If None is specified, the XmlElement class is used. version: int (optional) The version of the schema which should be used when converting the XML into an object. The default is 1. encoding: str (optional) The character encoding of the bytes in the xml_string. Default is 'UTF-8'. """ if target_class is None: target_class = XmlElement if isinstance(xml_string, unicode): if encoding is None: xml_string = xml_string.encode(STRING_ENCODING) else: xml_string = xml_string.encode(encoding) tree = ElementTree.fromstring(xml_string) return _xml_element_from_tree(tree, target_class, version) Parse = parse xml_element_from_string = parse XmlElementFromString = xml_element_from_string def _xml_element_from_tree(tree, target_class, version=1): if target_class._qname is None: instance = target_class() instance._qname = tree.tag instance._harvest_tree(tree, version) return instance # TODO handle the namespace-only case # Namespace only will be used with Google Spreadsheets rows and # Google Base item attributes. elif tree.tag == _get_qname(target_class, version): instance = target_class() instance._harvest_tree(tree, version) return instance return None class XmlAttribute(object): def __init__(self, qname, value): self._qname = qname self.value = value