blob: a19a70d4103b35681a6196a713f9f707c77af090 [file] [log] [blame]
#!/usr/bin/env python
#
# Copyright (C) 2008 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This module is used for version 2 of the Google Data APIs.
__author__ = 'j.s@google.com (Jeff Scudder)'
import inspect
try:
from xml.etree import cElementTree as ElementTree
except ImportError:
try:
import cElementTree as ElementTree
except ImportError:
try:
from xml.etree import ElementTree
except ImportError:
from elementtree import ElementTree
try:
from xml.dom.minidom import parseString as xmlString
except ImportError:
xmlString = None
STRING_ENCODING = 'utf-8'
class XmlElement(object):
"""Represents an element node in an XML document.
The text member is a UTF-8 encoded str or unicode.
"""
_qname = None
_other_elements = None
_other_attributes = None
# The rule set contains mappings for XML qnames to child members and the
# appropriate member classes.
_rule_set = None
_members = None
text = None
def __init__(self, text=None, *args, **kwargs):
if ('_members' not in self.__class__.__dict__
or self.__class__._members is None):
self.__class__._members = tuple(self.__class__._list_xml_members())
for member_name, member_type in self.__class__._members:
if member_name in kwargs:
setattr(self, member_name, kwargs[member_name])
else:
if isinstance(member_type, list):
setattr(self, member_name, [])
else:
setattr(self, member_name, None)
self._other_elements = []
self._other_attributes = {}
if text is not None:
self.text = text
def _list_xml_members(cls):
"""Generator listing all members which are XML elements or attributes.
The following members would be considered XML members:
foo = 'abc' - indicates an XML attribute with the qname abc
foo = SomeElement - indicates an XML child element
foo = [AnElement] - indicates a repeating XML child element, each instance
will be stored in a list in this member
foo = ('att1', '{http://example.com/namespace}att2') - indicates an XML
attribute which has different parsing rules in different versions of
the protocol. Version 1 of the XML parsing rules will look for an
attribute with the qname 'att1' but verion 2 of the parsing rules will
look for a namespaced attribute with the local name of 'att2' and an
XML namespace of 'http://example.com/namespace'.
"""
members = []
for pair in inspect.getmembers(cls):
if not pair[0].startswith('_') and pair[0] != 'text':
member_type = pair[1]
if (isinstance(member_type, tuple) or isinstance(member_type, list)
or isinstance(member_type, (str, unicode))
or (inspect.isclass(member_type)
and issubclass(member_type, XmlElement))):
members.append(pair)
return members
_list_xml_members = classmethod(_list_xml_members)
def _get_rules(cls, version):
"""Initializes the _rule_set for the class which is used when parsing XML.
This method is used internally for parsing and generating XML for an
XmlElement. It is not recommended that you call this method directly.
Returns:
A tuple containing the XML parsing rules for the appropriate version.
The tuple looks like:
(qname, {sub_element_qname: (member_name, member_class, repeating), ..},
{attribute_qname: member_name})
To give a couple of concrete example, the atom.data.Control _get_rules
with version of 2 will return:
('{http://www.w3.org/2007/app}control',
{'{http://www.w3.org/2007/app}draft': ('draft',
<class 'atom.data.Draft'>,
False)},
{})
Calling _get_rules with version 1 on gdata.data.FeedLink will produce:
('{http://schemas.google.com/g/2005}feedLink',
{'{http://www.w3.org/2005/Atom}feed': ('feed',
<class 'gdata.data.GDFeed'>,
False)},
{'href': 'href', 'readOnly': 'read_only', 'countHint': 'count_hint',
'rel': 'rel'})
"""
# Initialize the _rule_set to make sure there is a slot available to store
# the parsing rules for this version of the XML schema.
# Look for rule set in the class __dict__ proxy so that only the
# _rule_set for this class will be found. By using the dict proxy
# we avoid finding rule_sets defined in superclasses.
# The four lines below provide support for any number of versions, but it
# runs a bit slower then hard coding slots for two versions, so I'm using
# the below two lines.
#if '_rule_set' not in cls.__dict__ or cls._rule_set is None:
# cls._rule_set = []
#while len(cls.__dict__['_rule_set']) < version:
# cls._rule_set.append(None)
# If there is no rule set cache in the class, provide slots for two XML
# versions. If and when there is a version 3, this list will need to be
# expanded.
if '_rule_set' not in cls.__dict__ or cls._rule_set is None:
cls._rule_set = [None, None]
# If a version higher than 2 is requested, fall back to version 2 because
# 2 is currently the highest supported version.
if version > 2:
return cls._get_rules(2)
# Check the dict proxy for the rule set to avoid finding any rule sets
# which belong to the superclass. We only want rule sets for this class.
if cls._rule_set[version-1] is None:
# The rule set for each version consists of the qname for this element
# ('{namespace}tag'), a dictionary (elements) for looking up the
# corresponding class member when given a child element's qname, and a
# dictionary (attributes) for looking up the corresponding class member
# when given an XML attribute's qname.
elements = {}
attributes = {}
if ('_members' not in cls.__dict__ or cls._members is None):
cls._members = tuple(cls._list_xml_members())
for member_name, target in cls._members:
if isinstance(target, list):
# This member points to a repeating element.
elements[_get_qname(target[0], version)] = (member_name, target[0],
True)
elif isinstance(target, tuple):
# This member points to a versioned XML attribute.
if version <= len(target):
attributes[target[version-1]] = member_name
else:
attributes[target[-1]] = member_name
elif isinstance(target, (str, unicode)):
# This member points to an XML attribute.
attributes[target] = member_name
elif issubclass(target, XmlElement):
# This member points to a single occurance element.
elements[_get_qname(target, version)] = (member_name, target, False)
version_rules = (_get_qname(cls, version), elements, attributes)
cls._rule_set[version-1] = version_rules
return version_rules
else:
return cls._rule_set[version-1]
_get_rules = classmethod(_get_rules)
def get_elements(self, tag=None, namespace=None, version=1):
"""Find all sub elements which match the tag and namespace.
To find all elements in this object, call get_elements with the tag and
namespace both set to None (the default). This method searches through
the object's members and the elements stored in _other_elements which
did not match any of the XML parsing rules for this class.
Args:
tag: str
namespace: str
version: int Specifies the version of the XML rules to be used when
searching for matching elements.
Returns:
A list of the matching XmlElements.
"""
matches = []
ignored1, elements, ignored2 = self.__class__._get_rules(version)
if elements:
for qname, element_def in elements.iteritems():
member = getattr(self, element_def[0])
if member:
if _qname_matches(tag, namespace, qname):
if element_def[2]:
# If this is a repeating element, copy all instances into the
# result list.
matches.extend(member)
else:
matches.append(member)
for element in self._other_elements:
if _qname_matches(tag, namespace, element._qname):
matches.append(element)
return matches
GetElements = get_elements
# FindExtensions and FindChildren are provided for backwards compatibility
# to the atom.AtomBase class.
# However, FindExtensions may return more results than the v1 atom.AtomBase
# method does, because get_elements searches both the expected children
# and the unexpected "other elements". The old AtomBase.FindExtensions
# method searched only "other elements" AKA extension_elements.
FindExtensions = get_elements
FindChildren = get_elements
def get_attributes(self, tag=None, namespace=None, version=1):
"""Find all attributes which match the tag and namespace.
To find all attributes in this object, call get_attributes with the tag
and namespace both set to None (the default). This method searches
through the object's members and the attributes stored in
_other_attributes which did not fit any of the XML parsing rules for this
class.
Args:
tag: str
namespace: str
version: int Specifies the version of the XML rules to be used when
searching for matching attributes.
Returns:
A list of XmlAttribute objects for the matching attributes.
"""
matches = []
ignored1, ignored2, attributes = self.__class__._get_rules(version)
if attributes:
for qname, attribute_def in attributes.iteritems():
if isinstance(attribute_def, (list, tuple)):
attribute_def = attribute_def[0]
member = getattr(self, attribute_def)
# TODO: ensure this hasn't broken existing behavior.
#member = getattr(self, attribute_def[0])
if member:
if _qname_matches(tag, namespace, qname):
matches.append(XmlAttribute(qname, member))
for qname, value in self._other_attributes.iteritems():
if _qname_matches(tag, namespace, qname):
matches.append(XmlAttribute(qname, value))
return matches
GetAttributes = get_attributes
def _harvest_tree(self, tree, version=1):
"""Populates object members from the data in the tree Element."""
qname, elements, attributes = self.__class__._get_rules(version)
for element in tree:
if elements and element.tag in elements:
definition = elements[element.tag]
# If this is a repeating element, make sure the member is set to a
# list.
if definition[2]:
if getattr(self, definition[0]) is None:
setattr(self, definition[0], [])
getattr(self, definition[0]).append(_xml_element_from_tree(element,
definition[1], version))
else:
setattr(self, definition[0], _xml_element_from_tree(element,
definition[1], version))
else:
self._other_elements.append(_xml_element_from_tree(element, XmlElement,
version))
for attrib, value in tree.attrib.iteritems():
if attributes and attrib in attributes:
setattr(self, attributes[attrib], value)
else:
self._other_attributes[attrib] = value
if tree.text:
self.text = tree.text
def _to_tree(self, version=1, encoding=None):
new_tree = ElementTree.Element(_get_qname(self, version))
self._attach_members(new_tree, version, encoding)
return new_tree
def _attach_members(self, tree, version=1, encoding=None):
"""Convert members to XML elements/attributes and add them to the tree.
Args:
tree: An ElementTree.Element which will be modified. The members of
this object will be added as child elements or attributes
according to the rules described in _expected_elements and
_expected_attributes. The elements and attributes stored in
other_attributes and other_elements are also added a children
of this tree.
version: int Ingnored in this method but used by VersionedElement.
encoding: str (optional)
"""
qname, elements, attributes = self.__class__._get_rules(version)
encoding = encoding or STRING_ENCODING
# Add the expected elements and attributes to the tree.
if elements:
for tag, element_def in elements.iteritems():
member = getattr(self, element_def[0])
# If this is a repeating element and there are members in the list.
if member and element_def[2]:
for instance in member:
instance._become_child(tree, version)
elif member:
member._become_child(tree, version)
if attributes:
for attribute_tag, member_name in attributes.iteritems():
value = getattr(self, member_name)
if value:
tree.attrib[attribute_tag] = value
# Add the unexpected (other) elements and attributes to the tree.
for element in self._other_elements:
element._become_child(tree, version)
for key, value in self._other_attributes.iteritems():
# I'm not sure if unicode can be used in the attribute name, so for now
# we assume the encoding is correct for the attribute name.
if not isinstance(value, unicode):
value = value.decode(encoding)
tree.attrib[key] = value
if self.text:
if isinstance(self.text, unicode):
tree.text = self.text
else:
tree.text = self.text.decode(encoding)
def to_string(self, version=1, encoding=None, pretty_print=None):
"""Converts this object to XML."""
tree_string = ElementTree.tostring(self._to_tree(version, encoding))
if pretty_print and xmlString is not None:
return xmlString(tree_string).toprettyxml()
return tree_string
ToString = to_string
def __str__(self):
return self.to_string()
def _become_child(self, tree, version=1):
"""Adds a child element to tree with the XML data in self."""
new_child = ElementTree.Element('')
tree.append(new_child)
new_child.tag = _get_qname(self, version)
self._attach_members(new_child, version)
def __get_extension_elements(self):
return self._other_elements
def __set_extension_elements(self, elements):
self._other_elements = elements
extension_elements = property(__get_extension_elements,
__set_extension_elements,
"""Provides backwards compatibility for v1 atom.AtomBase classes.""")
def __get_extension_attributes(self):
return self._other_attributes
def __set_extension_attributes(self, attributes):
self._other_attributes = attributes
extension_attributes = property(__get_extension_attributes,
__set_extension_attributes,
"""Provides backwards compatibility for v1 atom.AtomBase classes.""")
def _get_tag(self, version=1):
qname = _get_qname(self, version)
if qname:
return qname[qname.find('}')+1:]
return None
def _get_namespace(self, version=1):
qname = _get_qname(self, version)
if qname.startswith('{'):
return qname[1:qname.find('}')]
else:
return None
def _set_tag(self, tag):
if isinstance(self._qname, tuple):
self._qname = self._qname.copy()
if self._qname[0].startswith('{'):
self._qname[0] = '{%s}%s' % (self._get_namespace(1), tag)
else:
self._qname[0] = tag
else:
if self._qname is not None and self._qname.startswith('{'):
self._qname = '{%s}%s' % (self._get_namespace(), tag)
else:
self._qname = tag
def _set_namespace(self, namespace):
tag = self._get_tag(1)
if tag is None:
tag = ''
if isinstance(self._qname, tuple):
self._qname = self._qname.copy()
if namespace:
self._qname[0] = '{%s}%s' % (namespace, tag)
else:
self._qname[0] = tag
else:
if namespace:
self._qname = '{%s}%s' % (namespace, tag)
else:
self._qname = tag
tag = property(_get_tag, _set_tag,
"""Provides backwards compatibility for v1 atom.AtomBase classes.""")
namespace = property(_get_namespace, _set_namespace,
"""Provides backwards compatibility for v1 atom.AtomBase classes.""")
# Provided for backwards compatibility to atom.ExtensionElement
children = extension_elements
attributes = extension_attributes
def _get_qname(element, version):
if isinstance(element._qname, tuple):
if version <= len(element._qname):
return element._qname[version-1]
else:
return element._qname[-1]
else:
return element._qname
def _qname_matches(tag, namespace, qname):
"""Logic determines if a QName matches the desired local tag and namespace.
This is used in XmlElement.get_elements and XmlElement.get_attributes to
find matches in the element's members (among all expected-and-unexpected
elements-and-attributes).
Args:
expected_tag: string
expected_namespace: string
qname: string in the form '{xml_namespace}localtag' or 'tag' if there is
no namespace.
Returns:
boolean True if the member's tag and namespace fit the expected tag and
namespace.
"""
# If there is no expected namespace or tag, then everything will match.
if qname is None:
member_tag = None
member_namespace = None
else:
if qname.startswith('{'):
member_namespace = qname[1:qname.index('}')]
member_tag = qname[qname.index('}') + 1:]
else:
member_namespace = None
member_tag = qname
return ((tag is None and namespace is None)
# If there is a tag, but no namespace, see if the local tag matches.
or (namespace is None and member_tag == tag)
# There was no tag, but there was a namespace so see if the namespaces
# match.
or (tag is None and member_namespace == namespace)
# There was no tag, and the desired elements have no namespace, so check
# to see that the member's namespace is None.
or (tag is None and namespace == ''
and member_namespace is None)
# The tag and the namespace both match.
or (tag == member_tag
and namespace == member_namespace)
# The tag matches, and the expected namespace is the empty namespace,
# check to make sure the member's namespace is None.
or (tag == member_tag and namespace == ''
and member_namespace is None))
def parse(xml_string, target_class=None, version=1, encoding=None):
"""Parses the XML string according to the rules for the target_class.
Args:
xml_string: str or unicode
target_class: XmlElement or a subclass. If None is specified, the
XmlElement class is used.
version: int (optional) The version of the schema which should be used when
converting the XML into an object. The default is 1.
encoding: str (optional) The character encoding of the bytes in the
xml_string. Default is 'UTF-8'.
"""
if target_class is None:
target_class = XmlElement
if isinstance(xml_string, unicode):
if encoding is None:
xml_string = xml_string.encode(STRING_ENCODING)
else:
xml_string = xml_string.encode(encoding)
tree = ElementTree.fromstring(xml_string)
return _xml_element_from_tree(tree, target_class, version)
Parse = parse
xml_element_from_string = parse
XmlElementFromString = xml_element_from_string
def _xml_element_from_tree(tree, target_class, version=1):
if target_class._qname is None:
instance = target_class()
instance._qname = tree.tag
instance._harvest_tree(tree, version)
return instance
# TODO handle the namespace-only case
# Namespace only will be used with Google Spreadsheets rows and
# Google Base item attributes.
elif tree.tag == _get_qname(target_class, version):
instance = target_class()
instance._harvest_tree(tree, version)
return instance
return None
class XmlAttribute(object):
def __init__(self, qname, value):
self._qname = qname
self.value = value