worblehat-old/python/gdata/books/__init__.py

#!/usr/bin/python

"""
    Data Models for books.service

    All classes can be instantiated from an xml string using their FromString
    class method.

    Notes:
        * Book.title displays the first dc:title because the returned XML
          repeats that datum as atom:title.
    There is an undocumented gbs:openAccess element that is not parsed.
"""

__author__ = "James Sams <sams.james@gmail.com>"
__copyright__ = "Apache License v2.0"

import atom
import gdata


BOOK_SEARCH_NAMESPACE   = 'http://schemas.google.com/books/2008'
DC_NAMESPACE            = 'http://purl.org/dc/terms'
ANNOTATION_REL          = "http://schemas.google.com/books/2008/annotation"
INFO_REL                = "http://schemas.google.com/books/2008/info"
LABEL_SCHEME            = "http://schemas.google.com/books/2008/labels"
PREVIEW_REL             = "http://schemas.google.com/books/2008/preview"
THUMBNAIL_REL           = "http://schemas.google.com/books/2008/thumbnail"
FULL_VIEW               = "http://schemas.google.com/books/2008#view_all_pages"
PARTIAL_VIEW            = "http://schemas.google.com/books/2008#view_partial"
NO_VIEW                 = "http://schemas.google.com/books/2008#view_no_pages"
UNKNOWN_VIEW            = "http://schemas.google.com/books/2008#view_unknown"
EMBEDDABLE              = "http://schemas.google.com/books/2008#embeddable"
NOT_EMBEDDABLE          = "http://schemas.google.com/books/2008#not_embeddable"


class _AtomFromString(atom.AtomBase):

    #@classmethod
    def FromString(cls, s):
        return atom.CreateClassFromXMLString(cls, s)

    FromString = classmethod(FromString)


class Creator(_AtomFromString):
    """
    The <dc:creator> element identifies an author-or more generally, an entity
    responsible for creating the volume in question. Examples of a creator
    include a person, an organization, or a service. In the case of
    anthologies, proceedings, or other edited works, this field may be used to
    indicate editors or other entities responsible for collecting the volume's
    contents.

    This element appears as a child of <entry>. If there are multiple authors or
    contributors to the book, there may be multiple <dc:creator> elements in the
    volume entry (one for each creator or contributor).
    """

    _tag = 'creator'
    _namespace = DC_NAMESPACE


class Date(_AtomFromString): #iso 8601 / W3CDTF profile
    """
    The <dc:date> element indicates the publication date of the specific volume
    in question. If the book is a reprint, this is the reprint date, not the
    original publication date. The date is encoded according to the ISO-8601
    standard (and more specifically, the W3CDTF profile).

    The <dc:date> element can appear only as a child of <entry>.

    Usually only the year or the year and the month are given.

    YYYY-MM-DDThh:mm:ssTZD  TZD = -hh:mm or +hh:mm
    """

    _tag = 'date'
    _namespace = DC_NAMESPACE


class Description(_AtomFromString):
    """
    The <dc:description> element includes text that describes a book or book
    result. In a search result feed, this may be a search result "snippet" that
    contains the words around the user's search term. For a single volume feed,
    this element may contain a synopsis of the book.

    The <dc:description> element can appear only as a child of <entry>
    """

    _tag = 'description'
    _namespace = DC_NAMESPACE


class Format(_AtomFromString):
    """
    The <dc:format> element describes the physical properties of the volume.
    Currently, it indicates the number of pages in the book, but more
    information may be added to this field in the future.

    This element can appear only as a child of <entry>.
    """

    _tag = 'format'
    _namespace = DC_NAMESPACE


class Identifier(_AtomFromString):
    """
    The <dc:identifier> element provides an unambiguous reference to a
    particular book.
    * Every <entry> contains at least one <dc:identifier> child.
    * The first identifier is always the unique string Book Search has assigned
      to the volume (such as s1gVAAAAYAAJ). This is the ID that appears in the
      book's URL in the Book Search GUI, as well as in the URL of that book's
      single item feed.
    * Many books contain additional <dc:identifier> elements. These provide
      alternate, external identifiers to the volume. Such identifiers may
      include the ISBNs, ISSNs, Library of Congress Control Numbers (LCCNs),
      and OCLC numbers; they are prepended with a corresponding namespace
      prefix (such as "ISBN:").
    * Any <dc:identifier> can be passed to the Dynamic Links, used to
      instantiate an Embedded Viewer, or even used to construct static links to
      Book Search.
    The <dc:identifier> element can appear only as a child of <entry>.
    """

    _tag = 'identifier'
    _namespace = DC_NAMESPACE


class Publisher(_AtomFromString):
    """
    The <dc:publisher> element contains the name of the entity responsible for
    producing and distributing the volume (usually the specific edition of this
    book). Examples of a publisher include a person, an organization, or a
    service.

    This element can appear only as a child of <entry>. If there is more than
    one publisher, multiple <dc:publisher> elements may appear.
    """

    _tag = 'publisher'
    _namespace = DC_NAMESPACE


class Subject(_AtomFromString):
    """
    The <dc:subject> element identifies the topic of the book. Usually this is
    a Library of Congress Subject Heading (LCSH) or  Book Industry Standards
    and Communications Subject Heading (BISAC).

    The <dc:subject> element can appear only as a child of <entry>. There may
    be multiple <dc:subject> elements per entry.
    """

    _tag = 'subject'
    _namespace = DC_NAMESPACE


class Title(_AtomFromString):
    """
    The <dc:title> element contains the title of a book as it was published. If
    a book has a subtitle, it appears as a second <dc:title> element in the book
    result's <entry>.
    """

    _tag = 'title'
    _namespace = DC_NAMESPACE


class Viewability(_AtomFromString):
    """
    Google Book Search respects the user's local copyright restrictions. As a
    result, previews or full views of some books are not available in all
    locations. The <gbs:viewability> element indicates whether a book is fully
    viewable, can be previewed, or only has "about the book" information. These
    three "viewability modes" are the same ones returned by the Dynamic Links
    API.

    The <gbs:viewability> element can appear only as a child of <entry>.

    The value attribute will take the form of the following URIs to represent
    the relevant viewing capability:

    Full View: http://schemas.google.com/books/2008#view_all_pages
    Limited Preview: http://schemas.google.com/books/2008#view_partial
    Snippet View/No Preview: http://schemas.google.com/books/2008#view_no_pages
    Unknown view: http://schemas.google.com/books/2008#view_unknown
    """

    _tag = 'viewability'
    _namespace = BOOK_SEARCH_NAMESPACE
    _attributes = atom.AtomBase._attributes.copy()
    _attributes['value'] = 'value'

    def __init__(self, value=None, text=None,
                extension_elements=None, extension_attributes=None):
        self.value = value
        _AtomFromString.__init__(self, extension_elements=extension_elements,
                    extension_attributes=extension_attributes, text=text)


class Embeddability(_AtomFromString):
    """
    Many of the books found on Google Book Search can be embedded on third-party
    sites using the Embedded Viewer. The <gbs:embeddability> element indicates
    whether a particular book result is available for embedding. By definition,
    a book that cannot be previewed on Book Search cannot be embedded on third-
    party sites.

    The <gbs:embeddability> element can appear only as a child of <entry>.

    The value attribute will take on one of the following URIs:
    embeddable: http://schemas.google.com/books/2008#embeddable
    not embeddable: http://schemas.google.com/books/2008#not_embeddable
    """

    _tag = 'embeddability'
    _namespace = BOOK_SEARCH_NAMESPACE
    _attributes = atom.AtomBase._attributes.copy()
    _attributes['value'] = 'value'

    def __init__(self, value=None, text=None, extension_elements=None,
                extension_attributes=None):
        self.value = value
        _AtomFromString.__init__(self, extension_elements=extension_elements,
                    extension_attributes=extension_attributes, text=text)


class Review(_AtomFromString):
    """
    When present, the <gbs:review> element contains a user-generated review for
    a given book. This element currently appears only in the user library and
    user annotation feeds, as a child of <entry>.

    type: text, html, xhtml
    xml:lang: id of the language, a guess, (always two letters?)
    """

    _tag = 'review'
    _namespace = BOOK_SEARCH_NAMESPACE
    _attributes = atom.AtomBase._attributes.copy()
    _attributes['type'] = 'type'
    _attributes['{http://www.w3.org/XML/1998/namespace}lang'] = 'lang'

    def __init__(self, type=None, lang=None, text=None,
                extension_elements=None, extension_attributes=None):
        self.type = type
        self.lang = lang
        _AtomFromString.__init__(self, extension_elements=extension_elements,
                    extension_attributes=extension_attributes, text=text)


class Rating(_AtomFromString):
    """All attributes must take an integral string between 1 and 5.
    The min, max, and average attributes represent 'community' ratings. The
    value attribute is the user's (of the feed from which the item is fetched,
    not necessarily the authenticated user) rating of the book.
    """

    _tag = 'rating'
    _namespace = gdata.GDATA_NAMESPACE
    _attributes = atom.AtomBase._attributes.copy()
    _attributes['min'] = 'min'
    _attributes['max'] = 'max'
    _attributes['average'] = 'average'
    _attributes['value'] = 'value'

    def __init__(self, min=None, max=None, average=None, value=None, text=None,
                extension_elements=None, extension_attributes=None):
        self.min = min
        self.max = max
        self.average = average
        self.value = value
        _AtomFromString.__init__(self, extension_elements=extension_elements,
                    extension_attributes=extension_attributes, text=text)


class Book(_AtomFromString, gdata.GDataEntry):
    """
    Represents an <entry> from either a search, annotation, library, or single
    item feed. Note that dc_title attribute is the proper title of the volume,
    title is an atom element and may not represent the full title.
    """

    _tag = 'entry'
    _namespace = atom.ATOM_NAMESPACE
    _children = gdata.GDataEntry._children.copy()
    for i in (Creator, Identifier, Publisher, Subject, Format):
        _children['{%s}%s' % (i._namespace, i._tag)] = (i._tag, [i])
    for i in (Date, Description, Viewability, Embeddability,
                Review, Rating):  # Review, Rating maybe only in anno/lib entrys
        _children['{%s}%s' % (i._namespace, i._tag)] = (i._tag, i)
    # there is an atom title as well, should we clobber that?
    del(i)
    _children['{%s}%s' % (Title._namespace, Title._tag)] = ('dc_title', [Title])

    def to_dict(self):
        """Returns a dictionary of the book's available metadata. If the data
        cannot be discovered, it is not included as a key in the returned dict.
        The possible keys are: authors, embeddability, date, description,
        format, identifiers, publishers, rating, review, subjects, title, and
        viewability.

        Notes:
          * Plural keys will be lists
          * Singular keys will be strings
          * Title, despite usually being a list, joins the title and subtitle
            with a space as a single string.
          * embeddability and viewability only return the portion of the URI
            after #
          * identifiers is a list of tuples, where the first item of each tuple
            is the type of identifier and the second item is the identifying
            string. Note that while doing dict() on this tuple may be possible,
            some items may have multiple of the same identifier and converting
            to a dict may resulted in collisions/dropped data.
          * Rating returns only the user's rating. See Rating class for precise
            definition.
        """
        d = {}
        if self.GetAnnotationLink():
            d['annotation'] = self.GetAnnotationLink().href
        if self.creator:
            d['authors'] = [x.text for x in self.creator]
        if self.embeddability:
            d['embeddability'] = self.embeddability.value.split('#')[-1]
        if self.date:
            d['date'] = self.date.text
        if self.description:
            d['description'] = self.description.text
        if self.format:
            d['format'] = [x.text for x in self.format]
        if self.identifier:
            d['identifiers'] = [('google_id', self.identifier[0].text)]
            for x in self.identifier[1:]:
                l = x.text.split(':') # should we lower the case of the ids?
                d['identifiers'].append((l[0], ':'.join(l[1:])))
        if self.GetInfoLink():
            d['info'] = self.GetInfoLink().href
        if self.GetPreviewLink():
            d['preview'] = self.GetPreviewLink().href
        if self.publisher:
            d['publishers'] = [x.text for x in self.publisher]
        if self.rating:
            d['rating'] = self.rating.value
        if self.review:
            d['review'] = self.review.text
        if self.subject:
            d['subjects'] = [x.text for x in self.subject]
        if self.GetThumbnailLink():
            d['thumbnail'] = self.GetThumbnailLink().href
        if self.dc_title:
            d['title'] = ' '.join([x.text for x in self.dc_title])
        if self.viewability:
            d['viewability'] = self.viewability.value.split('#')[-1]
        return d

    def __init__(self, creator=None, date=None,
                description=None, format=None, author=None, identifier=None,
                publisher=None, subject=None, dc_title=None, viewability=None,
                embeddability=None, review=None, rating=None, category=None,
                content=None, contributor=None, atom_id=None, link=None,
                published=None, rights=None, source=None, summary=None,
                title=None, control=None, updated=None, text=None,
                extension_elements=None, extension_attributes=None):
        self.creator = creator
        self.date = date
        self.description = description
        self.format = format
        self.identifier = identifier
        self.publisher = publisher
        self.subject = subject
        self.dc_title = dc_title or []
        self.viewability = viewability
        self.embeddability = embeddability
        self.review = review
        self.rating = rating
        gdata.GDataEntry.__init__(self, author=author, category=category,
                content=content, contributor=contributor, atom_id=atom_id,
                link=link, published=published, rights=rights, source=source,
                summary=summary, title=title, control=control, updated=updated,
                text=text, extension_elements=extension_elements,
                extension_attributes=extension_attributes)

    def GetThumbnailLink(self):
        """Returns the atom.Link object representing the thumbnail URI."""
        for i in self.link:
            if i.rel == THUMBNAIL_REL:
                return i

    def GetInfoLink(self):
        """
        Returns the atom.Link object representing the human-readable info URI.
        """
        for i in self.link:
            if i.rel == INFO_REL:
                return i

    def GetPreviewLink(self):
        """Returns the atom.Link object representing the preview URI."""
        for i in self.link:
            if i.rel == PREVIEW_REL:
                return i

    def GetAnnotationLink(self):
        """
        Returns the atom.Link object representing the Annotation URI.
        Note that the use of www.books in the href of this link seems to make
        this information useless. Using books.service.ANNOTATION_FEED and
        BOOK_SERVER to construct your URI seems to work better.
        """
        for i in self.link:
            if i.rel == ANNOTATION_REL:
                return i

    def set_rating(self, value):
        """Set user's rating. Must be an integral string between 1 nad 5"""
        assert (value in ('1','2','3','4','5'))
        if not isinstance(self.rating, Rating):
            self.rating = Rating()
        self.rating.value = value

    def set_review(self, text, type='text', lang='en'):
        """Set user's review text"""
        self.review = Review(text=text, type=type, lang=lang)

    def get_label(self):
        """Get users label for the item as a string"""
        for i in self.category:
            if i.scheme == LABEL_SCHEME:
                return i.term

    def set_label(self, term):
        """Clear pre-existing label for the item and set term as the label."""
        self.remove_label()
        self.category.append(atom.Category(term=term, scheme=LABEL_SCHEME))

    def remove_label(self):
        """Clear the user's label for the item"""
        ln = len(self.category)
        for i, j in enumerate(self.category[::-1]):
            if j.scheme == LABEL_SCHEME:
                del(self.category[ln-1-i])

    def clean_annotations(self):
        """Clear all annotations from an item. Useful for taking an item from
        another user's library/annotation feed and adding it to the
        authenticated user's library without adopting annotations."""
        self.remove_label()
        self.review = None
        self.rating = None


    def get_google_id(self):
        """Get Google's ID of the item."""
        return self.id.text.split('/')[-1]


class BookFeed(_AtomFromString, gdata.GDataFeed):
    """Represents a feed of entries from a search."""

    _tag = 'feed'
    _namespace = atom.ATOM_NAMESPACE
    _children = gdata.GDataFeed._children.copy()
    _children['{%s}%s' % (Book._namespace, Book._tag)] = (Book._tag, [Book])


if __name__ == '__main__':
    import doctest
    doctest.testfile('datamodels.txt')