worblehat-old/python/gdata/books/service.py

#!/usr/bin/python

"""
    Extend gdata.service.GDataService to support authenticated CRUD ops on
    Books API

    http://code.google.com/apis/books/docs/getting-started.html
    http://code.google.com/apis/books/docs/gdata/developers_guide_protocol.html

    TODO: (here and __init__)
        * search based on label, review, or other annotations (possible?)
        * edit (specifically, Put requests) seem to fail effect a change

    Problems With API:
        * Adding a book with a review to the library adds a note, not a review.
          This does not get included in the returned item. You see this by
          looking at My Library through the website.
        * Editing a review never edits a review (unless it is freshly added, but
          see above). More generally,
        * a Put request with changed annotations (label/rating/review) does NOT
          change the data. Note: Put requests only work on the href from
          GetEditLink (as per the spec). Do not try to PUT to the annotate or
          library feeds, this will cause a 400 Invalid URI Bad Request response.
          Attempting to Post to one of the feeds with the updated annotations
          does not update them. See the following for (hopefully) a follow up:
          google.com/support/forum/p/booksearch-apis/thread?tid=27fd7f68de438fc8
        * Attempts to workaround the edit problem continue to fail. For example,
          removing the item, editing the data, readding the item, gives us only
          our originally added data (annotations). This occurs even if we
          completely shut python down, refetch the book from the public feed,
          and re-add it. There is some kind of persistence going on that I
          cannot change. This is likely due to the annotations being cached in
          the annotation feed and the inability to edit (see Put, above)
        * GetAnnotationLink has www.books.... as the server, but hitting www...
          results in a bad URI error.
        * Spec indicates there may be multiple labels, but there does not seem
          to be a way to get the server to accept multiple labels, nor does the
          web interface have an obvious way to have multiple labels. Multiple
          labels are never returned.
"""

__author__ = "James Sams <sams.james@gmail.com>"
__copyright__ = "Apache License v2.0"

from shlex import split

import gdata.service
try:
    import books
except ImportError:
    import gdata.books as books


BOOK_SERVER       = "books.google.com"
GENERAL_FEED      = "/books/feeds/volumes"
ITEM_FEED         = "/books/feeds/volumes/"
LIBRARY_FEED      = "/books/feeds/users/%s/collections/library/volumes"
ANNOTATION_FEED   = "/books/feeds/users/%s/volumes"
PARTNER_FEED      = "/books/feeds/p/%s/volumes"
BOOK_SERVICE      = "print"
ACCOUNT_TYPE      = "HOSTED_OR_GOOGLE"


class BookService(gdata.service.GDataService):

    def __init__(self, email=None, password=None, source=None,
                server=BOOK_SERVER, account_type=ACCOUNT_TYPE,
                exception_handlers=tuple(), **kwargs):
        """source should be of form 'ProgramCompany - ProgramName - Version'"""

        gdata.service.GDataService.__init__(self, email=email,
                    password=password, service=BOOK_SERVICE, source=source,
                    server=server, **kwargs)
        self.exception_handlers = exception_handlers

    def search(self, q, start_index="1", max_results="10",
                min_viewability="none", feed=GENERAL_FEED,
                converter=books.BookFeed.FromString):
        """
        Query the Public search feed. q is either a search string or a
        gdata.service.Query instance with a query set.

        min_viewability must be "none", "partial", or "full".

        If you change the feed to a single item feed, note that you will
        probably need to change the converter to be Book.FromString
        """

        if not isinstance(q, gdata.service.Query):
            q = gdata.service.Query(text_query=q)
        if feed:
            q.feed = feed
        q['start-index'] = start_index
        q['max-results'] = max_results
        q['min-viewability'] = min_viewability
        return self.Get(uri=q.ToUri(),converter=converter)

    def search_by_keyword(self, q='', feed=GENERAL_FEED, start_index="1",
                max_results="10", min_viewability="none", **kwargs):
        """
            Query the Public Search Feed by keyword. Non-keyword strings can be
            set in q. This is quite fragile. Is there a function somewhere in
            the Google library that will parse a query the same way that Google
            does?

            Legal Identifiers are listed below and correspond to their meaning
            at http://books.google.com/advanced_book_search:
                all_words
                exact_phrase
                at_least_one
                without_words
                title
                author
                publisher
                subject
                isbn
                lccn
                oclc
                seemingly unsupported:
                publication_date: a sequence of two, two tuples:
                    ((min_month,min_year),(max_month,max_year))
                    where month is one/two digit month, year is 4 digit, eg:
                    (('1','2000'),('10','2003')). Lower bound is inclusive,
                    upper bound is exclusive
        """

        for k, v in kwargs.items():
            if not v:
                continue
            k = k.lower()
            if k == 'all_words':
                q = "%s %s" % (q, v)
            elif k == 'exact_phrase':
                q = '%s "%s"' % (q, v.strip('"'))
            elif k == 'at_least_one':
                q = '%s %s' % (q, ' '.join(['OR "%s"' % x for x in split(v)]))
            elif k == 'without_words':
                q = '%s %s' % (q, ' '.join(['-"%s"' % x for x in split(v)]))
            elif k in ('author','title', 'publisher'):
                q = '%s %s' % (q, ' '.join(['in%s:"%s"'%(k,x) for x in split(v)]))
            elif k == 'subject':
                q = '%s %s' % (q, ' '.join(['%s:"%s"' % (k,x) for x in split(v)]))
            elif k == 'isbn':
                q = '%s ISBN%s' % (q, v)
            elif k == 'issn':
                q = '%s ISSN%s' % (q,v)
            elif k == 'oclc':
                q = '%s OCLC%s' % (q,v)
            else:
                raise ValueError("Unsupported search keyword")
        return self.search(q.strip(),start_index=start_index, feed=feed,
                            max_results=max_results,
                            min_viewability=min_viewability)

    def search_library(self, q, id='me', **kwargs):
        """Like search, but in a library feed. Default is the authenticated
        user's feed. Change by setting id."""

        if 'feed' in kwargs:
            raise ValueError("kwarg 'feed' conflicts with library_id")
        feed = LIBRARY_FEED % id
        return self.search(q, feed=feed, **kwargs)

    def search_library_by_keyword(self, id='me', **kwargs):
        """Hybrid of search_by_keyword and search_library
        """

        if 'feed' in kwargs:
            raise ValueError("kwarg 'feed' conflicts with library_id")
        feed = LIBRARY_FEED % id
        return self.search_by_keyword(feed=feed,**kwargs)

    def search_annotations(self, q, id='me', **kwargs):
        """Like search, but in an annotation feed. Default is the authenticated
        user's feed. Change by setting id."""

        if 'feed' in kwargs:
            raise ValueError("kwarg 'feed' conflicts with library_id")
        feed = ANNOTATION_FEED % id
        return self.search(q, feed=feed, **kwargs)

    def search_annotations_by_keyword(self, id='me', **kwargs):
        """Hybrid of search_by_keyword and search_annotations
        """

        if 'feed' in kwargs:
            raise ValueError("kwarg 'feed' conflicts with library_id")
        feed = ANNOTATION_FEED % id
        return self.search_by_keyword(feed=feed,**kwargs)

    def add_item_to_library(self, item):
        """Add the item, either an XML string or books.Book instance, to the
        user's library feed"""

        feed = LIBRARY_FEED % 'me'
        return self.Post(data=item, uri=feed, converter=books.Book.FromString)

    def remove_item_from_library(self, item):
        """
        Remove the item, a books.Book instance, from the authenticated user's
        library feed. Using an item retrieved from a public search will fail.
        """

        return self.Delete(item.GetEditLink().href)

    def add_annotation(self, item):
        """
        Add the item, either an XML string or books.Book instance, to the
        user's annotation feed.
        """
        # do not use GetAnnotationLink, results in 400 Bad URI due to www
        return self.Post(data=item, uri=ANNOTATION_FEED % 'me',
                        converter=books.Book.FromString)

    def edit_annotation(self, item):
        """
        Send an edited item, a books.Book instance, to the user's annotation
        feed. Note that whereas extra annotations in add_annotations, minus
        ratings which are immutable once set, are simply added to the item in
        the annotation feed, if an annotation has been removed from the item,
        sending an edit request will remove that annotation. This should not
        happen with add_annotation.
        """

        return self.Put(data=item, uri=item.GetEditLink().href,
                    converter=books.Book.FromString)

    def get_by_google_id(self, id):
        return self.Get(ITEM_FEED + id, converter=books.Book.FromString)

    def get_library(self, id='me',feed=LIBRARY_FEED, start_index="1",
                max_results="100", min_viewability="none",
                converter=books.BookFeed.FromString):
        """
        Return a generator object that will return gbook.Book instances until
        the search feed no longer returns an item from the GetNextLink method.
        Thus max_results is not the maximum number of items that will be
        returned, but rather the number of items per page of searches. This has
        been set high to reduce the required number of network requests.
        """

        q = gdata.service.Query()
        q.feed = feed % id
        q['start-index'] = start_index
        q['max-results'] = max_results
        q['min-viewability'] = min_viewability
        x = self.Get(uri=q.ToUri(), converter=converter)
        while 1:
            for entry in x.entry:
                yield entry
            else:
                l = x.GetNextLink()
                if l: # hope the server preserves our preferences
                    x = self.Get(uri=l.href, converter=converter)
                else:
                    break

    def get_annotations(self, id='me', start_index="1", max_results="100",
                min_viewability="none", converter=books.BookFeed.FromString):
        """
        Like get_library, but for the annotation feed
        """

        return self.get_library(id=id, feed=ANNOTATION_FEED,
                    max_results=max_results, min_viewability = min_viewability,
                    converter=converter)