faset over fra Z3950 til google books

2010-09-23 13:57:37 +00:00
parent d173b9396e
commit 644e79f9b5
221 changed files with 60948 additions and 27273 deletions
--- a/python/gdata/books/service.py
+++ b/python/gdata/books/service.py
@@ -0,0 +1,266 @@
+#!/usr/bin/python
+
+"""
+    Extend gdata.service.GDataService to support authenticated CRUD ops on 
+    Books API
+
+    http://code.google.com/apis/books/docs/getting-started.html
+    http://code.google.com/apis/books/docs/gdata/developers_guide_protocol.html
+
+    TODO: (here and __init__)
+        * search based on label, review, or other annotations (possible?)
+        * edit (specifically, Put requests) seem to fail effect a change
+
+    Problems With API:
+        * Adding a book with a review to the library adds a note, not a review.
+          This does not get included in the returned item. You see this by
+          looking at My Library through the website.
+        * Editing a review never edits a review (unless it is freshly added, but 
+          see above). More generally,
+        * a Put request with changed annotations (label/rating/review) does NOT
+          change the data. Note: Put requests only work on the href from 
+          GetEditLink (as per the spec). Do not try to PUT to the annotate or 
+          library feeds, this will cause a 400 Invalid URI Bad Request response.
+          Attempting to Post to one of the feeds with the updated annotations
+          does not update them. See the following for (hopefully) a follow up:
+          google.com/support/forum/p/booksearch-apis/thread?tid=27fd7f68de438fc8
+        * Attempts to workaround the edit problem continue to fail. For example,
+          removing the item, editing the data, readding the item, gives us only
+          our originally added data (annotations). This occurs even if we
+          completely shut python down, refetch the book from the public feed,
+          and re-add it. There is some kind of persistence going on that I
+          cannot change. This is likely due to the annotations being cached in
+          the annotation feed and the inability to edit (see Put, above)
+        * GetAnnotationLink has www.books.... as the server, but hitting www...
+          results in a bad URI error.
+        * Spec indicates there may be multiple labels, but there does not seem
+          to be a way to get the server to accept multiple labels, nor does the
+          web interface have an obvious way to have multiple labels. Multiple 
+          labels are never returned.
+"""
+
+__author__ = "James Sams <sams.james@gmail.com>"
+__copyright__ = "Apache License v2.0"
+
+from shlex import split
+
+import gdata.service
+try:
+    import books
+except ImportError:
+    import gdata.books as books
+
+
+BOOK_SERVER       = "books.google.com"
+GENERAL_FEED      = "/books/feeds/volumes"
+ITEM_FEED         = "/books/feeds/volumes/"
+LIBRARY_FEED      = "/books/feeds/users/%s/collections/library/volumes"
+ANNOTATION_FEED   = "/books/feeds/users/%s/volumes"
+PARTNER_FEED      = "/books/feeds/p/%s/volumes"
+BOOK_SERVICE      = "print"
+ACCOUNT_TYPE      = "HOSTED_OR_GOOGLE"
+
+
+class BookService(gdata.service.GDataService):
+
+    def __init__(self, email=None, password=None, source=None,
+                server=BOOK_SERVER, account_type=ACCOUNT_TYPE,
+                exception_handlers=tuple(), **kwargs):
+        """source should be of form 'ProgramCompany - ProgramName - Version'"""
+
+        gdata.service.GDataService.__init__(self, email=email, 
+                    password=password, service=BOOK_SERVICE, source=source,
+                    server=server, **kwargs) 
+        self.exception_handlers = exception_handlers
+
+    def search(self, q, start_index="1", max_results="10", 
+                min_viewability="none", feed=GENERAL_FEED,
+                converter=books.BookFeed.FromString):
+        """
+        Query the Public search feed. q is either a search string or a
+        gdata.service.Query instance with a query set.
+        
+        min_viewability must be "none", "partial", or "full".
+        
+        If you change the feed to a single item feed, note that you will 
+        probably need to change the converter to be Book.FromString
+        """
+
+        if not isinstance(q, gdata.service.Query):
+            q = gdata.service.Query(text_query=q)
+        if feed:
+            q.feed = feed
+        q['start-index'] = start_index
+        q['max-results'] = max_results
+        q['min-viewability'] = min_viewability
+        return self.Get(uri=q.ToUri(),converter=converter)
+    
+    def search_by_keyword(self, q='', feed=GENERAL_FEED, start_index="1",
+                max_results="10", min_viewability="none", **kwargs):
+        """
+            Query the Public Search Feed by keyword. Non-keyword strings can be
+            set in q. This is quite fragile. Is there a function somewhere in
+            the Google library that will parse a query the same way that Google
+            does?
+
+            Legal Identifiers are listed below and correspond to their meaning
+            at http://books.google.com/advanced_book_search:
+                all_words 
+                exact_phrase 
+                at_least_one 
+                without_words 
+                title
+                author
+                publisher
+                subject
+                isbn
+                lccn
+                oclc
+                seemingly unsupported:
+                publication_date: a sequence of two, two tuples:
+                    ((min_month,min_year),(max_month,max_year))
+                    where month is one/two digit month, year is 4 digit, eg:
+                    (('1','2000'),('10','2003')). Lower bound is inclusive,
+                    upper bound is exclusive
+        """
+
+        for k, v in kwargs.items():
+            if not v:
+                continue
+            k = k.lower()
+            if k == 'all_words':
+                q = "%s %s" % (q, v)
+            elif k == 'exact_phrase':
+                q = '%s "%s"' % (q, v.strip('"'))
+            elif k == 'at_least_one':
+                q = '%s %s' % (q, ' '.join(['OR "%s"' % x for x in split(v)]))
+            elif k == 'without_words':
+                q = '%s %s' % (q, ' '.join(['-"%s"' % x for x in split(v)]))
+            elif k in ('author','title', 'publisher'):
+                q = '%s %s' % (q, ' '.join(['in%s:"%s"'%(k,x) for x in split(v)]))
+            elif k == 'subject':
+                q = '%s %s' % (q, ' '.join(['%s:"%s"' % (k,x) for x in split(v)]))
+            elif k == 'isbn':
+                q = '%s ISBN%s' % (q, v)
+            elif k == 'issn':
+                q = '%s ISSN%s' % (q,v)
+            elif k == 'oclc':
+                q = '%s OCLC%s' % (q,v)
+            else:
+                raise ValueError("Unsupported search keyword")
+        return self.search(q.strip(),start_index=start_index, feed=feed, 
+                            max_results=max_results, 
+                            min_viewability=min_viewability)
+    
+    def search_library(self, q, id='me', **kwargs):
+        """Like search, but in a library feed. Default is the authenticated
+        user's feed. Change by setting id."""
+
+        if 'feed' in kwargs:
+            raise ValueError("kwarg 'feed' conflicts with library_id")
+        feed = LIBRARY_FEED % id
+        return self.search(q, feed=feed, **kwargs)
+   
+    def search_library_by_keyword(self, id='me', **kwargs):
+        """Hybrid of search_by_keyword and search_library
+        """
+
+        if 'feed' in kwargs:
+            raise ValueError("kwarg 'feed' conflicts with library_id")
+        feed = LIBRARY_FEED % id
+        return self.search_by_keyword(feed=feed,**kwargs)
+    
+    def search_annotations(self, q, id='me', **kwargs): 
+        """Like search, but in an annotation feed. Default is the authenticated
+        user's feed. Change by setting id."""
+
+        if 'feed' in kwargs:
+            raise ValueError("kwarg 'feed' conflicts with library_id")
+        feed = ANNOTATION_FEED % id
+        return self.search(q, feed=feed, **kwargs)
+    
+    def search_annotations_by_keyword(self, id='me', **kwargs):
+        """Hybrid of search_by_keyword and search_annotations
+        """
+
+        if 'feed' in kwargs:
+            raise ValueError("kwarg 'feed' conflicts with library_id")
+        feed = ANNOTATION_FEED % id
+        return self.search_by_keyword(feed=feed,**kwargs)
+    
+    def add_item_to_library(self, item):
+        """Add the item, either an XML string or books.Book instance, to the 
+        user's library feed"""
+
+        feed = LIBRARY_FEED % 'me'
+        return self.Post(data=item, uri=feed, converter=books.Book.FromString)
+    
+    def remove_item_from_library(self, item):
+        """
+        Remove the item, a books.Book instance, from the authenticated user's 
+        library feed. Using an item retrieved from a public search will fail.
+        """
+
+        return self.Delete(item.GetEditLink().href)
+    
+    def add_annotation(self, item):
+        """
+        Add the item, either an XML string or books.Book instance, to the 
+        user's annotation feed.
+        """
+        # do not use GetAnnotationLink, results in 400 Bad URI due to www
+        return self.Post(data=item, uri=ANNOTATION_FEED % 'me', 
+                        converter=books.Book.FromString)
+    
+    def edit_annotation(self, item):
+        """
+        Send an edited item, a books.Book instance, to the user's annotation 
+        feed. Note that whereas extra annotations in add_annotations, minus 
+        ratings which are immutable once set, are simply added to the item in 
+        the annotation feed, if an annotation has been removed from the item, 
+        sending an edit request will remove that annotation. This should not 
+        happen with add_annotation.
+        """
+
+        return self.Put(data=item, uri=item.GetEditLink().href,
+                    converter=books.Book.FromString) 
+    
+    def get_by_google_id(self, id):
+        return self.Get(ITEM_FEED + id, converter=books.Book.FromString)
+    
+    def get_library(self, id='me',feed=LIBRARY_FEED, start_index="1", 
+                max_results="100", min_viewability="none",
+                converter=books.BookFeed.FromString):
+        """
+        Return a generator object that will return gbook.Book instances until
+        the search feed no longer returns an item from the GetNextLink method.
+        Thus max_results is not the maximum number of items that will be
+        returned, but rather the number of items per page of searches. This has
+        been set high to reduce the required number of network requests.
+        """
+
+        q = gdata.service.Query()
+        q.feed = feed % id
+        q['start-index'] = start_index
+        q['max-results'] = max_results
+        q['min-viewability'] = min_viewability
+        x = self.Get(uri=q.ToUri(), converter=converter)
+        while 1:
+            for entry in x.entry:
+                yield entry
+            else:
+                l = x.GetNextLink()
+                if l: # hope the server preserves our preferences
+                    x = self.Get(uri=l.href, converter=converter)
+                else:
+                    break
+
+    def get_annotations(self, id='me', start_index="1", max_results="100",
+                min_viewability="none", converter=books.BookFeed.FromString):
+        """
+        Like get_library, but for the annotation feed
+        """
+
+        return self.get_library(id=id, feed=ANNOTATION_FEED,
+                    max_results=max_results, min_viewability = min_viewability,
+                    converter=converter)