| #!/usr/bin/python |
| |
| """ |
| Extend gdata.service.GDataService to support authenticated CRUD ops on |
| Books API |
| |
| http://code.google.com/apis/books/docs/getting-started.html |
| http://code.google.com/apis/books/docs/gdata/developers_guide_protocol.html |
| |
| TODO: (here and __init__) |
| * search based on label, review, or other annotations (possible?) |
| * edit (specifically, Put requests) seem to fail effect a change |
| |
| Problems With API: |
| * Adding a book with a review to the library adds a note, not a review. |
| This does not get included in the returned item. You see this by |
| looking at My Library through the website. |
| * Editing a review never edits a review (unless it is freshly added, but |
| see above). More generally, |
| * a Put request with changed annotations (label/rating/review) does NOT |
| change the data. Note: Put requests only work on the href from |
| GetEditLink (as per the spec). Do not try to PUT to the annotate or |
| library feeds, this will cause a 400 Invalid URI Bad Request response. |
| Attempting to Post to one of the feeds with the updated annotations |
| does not update them. See the following for (hopefully) a follow up: |
| google.com/support/forum/p/booksearch-apis/thread?tid=27fd7f68de438fc8 |
| * Attempts to workaround the edit problem continue to fail. For example, |
| removing the item, editing the data, readding the item, gives us only |
| our originally added data (annotations). This occurs even if we |
| completely shut python down, refetch the book from the public feed, |
| and re-add it. There is some kind of persistence going on that I |
| cannot change. This is likely due to the annotations being cached in |
| the annotation feed and the inability to edit (see Put, above) |
| * GetAnnotationLink has www.books.... as the server, but hitting www... |
| results in a bad URI error. |
| * Spec indicates there may be multiple labels, but there does not seem |
| to be a way to get the server to accept multiple labels, nor does the |
| web interface have an obvious way to have multiple labels. Multiple |
| labels are never returned. |
| """ |
| |
| __author__ = "James Sams <sams.james@gmail.com>" |
| __copyright__ = "Apache License v2.0" |
| |
| from shlex import split |
| |
| import gdata.service |
| try: |
| import books |
| except ImportError: |
| import gdata.books as books |
| |
| |
| BOOK_SERVER = "books.google.com" |
| GENERAL_FEED = "/books/feeds/volumes" |
| ITEM_FEED = "/books/feeds/volumes/" |
| LIBRARY_FEED = "/books/feeds/users/%s/collections/library/volumes" |
| ANNOTATION_FEED = "/books/feeds/users/%s/volumes" |
| PARTNER_FEED = "/books/feeds/p/%s/volumes" |
| BOOK_SERVICE = "print" |
| ACCOUNT_TYPE = "HOSTED_OR_GOOGLE" |
| |
| |
| class BookService(gdata.service.GDataService): |
| |
| def __init__(self, email=None, password=None, source=None, |
| server=BOOK_SERVER, account_type=ACCOUNT_TYPE, |
| exception_handlers=tuple(), **kwargs): |
| """source should be of form 'ProgramCompany - ProgramName - Version'""" |
| |
| gdata.service.GDataService.__init__(self, email=email, |
| password=password, service=BOOK_SERVICE, source=source, |
| server=server, **kwargs) |
| self.exception_handlers = exception_handlers |
| |
| def search(self, q, start_index="1", max_results="10", |
| min_viewability="none", feed=GENERAL_FEED, |
| converter=books.BookFeed.FromString): |
| """ |
| Query the Public search feed. q is either a search string or a |
| gdata.service.Query instance with a query set. |
| |
| min_viewability must be "none", "partial", or "full". |
| |
| If you change the feed to a single item feed, note that you will |
| probably need to change the converter to be Book.FromString |
| """ |
| |
| if not isinstance(q, gdata.service.Query): |
| q = gdata.service.Query(text_query=q) |
| if feed: |
| q.feed = feed |
| q['start-index'] = start_index |
| q['max-results'] = max_results |
| q['min-viewability'] = min_viewability |
| return self.Get(uri=q.ToUri(),converter=converter) |
| |
| def search_by_keyword(self, q='', feed=GENERAL_FEED, start_index="1", |
| max_results="10", min_viewability="none", **kwargs): |
| """ |
| Query the Public Search Feed by keyword. Non-keyword strings can be |
| set in q. This is quite fragile. Is there a function somewhere in |
| the Google library that will parse a query the same way that Google |
| does? |
| |
| Legal Identifiers are listed below and correspond to their meaning |
| at http://books.google.com/advanced_book_search: |
| all_words |
| exact_phrase |
| at_least_one |
| without_words |
| title |
| author |
| publisher |
| subject |
| isbn |
| lccn |
| oclc |
| seemingly unsupported: |
| publication_date: a sequence of two, two tuples: |
| ((min_month,min_year),(max_month,max_year)) |
| where month is one/two digit month, year is 4 digit, eg: |
| (('1','2000'),('10','2003')). Lower bound is inclusive, |
| upper bound is exclusive |
| """ |
| |
| for k, v in kwargs.items(): |
| if not v: |
| continue |
| k = k.lower() |
| if k == 'all_words': |
| q = "%s %s" % (q, v) |
| elif k == 'exact_phrase': |
| q = '%s "%s"' % (q, v.strip('"')) |
| elif k == 'at_least_one': |
| q = '%s %s' % (q, ' '.join(['OR "%s"' % x for x in split(v)])) |
| elif k == 'without_words': |
| q = '%s %s' % (q, ' '.join(['-"%s"' % x for x in split(v)])) |
| elif k in ('author','title', 'publisher'): |
| q = '%s %s' % (q, ' '.join(['in%s:"%s"'%(k,x) for x in split(v)])) |
| elif k == 'subject': |
| q = '%s %s' % (q, ' '.join(['%s:"%s"' % (k,x) for x in split(v)])) |
| elif k == 'isbn': |
| q = '%s ISBN%s' % (q, v) |
| elif k == 'issn': |
| q = '%s ISSN%s' % (q,v) |
| elif k == 'oclc': |
| q = '%s OCLC%s' % (q,v) |
| else: |
| raise ValueError("Unsupported search keyword") |
| return self.search(q.strip(),start_index=start_index, feed=feed, |
| max_results=max_results, |
| min_viewability=min_viewability) |
| |
| def search_library(self, q, id='me', **kwargs): |
| """Like search, but in a library feed. Default is the authenticated |
| user's feed. Change by setting id.""" |
| |
| if 'feed' in kwargs: |
| raise ValueError("kwarg 'feed' conflicts with library_id") |
| feed = LIBRARY_FEED % id |
| return self.search(q, feed=feed, **kwargs) |
| |
| def search_library_by_keyword(self, id='me', **kwargs): |
| """Hybrid of search_by_keyword and search_library |
| """ |
| |
| if 'feed' in kwargs: |
| raise ValueError("kwarg 'feed' conflicts with library_id") |
| feed = LIBRARY_FEED % id |
| return self.search_by_keyword(feed=feed,**kwargs) |
| |
| def search_annotations(self, q, id='me', **kwargs): |
| """Like search, but in an annotation feed. Default is the authenticated |
| user's feed. Change by setting id.""" |
| |
| if 'feed' in kwargs: |
| raise ValueError("kwarg 'feed' conflicts with library_id") |
| feed = ANNOTATION_FEED % id |
| return self.search(q, feed=feed, **kwargs) |
| |
| def search_annotations_by_keyword(self, id='me', **kwargs): |
| """Hybrid of search_by_keyword and search_annotations |
| """ |
| |
| if 'feed' in kwargs: |
| raise ValueError("kwarg 'feed' conflicts with library_id") |
| feed = ANNOTATION_FEED % id |
| return self.search_by_keyword(feed=feed,**kwargs) |
| |
| def add_item_to_library(self, item): |
| """Add the item, either an XML string or books.Book instance, to the |
| user's library feed""" |
| |
| feed = LIBRARY_FEED % 'me' |
| return self.Post(data=item, uri=feed, converter=books.Book.FromString) |
| |
| def remove_item_from_library(self, item): |
| """ |
| Remove the item, a books.Book instance, from the authenticated user's |
| library feed. Using an item retrieved from a public search will fail. |
| """ |
| |
| return self.Delete(item.GetEditLink().href) |
| |
| def add_annotation(self, item): |
| """ |
| Add the item, either an XML string or books.Book instance, to the |
| user's annotation feed. |
| """ |
| # do not use GetAnnotationLink, results in 400 Bad URI due to www |
| return self.Post(data=item, uri=ANNOTATION_FEED % 'me', |
| converter=books.Book.FromString) |
| |
| def edit_annotation(self, item): |
| """ |
| Send an edited item, a books.Book instance, to the user's annotation |
| feed. Note that whereas extra annotations in add_annotations, minus |
| ratings which are immutable once set, are simply added to the item in |
| the annotation feed, if an annotation has been removed from the item, |
| sending an edit request will remove that annotation. This should not |
| happen with add_annotation. |
| """ |
| |
| return self.Put(data=item, uri=item.GetEditLink().href, |
| converter=books.Book.FromString) |
| |
| def get_by_google_id(self, id): |
| return self.Get(ITEM_FEED + id, converter=books.Book.FromString) |
| |
| def get_library(self, id='me',feed=LIBRARY_FEED, start_index="1", |
| max_results="100", min_viewability="none", |
| converter=books.BookFeed.FromString): |
| """ |
| Return a generator object that will return gbook.Book instances until |
| the search feed no longer returns an item from the GetNextLink method. |
| Thus max_results is not the maximum number of items that will be |
| returned, but rather the number of items per page of searches. This has |
| been set high to reduce the required number of network requests. |
| """ |
| |
| q = gdata.service.Query() |
| q.feed = feed % id |
| q['start-index'] = start_index |
| q['max-results'] = max_results |
| q['min-viewability'] = min_viewability |
| x = self.Get(uri=q.ToUri(), converter=converter) |
| while 1: |
| for entry in x.entry: |
| yield entry |
| else: |
| l = x.GetNextLink() |
| if l: # hope the server preserves our preferences |
| x = self.Get(uri=l.href, converter=converter) |
| else: |
| break |
| |
| def get_annotations(self, id='me', start_index="1", max_results="100", |
| min_viewability="none", converter=books.BookFeed.FromString): |
| """ |
| Like get_library, but for the annotation feed |
| """ |
| |
| return self.get_library(id=id, feed=ANNOTATION_FEED, |
| max_results=max_results, min_viewability = min_viewability, |
| converter=converter) |