| #!/usr/bin/python |
| # |
| # Copyright 2009 Google Inc. All Rights Reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| """DocsService extends the GDataService to streamline Google Documents |
| operations. |
| |
| DocsService: Provides methods to query feeds and manipulate items. |
| Extends GDataService. |
| |
| DocumentQuery: Queries a Google Document list feed. |
| |
| DocumentAclQuery: Queries a Google Document Acl feed. |
| """ |
| |
| |
| __author__ = ('api.jfisher (Jeff Fisher), ' |
| 'e.bidelman (Eric Bidelman)') |
| |
| import re |
| import atom |
| import gdata.service |
| import gdata.docs |
| import urllib |
| |
| # XML Namespaces used in Google Documents entities. |
| DATA_KIND_SCHEME = gdata.GDATA_NAMESPACE + '#kind' |
| DOCUMENT_LABEL = 'document' |
| SPREADSHEET_LABEL = 'spreadsheet' |
| PRESENTATION_LABEL = 'presentation' |
| FOLDER_LABEL = 'folder' |
| PDF_LABEL = 'pdf' |
| |
| LABEL_SCHEME = gdata.GDATA_NAMESPACE + '/labels' |
| STARRED_LABEL_TERM = LABEL_SCHEME + '#starred' |
| TRASHED_LABEL_TERM = LABEL_SCHEME + '#trashed' |
| HIDDEN_LABEL_TERM = LABEL_SCHEME + '#hidden' |
| MINE_LABEL_TERM = LABEL_SCHEME + '#mine' |
| PRIVATE_LABEL_TERM = LABEL_SCHEME + '#private' |
| SHARED_WITH_DOMAIN_LABEL_TERM = LABEL_SCHEME + '#shared-with-domain' |
| VIEWED_LABEL_TERM = LABEL_SCHEME + '#viewed' |
| |
| FOLDERS_SCHEME_PREFIX = gdata.docs.DOCUMENTS_NAMESPACE + '/folders/' |
| |
| # File extensions of documents that are permitted to be uploaded or downloaded. |
| SUPPORTED_FILETYPES = { |
| 'CSV': 'text/csv', |
| 'TSV': 'text/tab-separated-values', |
| 'TAB': 'text/tab-separated-values', |
| 'DOC': 'application/msword', |
| 'DOCX': ('application/vnd.openxmlformats-officedocument.' |
| 'wordprocessingml.document'), |
| 'ODS': 'application/x-vnd.oasis.opendocument.spreadsheet', |
| 'ODT': 'application/vnd.oasis.opendocument.text', |
| 'RTF': 'application/rtf', |
| 'SXW': 'application/vnd.sun.xml.writer', |
| 'TXT': 'text/plain', |
| 'XLS': 'application/vnd.ms-excel', |
| 'XLSX': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', |
| 'PDF': 'application/pdf', |
| 'PNG': 'image/png', |
| 'PPT': 'application/vnd.ms-powerpoint', |
| 'PPS': 'application/vnd.ms-powerpoint', |
| 'HTM': 'text/html', |
| 'HTML': 'text/html', |
| 'ZIP': 'application/zip', |
| 'SWF': 'application/x-shockwave-flash' |
| } |
| |
| |
| class DocsService(gdata.service.GDataService): |
| |
| """Client extension for the Google Documents service Document List feed.""" |
| |
| __FILE_EXT_PATTERN = re.compile('.*\.([a-zA-Z]{3,}$)') |
| __RESOURCE_ID_PATTERN = re.compile('^([a-z]*)(:|%3A)([\w-]*)$') |
| |
| def __init__(self, email=None, password=None, source=None, |
| server='docs.google.com', additional_headers=None, **kwargs): |
| """Creates a client for the Google Documents service. |
| |
| Args: |
| email: string (optional) The user's email address, used for |
| authentication. |
| password: string (optional) The user's password. |
| source: string (optional) The name of the user's application. |
| server: string (optional) The name of the server to which a connection |
| will be opened. Default value: 'docs.google.com'. |
| **kwargs: The other parameters to pass to gdata.service.GDataService |
| constructor. |
| """ |
| gdata.service.GDataService.__init__( |
| self, email=email, password=password, service='writely', source=source, |
| server=server, additional_headers=additional_headers, **kwargs) |
| self.ssl = True |
| |
| # Variables used to hack-in Export function to use it |
| # with a file handler instead of file path name. |
| self.file_handler = None |
| self.use_file_handler = False |
| |
| def _MakeKindCategory(self, label): |
| if label is None: |
| return None |
| return atom.Category(scheme=DATA_KIND_SCHEME, |
| term=gdata.docs.DOCUMENTS_NAMESPACE + '#' + label, label=label) |
| |
| def _MakeContentLinkFromId(self, resource_id): |
| match = self.__RESOURCE_ID_PATTERN.match(resource_id) |
| label = match.group(1) |
| doc_id = match.group(3) |
| if label == DOCUMENT_LABEL: |
| return '/feeds/download/documents/Export?docId=%s' % doc_id |
| if label == PRESENTATION_LABEL: |
| return '/feeds/download/presentations/Export?docId=%s' % doc_id |
| if label == SPREADSHEET_LABEL: |
| return ('https://spreadsheets.google.com/feeds/download/spreadsheets/' |
| 'Export?key=%s' % doc_id) |
| raise ValueError, 'Invalid resource id: %s' % resource_id |
| |
| def _UploadFile(self, media_source, title, category, folder_or_uri=None): |
| """Uploads a file to the Document List feed. |
| |
| Args: |
| media_source: A gdata.MediaSource object containing the file to be |
| uploaded. |
| title: string The title of the document on the server after being |
| uploaded. |
| category: An atom.Category object specifying the appropriate document |
| type. |
| folder_or_uri: DocumentListEntry or string (optional) An object with a |
| link to a folder or a uri to a folder to upload to. |
| Note: A valid uri for a folder is of the form: |
| /feeds/folders/private/full/folder%3Afolder_id |
| |
| Returns: |
| A DocumentListEntry containing information about the document created on |
| the Google Documents service. |
| """ |
| if folder_or_uri: |
| try: |
| uri = folder_or_uri.content.src |
| except AttributeError: |
| uri = folder_or_uri |
| else: |
| uri = '/feeds/documents/private/full' |
| |
| entry = gdata.docs.DocumentListEntry() |
| entry.title = atom.Title(text=title) |
| if category is not None: |
| entry.category.append(category) |
| entry = self.Post(entry, uri, media_source=media_source, |
| extra_headers={'Slug': media_source.file_name}, |
| converter=gdata.docs.DocumentListEntryFromString) |
| return entry |
| |
| def _DownloadFile(self, uri, file_path): |
| """Downloads a file. |
| |
| Args: |
| uri: string The full Export URL to download the file from. |
| file_path: string The full path to save the file to. |
| |
| Raises: |
| RequestError: on error response from server. |
| """ |
| server_response = self.request('GET', uri) |
| response_body = server_response.read() |
| timeout = 5 |
| while server_response.status == 302 and timeout > 0: |
| server_response = self.request('GET', |
| server_response.getheader('Location')) |
| response_body = server_response.read() |
| timeout -= 1 |
| if server_response.status != 200: |
| raise gdata.service.RequestError, {'status': server_response.status, |
| 'reason': server_response.reason, |
| 'body': response_body} |
| |
| def writeResponseToFile(f, close=True): |
| f.write(response_body) |
| f.flush() |
| if close: |
| f.close() |
| |
| if self.use_file_handler: |
| writeResponseToFile(self.file_handler, close=False) |
| else: |
| writeResponseToFile(open(file_path, 'wb')) |
| |
| def MoveIntoFolder(self, source_entry, folder_entry): |
| """Moves a document into a folder in the Document List Feed. |
| |
| Args: |
| source_entry: DocumentListEntry An object representing the source |
| document/folder. |
| folder_entry: DocumentListEntry An object with a link to the destination |
| folder. |
| |
| Returns: |
| A DocumentListEntry containing information about the document created on |
| the Google Documents service. |
| """ |
| entry = gdata.docs.DocumentListEntry() |
| entry.id = source_entry.id |
| entry = self.Post(entry, folder_entry.content.src, |
| converter=gdata.docs.DocumentListEntryFromString) |
| return entry |
| |
| def Query(self, uri, converter=gdata.docs.DocumentListFeedFromString): |
| """Queries the Document List feed and returns the resulting feed of |
| entries. |
| |
| Args: |
| uri: string The full URI to be queried. This can contain query |
| parameters, a hostname, or simply the relative path to a Document |
| List feed. The DocumentQuery object is useful when constructing |
| query parameters. |
| converter: func (optional) A function which will be executed on the |
| retrieved item, generally to render it into a Python object. |
| By default the DocumentListFeedFromString function is used to |
| return a DocumentListFeed object. This is because most feed |
| queries will result in a feed and not a single entry. |
| """ |
| return self.Get(uri, converter=converter) |
| |
| def QueryDocumentListFeed(self, uri): |
| """Retrieves a DocumentListFeed by retrieving a URI based off the Document |
| List feed, including any query parameters. A DocumentQuery object can |
| be used to construct these parameters. |
| |
| Args: |
| uri: string The URI of the feed being retrieved possibly with query |
| parameters. |
| |
| Returns: |
| A DocumentListFeed object representing the feed returned by the server. |
| """ |
| return self.Get(uri, converter=gdata.docs.DocumentListFeedFromString) |
| |
| def GetDocumentListEntry(self, uri): |
| """Retrieves a particular DocumentListEntry by its unique URI. |
| |
| Args: |
| uri: string The unique URI of an entry in a Document List feed. |
| |
| Returns: |
| A DocumentListEntry object representing the retrieved entry. |
| """ |
| return self.Get(uri, converter=gdata.docs.DocumentListEntryFromString) |
| |
| def GetDocumentListFeed(self, uri=None): |
| """Retrieves a feed containing all of a user's documents. |
| |
| Args: |
| uri: string A full URI to query the Document List feed. |
| """ |
| if not uri: |
| uri = gdata.docs.service.DocumentQuery().ToUri() |
| return self.QueryDocumentListFeed(uri) |
| |
| def GetDocumentListAclEntry(self, uri): |
| """Retrieves a particular DocumentListAclEntry by its unique URI. |
| |
| Args: |
| uri: string The unique URI of an entry in a Document List feed. |
| |
| Returns: |
| A DocumentListAclEntry object representing the retrieved entry. |
| """ |
| return self.Get(uri, converter=gdata.docs.DocumentListAclEntryFromString) |
| |
| def GetDocumentListAclFeed(self, uri): |
| """Retrieves a feed containing all of a user's documents. |
| |
| Args: |
| uri: string The URI of a document's Acl feed to retrieve. |
| |
| Returns: |
| A DocumentListAclFeed object representing the ACL feed |
| returned by the server. |
| """ |
| return self.Get(uri, converter=gdata.docs.DocumentListAclFeedFromString) |
| |
| def Upload(self, media_source, title, folder_or_uri=None, label=None): |
| """Uploads a document inside of a MediaSource object to the Document List |
| feed with the given title. |
| |
| Args: |
| media_source: MediaSource The gdata.MediaSource object containing a |
| document file to be uploaded. |
| title: string The title of the document on the server after being |
| uploaded. |
| folder_or_uri: DocumentListEntry or string (optional) An object with a |
| link to a folder or a uri to a folder to upload to. |
| Note: A valid uri for a folder is of the form: |
| /feeds/folders/private/full/folder%3Afolder_id |
| label: optional label describing the type of the document to be created. |
| |
| Returns: |
| A DocumentListEntry containing information about the document created |
| on the Google Documents service. |
| """ |
| |
| return self._UploadFile(media_source, title, self._MakeKindCategory(label), |
| folder_or_uri) |
| |
| def Download(self, entry_or_id_or_url, file_path, export_format=None, |
| gid=None, extra_params=None): |
| """Downloads a document from the Document List. |
| |
| Args: |
| entry_or_id_or_url: a DocumentListEntry, or the resource id of an entry, |
| or a url to download from (such as the content src). |
| file_path: string The full path to save the file to. |
| export_format: the format to convert to, if conversion is required. |
| gid: grid id, for downloading a single grid of a spreadsheet |
| extra_params: a map of any further parameters to control how the document |
| is downloaded |
| |
| Raises: |
| RequestError if the service does not respond with success |
| """ |
| |
| if isinstance(entry_or_id_or_url, gdata.docs.DocumentListEntry): |
| url = entry_or_id_or_url.content.src |
| else: |
| if self.__RESOURCE_ID_PATTERN.match(entry_or_id_or_url): |
| url = self._MakeContentLinkFromId(entry_or_id_or_url) |
| else: |
| url = entry_or_id_or_url |
| |
| if export_format is not None: |
| if url.find('/Export?') == -1: |
| raise gdata.service.Error, ('This entry cannot be exported ' |
| 'as a different format') |
| url += '&exportFormat=%s' % export_format |
| |
| if gid is not None: |
| if url.find('spreadsheets') == -1: |
| raise gdata.service.Error, 'grid id param is not valid for this entry' |
| url += '&gid=%s' % gid |
| |
| if extra_params: |
| url += '&' + urllib.urlencode(extra_params) |
| |
| self._DownloadFile(url, file_path) |
| |
| def Export(self, entry_or_id_or_url, file_path, gid=None, extra_params=None, file_handler=None): |
| """Downloads a document from the Document List in a different format. |
| |
| Args: |
| entry_or_id_or_url: a DocumentListEntry, or the resource id of an entry, |
| or a url to download from (such as the content src). |
| file_path: string The full path to save the file to. The export |
| format is inferred from the the file extension. |
| gid: grid id, for downloading a single grid of a spreadsheet |
| extra_params: a map of any further parameters to control how the document |
| is downloaded |
| |
| Raises: |
| RequestError if the service does not respond with success |
| """ |
| |
| if file_handler: |
| self.file_handler = file_handler |
| self.use_file_handler = True |
| else: |
| self.use_file_handler = False |
| |
| ext = None |
| match = self.__FILE_EXT_PATTERN.match(file_path) |
| if match: |
| ext = match.group(1) |
| self.Download(entry_or_id_or_url, file_path, ext, gid, extra_params) |
| |
| def CreateFolder(self, title, folder_or_uri=None): |
| """Creates a folder in the Document List feed. |
| |
| Args: |
| title: string The title of the folder on the server after being created. |
| folder_or_uri: DocumentListEntry or string (optional) An object with a |
| link to a folder or a uri to a folder to upload to. |
| Note: A valid uri for a folder is of the form: |
| /feeds/folders/private/full/folder%3Afolder_id |
| |
| Returns: |
| A DocumentListEntry containing information about the folder created on |
| the Google Documents service. |
| """ |
| if folder_or_uri: |
| try: |
| uri = folder_or_uri.content.src |
| except AttributeError: |
| uri = folder_or_uri |
| else: |
| uri = '/feeds/documents/private/full' |
| |
| folder_entry = gdata.docs.DocumentListEntry() |
| folder_entry.title = atom.Title(text=title) |
| folder_entry.category.append(self._MakeKindCategory(FOLDER_LABEL)) |
| folder_entry = self.Post(folder_entry, uri, |
| converter=gdata.docs.DocumentListEntryFromString) |
| |
| return folder_entry |
| |
| |
| def MoveOutOfFolder(self, source_entry): |
| """Moves a document into a folder in the Document List Feed. |
| |
| Args: |
| source_entry: DocumentListEntry An object representing the source |
| document/folder. |
| |
| Returns: |
| True if the entry was moved out. |
| """ |
| return self.Delete(source_entry.GetEditLink().href) |
| |
| # Deprecated methods |
| |
| #@atom.deprecated('Please use Upload instead') |
| def UploadPresentation(self, media_source, title, folder_or_uri=None): |
| """Uploads a presentation inside of a MediaSource object to the Document |
| List feed with the given title. |
| |
| This method is deprecated, use Upload instead. |
| |
| Args: |
| media_source: MediaSource The MediaSource object containing a |
| presentation file to be uploaded. |
| title: string The title of the presentation on the server after being |
| uploaded. |
| folder_or_uri: DocumentListEntry or string (optional) An object with a |
| link to a folder or a uri to a folder to upload to. |
| Note: A valid uri for a folder is of the form: |
| /feeds/folders/private/full/folder%3Afolder_id |
| |
| Returns: |
| A DocumentListEntry containing information about the presentation created |
| on the Google Documents service. |
| """ |
| return self._UploadFile( |
| media_source, title, self._MakeKindCategory(PRESENTATION_LABEL), |
| folder_or_uri=folder_or_uri) |
| |
| UploadPresentation = atom.deprecated('Please use Upload instead')( |
| UploadPresentation) |
| |
| #@atom.deprecated('Please use Upload instead') |
| def UploadSpreadsheet(self, media_source, title, folder_or_uri=None): |
| """Uploads a spreadsheet inside of a MediaSource object to the Document |
| List feed with the given title. |
| |
| This method is deprecated, use Upload instead. |
| |
| Args: |
| media_source: MediaSource The MediaSource object containing a spreadsheet |
| file to be uploaded. |
| title: string The title of the spreadsheet on the server after being |
| uploaded. |
| folder_or_uri: DocumentListEntry or string (optional) An object with a |
| link to a folder or a uri to a folder to upload to. |
| Note: A valid uri for a folder is of the form: |
| /feeds/folders/private/full/folder%3Afolder_id |
| |
| Returns: |
| A DocumentListEntry containing information about the spreadsheet created |
| on the Google Documents service. |
| """ |
| return self._UploadFile( |
| media_source, title, self._MakeKindCategory(SPREADSHEET_LABEL), |
| folder_or_uri=folder_or_uri) |
| |
| UploadSpreadsheet = atom.deprecated('Please use Upload instead')( |
| UploadSpreadsheet) |
| |
| #@atom.deprecated('Please use Upload instead') |
| def UploadDocument(self, media_source, title, folder_or_uri=None): |
| """Uploads a document inside of a MediaSource object to the Document List |
| feed with the given title. |
| |
| This method is deprecated, use Upload instead. |
| |
| Args: |
| media_source: MediaSource The gdata.MediaSource object containing a |
| document file to be uploaded. |
| title: string The title of the document on the server after being |
| uploaded. |
| folder_or_uri: DocumentListEntry or string (optional) An object with a |
| link to a folder or a uri to a folder to upload to. |
| Note: A valid uri for a folder is of the form: |
| /feeds/folders/private/full/folder%3Afolder_id |
| |
| Returns: |
| A DocumentListEntry containing information about the document created |
| on the Google Documents service. |
| """ |
| return self._UploadFile( |
| media_source, title, self._MakeKindCategory(DOCUMENT_LABEL), |
| folder_or_uri=folder_or_uri) |
| |
| UploadDocument = atom.deprecated('Please use Upload instead')( |
| UploadDocument) |
| |
| """Calling any of these functions is the same as calling Export""" |
| DownloadDocument = atom.deprecated('Please use Export instead')(Export) |
| DownloadPresentation = atom.deprecated('Please use Export instead')(Export) |
| DownloadSpreadsheet = atom.deprecated('Please use Export instead')(Export) |
| |
| """Calling any of these functions is the same as calling MoveIntoFolder""" |
| MoveDocumentIntoFolder = atom.deprecated( |
| 'Please use MoveIntoFolder instead')(MoveIntoFolder) |
| MovePresentationIntoFolder = atom.deprecated( |
| 'Please use MoveIntoFolder instead')(MoveIntoFolder) |
| MoveSpreadsheetIntoFolder = atom.deprecated( |
| 'Please use MoveIntoFolder instead')(MoveIntoFolder) |
| MoveFolderIntoFolder = atom.deprecated( |
| 'Please use MoveIntoFolder instead')(MoveIntoFolder) |
| |
| |
| class DocumentQuery(gdata.service.Query): |
| |
| """Object used to construct a URI to query the Google Document List feed""" |
| |
| def __init__(self, feed='/feeds/documents', visibility='private', |
| projection='full', text_query=None, params=None, |
| categories=None): |
| """Constructor for Document List Query |
| |
| Args: |
| feed: string (optional) The path for the feed. (e.g. '/feeds/documents') |
| visibility: string (optional) The visibility chosen for the current feed. |
| projection: string (optional) The projection chosen for the current feed. |
| text_query: string (optional) The contents of the q query parameter. This |
| string is URL escaped upon conversion to a URI. |
| params: dict (optional) Parameter value string pairs which become URL |
| params when translated to a URI. These parameters are added to |
| the query's items. |
| categories: list (optional) List of category strings which should be |
| included as query categories. See gdata.service.Query for |
| additional documentation. |
| |
| Yields: |
| A DocumentQuery object used to construct a URI based on the Document |
| List feed. |
| """ |
| self.visibility = visibility |
| self.projection = projection |
| gdata.service.Query.__init__(self, feed, text_query, params, categories) |
| |
| def ToUri(self): |
| """Generates a URI from the query parameters set in the object. |
| |
| Returns: |
| A string containing the URI used to retrieve entries from the Document |
| List feed. |
| """ |
| old_feed = self.feed |
| self.feed = '/'.join([old_feed, self.visibility, self.projection]) |
| new_feed = gdata.service.Query.ToUri(self) |
| self.feed = old_feed |
| return new_feed |
| |
| def AddNamedFolder(self, email, folder_name): |
| """Adds a named folder category, qualified by a schema. |
| |
| This function lets you query for documents that are contained inside a |
| named folder without fear of collision with other categories. |
| |
| Args: |
| email: string The email of the user who owns the folder. |
| folder_name: string The name of the folder. |
| |
| Returns: |
| The string of the category that was added to the object. |
| """ |
| |
| category = '{%s%s}%s' % (FOLDERS_SCHEME_PREFIX, email, folder_name) |
| self.categories.append(category) |
| return category |
| |
| def RemoveNamedFolder(self, email, folder_name): |
| """Removes a named folder category, qualified by a schema. |
| |
| Args: |
| email: string The email of the user who owns the folder. |
| folder_name: string The name of the folder. |
| |
| Returns: |
| The string of the category that was removed to the object. |
| """ |
| category = '{%s%s}%s' % (FOLDERS_SCHEME_PREFIX, email, folder_name) |
| self.categories.remove(category) |
| return category |
| |
| |
| class DocumentAclQuery(gdata.service.Query): |
| |
| """Object used to construct a URI to query a Document's ACL feed""" |
| |
| def __init__(self, resource_id, feed='/feeds/acl/private/full'): |
| """Constructor for Document ACL Query |
| |
| Args: |
| resource_id: string The resource id. (e.g. 'document%3Adocument_id', |
| 'spreadsheet%3Aspreadsheet_id', etc.) |
| feed: string (optional) The path for the feed. |
| (e.g. '/feeds/acl/private/full') |
| |
| Yields: |
| A DocumentAclQuery object used to construct a URI based on the Document |
| ACL feed. |
| """ |
| self.resource_id = resource_id |
| gdata.service.Query.__init__(self, feed) |
| |
| def ToUri(self): |
| """Generates a URI from the query parameters set in the object. |
| |
| Returns: |
| A string containing the URI used to retrieve entries from the Document |
| ACL feed. |
| """ |
| return '%s/%s' % (gdata.service.Query.ToUri(self), self.resource_id) |