| #!/usr/bin/python |
| # |
| # Copyright Google 2007-2008, all rights reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| |
| import StringIO |
| import gdata |
| import gdata.service |
| import gdata.spreadsheet |
| import gdata.spreadsheet.service |
| import gdata.docs |
| import gdata.docs.service |
| |
| |
| """Make the Google Documents API feel more like using a database. |
| |
| This module contains a client and other classes which make working with the |
| Google Documents List Data API and the Google Spreadsheets Data API look a |
| bit more like working with a heirarchical database. Using the DatabaseClient, |
| you can create or find spreadsheets and use them like a database, with |
| worksheets representing tables and rows representing records. |
| |
| Example Usage: |
| # Create a new database, a new table, and add records. |
| client = gdata.spreadsheet.text_db.DatabaseClient(username='jo@example.com', |
| password='12345') |
| database = client.CreateDatabase('My Text Database') |
| table = database.CreateTable('addresses', ['name','email', |
| 'phonenumber', 'mailingaddress']) |
| record = table.AddRecord({'name':'Bob', 'email':'bob@example.com', |
| 'phonenumber':'555-555-1234', 'mailingaddress':'900 Imaginary St.'}) |
| |
| # Edit a record |
| record.content['email'] = 'bob2@example.com' |
| record.Push() |
| |
| # Delete a table |
| table.Delete |
| |
| Warnings: |
| Care should be exercised when using this module on spreadsheets |
| which contain formulas. This module treats all rows as containing text and |
| updating a row will overwrite any formula with the output of the formula. |
| The intended use case is to allow easy storage of text data in a spreadsheet. |
| |
| Error: Domain specific extension of Exception. |
| BadCredentials: Error raised is username or password was incorrect. |
| CaptchaRequired: Raised if a login attempt failed and a CAPTCHA challenge |
| was issued. |
| DatabaseClient: Communicates with Google Docs APIs servers. |
| Database: Represents a spreadsheet and interacts with tables. |
| Table: Represents a worksheet and interacts with records. |
| RecordResultSet: A list of records in a table. |
| Record: Represents a row in a worksheet allows manipulation of text data. |
| """ |
| |
| |
| __author__ = 'api.jscudder (Jeffrey Scudder)' |
| |
| |
| class Error(Exception): |
| pass |
| |
| |
| class BadCredentials(Error): |
| pass |
| |
| |
| class CaptchaRequired(Error): |
| pass |
| |
| |
| class DatabaseClient(object): |
| """Allows creation and finding of Google Spreadsheets databases. |
| |
| The DatabaseClient simplifies the process of creating and finding Google |
| Spreadsheets and will talk to both the Google Spreadsheets API and the |
| Google Documents List API. |
| """ |
| |
| def __init__(self, username=None, password=None): |
| """Constructor for a Database Client. |
| |
| If the username and password are present, the constructor will contact |
| the Google servers to authenticate. |
| |
| Args: |
| username: str (optional) Example: jo@example.com |
| password: str (optional) |
| """ |
| self.__docs_client = gdata.docs.service.DocsService() |
| self.__spreadsheets_client = ( |
| gdata.spreadsheet.service.SpreadsheetsService()) |
| self.SetCredentials(username, password) |
| |
| def SetCredentials(self, username, password): |
| """Attempts to log in to Google APIs using the provided credentials. |
| |
| If the username or password are None, the client will not request auth |
| tokens. |
| |
| Args: |
| username: str (optional) Example: jo@example.com |
| password: str (optional) |
| """ |
| self.__docs_client.email = username |
| self.__docs_client.password = password |
| self.__spreadsheets_client.email = username |
| self.__spreadsheets_client.password = password |
| if username and password: |
| try: |
| self.__docs_client.ProgrammaticLogin() |
| self.__spreadsheets_client.ProgrammaticLogin() |
| except gdata.service.CaptchaRequired: |
| raise CaptchaRequired('Please visit https://www.google.com/accounts/' |
| 'DisplayUnlockCaptcha to unlock your account.') |
| except gdata.service.BadAuthentication: |
| raise BadCredentials('Username or password incorrect.') |
| |
| def CreateDatabase(self, name): |
| """Creates a new Google Spreadsheet with the desired name. |
| |
| Args: |
| name: str The title for the spreadsheet. |
| |
| Returns: |
| A Database instance representing the new spreadsheet. |
| """ |
| # Create a Google Spreadsheet to form the foundation of this database. |
| # Spreadsheet is created by uploading a file to the Google Documents |
| # List API. |
| virtual_csv_file = StringIO.StringIO(',,,') |
| virtual_media_source = gdata.MediaSource(file_handle=virtual_csv_file, content_type='text/csv', content_length=3) |
| db_entry = self.__docs_client.UploadSpreadsheet(virtual_media_source, name) |
| return Database(spreadsheet_entry=db_entry, database_client=self) |
| |
| def GetDatabases(self, spreadsheet_key=None, name=None): |
| """Finds spreadsheets which have the unique key or title. |
| |
| If querying on the spreadsheet_key there will be at most one result, but |
| searching by name could yield multiple results. |
| |
| Args: |
| spreadsheet_key: str The unique key for the spreadsheet, this |
| usually in the the form 'pk23...We' or 'o23...423.12,,,3'. |
| name: str The title of the spreadsheets. |
| |
| Returns: |
| A list of Database objects representing the desired spreadsheets. |
| """ |
| if spreadsheet_key: |
| db_entry = self.__docs_client.GetDocumentListEntry( |
| r'/feeds/documents/private/full/spreadsheet%3A' + spreadsheet_key) |
| return [Database(spreadsheet_entry=db_entry, database_client=self)] |
| else: |
| title_query = gdata.docs.service.DocumentQuery() |
| title_query['title'] = name |
| db_feed = self.__docs_client.QueryDocumentListFeed(title_query.ToUri()) |
| matching_databases = [] |
| for entry in db_feed.entry: |
| matching_databases.append(Database(spreadsheet_entry=entry, |
| database_client=self)) |
| return matching_databases |
| |
| def _GetDocsClient(self): |
| return self.__docs_client |
| |
| def _GetSpreadsheetsClient(self): |
| return self.__spreadsheets_client |
| |
| |
| class Database(object): |
| """Provides interface to find and create tables. |
| |
| The database represents a Google Spreadsheet. |
| """ |
| |
| def __init__(self, spreadsheet_entry=None, database_client=None): |
| """Constructor for a database object. |
| |
| Args: |
| spreadsheet_entry: gdata.docs.DocumentListEntry The |
| Atom entry which represents the Google Spreadsheet. The |
| spreadsheet's key is extracted from the entry and stored as a |
| member. |
| database_client: DatabaseClient A client which can talk to the |
| Google Spreadsheets servers to perform operations on worksheets |
| within this spreadsheet. |
| """ |
| self.entry = spreadsheet_entry |
| if self.entry: |
| id_parts = spreadsheet_entry.id.text.split('/') |
| self.spreadsheet_key = id_parts[-1].replace('spreadsheet%3A', '') |
| self.client = database_client |
| |
| def CreateTable(self, name, fields=None): |
| """Add a new worksheet to this spreadsheet and fill in column names. |
| |
| Args: |
| name: str The title of the new worksheet. |
| fields: list of strings The column names which are placed in the |
| first row of this worksheet. These names are converted into XML |
| tags by the server. To avoid changes during the translation |
| process I recommend using all lowercase alphabetic names. For |
| example ['somelongname', 'theothername'] |
| |
| Returns: |
| Table representing the newly created worksheet. |
| """ |
| worksheet = self.client._GetSpreadsheetsClient().AddWorksheet(title=name, |
| row_count=1, col_count=len(fields), key=self.spreadsheet_key) |
| return Table(name=name, worksheet_entry=worksheet, |
| database_client=self.client, |
| spreadsheet_key=self.spreadsheet_key, fields=fields) |
| |
| def GetTables(self, worksheet_id=None, name=None): |
| """Searches for a worksheet with the specified ID or name. |
| |
| The list of results should have one table at most, or no results |
| if the id or name were not found. |
| |
| Args: |
| worksheet_id: str The ID of the worksheet, example: 'od6' |
| name: str The title of the worksheet. |
| |
| Returns: |
| A list of length 0 or 1 containing the desired Table. A list is returned |
| to make this method feel like GetDatabases and GetRecords. |
| """ |
| if worksheet_id: |
| worksheet_entry = self.client._GetSpreadsheetsClient().GetWorksheetsFeed( |
| self.spreadsheet_key, wksht_id=worksheet_id) |
| return [Table(name=worksheet_entry.title.text, |
| worksheet_entry=worksheet_entry, database_client=self.client, |
| spreadsheet_key=self.spreadsheet_key)] |
| else: |
| matching_tables = [] |
| query = None |
| if name: |
| query = gdata.spreadsheet.service.DocumentQuery() |
| query.title = name |
| |
| worksheet_feed = self.client._GetSpreadsheetsClient().GetWorksheetsFeed( |
| self.spreadsheet_key, query=query) |
| for entry in worksheet_feed.entry: |
| matching_tables.append(Table(name=entry.title.text, |
| worksheet_entry=entry, database_client=self.client, |
| spreadsheet_key=self.spreadsheet_key)) |
| return matching_tables |
| |
| def Delete(self): |
| """Deletes the entire database spreadsheet from Google Spreadsheets.""" |
| entry = self.client._GetDocsClient().Get( |
| r'http://docs.google.com/feeds/documents/private/full/spreadsheet%3A' + |
| self.spreadsheet_key) |
| self.client._GetDocsClient().Delete(entry.GetEditLink().href) |
| |
| |
| class Table(object): |
| |
| def __init__(self, name=None, worksheet_entry=None, database_client=None, |
| spreadsheet_key=None, fields=None): |
| self.name = name |
| self.entry = worksheet_entry |
| id_parts = worksheet_entry.id.text.split('/') |
| self.worksheet_id = id_parts[-1] |
| self.spreadsheet_key = spreadsheet_key |
| self.client = database_client |
| self.fields = fields or [] |
| if fields: |
| self.SetFields(fields) |
| |
| def LookupFields(self): |
| """Queries to find the column names in the first row of the worksheet. |
| |
| Useful when you have retrieved the table from the server and you don't |
| know the column names. |
| """ |
| if self.entry: |
| first_row_contents = [] |
| query = gdata.spreadsheet.service.CellQuery() |
| query.max_row = '1' |
| query.min_row = '1' |
| feed = self.client._GetSpreadsheetsClient().GetCellsFeed( |
| self.spreadsheet_key, wksht_id=self.worksheet_id, query=query) |
| for entry in feed.entry: |
| first_row_contents.append(entry.content.text) |
| # Get the next set of cells if needed. |
| next_link = feed.GetNextLink() |
| while next_link: |
| feed = self.client._GetSpreadsheetsClient().Get(next_link.href, |
| converter=gdata.spreadsheet.SpreadsheetsCellsFeedFromString) |
| for entry in feed.entry: |
| first_row_contents.append(entry.content.text) |
| next_link = feed.GetNextLink() |
| # Convert the contents of the cells to valid headers. |
| self.fields = ConvertStringsToColumnHeaders(first_row_contents) |
| |
| def SetFields(self, fields): |
| """Changes the contents of the cells in the first row of this worksheet. |
| |
| Args: |
| fields: list of strings The names in the list comprise the |
| first row of the worksheet. These names are converted into XML |
| tags by the server. To avoid changes during the translation |
| process I recommend using all lowercase alphabetic names. For |
| example ['somelongname', 'theothername'] |
| """ |
| # TODO: If the table already had fields, we might want to clear out the, |
| # current column headers. |
| self.fields = fields |
| i = 0 |
| for column_name in fields: |
| i = i + 1 |
| # TODO: speed this up by using a batch request to update cells. |
| self.client._GetSpreadsheetsClient().UpdateCell(1, i, column_name, |
| self.spreadsheet_key, self.worksheet_id) |
| |
| def Delete(self): |
| """Deletes this worksheet from the spreadsheet.""" |
| worksheet = self.client._GetSpreadsheetsClient().GetWorksheetsFeed( |
| self.spreadsheet_key, wksht_id=self.worksheet_id) |
| self.client._GetSpreadsheetsClient().DeleteWorksheet( |
| worksheet_entry=worksheet) |
| |
| def AddRecord(self, data): |
| """Adds a new row to this worksheet. |
| |
| Args: |
| data: dict of strings Mapping of string values to column names. |
| |
| Returns: |
| Record which represents this row of the spreadsheet. |
| """ |
| new_row = self.client._GetSpreadsheetsClient().InsertRow(data, |
| self.spreadsheet_key, wksht_id=self.worksheet_id) |
| return Record(content=data, row_entry=new_row, |
| spreadsheet_key=self.spreadsheet_key, worksheet_id=self.worksheet_id, |
| database_client=self.client) |
| |
| def GetRecord(self, row_id=None, row_number=None): |
| """Gets a single record from the worksheet based on row ID or number. |
| |
| Args: |
| row_id: The ID for the individual row. |
| row_number: str or int The position of the desired row. Numbering |
| begins at 1, which refers to the second row in the worksheet since |
| the first row is used for column names. |
| |
| Returns: |
| Record for the desired row. |
| """ |
| if row_id: |
| row_entry = self.client._GetSpreadsheetsClient().GetListFeed( |
| self.spreadsheet_key, wksht_id=self.worksheet_id, row_id=row_id) |
| return Record(content=None, row_entry=row_entry, |
| spreadsheet_key=self.spreadsheet_key, |
| worksheet_id=self.worksheet_id, database_client=self.client) |
| else: |
| row_query = gdata.spreadsheet.service.ListQuery() |
| row_query.start_index = str(row_number) |
| row_query.max_results = '1' |
| row_feed = self.client._GetSpreadsheetsClient().GetListFeed( |
| self.spreadsheet_key, wksht_id=self.worksheet_id, query=row_query) |
| if len(row_feed.entry) >= 1: |
| return Record(content=None, row_entry=row_feed.entry[0], |
| spreadsheet_key=self.spreadsheet_key, |
| worksheet_id=self.worksheet_id, database_client=self.client) |
| else: |
| return None |
| |
| def GetRecords(self, start_row, end_row): |
| """Gets all rows between the start and end row numbers inclusive. |
| |
| Args: |
| start_row: str or int |
| end_row: str or int |
| |
| Returns: |
| RecordResultSet for the desired rows. |
| """ |
| start_row = int(start_row) |
| end_row = int(end_row) |
| max_rows = end_row - start_row + 1 |
| row_query = gdata.spreadsheet.service.ListQuery() |
| row_query.start_index = str(start_row) |
| row_query.max_results = str(max_rows) |
| rows_feed = self.client._GetSpreadsheetsClient().GetListFeed( |
| self.spreadsheet_key, wksht_id=self.worksheet_id, query=row_query) |
| return RecordResultSet(rows_feed, self.client, self.spreadsheet_key, |
| self.worksheet_id) |
| |
| def FindRecords(self, query_string): |
| """Performs a query against the worksheet to find rows which match. |
| |
| For details on query string syntax see the section on sq under |
| http://code.google.com/apis/spreadsheets/reference.html#list_Parameters |
| |
| Args: |
| query_string: str Examples: 'name == john' to find all rows with john |
| in the name column, '(cost < 19.50 and name != toy) or cost > 500' |
| |
| Returns: |
| RecordResultSet with the first group of matches. |
| """ |
| row_query = gdata.spreadsheet.service.ListQuery() |
| row_query.sq = query_string |
| matching_feed = self.client._GetSpreadsheetsClient().GetListFeed( |
| self.spreadsheet_key, wksht_id=self.worksheet_id, query=row_query) |
| return RecordResultSet(matching_feed, self.client, |
| self.spreadsheet_key, self.worksheet_id) |
| |
| |
| class RecordResultSet(list): |
| """A collection of rows which allows fetching of the next set of results. |
| |
| The server may not send all rows in the requested range because there are |
| too many. Using this result set you can access the first set of results |
| as if it is a list, then get the next batch (if there are more results) by |
| calling GetNext(). |
| """ |
| |
| def __init__(self, feed, client, spreadsheet_key, worksheet_id): |
| self.client = client |
| self.spreadsheet_key = spreadsheet_key |
| self.worksheet_id = worksheet_id |
| self.feed = feed |
| list(self) |
| for entry in self.feed.entry: |
| self.append(Record(content=None, row_entry=entry, |
| spreadsheet_key=spreadsheet_key, worksheet_id=worksheet_id, |
| database_client=client)) |
| |
| def GetNext(self): |
| """Fetches the next batch of rows in the result set. |
| |
| Returns: |
| A new RecordResultSet. |
| """ |
| next_link = self.feed.GetNextLink() |
| if next_link and next_link.href: |
| new_feed = self.client._GetSpreadsheetsClient().Get(next_link.href, |
| converter=gdata.spreadsheet.SpreadsheetsListFeedFromString) |
| return RecordResultSet(new_feed, self.client, self.spreadsheet_key, |
| self.worksheet_id) |
| |
| |
| class Record(object): |
| """Represents one row in a worksheet and provides a dictionary of values. |
| |
| Attributes: |
| custom: dict Represents the contents of the row with cell values mapped |
| to column headers. |
| """ |
| |
| def __init__(self, content=None, row_entry=None, spreadsheet_key=None, |
| worksheet_id=None, database_client=None): |
| """Constructor for a record. |
| |
| Args: |
| content: dict of strings Mapping of string values to column names. |
| row_entry: gdata.spreadsheet.SpreadsheetsList The Atom entry |
| representing this row in the worksheet. |
| spreadsheet_key: str The ID of the spreadsheet in which this row |
| belongs. |
| worksheet_id: str The ID of the worksheet in which this row belongs. |
| database_client: DatabaseClient The client which can be used to talk |
| the Google Spreadsheets server to edit this row. |
| """ |
| self.entry = row_entry |
| self.spreadsheet_key = spreadsheet_key |
| self.worksheet_id = worksheet_id |
| if row_entry: |
| self.row_id = row_entry.id.text.split('/')[-1] |
| else: |
| self.row_id = None |
| self.client = database_client |
| self.content = content or {} |
| if not content: |
| self.ExtractContentFromEntry(row_entry) |
| |
| def ExtractContentFromEntry(self, entry): |
| """Populates the content and row_id based on content of the entry. |
| |
| This method is used in the Record's contructor. |
| |
| Args: |
| entry: gdata.spreadsheet.SpreadsheetsList The Atom entry |
| representing this row in the worksheet. |
| """ |
| self.content = {} |
| if entry: |
| self.row_id = entry.id.text.split('/')[-1] |
| for label, custom in entry.custom.iteritems(): |
| self.content[label] = custom.text |
| |
| def Push(self): |
| """Send the content of the record to spreadsheets to edit the row. |
| |
| All items in the content dictionary will be sent. Items which have been |
| removed from the content may remain in the row. The content member |
| of the record will not be modified so additional fields in the row |
| might be absent from this local copy. |
| """ |
| self.entry = self.client._GetSpreadsheetsClient().UpdateRow(self.entry, self.content) |
| |
| def Pull(self): |
| """Query Google Spreadsheets to get the latest data from the server. |
| |
| Fetches the entry for this row and repopulates the content dictionary |
| with the data found in the row. |
| """ |
| if self.row_id: |
| self.entry = self.client._GetSpreadsheetsClient().GetListFeed( |
| self.spreadsheet_key, wksht_id=self.worksheet_id, row_id=self.row_id) |
| self.ExtractContentFromEntry(self.entry) |
| |
| def Delete(self): |
| self.client._GetSpreadsheetsClient().DeleteRow(self.entry) |
| |
| |
| def ConvertStringsToColumnHeaders(proposed_headers): |
| """Converts a list of strings to column names which spreadsheets accepts. |
| |
| When setting values in a record, the keys which represent column names must |
| fit certain rules. They are all lower case, contain no spaces or special |
| characters. If two columns have the same name after being sanitized, the |
| columns further to the right have _2, _3 _4, etc. appended to them. |
| |
| If there are column names which consist of all special characters, or if |
| the column header is blank, an obfuscated value will be used for a column |
| name. This method does not handle blank column names or column names with |
| only special characters. |
| """ |
| headers = [] |
| for input_string in proposed_headers: |
| # TODO: probably a more efficient way to do this. Perhaps regex. |
| sanitized = input_string.lower().replace('_', '').replace( |
| ':', '').replace(' ', '') |
| # When the same sanitized header appears multiple times in the first row |
| # of a spreadsheet, _n is appended to the name to make it unique. |
| header_count = headers.count(sanitized) |
| if header_count > 0: |
| headers.append('%s_%i' % (sanitized, header_count+1)) |
| else: |
| headers.append(sanitized) |
| return headers |