#!/usr/bin/python # # Copyright 2009 Google Inc. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """DocsService extends the GDataService to streamline Google Documents operations. DocsService: Provides methods to query feeds and manipulate items. Extends GDataService. DocumentQuery: Queries a Google Document list feed. DocumentAclQuery: Queries a Google Document Acl feed. """ __author__ = ('api.jfisher (Jeff Fisher), ' 'e.bidelman (Eric Bidelman)') import re import atom import gdata.service import gdata.docs import urllib # XML Namespaces used in Google Documents entities. DATA_KIND_SCHEME = gdata.GDATA_NAMESPACE + '#kind' DOCUMENT_LABEL = 'document' SPREADSHEET_LABEL = 'spreadsheet' PRESENTATION_LABEL = 'presentation' FOLDER_LABEL = 'folder' PDF_LABEL = 'pdf' LABEL_SCHEME = gdata.GDATA_NAMESPACE + '/labels' STARRED_LABEL_TERM = LABEL_SCHEME + '#starred' TRASHED_LABEL_TERM = LABEL_SCHEME + '#trashed' HIDDEN_LABEL_TERM = LABEL_SCHEME + '#hidden' MINE_LABEL_TERM = LABEL_SCHEME + '#mine' PRIVATE_LABEL_TERM = LABEL_SCHEME + '#private' SHARED_WITH_DOMAIN_LABEL_TERM = LABEL_SCHEME + '#shared-with-domain' VIEWED_LABEL_TERM = LABEL_SCHEME + '#viewed' FOLDERS_SCHEME_PREFIX = gdata.docs.DOCUMENTS_NAMESPACE + '/folders/' # File extensions of documents that are permitted to be uploaded or downloaded. SUPPORTED_FILETYPES = { 'CSV': 'text/csv', 'TSV': 'text/tab-separated-values', 'TAB': 'text/tab-separated-values', 'DOC': 'application/msword', 'DOCX': ('application/vnd.openxmlformats-officedocument.' 'wordprocessingml.document'), 'ODS': 'application/x-vnd.oasis.opendocument.spreadsheet', 'ODT': 'application/vnd.oasis.opendocument.text', 'RTF': 'application/rtf', 'SXW': 'application/vnd.sun.xml.writer', 'TXT': 'text/plain', 'XLS': 'application/vnd.ms-excel', 'XLSX': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'PDF': 'application/pdf', 'PNG': 'image/png', 'PPT': 'application/vnd.ms-powerpoint', 'PPS': 'application/vnd.ms-powerpoint', 'HTM': 'text/html', 'HTML': 'text/html', 'ZIP': 'application/zip', 'SWF': 'application/x-shockwave-flash' } class DocsService(gdata.service.GDataService): """Client extension for the Google Documents service Document List feed.""" __FILE_EXT_PATTERN = re.compile('.*\.([a-zA-Z]{3,}$)') __RESOURCE_ID_PATTERN = re.compile('^([a-z]*)(:|%3A)([\w-]*)$') def __init__(self, email=None, password=None, source=None, server='docs.google.com', additional_headers=None, **kwargs): """Creates a client for the Google Documents service. Args: email: string (optional) The user's email address, used for authentication. password: string (optional) The user's password. source: string (optional) The name of the user's application. server: string (optional) The name of the server to which a connection will be opened. Default value: 'docs.google.com'. **kwargs: The other parameters to pass to gdata.service.GDataService constructor. """ gdata.service.GDataService.__init__( self, email=email, password=password, service='writely', source=source, server=server, additional_headers=additional_headers, **kwargs) def _MakeKindCategory(self, label): if label is None: return None return atom.Category(scheme=DATA_KIND_SCHEME, term=gdata.docs.DOCUMENTS_NAMESPACE + '#' + label, label=label) def _MakeContentLinkFromId(self, resource_id): match = self.__RESOURCE_ID_PATTERN.match(resource_id) label = match.group(1) doc_id = match.group(3) if label == DOCUMENT_LABEL: return '/feeds/download/documents/Export?docId=%s' % doc_id if label == PRESENTATION_LABEL: return '/feeds/download/presentations/Export?docId=%s' % doc_id if label == SPREADSHEET_LABEL: return ('https://spreadsheets.google.com/feeds/download/spreadsheets/' 'Export?key=%s' % doc_id) raise ValueError, 'Invalid resource id: %s' % resource_id def _UploadFile(self, media_source, title, category, folder_or_uri=None): """Uploads a file to the Document List feed. Args: media_source: A gdata.MediaSource object containing the file to be uploaded. title: string The title of the document on the server after being uploaded. category: An atom.Category object specifying the appropriate document type. folder_or_uri: DocumentListEntry or string (optional) An object with a link to a folder or a uri to a folder to upload to. Note: A valid uri for a folder is of the form: /feeds/folders/private/full/folder%3Afolder_id Returns: A DocumentListEntry containing information about the document created on the Google Documents service. """ if folder_or_uri: try: uri = folder_or_uri.content.src except AttributeError: uri = folder_or_uri else: uri = '/feeds/documents/private/full' entry = gdata.docs.DocumentListEntry() entry.title = atom.Title(text=title) if category is not None: entry.category.append(category) entry = self.Post(entry, uri, media_source=media_source, extra_headers={'Slug': media_source.file_name}, converter=gdata.docs.DocumentListEntryFromString) return entry def _DownloadFile(self, uri, file_path): """Downloads a file. Args: uri: string The full Export URL to download the file from. file_path: string The full path to save the file to. Raises: RequestError: on error response from server. """ server_response = self.request('GET', uri) response_body = server_response.read() if server_response.status != 200: raise gdata.service.RequestError, {'status': server_response.status, 'reason': server_response.reason, 'body': response_body} f = open(file_path, 'wb') f.write(response_body) f.flush() f.close() def MoveIntoFolder(self, source_entry, folder_entry): """Moves a document into a folder in the Document List Feed. Args: source_entry: DocumentListEntry An object representing the source document/folder. folder_entry: DocumentListEntry An object with a link to the destination folder. Returns: A DocumentListEntry containing information about the document created on the Google Documents service. """ entry = gdata.docs.DocumentListEntry() entry.id = source_entry.id entry = self.Post(entry, folder_entry.content.src, converter=gdata.docs.DocumentListEntryFromString) return entry def Query(self, uri, converter=gdata.docs.DocumentListFeedFromString): """Queries the Document List feed and returns the resulting feed of entries. Args: uri: string The full URI to be queried. This can contain query parameters, a hostname, or simply the relative path to a Document List feed. The DocumentQuery object is useful when constructing query parameters. converter: func (optional) A function which will be executed on the retrieved item, generally to render it into a Python object. By default the DocumentListFeedFromString function is used to return a DocumentListFeed object. This is because most feed queries will result in a feed and not a single entry. """ return self.Get(uri, converter=converter) def QueryDocumentListFeed(self, uri): """Retrieves a DocumentListFeed by retrieving a URI based off the Document List feed, including any query parameters. A DocumentQuery object can be used to construct these parameters. Args: uri: string The URI of the feed being retrieved possibly with query parameters. Returns: A DocumentListFeed object representing the feed returned by the server. """ return self.Get(uri, converter=gdata.docs.DocumentListFeedFromString) def GetDocumentListEntry(self, uri): """Retrieves a particular DocumentListEntry by its unique URI. Args: uri: string The unique URI of an entry in a Document List feed. Returns: A DocumentListEntry object representing the retrieved entry. """ return self.Get(uri, converter=gdata.docs.DocumentListEntryFromString) def GetDocumentListFeed(self, uri=None): """Retrieves a feed containing all of a user's documents. Args: uri: string A full URI to query the Document List feed. """ if not uri: uri = gdata.docs.service.DocumentQuery().ToUri() return self.QueryDocumentListFeed(uri) def GetDocumentListAclEntry(self, uri): """Retrieves a particular DocumentListAclEntry by its unique URI. Args: uri: string The unique URI of an entry in a Document List feed. Returns: A DocumentListAclEntry object representing the retrieved entry. """ return self.Get(uri, converter=gdata.docs.DocumentListAclEntryFromString) def GetDocumentListAclFeed(self, uri): """Retrieves a feed containing all of a user's documents. Args: uri: string The URI of a document's Acl feed to retrieve. Returns: A DocumentListAclFeed object representing the ACL feed returned by the server. """ return self.Get(uri, converter=gdata.docs.DocumentListAclFeedFromString) def Upload(self, media_source, title, folder_or_uri=None, label=None): """Uploads a document inside of a MediaSource object to the Document List feed with the given title. Args: media_source: MediaSource The gdata.MediaSource object containing a document file to be uploaded. title: string The title of the document on the server after being uploaded. folder_or_uri: DocumentListEntry or string (optional) An object with a link to a folder or a uri to a folder to upload to. Note: A valid uri for a folder is of the form: /feeds/folders/private/full/folder%3Afolder_id label: optional label describing the type of the document to be created. Returns: A DocumentListEntry containing information about the document created on the Google Documents service. """ return self._UploadFile(media_source, title, self._MakeKindCategory(label), folder_or_uri) def Download(self, entry_or_id_or_url, file_path, export_format=None, gid=None, extra_params=None): """Downloads a document from the Document List. Args: entry_or_id_or_url: a DocumentListEntry, or the resource id of an entry, or a url to download from (such as the content src). file_path: string The full path to save the file to. export_format: the format to convert to, if conversion is required. gid: grid id, for downloading a single grid of a spreadsheet extra_params: a map of any further parameters to control how the document is downloaded Raises: RequestError if the service does not respond with success """ if isinstance(entry_or_id_or_url, gdata.docs.DocumentListEntry): url = entry_or_id_or_url.content.src else: if self.__RESOURCE_ID_PATTERN.match(entry_or_id_or_url): url = self._MakeContentLinkFromId(entry_or_id_or_url) else: url = entry_or_id_or_url if export_format is not None: if url.find('/Export?') == -1: raise gdata.service.Error, ('This entry cannot be exported ' 'as a different format') url += '&exportFormat=%s' % export_format if gid is not None: if url.find('spreadsheets') == -1: raise gdata.service.Error, 'grid id param is not valid for this entry' url += '&gid=%s' % gid if extra_params: url += '&' + urllib.urlencode(extra_params) self._DownloadFile(url, file_path) def Export(self, entry_or_id_or_url, file_path, gid=None, extra_params=None): """Downloads a document from the Document List in a different format. Args: entry_or_id_or_url: a DocumentListEntry, or the resource id of an entry, or a url to download from (such as the content src). file_path: string The full path to save the file to. The export format is inferred from the the file extension. gid: grid id, for downloading a single grid of a spreadsheet extra_params: a map of any further parameters to control how the document is downloaded Raises: RequestError if the service does not respond with success """ ext = None match = self.__FILE_EXT_PATTERN.match(file_path) if match: ext = match.group(1) self.Download(entry_or_id_or_url, file_path, ext, gid, extra_params) def CreateFolder(self, title, folder_or_uri=None): """Creates a folder in the Document List feed. Args: title: string The title of the folder on the server after being created. folder_or_uri: DocumentListEntry or string (optional) An object with a link to a folder or a uri to a folder to upload to. Note: A valid uri for a folder is of the form: /feeds/folders/private/full/folder%3Afolder_id Returns: A DocumentListEntry containing information about the folder created on the Google Documents service. """ if folder_or_uri: try: uri = folder_or_uri.content.src except AttributeError: uri = folder_or_uri else: uri = '/feeds/documents/private/full' folder_entry = gdata.docs.DocumentListEntry() folder_entry.title = atom.Title(text=title) folder_entry.category.append(self._MakeKindCategory(FOLDER_LABEL)) folder_entry = self.Post(folder_entry, uri, converter=gdata.docs.DocumentListEntryFromString) return folder_entry def MoveOutOfFolder(self, source_entry): """Moves a document into a folder in the Document List Feed. Args: source_entry: DocumentListEntry An object representing the source document/folder. Returns: True if the entry was moved out. """ return self.Delete(source_entry.GetEditLink().href) # Deprecated methods #@atom.deprecated('Please use Upload instead') def UploadPresentation(self, media_source, title, folder_or_uri=None): """Uploads a presentation inside of a MediaSource object to the Document List feed with the given title. This method is deprecated, use Upload instead. Args: media_source: MediaSource The MediaSource object containing a presentation file to be uploaded. title: string The title of the presentation on the server after being uploaded. folder_or_uri: DocumentListEntry or string (optional) An object with a link to a folder or a uri to a folder to upload to. Note: A valid uri for a folder is of the form: /feeds/folders/private/full/folder%3Afolder_id Returns: A DocumentListEntry containing information about the presentation created on the Google Documents service. """ return self._UploadFile( media_source, title, self._MakeKindCategory(PRESENTATION_LABEL), folder_or_uri=folder_or_uri) UploadPresentation = atom.deprecated('Please use Upload instead')( UploadPresentation) #@atom.deprecated('Please use Upload instead') def UploadSpreadsheet(self, media_source, title, folder_or_uri=None): """Uploads a spreadsheet inside of a MediaSource object to the Document List feed with the given title. This method is deprecated, use Upload instead. Args: media_source: MediaSource The MediaSource object containing a spreadsheet file to be uploaded. title: string The title of the spreadsheet on the server after being uploaded. folder_or_uri: DocumentListEntry or string (optional) An object with a link to a folder or a uri to a folder to upload to. Note: A valid uri for a folder is of the form: /feeds/folders/private/full/folder%3Afolder_id Returns: A DocumentListEntry containing information about the spreadsheet created on the Google Documents service. """ return self._UploadFile( media_source, title, self._MakeKindCategory(SPREADSHEET_LABEL), folder_or_uri=folder_or_uri) UploadSpreadsheet = atom.deprecated('Please use Upload instead')( UploadSpreadsheet) #@atom.deprecated('Please use Upload instead') def UploadDocument(self, media_source, title, folder_or_uri=None): """Uploads a document inside of a MediaSource object to the Document List feed with the given title. This method is deprecated, use Upload instead. Args: media_source: MediaSource The gdata.MediaSource object containing a document file to be uploaded. title: string The title of the document on the server after being uploaded. folder_or_uri: DocumentListEntry or string (optional) An object with a link to a folder or a uri to a folder to upload to. Note: A valid uri for a folder is of the form: /feeds/folders/private/full/folder%3Afolder_id Returns: A DocumentListEntry containing information about the document created on the Google Documents service. """ return self._UploadFile( media_source, title, self._MakeKindCategory(DOCUMENT_LABEL), folder_or_uri=folder_or_uri) UploadDocument = atom.deprecated('Please use Upload instead')( UploadDocument) """Calling any of these functions is the same as calling Export""" DownloadDocument = atom.deprecated('Please use Export instead')(Export) DownloadPresentation = atom.deprecated('Please use Export instead')(Export) DownloadSpreadsheet = atom.deprecated('Please use Export instead')(Export) """Calling any of these functions is the same as calling MoveIntoFolder""" MoveDocumentIntoFolder = atom.deprecated( 'Please use MoveIntoFolder instead')(MoveIntoFolder) MovePresentationIntoFolder = atom.deprecated( 'Please use MoveIntoFolder instead')(MoveIntoFolder) MoveSpreadsheetIntoFolder = atom.deprecated( 'Please use MoveIntoFolder instead')(MoveIntoFolder) MoveFolderIntoFolder = atom.deprecated( 'Please use MoveIntoFolder instead')(MoveIntoFolder) class DocumentQuery(gdata.service.Query): """Object used to construct a URI to query the Google Document List feed""" def __init__(self, feed='/feeds/documents', visibility='private', projection='full', text_query=None, params=None, categories=None): """Constructor for Document List Query Args: feed: string (optional) The path for the feed. (e.g. '/feeds/documents') visibility: string (optional) The visibility chosen for the current feed. projection: string (optional) The projection chosen for the current feed. text_query: string (optional) The contents of the q query parameter. This string is URL escaped upon conversion to a URI. params: dict (optional) Parameter value string pairs which become URL params when translated to a URI. These parameters are added to the query's items. categories: list (optional) List of category strings which should be included as query categories. See gdata.service.Query for additional documentation. Yields: A DocumentQuery object used to construct a URI based on the Document List feed. """ self.visibility = visibility self.projection = projection gdata.service.Query.__init__(self, feed, text_query, params, categories) def ToUri(self): """Generates a URI from the query parameters set in the object. Returns: A string containing the URI used to retrieve entries from the Document List feed. """ old_feed = self.feed self.feed = '/'.join([old_feed, self.visibility, self.projection]) new_feed = gdata.service.Query.ToUri(self) self.feed = old_feed return new_feed def AddNamedFolder(self, email, folder_name): """Adds a named folder category, qualified by a schema. This function lets you query for documents that are contained inside a named folder without fear of collision with other categories. Args: email: string The email of the user who owns the folder. folder_name: string The name of the folder. Returns: The string of the category that was added to the object. """ category = '{%s%s}%s' % (FOLDERS_SCHEME_PREFIX, email, folder_name) self.categories.append(category) return category def RemoveNamedFolder(self, email, folder_name): """Removes a named folder category, qualified by a schema. Args: email: string The email of the user who owns the folder. folder_name: string The name of the folder. Returns: The string of the category that was removed to the object. """ category = '{%s%s}%s' % (FOLDERS_SCHEME_PREFIX, email, folder_name) self.categories.remove(category) return category class DocumentAclQuery(gdata.service.Query): """Object used to construct a URI to query a Document's ACL feed""" def __init__(self, resource_id, feed='/feeds/acl/private/full'): """Constructor for Document ACL Query Args: resource_id: string The resource id. (e.g. 'document%3Adocument_id', 'spreadsheet%3Aspreadsheet_id', etc.) feed: string (optional) The path for the feed. (e.g. '/feeds/acl/private/full') Yields: A DocumentAclQuery object used to construct a URI based on the Document ACL feed. """ self.resource_id = resource_id gdata.service.Query.__init__(self, feed) def ToUri(self): """Generates a URI from the query parameters set in the object. Returns: A string containing the URI used to retrieve entries from the Document ACL feed. """ return '%s/%s' % (gdata.service.Query.ToUri(self), self.resource_id)