worblehat-old/python/gdata/docs/client.py

#!/usr/bin/python
#
# Copyright 2009 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""DocsClient extends gdata.client.GDClient to streamline DocList API calls."""


__author__ = 'e.bidelman (Eric Bidelman)'

import mimetypes
import urllib
import atom.data
import atom.http_core
import gdata.client
import gdata.docs.data
import gdata.gauth


# Feed URI templates
DOCLIST_FEED_URI = '/feeds/default/private/full/'
FOLDERS_FEED_TEMPLATE = DOCLIST_FEED_URI + '%s/contents'
ACL_FEED_TEMPLATE = DOCLIST_FEED_URI + '%s/acl'
REVISIONS_FEED_TEMPLATE = DOCLIST_FEED_URI + '%s/revisions'


class DocsClient(gdata.client.GDClient):
  """Client extension for the Google Documents List API."""

  host = 'docs.google.com'  # default server for the API
  api_version = '3.0'  # default major version for the service.
  auth_service = 'writely'
  auth_scopes = gdata.gauth.AUTH_SCOPES['writely']
  ssl = True

  def __init__(self, auth_token=None, **kwargs):
    """Constructs a new client for the DocList API.

    Args:
      auth_token: (optional) gdata.gauth.ClientLoginToken, AuthSubToken, or
          OAuthToken which authorizes this client to edit the user's data.
      kwargs: The other parameters to pass to gdata.client.GDClient constructor.
    """
    gdata.client.GDClient.__init__(self, auth_token=auth_token, **kwargs)

  def get_file_content(self, uri, auth_token=None, **kwargs):
    """Fetches the file content from the specified uri.

    This method is useful for downloading/exporting a file within enviornments
    like Google App Engine, where the user does not have the ability to write
    the file to a local disk.

    Args:
      uri: str The full URL to fetch the file contents from.
      auth_token: (optional) gdata.gauth.ClientLoginToken, AuthSubToken, or
          OAuthToken which authorizes this client to edit the user's data.
      kwargs: Other parameters to pass to self.request().

    Returns:
      The binary file content.

    Raises:
      gdata.client.RequestError: on error response from server.
    """
    server_response = self.request('GET', uri, auth_token=auth_token, **kwargs)
    if server_response.status != 200:
      raise  gdata.client.RequestError, {'status': server_response.status,
                                         'reason': server_response.reason,
                                         'body': server_response.read()}
    return server_response.read()

  GetFileContent = get_file_content

  def _download_file(self, uri, file_path, auth_token=None, **kwargs):
    """Downloads a file to disk from the specified URI.

    Note: to download a file in memory, use the GetFileContent() method.

    Args:
      uri: str The full URL to download the file from.
      file_path: str The full path to save the file to.
      auth_token: (optional) gdata.gauth.ClientLoginToken, AuthSubToken, or
          OAuthToken which authorizes this client to edit the user's data.
      kwargs: Other parameters to pass to self.get_file_content().

    Raises:
      gdata.client.RequestError: on error response from server.
    """
    f = open(file_path, 'wb')
    try:
      f.write(self.get_file_content(uri, auth_token=auth_token, **kwargs))
    except gdata.client.RequestError, e:
      f.close()
      raise e
    f.flush()
    f.close()

  _DownloadFile = _download_file

  def get_doclist(self, uri=None, limit=None, auth_token=None, **kwargs):
    """Retrieves the main doclist feed containing the user's items.

    Args:
      uri: str (optional) A URI to query the doclist feed.
      limit: int (optional) A maximum cap for the number of results to
          return in the feed. By default, the API returns a maximum of 100
          per page. Thus, if you set limit=5000, you will get <= 5000
          documents (guarenteed no more than 5000), and will need to follow the
          feed's next links (feed.GetNextLink()) to the rest. See
          get_everything(). Similarly, if you set limit=50, only <= 50
          documents are returned. Note: if the max-results parameter is set in
          the uri parameter, it is chosen over a value set for limit.
      auth_token: (optional) gdata.gauth.ClientLoginToken, AuthSubToken, or
          OAuthToken which authorizes this client to edit the user's data.
      kwargs: Other parameters to pass to self.get_feed().

    Returns:
      gdata.docs.data.DocList feed.
    """
    if uri is None:
      uri = DOCLIST_FEED_URI

    if isinstance(uri, (str, unicode)):
      uri = atom.http_core.Uri.parse_uri(uri)    

    # Add max-results param if it wasn't included in the uri.
    if limit is not None and not 'max-results' in uri.query:
      uri.query['max-results'] = limit

    return self.get_feed(uri, desired_class=gdata.docs.data.DocList,
                         auth_token=auth_token, **kwargs)

  GetDocList = get_doclist

  def get_doc(self, resource_id, etag=None, auth_token=None, **kwargs):
    """Retrieves a particular document given by its resource id.

    Args:
      resource_id: str The document/item's resource id. Example spreadsheet:
          'spreadsheet%3A0A1234567890'.
      etag: str (optional) The document/item's etag value to be used in a
          conditional GET. See http://code.google.com/apis/documents/docs/3.0/
          developers_guide_protocol.html#RetrievingCached.
      auth_token: (optional) gdata.gauth.ClientLoginToken, AuthSubToken, or
          OAuthToken which authorizes this client to edit the user's data.
      kwargs: Other parameters to pass to self.get_entry().

    Returns:
      A gdata.docs.data.DocsEntry object representing the retrieved entry.

    Raises:
      ValueError if the resource_id is not a valid format.
    """
    match = gdata.docs.data.RESOURCE_ID_PATTERN.match(resource_id)
    if match is None:
      raise ValueError, 'Invalid resource id: %s' % resource_id
    return self.get_entry(
        DOCLIST_FEED_URI + resource_id, etag=etag,
        desired_class=gdata.docs.data.DocsEntry,
        auth_token=auth_token, **kwargs)

  GetDoc = get_doc

  def get_everything(self, uri=None, auth_token=None, **kwargs):
    """Retrieves the user's entire doc list.

    The method makes multiple HTTP requests (by following the feed's next links)
    in order to fetch the user's entire document list.

    Args:
      uri: str (optional) A URI to query the doclist feed with.
      auth_token: (optional) gdata.gauth.ClientLoginToken, AuthSubToken, or
          OAuthToken which authorizes this client to edit the user's data.
      kwargs: Other parameters to pass to self.GetDocList().

    Returns:
      A list of gdata.docs.data.DocsEntry objects representing the retrieved
      entries.
    """
    if uri is None:
      uri = DOCLIST_FEED_URI

    feed = self.GetDocList(uri=uri, auth_token=auth_token, **kwargs)
    entries = feed.entry

    while feed.GetNextLink() is not None:
      feed = self.GetDocList(
          feed.GetNextLink().href, auth_token=auth_token, **kwargs)
      entries.extend(feed.entry)

    return entries

  GetEverything = get_everything

  def get_acl_permissions(self, resource_id, auth_token=None, **kwargs):
    """Retrieves a the ACL sharing permissions for a document.

    Args:
      resource_id: str The document/item's resource id. Example for pdf:
          'pdf%3A0A1234567890'.
      auth_token: (optional) gdata.gauth.ClientLoginToken, AuthSubToken, or
          OAuthToken which authorizes this client to edit the user's data.
      kwargs: Other parameters to pass to self.get_feed().

    Returns:
      A gdata.docs.data.AclFeed object representing the document's ACL entries.

    Raises:
      ValueError if the resource_id is not a valid format.
    """
    match = gdata.docs.data.RESOURCE_ID_PATTERN.match(resource_id)
    if match is None:
      raise ValueError, 'Invalid resource id: %s' % resource_id

    return self.get_feed(
        ACL_FEED_TEMPLATE % resource_id, desired_class=gdata.docs.data.AclFeed,
        auth_token=auth_token, **kwargs)

  GetAclPermissions = get_acl_permissions

  def get_revisions(self, resource_id, auth_token=None, **kwargs):
    """Retrieves the revision history for a document.

    Args:
      resource_id: str The document/item's resource id. Example for pdf:
          'pdf%3A0A1234567890'.
      auth_token: (optional) gdata.gauth.ClientLoginToken, AuthSubToken, or
          OAuthToken which authorizes this client to edit the user's data.
      kwargs: Other parameters to pass to self.get_feed().

    Returns:
      A gdata.docs.data.RevisionFeed representing the document's revisions.

    Raises:
      ValueError if the resource_id is not a valid format.
    """
    match = gdata.docs.data.RESOURCE_ID_PATTERN.match(resource_id)
    if match is None:
      raise ValueError, 'Invalid resource id: %s' % resource_id

    return self.get_feed(
        REVISIONS_FEED_TEMPLATE % resource_id,
        desired_class=gdata.docs.data.RevisionFeed, auth_token=auth_token,
        **kwargs)

  GetRevisions = get_revisions

  def create(self, doc_type, title, folder_or_id=None, writers_can_invite=None,
             auth_token=None, **kwargs):
    """Creates a new item in the user's doclist.

    Args:
      doc_type: str The type of object to create. For example: 'document',
          'spreadsheet', 'folder', 'presentation'.
      title: str A title for the document.
      folder_or_id: gdata.docs.data.DocsEntry or str (optional) Folder entry or
          the resouce id of a folder to create the object under. Note: A valid
          resource id for a folder is of the form: folder%3Afolder_id.
      writers_can_invite: bool (optional) False prevents collaborators from
          being able to invite others to edit or view the document.
      auth_token: (optional) gdata.gauth.ClientLoginToken, AuthSubToken, or
          OAuthToken which authorizes this client to edit the user's data.
      kwargs: Other parameters to pass to self.post().

    Returns:
      gdata.docs.data.DocsEntry containing information newly created item.
    """
    entry = gdata.docs.data.DocsEntry(title=atom.data.Title(text=title))
    entry.category.append(gdata.docs.data.make_kind_category(doc_type))

    if isinstance(writers_can_invite, gdata.docs.data.WritersCanInvite):
      entry.writers_can_invite = writers_can_invite
    elif isinstance(writers_can_invite, bool):
      entry.writers_can_invite = gdata.docs.data.WritersCanInvite(
          value=str(writers_can_invite).lower())

    uri = DOCLIST_FEED_URI

    if folder_or_id is not None:
      if isinstance(folder_or_id, gdata.docs.data.DocsEntry):
        # Verify that we're uploading the resource into to a folder.
        if folder_or_id.get_document_type() == gdata.docs.data.FOLDER_LABEL:
          uri = folder_or_id.content.src
        else:
          raise gdata.client.Error, 'Trying to upload item to a non-folder.'
      else:
        uri = FOLDERS_FEED_TEMPLATE % folder_or_id

    return self.post(entry, uri, auth_token=auth_token, **kwargs)

  Create = create

  def copy(self, source_entry, title, auth_token=None, **kwargs):
    """Copies a native Google document, spreadsheet, or presentation.

    Note: arbitrary file types and PDFs do not support this feature.

    Args:
      source_entry: gdata.docs.data.DocsEntry An object representing the source
          document/folder.
      title: str A title for the new document.
      auth_token: (optional) gdata.gauth.ClientLoginToken, AuthSubToken, or
          OAuthToken which authorizes this client to edit the user's data.
      kwargs: Other parameters to pass to self.post().

    Returns:
      A gdata.docs.data.DocsEntry of the duplicated document.
    """
    entry = gdata.docs.data.DocsEntry(
        title=atom.data.Title(text=title),
        id=atom.data.Id(text=source_entry.GetSelfLink().href))
    return self.post(entry, DOCLIST_FEED_URI, auth_token=auth_token, **kwargs)

  Copy = copy

  def move(self, source_entry, folder_entry=None,
           keep_in_folders=False, auth_token=None, **kwargs):
    """Moves an item into a different folder (or to the root document list).

    Args:
      source_entry: gdata.docs.data.DocsEntry An object representing the source
          document/folder.
      folder_entry: gdata.docs.data.DocsEntry (optional) An object representing
          the destination folder. If None, set keep_in_folders to
          True to remove the item from all parent folders.
      keep_in_folders: boolean (optional) If True, the source entry
          is not removed from any existing parent folders it is in.
      auth_token: (optional) gdata.gauth.ClientLoginToken, AuthSubToken, or
          OAuthToken which authorizes this client to edit the user's data.
      kwargs: Other parameters to pass to self.post().

    Returns:
      A gdata.docs.data.DocsEntry of the moved entry or True if just moving the
      item out of all folders (e.g. Move(source_entry)).
    """
    entry = gdata.docs.data.DocsEntry(id=source_entry.id)

    # Remove the item from any folders it is already in.
    if not keep_in_folders:
      for folder in source_entry.InFolders():
        self.delete(
            '%s/contents/%s' % (
                folder.href,
                urllib.quote(source_entry.resource_id.text)),
            force=True)

    # If we're moving the resource into a folder, verify it is a folder entry.
    if folder_entry is not None:
      if folder_entry.get_document_type() == gdata.docs.data.FOLDER_LABEL:
        return self.post(entry, folder_entry.content.src,
                         auth_token=auth_token, **kwargs)
      else:
        raise gdata.client.Error, 'Trying to move item into a non-folder.'

    return True

  Move = move

  def upload(self, media, title, folder_or_uri=None, content_type=None,
             auth_token=None, **kwargs):
    """Uploads a file to Google Docs.

    Args:
      media: A gdata.data.MediaSource object containing the file to be
          uploaded or a string of the filepath.
      title: str The title of the document on the server after being
          uploaded.
      folder_or_uri: gdata.docs.data.DocsEntry or str (optional) An object with
          a link to the folder or the uri to upload the file to.
          Note: A valid uri for a folder is of the form:
                /feeds/default/private/full/folder%3Afolder_id/contents
      content_type: str (optional) The file's mimetype. If not provided, the
          one in the media source object is used or the mimetype is inferred
          from the filename (if media is a string). When media is a filename,
          it is always recommended to pass in a content type.
      auth_token: (optional) gdata.gauth.ClientLoginToken, AuthSubToken, or
          OAuthToken which authorizes this client to edit the user's data.
      kwargs: Other parameters to pass to self.post().

    Returns:
      A gdata.docs.data.DocsEntry containing information about uploaded doc.
    """
    uri = None
    if folder_or_uri is not None:
      if isinstance(folder_or_uri, gdata.docs.data.DocsEntry):
        # Verify that we're uploading the resource into to a folder.
        if folder_or_uri.get_document_type() == gdata.docs.data.FOLDER_LABEL:
          uri = folder_or_uri.content.src
        else:
          raise gdata.client.Error, 'Trying to upload item to a non-folder.'
      else:
        uri = folder_or_uri
    else:
      uri = DOCLIST_FEED_URI

    # Create media source if media is a filepath.
    if isinstance(media, (str, unicode)):
      mimetype = mimetypes.guess_type(media)[0]
      if mimetype is None and content_type is None:
        raise ValueError, ("Unknown mimetype. Please pass in the file's "
                           "content_type")
      else:
        media = gdata.data.MediaSource(file_path=media,
                                       content_type=content_type)

    entry = gdata.docs.data.DocsEntry(title=atom.data.Title(text=title))

    return self.post(entry, uri, media_source=media,
                     desired_class=gdata.docs.data.DocsEntry,
                     auth_token=auth_token, **kwargs)

  Upload = upload

  def download(self, entry_or_id_or_url, file_path, extra_params=None,
               auth_token=None, **kwargs):
    """Downloads a file from the Document List to local disk.

    Note: to download a file in memory, use the GetFileContent() method.

    Args:
      entry_or_id_or_url: gdata.docs.data.DocsEntry or string representing a
          resource id or URL to download the document from (such as the content
          src link).
      file_path: str The full path to save the file to.
      extra_params: dict (optional) A map of any further parameters to control
          how the document is downloaded/exported. For example, exporting a
          spreadsheet as a .csv: extra_params={'gid': 0, 'exportFormat': 'csv'}
      auth_token: (optional) gdata.gauth.ClientLoginToken, AuthSubToken, or
          OAuthToken which authorizes this client to edit the user's data.
      kwargs: Other parameters to pass to self._download_file().

    Raises:
      gdata.client.RequestError if the download URL is malformed or the server's
      response was not successful.
      ValueError if entry_or_id_or_url was a resource id for a filetype
      in which the download link cannot be manually constructed (e.g. pdf).
    """
    if isinstance(entry_or_id_or_url, gdata.docs.data.DocsEntry):
      url = entry_or_id_or_url.content.src
    else:
      if gdata.docs.data.RESOURCE_ID_PATTERN.match(entry_or_id_or_url):
        url = gdata.docs.data.make_content_link_from_resource_id(
            entry_or_id_or_url)
      else:
        url = entry_or_id_or_url

    if extra_params is not None:
      if 'exportFormat' in extra_params and url.find('/Export?') == -1:
        raise gdata.client.Error, ('This entry type cannot be exported '
                                   'as a different format.')

      if 'gid' in extra_params and url.find('spreadsheets') == -1:
        raise gdata.client.Error, 'gid param is not valid for this doc type.'

      url += '&' + urllib.urlencode(extra_params)

    self._download_file(url, file_path, auth_token=auth_token, **kwargs)

  Download = download

  def export(self, entry_or_id_or_url, file_path, gid=None, auth_token=None,
             **kwargs):
    """Exports a document from the Document List in a different format.

    Args:
      entry_or_id_or_url: gdata.docs.data.DocsEntry or string representing a
          resource id or URL to download the document from (such as the content
          src link).
      file_path: str The full path to save the file to.  The export
          format is inferred from the the file extension.
      gid: str (optional) grid id for downloading a single grid of a
          spreadsheet. The param should only be used for .csv and .tsv
          spreadsheet exports.
      auth_token: (optional) gdata.gauth.ClientLoginToken, AuthSubToken, or
          OAuthToken which authorizes this client to edit the user's data.
      kwargs: Other parameters to pass to self.download().

    Raises:
      gdata.client.RequestError if the download URL is malformed or the server's
      response was not successful.
    """
    extra_params = {}

    match = gdata.docs.data.FILE_EXT_PATTERN.match(file_path)
    if match:
      extra_params['exportFormat'] = match.group(1)

    if gid is not None:
      extra_params['gid'] = gid

    self.download(entry_or_id_or_url, file_path, extra_params,
                  auth_token=auth_token, **kwargs)

  Export = export


class DocsQuery(gdata.client.Query):

  def __init__(self, title=None, title_exact=None, opened_min=None,
               opened_max=None, edited_min=None, edited_max=None, owner=None,
               writer=None, reader=None, show_folders=None,
               show_deleted=None, ocr=None, target_language=None,
               source_language=None, convert=None, **kwargs):
    """Constructs a query URL for the Google Documents List  API.

    Args:
      title: str (optional) Specifies the search terms for the title of a
             document. This parameter used without title_exact will only
             submit partial queries, not exact queries.
      title_exact: str (optional) Meaningless without title. Possible values
                   are 'true' and 'false'. Note: Matches are case-insensitive.
      opened_min: str (optional) Lower bound on the last time a document was
                  opened by the current user. Use the RFC 3339 timestamp
                  format. For example: opened_min='2005-08-09T09:57:00-08:00'.
      opened_max: str (optional) Upper bound on the last time a document was
                  opened by the current user. (See also opened_min.)
      edited_min: str (optional) Lower bound on the last time a document was
                  edited by the current user. This value corresponds to the
                  edited.text value in the doc's entry object, which
                  represents changes to the document's content or metadata.
                  Use the RFC 3339 timestamp format. For example:
                  edited_min='2005-08-09T09:57:00-08:00'
      edited_max: str (optional) Upper bound on the last time a document was
                  edited by the user. (See also edited_min.)
      owner: str (optional) Searches for documents with a specific owner. Use
             the email address of the owner. For example:
             owner='user@gmail.com'
      writer: str (optional) Searches for documents which can be written to
              by specific users. Use a single email address or a comma
              separated list of email addresses. For example:
              writer='user1@gmail.com,user@example.com'
      reader: str (optional) Searches for documents which can be read by
              specific users. (See also writer.)
      show_folders: str (optional) Specifies whether the query should return
                    folders as well as documents. Possible values are 'true'
                    and 'false'. Default is false.
      show_deleted: str (optional) Specifies whether the query should return
                    documents which are in the trash as well as other
                    documents. Possible values are 'true' and 'false'.
                    Default is false.
      ocr: str (optional) Specifies whether to attempt OCR on a .jpg, .png, or
           .gif upload. Possible values are 'true' and 'false'. Default is
           false. See OCR in the Protocol Guide: 
           http://code.google.com/apis/documents/docs/3.0/developers_guide_protocol.html#OCR
      target_language: str (optional) Specifies the language to translate a
                       document into. See Document Translation in the Protocol
                       Guide for a table of possible values:
                       http://code.google.com/apis/documents/docs/3.0/developers_guide_protocol.html#DocumentTranslation
      source_language: str (optional) Specifies the source language of the
                       original document. Optional when using the translation
                       service. If not provided, Google will attempt to
                       auto-detect the source language. See Document
                       Translation in the Protocol Guide for a table of
                       possible values (link in target_language).
      convert: str (optional) Used when uploading arbitrary file types to
               specity if document-type uploads should convert to a native
               Google Docs format. Possible values are 'true' and 'false'.
               The default is 'true'.
    """
    gdata.client.Query.__init__(self, **kwargs)
    self.convert = convert
    self.title = title
    self.title_exact = title_exact
    self.opened_min = opened_min
    self.opened_max = opened_max
    self.edited_min = edited_min
    self.edited_max = edited_max
    self.owner = owner
    self.writer = writer
    self.reader = reader
    self.show_folders = show_folders
    self.show_deleted = show_deleted
    self.ocr = ocr
    self.target_language = target_language
    self.source_language = source_language

  def modify_request(self, http_request):
    gdata.client._add_query_param('convert', self.convert, http_request)
    gdata.client._add_query_param('title', self.title, http_request)
    gdata.client._add_query_param('title-exact', self.title_exact,
                                  http_request)
    gdata.client._add_query_param('opened-min', self.opened_min, http_request)
    gdata.client._add_query_param('opened-max', self.opened_max, http_request)
    gdata.client._add_query_param('edited-min', self.edited_min, http_request)
    gdata.client._add_query_param('edited-max', self.edited_max, http_request)
    gdata.client._add_query_param('owner', self.owner, http_request)
    gdata.client._add_query_param('writer', self.writer, http_request)
    gdata.client._add_query_param('reader', self.reader, http_request)
    gdata.client._add_query_param('showfolders', self.show_folders,
                                  http_request)
    gdata.client._add_query_param('showdeleted', self.show_deleted,
                                  http_request)
    gdata.client._add_query_param('ocr', self.ocr, http_request)
    gdata.client._add_query_param('targetLanguage', self.target_language,
                                  http_request)
    gdata.client._add_query_param('sourceLanguage', self.source_language,
                                  http_request)
    gdata.client.Query.modify_request(self, http_request)

  ModifyRequest = modify_request