598 lines
19 KiB
Python
598 lines
19 KiB
Python
#!/usr/bin/env python
|
|
#
|
|
# Copyright (C) 2009 Google Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
|
|
# This module is used for version 2 of the Google Data APIs.
|
|
# TODO: add proxy handling.
|
|
|
|
|
|
__author__ = 'j.s@google.com (Jeff Scudder)'
|
|
|
|
|
|
import os
|
|
import StringIO
|
|
import urlparse
|
|
import urllib
|
|
import httplib
|
|
ssl = None
|
|
try:
|
|
import ssl
|
|
except ImportError:
|
|
pass
|
|
|
|
|
|
|
|
class Error(Exception):
|
|
pass
|
|
|
|
|
|
class UnknownSize(Error):
|
|
pass
|
|
|
|
|
|
class ProxyError(Error):
|
|
pass
|
|
|
|
|
|
MIME_BOUNDARY = 'END_OF_PART'
|
|
|
|
|
|
def get_headers(http_response):
|
|
"""Retrieves all HTTP headers from an HTTP response from the server.
|
|
|
|
This method is provided for backwards compatibility for Python2.2 and 2.3.
|
|
The httplib.HTTPResponse object in 2.2 and 2.3 does not have a getheaders
|
|
method so this function will use getheaders if available, but if not it
|
|
will retrieve a few using getheader.
|
|
"""
|
|
if hasattr(http_response, 'getheaders'):
|
|
return http_response.getheaders()
|
|
else:
|
|
headers = []
|
|
for header in (
|
|
'location', 'content-type', 'content-length', 'age', 'allow',
|
|
'cache-control', 'content-location', 'content-encoding', 'date',
|
|
'etag', 'expires', 'last-modified', 'pragma', 'server',
|
|
'set-cookie', 'transfer-encoding', 'vary', 'via', 'warning',
|
|
'www-authenticate', 'gdata-version'):
|
|
value = http_response.getheader(header, None)
|
|
if value is not None:
|
|
headers.append((header, value))
|
|
return headers
|
|
|
|
|
|
class HttpRequest(object):
|
|
"""Contains all of the parameters for an HTTP 1.1 request.
|
|
|
|
The HTTP headers are represented by a dictionary, and it is the
|
|
responsibility of the user to ensure that duplicate field names are combined
|
|
into one header value according to the rules in section 4.2 of RFC 2616.
|
|
"""
|
|
method = None
|
|
uri = None
|
|
|
|
def __init__(self, uri=None, method=None, headers=None):
|
|
"""Construct an HTTP request.
|
|
|
|
Args:
|
|
uri: The full path or partial path as a Uri object or a string.
|
|
method: The HTTP method for the request, examples include 'GET', 'POST',
|
|
etc.
|
|
headers: dict of strings The HTTP headers to include in the request.
|
|
"""
|
|
self.headers = headers or {}
|
|
self._body_parts = []
|
|
if method is not None:
|
|
self.method = method
|
|
if isinstance(uri, (str, unicode)):
|
|
uri = Uri.parse_uri(uri)
|
|
self.uri = uri or Uri()
|
|
|
|
|
|
def add_body_part(self, data, mime_type, size=None):
|
|
"""Adds data to the HTTP request body.
|
|
|
|
If more than one part is added, this is assumed to be a mime-multipart
|
|
request. This method is designed to create MIME 1.0 requests as specified
|
|
in RFC 1341.
|
|
|
|
Args:
|
|
data: str or a file-like object containing a part of the request body.
|
|
mime_type: str The MIME type describing the data
|
|
size: int Required if the data is a file like object. If the data is a
|
|
string, the size is calculated so this parameter is ignored.
|
|
"""
|
|
if isinstance(data, str):
|
|
size = len(data)
|
|
if size is None:
|
|
# TODO: support chunked transfer if some of the body is of unknown size.
|
|
raise UnknownSize('Each part of the body must have a known size.')
|
|
if 'Content-Length' in self.headers:
|
|
content_length = int(self.headers['Content-Length'])
|
|
else:
|
|
content_length = 0
|
|
# If this is the first part added to the body, then this is not a multipart
|
|
# request.
|
|
if len(self._body_parts) == 0:
|
|
self.headers['Content-Type'] = mime_type
|
|
content_length = size
|
|
self._body_parts.append(data)
|
|
elif len(self._body_parts) == 1:
|
|
# This is the first member in a mime-multipart request, so change the
|
|
# _body_parts list to indicate a multipart payload.
|
|
self._body_parts.insert(0, 'Media multipart posting')
|
|
boundary_string = '\r\n--%s\r\n' % (MIME_BOUNDARY,)
|
|
content_length += len(boundary_string) + size
|
|
self._body_parts.insert(1, boundary_string)
|
|
content_length += len('Media multipart posting')
|
|
# Put the content type of the first part of the body into the multipart
|
|
# payload.
|
|
original_type_string = 'Content-Type: %s\r\n\r\n' % (
|
|
self.headers['Content-Type'],)
|
|
self._body_parts.insert(2, original_type_string)
|
|
content_length += len(original_type_string)
|
|
boundary_string = '\r\n--%s\r\n' % (MIME_BOUNDARY,)
|
|
self._body_parts.append(boundary_string)
|
|
content_length += len(boundary_string)
|
|
# Change the headers to indicate this is now a mime multipart request.
|
|
self.headers['Content-Type'] = 'multipart/related; boundary="%s"' % (
|
|
MIME_BOUNDARY,)
|
|
self.headers['MIME-version'] = '1.0'
|
|
# Include the mime type of this part.
|
|
type_string = 'Content-Type: %s\r\n\r\n' % (mime_type)
|
|
self._body_parts.append(type_string)
|
|
content_length += len(type_string)
|
|
self._body_parts.append(data)
|
|
ending_boundary_string = '\r\n--%s--' % (MIME_BOUNDARY,)
|
|
self._body_parts.append(ending_boundary_string)
|
|
content_length += len(ending_boundary_string)
|
|
else:
|
|
# This is a mime multipart request.
|
|
boundary_string = '\r\n--%s\r\n' % (MIME_BOUNDARY,)
|
|
self._body_parts.insert(-1, boundary_string)
|
|
content_length += len(boundary_string) + size
|
|
# Include the mime type of this part.
|
|
type_string = 'Content-Type: %s\r\n\r\n' % (mime_type)
|
|
self._body_parts.insert(-1, type_string)
|
|
content_length += len(type_string)
|
|
self._body_parts.insert(-1, data)
|
|
self.headers['Content-Length'] = str(content_length)
|
|
# I could add an "append_to_body_part" method as well.
|
|
|
|
AddBodyPart = add_body_part
|
|
|
|
def add_form_inputs(self, form_data,
|
|
mime_type='application/x-www-form-urlencoded'):
|
|
"""Form-encodes and adds data to the request body.
|
|
|
|
Args:
|
|
form_data: dict or sequnce or two member tuples which contains the
|
|
form keys and values.
|
|
mime_type: str The MIME type of the form data being sent. Defaults
|
|
to 'application/x-www-form-urlencoded'.
|
|
"""
|
|
body = urllib.urlencode(form_data)
|
|
self.add_body_part(body, mime_type)
|
|
|
|
AddFormInputs = add_form_inputs
|
|
|
|
def _copy(self):
|
|
"""Creates a deep copy of this request."""
|
|
copied_uri = Uri(self.uri.scheme, self.uri.host, self.uri.port,
|
|
self.uri.path, self.uri.query.copy())
|
|
new_request = HttpRequest(uri=copied_uri, method=self.method,
|
|
headers=self.headers.copy())
|
|
new_request._body_parts = self._body_parts[:]
|
|
return new_request
|
|
|
|
def _dump(self):
|
|
"""Converts to a printable string for debugging purposes.
|
|
|
|
In order to preserve the request, it does not read from file-like objects
|
|
in the body.
|
|
"""
|
|
output = 'HTTP Request\n method: %s\n url: %s\n headers:\n' % (
|
|
self.method, str(self.uri))
|
|
for header, value in self.headers.iteritems():
|
|
output += ' %s: %s\n' % (header, value)
|
|
output += ' body sections:\n'
|
|
i = 0
|
|
for part in self._body_parts:
|
|
if isinstance(part, (str, unicode)):
|
|
output += ' %s: %s\n' % (i, part)
|
|
else:
|
|
output += ' %s: <file like object>\n' % i
|
|
i += 1
|
|
return output
|
|
|
|
|
|
def _apply_defaults(http_request):
|
|
if http_request.uri.scheme is None:
|
|
if http_request.uri.port == 443:
|
|
http_request.uri.scheme = 'https'
|
|
else:
|
|
http_request.uri.scheme = 'http'
|
|
|
|
|
|
class Uri(object):
|
|
"""A URI as used in HTTP 1.1"""
|
|
scheme = None
|
|
host = None
|
|
port = None
|
|
path = None
|
|
|
|
def __init__(self, scheme=None, host=None, port=None, path=None, query=None):
|
|
"""Constructor for a URI.
|
|
|
|
Args:
|
|
scheme: str This is usually 'http' or 'https'.
|
|
host: str The host name or IP address of the desired server.
|
|
post: int The server's port number.
|
|
path: str The path of the resource following the host. This begins with
|
|
a /, example: '/calendar/feeds/default/allcalendars/full'
|
|
query: dict of strings The URL query parameters. The keys and values are
|
|
both escaped so this dict should contain the unescaped values.
|
|
For example {'my key': 'val', 'second': '!!!'} will become
|
|
'?my+key=val&second=%21%21%21' which is appended to the path.
|
|
"""
|
|
self.query = query or {}
|
|
if scheme is not None:
|
|
self.scheme = scheme
|
|
if host is not None:
|
|
self.host = host
|
|
if port is not None:
|
|
self.port = port
|
|
if path:
|
|
self.path = path
|
|
|
|
def _get_query_string(self):
|
|
param_pairs = []
|
|
for key, value in self.query.iteritems():
|
|
param_pairs.append('='.join((urllib.quote_plus(key),
|
|
urllib.quote_plus(str(value)))))
|
|
return '&'.join(param_pairs)
|
|
|
|
def _get_relative_path(self):
|
|
"""Returns the path with the query parameters escaped and appended."""
|
|
param_string = self._get_query_string()
|
|
if self.path is None:
|
|
path = '/'
|
|
else:
|
|
path = self.path
|
|
if param_string:
|
|
return '?'.join([path, param_string])
|
|
else:
|
|
return path
|
|
|
|
def _to_string(self):
|
|
if self.scheme is None and self.port == 443:
|
|
scheme = 'https'
|
|
elif self.scheme is None:
|
|
scheme = 'http'
|
|
else:
|
|
scheme = self.scheme
|
|
if self.path is None:
|
|
path = '/'
|
|
else:
|
|
path = self.path
|
|
if self.port is None:
|
|
return '%s://%s%s' % (scheme, self.host, self._get_relative_path())
|
|
else:
|
|
return '%s://%s:%s%s' % (scheme, self.host, str(self.port),
|
|
self._get_relative_path())
|
|
|
|
def __str__(self):
|
|
return self._to_string()
|
|
|
|
def modify_request(self, http_request=None):
|
|
"""Sets HTTP request components based on the URI."""
|
|
if http_request is None:
|
|
http_request = HttpRequest()
|
|
if http_request.uri is None:
|
|
http_request.uri = Uri()
|
|
# Determine the correct scheme.
|
|
if self.scheme:
|
|
http_request.uri.scheme = self.scheme
|
|
if self.port:
|
|
http_request.uri.port = self.port
|
|
if self.host:
|
|
http_request.uri.host = self.host
|
|
# Set the relative uri path
|
|
if self.path:
|
|
http_request.uri.path = self.path
|
|
if self.query:
|
|
http_request.uri.query = self.query.copy()
|
|
return http_request
|
|
|
|
ModifyRequest = modify_request
|
|
|
|
def parse_uri(uri_string):
|
|
"""Creates a Uri object which corresponds to the URI string.
|
|
|
|
This method can accept partial URIs, but it will leave missing
|
|
members of the Uri unset.
|
|
"""
|
|
parts = urlparse.urlparse(uri_string)
|
|
uri = Uri()
|
|
if parts[0]:
|
|
uri.scheme = parts[0]
|
|
if parts[1]:
|
|
host_parts = parts[1].split(':')
|
|
if host_parts[0]:
|
|
uri.host = host_parts[0]
|
|
if len(host_parts) > 1:
|
|
uri.port = int(host_parts[1])
|
|
if parts[2]:
|
|
uri.path = parts[2]
|
|
if parts[4]:
|
|
param_pairs = parts[4].split('&')
|
|
for pair in param_pairs:
|
|
pair_parts = pair.split('=')
|
|
if len(pair_parts) > 1:
|
|
uri.query[urllib.unquote_plus(pair_parts[0])] = (
|
|
urllib.unquote_plus(pair_parts[1]))
|
|
elif len(pair_parts) == 1:
|
|
uri.query[urllib.unquote_plus(pair_parts[0])] = None
|
|
return uri
|
|
|
|
parse_uri = staticmethod(parse_uri)
|
|
|
|
ParseUri = parse_uri
|
|
|
|
|
|
parse_uri = Uri.parse_uri
|
|
|
|
|
|
ParseUri = Uri.parse_uri
|
|
|
|
|
|
class HttpResponse(object):
|
|
status = None
|
|
reason = None
|
|
_body = None
|
|
|
|
def __init__(self, status=None, reason=None, headers=None, body=None):
|
|
self._headers = headers or {}
|
|
if status is not None:
|
|
self.status = status
|
|
if reason is not None:
|
|
self.reason = reason
|
|
if body is not None:
|
|
if hasattr(body, 'read'):
|
|
self._body = body
|
|
else:
|
|
self._body = StringIO.StringIO(body)
|
|
|
|
def getheader(self, name, default=None):
|
|
if name in self._headers:
|
|
return self._headers[name]
|
|
else:
|
|
return default
|
|
|
|
def getheaders(self):
|
|
return self._headers
|
|
|
|
def read(self, amt=None):
|
|
if self._body is None:
|
|
return None
|
|
if not amt:
|
|
return self._body.read()
|
|
else:
|
|
return self._body.read(amt)
|
|
|
|
|
|
def _dump_response(http_response):
|
|
"""Converts to a string for printing debug messages.
|
|
|
|
Does not read the body since that may consume the content.
|
|
"""
|
|
output = 'HttpResponse\n status: %s\n reason: %s\n headers:' % (
|
|
http_response.status, http_response.reason)
|
|
headers = get_headers(http_response)
|
|
if isinstance(headers, dict):
|
|
for header, value in headers.iteritems():
|
|
output += ' %s: %s\n' % (header, value)
|
|
else:
|
|
for pair in headers:
|
|
output += ' %s: %s\n' % (pair[0], pair[1])
|
|
return output
|
|
|
|
|
|
class HttpClient(object):
|
|
"""Performs HTTP requests using httplib."""
|
|
debug = None
|
|
|
|
def request(self, http_request):
|
|
return self._http_request(http_request.method, http_request.uri,
|
|
http_request.headers, http_request._body_parts)
|
|
|
|
Request = request
|
|
|
|
def _get_connection(self, uri, headers=None):
|
|
"""Opens a socket connection to the server to set up an HTTP request.
|
|
|
|
Args:
|
|
uri: The full URL for the request as a Uri object.
|
|
headers: A dict of string pairs containing the HTTP headers for the
|
|
request.
|
|
"""
|
|
connection = None
|
|
if uri.scheme == 'https':
|
|
if not uri.port:
|
|
connection = httplib.HTTPSConnection(uri.host)
|
|
else:
|
|
connection = httplib.HTTPSConnection(uri.host, int(uri.port))
|
|
else:
|
|
if not uri.port:
|
|
connection = httplib.HTTPConnection(uri.host)
|
|
else:
|
|
connection = httplib.HTTPConnection(uri.host, int(uri.port))
|
|
return connection
|
|
|
|
def _http_request(self, method, uri, headers=None, body_parts=None):
|
|
"""Makes an HTTP request using httplib.
|
|
|
|
Args:
|
|
method: str example: 'GET', 'POST', 'PUT', 'DELETE', etc.
|
|
uri: str or atom.http_core.Uri
|
|
headers: dict of strings mapping to strings which will be sent as HTTP
|
|
headers in the request.
|
|
body_parts: list of strings, objects with a read method, or objects
|
|
which can be converted to strings using str. Each of these
|
|
will be sent in order as the body of the HTTP request.
|
|
"""
|
|
if isinstance(uri, (str, unicode)):
|
|
uri = Uri.parse_uri(uri)
|
|
|
|
connection = self._get_connection(uri, headers=headers)
|
|
|
|
if self.debug:
|
|
connection.debuglevel = 1
|
|
|
|
if connection.host != uri.host:
|
|
connection.putrequest(method, str(uri))
|
|
else:
|
|
connection.putrequest(method, uri._get_relative_path())
|
|
|
|
# Overcome a bug in Python 2.4 and 2.5
|
|
# httplib.HTTPConnection.putrequest adding
|
|
# HTTP request header 'Host: www.google.com:443' instead of
|
|
# 'Host: www.google.com', and thus resulting the error message
|
|
# 'Token invalid - AuthSub token has wrong scope' in the HTTP response.
|
|
if (uri.scheme == 'https' and int(uri.port or 443) == 443 and
|
|
hasattr(connection, '_buffer') and
|
|
isinstance(connection._buffer, list)):
|
|
header_line = 'Host: %s:443' % uri.host
|
|
replacement_header_line = 'Host: %s' % uri.host
|
|
try:
|
|
connection._buffer[connection._buffer.index(header_line)] = (
|
|
replacement_header_line)
|
|
except ValueError: # header_line missing from connection._buffer
|
|
pass
|
|
|
|
# Send the HTTP headers.
|
|
for header_name, value in headers.iteritems():
|
|
connection.putheader(header_name, value)
|
|
connection.endheaders()
|
|
|
|
# If there is data, send it in the request.
|
|
if body_parts:
|
|
for part in body_parts:
|
|
_send_data_part(part, connection)
|
|
|
|
# Return the HTTP Response from the server.
|
|
return connection.getresponse()
|
|
|
|
|
|
def _send_data_part(data, connection):
|
|
if isinstance(data, (str, unicode)):
|
|
# I might want to just allow str, not unicode.
|
|
connection.send(data)
|
|
return
|
|
# Check to see if data is a file-like object that has a read method.
|
|
elif hasattr(data, 'read'):
|
|
# Read the file and send it a chunk at a time.
|
|
while 1:
|
|
binarydata = data.read(100000)
|
|
if binarydata == '': break
|
|
connection.send(binarydata)
|
|
return
|
|
else:
|
|
# The data object was not a file.
|
|
# Try to convert to a string and send the data.
|
|
connection.send(str(data))
|
|
return
|
|
|
|
|
|
class ProxiedHttpClient(HttpClient):
|
|
|
|
def _get_connection(self, uri, headers=None):
|
|
# Check to see if there are proxy settings required for this request.
|
|
proxy = None
|
|
if uri.scheme == 'https':
|
|
proxy = os.environ.get('https_proxy')
|
|
elif uri.scheme == 'http':
|
|
proxy = os.environ.get('http_proxy')
|
|
if not proxy:
|
|
return HttpClient._get_connection(self, uri, headers=headers)
|
|
# Now we have the URL of the appropriate proxy server.
|
|
# Get a username and password for the proxy if required.
|
|
proxy_auth = _get_proxy_auth()
|
|
if uri.scheme == 'https':
|
|
import socket
|
|
if proxy_auth:
|
|
proxy_auth = 'Proxy-authorization: %s' % proxy_auth
|
|
# Construct the proxy connect command.
|
|
port = uri.port
|
|
if not port:
|
|
port = 443
|
|
proxy_connect = 'CONNECT %s:%s HTTP/1.0\r\n' % (uri.host, port)
|
|
# Set the user agent to send to the proxy
|
|
user_agent = ''
|
|
if headers and 'User-Agent' in headers:
|
|
user_agent = 'User-Agent: %s\r\n' % (headers['User-Agent'])
|
|
proxy_pieces = '%s%s%s\r\n' % (proxy_connect, proxy_auth, user_agent)
|
|
# Find the proxy host and port.
|
|
proxy_uri = Uri.parse_uri(proxy)
|
|
if not proxy_uri.port:
|
|
proxy_uri.port = '80'
|
|
# Connect to the proxy server, very simple recv and error checking
|
|
p_sock = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
|
|
p_sock.connect((proxy_uri.host, int(proxy_uri.port)))
|
|
p_sock.sendall(proxy_pieces)
|
|
response = ''
|
|
# Wait for the full response.
|
|
while response.find("\r\n\r\n") == -1:
|
|
response += p_sock.recv(8192)
|
|
p_status = response.split()[1]
|
|
if p_status != str(200):
|
|
raise ProxyError('Error status=%s' % str(p_status))
|
|
# Trivial setup for ssl socket.
|
|
sslobj = None
|
|
if ssl is not None:
|
|
sslobj = ssl.wrap_socket(p_sock, None, None)
|
|
else:
|
|
sock_ssl = socket.ssl(p_sock, None, Nonesock_)
|
|
sslobj = httplib.FakeSocket(p_sock, sock_ssl)
|
|
# Initalize httplib and replace with the proxy socket.
|
|
connection = httplib.HTTPConnection(proxy_uri.host)
|
|
connection.sock = sslobj
|
|
return connection
|
|
elif uri.scheme == 'http':
|
|
proxy_uri = Uri.parse_uri(proxy)
|
|
if not proxy_uri.port:
|
|
proxy_uri.port = '80'
|
|
if proxy_auth:
|
|
headers['Proxy-Authorization'] = proxy_auth.strip()
|
|
return httplib.HTTPConnection(proxy_uri.host, int(proxy_uri.port))
|
|
return None
|
|
|
|
|
|
def _get_proxy_auth():
|
|
import base64
|
|
proxy_username = os.environ.get('proxy-username')
|
|
if not proxy_username:
|
|
proxy_username = os.environ.get('proxy_username')
|
|
proxy_password = os.environ.get('proxy-password')
|
|
if not proxy_password:
|
|
proxy_password = os.environ.get('proxy_password')
|
|
if proxy_username:
|
|
user_auth = base64.b64encode('%s:%s' % (proxy_username,
|
|
proxy_password))
|
|
return 'Basic %s\r\n' % (user_auth.strip())
|
|
else:
|
|
return ''
|