#!/usr/bin/python # # Copyright (C) 2008 Google Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """HttpClients in this module use httplib to make HTTP requests. This module make HTTP requests based on httplib, but there are environments in which an httplib based approach will not work (if running in Google App Engine for example). In those cases, higher level classes (like AtomService and GDataService) can swap out the HttpClient to transparently use a different mechanism for making HTTP requests. HttpClient: Contains a request method which performs an HTTP call to the server. ProxiedHttpClient: Contains a request method which connects to a proxy using settings stored in operating system environment variables then performs an HTTP call to the endpoint server. """ __author__ = 'api.jscudder (Jeff Scudder)' import types import os import httplib import atom.url import atom.http_interface import socket import base64 import atom.http_core ssl_imported = False ssl = None try: import ssl ssl_imported = True except ImportError: pass class ProxyError(atom.http_interface.Error): pass class TestConfigurationError(Exception): pass DEFAULT_CONTENT_TYPE = 'application/atom+xml' class HttpClient(atom.http_interface.GenericHttpClient): # Added to allow old v1 HttpClient objects to use the new # http_code.HttpClient. Used in unit tests to inject a mock client. v2_http_client = None def __init__(self, headers=None): self.debug = False self.headers = headers or {} def request(self, operation, url, data=None, headers=None): """Performs an HTTP call to the server, supports GET, POST, PUT, and DELETE. Usage example, perform and HTTP GET on http://www.google.com/: import atom.http client = atom.http.HttpClient() http_response = client.request('GET', 'http://www.google.com/') Args: operation: str The HTTP operation to be performed. This is usually one of 'GET', 'POST', 'PUT', or 'DELETE' data: filestream, list of parts, or other object which can be converted to a string. Should be set to None when performing a GET or DELETE. If data is a file-like object which can be read, this method will read a chunk of 100K bytes at a time and send them. If the data is a list of parts to be sent, each part will be evaluated and sent. url: The full URL to which the request should be sent. Can be a string or atom.url.Url. headers: dict of strings. HTTP headers which should be sent in the request. """ all_headers = self.headers.copy() if headers: all_headers.update(headers) # If the list of headers does not include a Content-Length, attempt to # calculate it based on the data object. if data and 'Content-Length' not in all_headers: if isinstance(data, types.StringTypes): all_headers['Content-Length'] = str(len(data)) else: raise atom.http_interface.ContentLengthRequired('Unable to calculate ' 'the length of the data parameter. Specify a value for ' 'Content-Length') # Set the content type to the default value if none was set. if 'Content-Type' not in all_headers: all_headers['Content-Type'] = DEFAULT_CONTENT_TYPE if self.v2_http_client is not None: http_request = atom.http_core.HttpRequest(method=operation) atom.http_core.Uri.parse_uri(str(url)).modify_request(http_request) http_request.headers = all_headers if data: http_request._body_parts.append(data) return self.v2_http_client.request(http_request=http_request) if not isinstance(url, atom.url.Url): if isinstance(url, types.StringTypes): url = atom.url.parse_url(url) else: raise atom.http_interface.UnparsableUrlObject('Unable to parse url ' 'parameter because it was not a string or atom.url.Url') connection = self._prepare_connection(url, all_headers) if self.debug: connection.debuglevel = 1 connection.putrequest(operation, self._get_access_url(url), skip_host=True) if url.port is not None: connection.putheader('Host', '%s:%s' % (url.host, url.port)) else: connection.putheader('Host', url.host) # Overcome a bug in Python 2.4 and 2.5 # httplib.HTTPConnection.putrequest adding # HTTP request header 'Host: www.google.com:443' instead of # 'Host: www.google.com', and thus resulting the error message # 'Token invalid - AuthSub token has wrong scope' in the HTTP response. if (url.protocol == 'https' and int(url.port or 443) == 443 and hasattr(connection, '_buffer') and isinstance(connection._buffer, list)): header_line = 'Host: %s:443' % url.host replacement_header_line = 'Host: %s' % url.host try: connection._buffer[connection._buffer.index(header_line)] = ( replacement_header_line) except ValueError: # header_line missing from connection._buffer pass # Send the HTTP headers. for header_name in all_headers: connection.putheader(header_name, all_headers[header_name]) connection.endheaders() # If there is data, send it in the request. if data: if isinstance(data, list): for data_part in data: _send_data_part(data_part, connection) else: _send_data_part(data, connection) # Return the HTTP Response from the server. return connection.getresponse() def _prepare_connection(self, url, headers): if not isinstance(url, atom.url.Url): if isinstance(url, types.StringTypes): url = atom.url.parse_url(url) else: raise atom.http_interface.UnparsableUrlObject('Unable to parse url ' 'parameter because it was not a string or atom.url.Url') if url.protocol == 'https': if not url.port: return httplib.HTTPSConnection(url.host) return httplib.HTTPSConnection(url.host, int(url.port)) else: if not url.port: return httplib.HTTPConnection(url.host) return httplib.HTTPConnection(url.host, int(url.port)) def _get_access_url(self, url): return url.to_string() class ProxiedHttpClient(HttpClient): """Performs an HTTP request through a proxy. The proxy settings are obtained from enviroment variables. The URL of the proxy server is assumed to be stored in the environment variables 'https_proxy' and 'http_proxy' respectively. If the proxy server requires a Basic Auth authorization header, the username and password are expected to be in the 'proxy-username' or 'proxy_username' variable and the 'proxy-password' or 'proxy_password' variable. After connecting to the proxy server, the request is completed as in HttpClient.request. """ def _prepare_connection(self, url, headers): proxy_auth = _get_proxy_auth() if url.protocol == 'https': # destination is https proxy = os.environ.get('https_proxy') if proxy: # Set any proxy auth headers if proxy_auth: proxy_auth = 'Proxy-authorization: %s' % proxy_auth # Construct the proxy connect command. port = url.port if not port: port = '443' proxy_connect = 'CONNECT %s:%s HTTP/1.0\r\n' % (url.host, port) # Set the user agent to send to the proxy if headers and 'User-Agent' in headers: user_agent = 'User-Agent: %s\r\n' % (headers['User-Agent']) else: user_agent = '' proxy_pieces = '%s%s%s\r\n' % (proxy_connect, proxy_auth, user_agent) # Find the proxy host and port. proxy_url = atom.url.parse_url(proxy) if not proxy_url.port: proxy_url.port = '80' # Connect to the proxy server, very simple recv and error checking p_sock = socket.socket(socket.AF_INET,socket.SOCK_STREAM) p_sock.connect((proxy_url.host, int(proxy_url.port))) p_sock.sendall(proxy_pieces) response = '' # Wait for the full response. while response.find("\r\n\r\n") == -1: response += p_sock.recv(8192) p_status = response.split()[1] if p_status != str(200): raise ProxyError('Error status=%s' % str(p_status)) # Trivial setup for ssl socket. sslobj = None if ssl_imported: sslobj = ssl.wrap_socket(p_sock, None, None) else: sock_ssl = socket.ssl(p_sock, None, None) sslobj = httplib.FakeSocket(p_sock, sock_ssl) # Initalize httplib and replace with the proxy socket. connection = httplib.HTTPConnection(proxy_url.host) connection.sock = sslobj return connection else: # The request was HTTPS, but there was no https_proxy set. return HttpClient._prepare_connection(self, url, headers) else: proxy = os.environ.get('http_proxy') if proxy: # Find the proxy host and port. proxy_url = atom.url.parse_url(proxy) if not proxy_url.port: proxy_url.port = '80' if proxy_auth: headers['Proxy-Authorization'] = proxy_auth.strip() return httplib.HTTPConnection(proxy_url.host, int(proxy_url.port)) else: # The request was HTTP, but there was no http_proxy set. return HttpClient._prepare_connection(self, url, headers) def _get_access_url(self, url): return url.to_string() def _get_proxy_auth(): proxy_username = os.environ.get('proxy-username') if not proxy_username: proxy_username = os.environ.get('proxy_username') proxy_password = os.environ.get('proxy-password') if not proxy_password: proxy_password = os.environ.get('proxy_password') if proxy_username: user_auth = base64.encodestring('%s:%s' % (proxy_username, proxy_password)) return 'Basic %s\r\n' % (user_auth.strip()) else: return '' def _send_data_part(data, connection): if isinstance(data, types.StringTypes): connection.send(data) return # Check to see if data is a file-like object that has a read method. elif hasattr(data, 'read'): # Read the file and send it a chunk at a time. while 1: binarydata = data.read(100000) if binarydata == '': break connection.send(binarydata) return else: # The data object was not a file. # Try to convert to a string and send the data. connection.send(str(data)) return