#!/usr/bin/python # # Copyright (C) 2008 Google Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. __author__ = 'api.jscudder (Jeff Scudder)' import urlparse import urllib DEFAULT_PROTOCOL = 'http' DEFAULT_PORT = 80 def parse_url(url_string): """Creates a Url object which corresponds to the URL string. This method can accept partial URLs, but it will leave missing members of the Url unset. """ parts = urlparse.urlparse(url_string) url = Url() if parts[0]: url.protocol = parts[0] if parts[1]: host_parts = parts[1].split(':') if host_parts[0]: url.host = host_parts[0] if len(host_parts) > 1: url.port = host_parts[1] if parts[2]: url.path = parts[2] if parts[4]: param_pairs = parts[4].split('&') for pair in param_pairs: pair_parts = pair.split('=') if len(pair_parts) > 1: url.params[urllib.unquote_plus(pair_parts[0])] = ( urllib.unquote_plus(pair_parts[1])) elif len(pair_parts) == 1: url.params[urllib.unquote_plus(pair_parts[0])] = None return url class Url(object): """Represents a URL and implements comparison logic. URL strings which are not identical can still be equivalent, so this object provides a better interface for comparing and manipulating URLs than strings. URL parameters are represented as a dictionary of strings, and defaults are used for the protocol (http) and port (80) if not provided. """ def __init__(self, protocol=None, host=None, port=None, path=None, params=None): self.protocol = protocol self.host = host self.port = port self.path = path self.params = params or {} def to_string(self): url_parts = ['', '', '', '', '', ''] if self.protocol: url_parts[0] = self.protocol if self.host: if self.port: url_parts[1] = ':'.join((self.host, str(self.port))) else: url_parts[1] = self.host if self.path: url_parts[2] = self.path if self.params: url_parts[4] = self.get_param_string() return urlparse.urlunparse(url_parts) def get_param_string(self): param_pairs = [] for key, value in self.params.iteritems(): param_pairs.append('='.join((urllib.quote_plus(key), urllib.quote_plus(str(value))))) return '&'.join(param_pairs) def get_request_uri(self): """Returns the path with the parameters escaped and appended.""" param_string = self.get_param_string() if param_string: return '?'.join([self.path, param_string]) else: return self.path def __cmp__(self, other): if not isinstance(other, Url): return cmp(self.to_string(), str(other)) difference = 0 # Compare the protocol if self.protocol and other.protocol: difference = cmp(self.protocol, other.protocol) elif self.protocol and not other.protocol: difference = cmp(self.protocol, DEFAULT_PROTOCOL) elif not self.protocol and other.protocol: difference = cmp(DEFAULT_PROTOCOL, other.protocol) if difference != 0: return difference # Compare the host difference = cmp(self.host, other.host) if difference != 0: return difference # Compare the port if self.port and other.port: difference = cmp(self.port, other.port) elif self.port and not other.port: difference = cmp(self.port, DEFAULT_PORT) elif not self.port and other.port: difference = cmp(DEFAULT_PORT, other.port) if difference != 0: return difference # Compare the path difference = cmp(self.path, other.path) if difference != 0: return difference # Compare the parameters return cmp(self.params, other.params) def __str__(self): return self.to_string()