blob: 247fed52bac9fd1f44bbf1532fba0c779d6c0523 [file] [log] [blame]
#!/usr/bin/python
#
# Copyright (C) 2008 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""HttpClients in this module use httplib to make HTTP requests.
This module make HTTP requests based on httplib, but there are environments
in which an httplib based approach will not work (if running in Google App
Engine for example). In those cases, higher level classes (like AtomService
and GDataService) can swap out the HttpClient to transparently use a
different mechanism for making HTTP requests.
HttpClient: Contains a request method which performs an HTTP call to the
server.
ProxiedHttpClient: Contains a request method which connects to a proxy using
settings stored in operating system environment variables then
performs an HTTP call to the endpoint server.
"""
__author__ = 'api.jscudder (Jeff Scudder)'
import types
import os
import httplib
import atom.url
import atom.http_interface
import socket
import base64
import atom.http_core
ssl_imported = False
ssl = None
try:
import ssl
ssl_imported = True
except ImportError:
pass
class ProxyError(atom.http_interface.Error):
pass
class TestConfigurationError(Exception):
pass
DEFAULT_CONTENT_TYPE = 'application/atom+xml'
class HttpClient(atom.http_interface.GenericHttpClient):
# Added to allow old v1 HttpClient objects to use the new
# http_code.HttpClient. Used in unit tests to inject a mock client.
v2_http_client = None
def __init__(self, headers=None):
self.debug = False
self.headers = headers or {}
def request(self, operation, url, data=None, headers=None):
"""Performs an HTTP call to the server, supports GET, POST, PUT, and
DELETE.
Usage example, perform and HTTP GET on http://www.google.com/:
import atom.http
client = atom.http.HttpClient()
http_response = client.request('GET', 'http://www.google.com/')
Args:
operation: str The HTTP operation to be performed. This is usually one
of 'GET', 'POST', 'PUT', or 'DELETE'
data: filestream, list of parts, or other object which can be converted
to a string. Should be set to None when performing a GET or DELETE.
If data is a file-like object which can be read, this method will
read a chunk of 100K bytes at a time and send them.
If the data is a list of parts to be sent, each part will be
evaluated and sent.
url: The full URL to which the request should be sent. Can be a string
or atom.url.Url.
headers: dict of strings. HTTP headers which should be sent
in the request.
"""
all_headers = self.headers.copy()
if headers:
all_headers.update(headers)
# If the list of headers does not include a Content-Length, attempt to
# calculate it based on the data object.
if data and 'Content-Length' not in all_headers:
if isinstance(data, types.StringTypes):
all_headers['Content-Length'] = str(len(data))
else:
raise atom.http_interface.ContentLengthRequired('Unable to calculate '
'the length of the data parameter. Specify a value for '
'Content-Length')
# Set the content type to the default value if none was set.
if 'Content-Type' not in all_headers:
all_headers['Content-Type'] = DEFAULT_CONTENT_TYPE
if self.v2_http_client is not None:
http_request = atom.http_core.HttpRequest(method=operation)
atom.http_core.Uri.parse_uri(str(url)).modify_request(http_request)
http_request.headers = all_headers
if data:
http_request._body_parts.append(data)
return self.v2_http_client.request(http_request=http_request)
if not isinstance(url, atom.url.Url):
if isinstance(url, types.StringTypes):
url = atom.url.parse_url(url)
else:
raise atom.http_interface.UnparsableUrlObject('Unable to parse url '
'parameter because it was not a string or atom.url.Url')
connection = self._prepare_connection(url, all_headers)
if self.debug:
connection.debuglevel = 1
connection.putrequest(operation, self._get_access_url(url),
skip_host=True)
if url.port is not None:
connection.putheader('Host', '%s:%s' % (url.host, url.port))
else:
connection.putheader('Host', url.host)
# Overcome a bug in Python 2.4 and 2.5
# httplib.HTTPConnection.putrequest adding
# HTTP request header 'Host: www.google.com:443' instead of
# 'Host: www.google.com', and thus resulting the error message
# 'Token invalid - AuthSub token has wrong scope' in the HTTP response.
if (url.protocol == 'https' and int(url.port or 443) == 443 and
hasattr(connection, '_buffer') and
isinstance(connection._buffer, list)):
header_line = 'Host: %s:443' % url.host
replacement_header_line = 'Host: %s' % url.host
try:
connection._buffer[connection._buffer.index(header_line)] = (
replacement_header_line)
except ValueError: # header_line missing from connection._buffer
pass
# Send the HTTP headers.
for header_name in all_headers:
connection.putheader(header_name, all_headers[header_name])
connection.endheaders()
# If there is data, send it in the request.
if data:
if isinstance(data, list):
for data_part in data:
_send_data_part(data_part, connection)
else:
_send_data_part(data, connection)
# Return the HTTP Response from the server.
return connection.getresponse()
def _prepare_connection(self, url, headers):
if not isinstance(url, atom.url.Url):
if isinstance(url, types.StringTypes):
url = atom.url.parse_url(url)
else:
raise atom.http_interface.UnparsableUrlObject('Unable to parse url '
'parameter because it was not a string or atom.url.Url')
if url.protocol == 'https':
if not url.port:
return httplib.HTTPSConnection(url.host)
return httplib.HTTPSConnection(url.host, int(url.port))
else:
if not url.port:
return httplib.HTTPConnection(url.host)
return httplib.HTTPConnection(url.host, int(url.port))
def _get_access_url(self, url):
return url.to_string()
class ProxiedHttpClient(HttpClient):
"""Performs an HTTP request through a proxy.
The proxy settings are obtained from enviroment variables. The URL of the
proxy server is assumed to be stored in the environment variables
'https_proxy' and 'http_proxy' respectively. If the proxy server requires
a Basic Auth authorization header, the username and password are expected to
be in the 'proxy-username' or 'proxy_username' variable and the
'proxy-password' or 'proxy_password' variable, or in 'http_proxy' or
'https_proxy' as "protocol://[username:password@]host:port".
After connecting to the proxy server, the request is completed as in
HttpClient.request.
"""
def _prepare_connection(self, url, headers):
proxy_settings = os.environ.get('%s_proxy' % url.protocol)
if not proxy_settings:
# The request was HTTP or HTTPS, but there was no appropriate proxy set.
return HttpClient._prepare_connection(self, url, headers)
else:
print '!!!!%s' % proxy_settings
proxy_auth = _get_proxy_auth(proxy_settings)
proxy_netloc = _get_proxy_net_location(proxy_settings)
print '!!!!%s' % proxy_auth
print '!!!!%s' % proxy_netloc
if url.protocol == 'https':
# Set any proxy auth headers
if proxy_auth:
proxy_auth = 'Proxy-authorization: %s' % proxy_auth
# Construct the proxy connect command.
port = url.port
if not port:
port = '443'
proxy_connect = 'CONNECT %s:%s HTTP/1.0\r\n' % (url.host, port)
# Set the user agent to send to the proxy
if headers and 'User-Agent' in headers:
user_agent = 'User-Agent: %s\r\n' % (headers['User-Agent'])
else:
user_agent = 'User-Agent: python\r\n'
proxy_pieces = '%s%s%s\r\n' % (proxy_connect, proxy_auth, user_agent)
# Find the proxy host and port.
proxy_url = atom.url.parse_url(proxy_netloc)
if not proxy_url.port:
proxy_url.port = '80'
# Connect to the proxy server, very simple recv and error checking
p_sock = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
p_sock.connect((proxy_url.host, int(proxy_url.port)))
p_sock.sendall(proxy_pieces)
response = ''
# Wait for the full response.
while response.find("\r\n\r\n") == -1:
response += p_sock.recv(8192)
p_status = response.split()[1]
if p_status != str(200):
raise ProxyError('Error status=%s' % str(p_status))
# Trivial setup for ssl socket.
sslobj = None
if ssl_imported:
sslobj = ssl.wrap_socket(p_sock, None, None)
else:
sock_ssl = socket.ssl(p_sock, None, None)
sslobj = httplib.FakeSocket(p_sock, sock_ssl)
# Initalize httplib and replace with the proxy socket.
connection = httplib.HTTPConnection(proxy_url.host)
connection.sock = sslobj
return connection
else:
# If protocol was not https.
# Find the proxy host and port.
proxy_url = atom.url.parse_url(proxy_netloc)
if not proxy_url.port:
proxy_url.port = '80'
if proxy_auth:
headers['Proxy-Authorization'] = proxy_auth.strip()
return httplib.HTTPConnection(proxy_url.host, int(proxy_url.port))
def _get_access_url(self, url):
return url.to_string()
def _get_proxy_auth(proxy_settings):
"""Returns proxy authentication string for header.
Will check environment variables for proxy authentication info, starting with
proxy(_/-)username and proxy(_/-)password before checking the given
proxy_settings for a [protocol://]username:password@host[:port] string.
Args:
proxy_settings: String from http_proxy or https_proxy environment variable.
Returns:
Authentication string for proxy, or empty string if no proxy username was
found.
"""
proxy_username = None
proxy_password = None
proxy_username = os.environ.get('proxy-username')
if not proxy_username:
proxy_username = os.environ.get('proxy_username')
proxy_password = os.environ.get('proxy-password')
if not proxy_password:
proxy_password = os.environ.get('proxy_password')
if not proxy_username:
if '@' in proxy_settings:
protocol_and_proxy_auth = proxy_settings.split('@')[0].split(':')
if len(protocol_and_proxy_auth) == 3:
# 3 elements means we have [<protocol>, //<user>, <password>]
proxy_username = protocol_and_proxy_auth[1].lstrip('/')
proxy_password = protocol_and_proxy_auth[2]
elif len(protocol_and_proxy_auth) == 2:
# 2 elements means we have [<user>, <password>]
proxy_username = protocol_and_proxy_auth[0]
proxy_password = protocol_and_proxy_auth[1]
if proxy_username:
user_auth = base64.encodestring('%s:%s' % (proxy_username,
proxy_password))
return 'Basic %s\r\n' % (user_auth.strip())
else:
return ''
def _get_proxy_net_location(proxy_settings):
"""Returns proxy host and port.
Args:
proxy_settings: String from http_proxy or https_proxy environment variable.
Must be in the form of protocol://[username:password@]host:port
Returns:
String in the form of protocol://host:port
"""
if '@' in proxy_settings:
protocol = proxy_settings.split(':')[0]
netloc = proxy_settings.split('@')[1]
return '%s://%s' % (protocol, netloc)
else:
return proxy_settings
def _send_data_part(data, connection):
if isinstance(data, types.StringTypes):
connection.send(data)
return
# Check to see if data is a file-like object that has a read method.
elif hasattr(data, 'read'):
# Read the file and send it a chunk at a time.
while 1:
binarydata = data.read(100000)
if binarydata == '': break
connection.send(binarydata)
return
else:
# The data object was not a file.
# Try to convert to a string and send the data.
connection.send(str(data))
return