Asynchronous HTTP Requests in Python

Home

About

Programming

Puzzles

I found myself in the position of being disappointed with the lack of ability to make asynchronous HTTP requests in Python twice without installing (and forcing users to install) some silly library, which means it's time to just roll my own.

This is one file. Just drop it into your codebase and import it as a module. No installations. It uses Python's native urllib and threading modules. It works in both Python 2 and Python 3.

Particularly this is useful for things like games or other UI apps where blocking a thread is unacceptable.

Sample Usage

import fetcher # or whatever you name the file

import time

request = fetcher.HttpAsyncRequest('http://www.blakeohare.com/blog/')

while not request.is_complete():
  print('Waiting...')
  time.sleep(.05)

print(request.get_response_code(), request.get_response_message())
print('-' * 10)
print(request.get_response_content('t'))

The Code

# This module makes asynchronous HTTP requests in Python.

# This runs in Python 2.x* and 3.x

# This requires no special library to be installed.

# This is public domain.

# This is provided as-is.

# Go nuts.


# * HTTP header names in the response are all lowercase in Python 2.x. This seems to be

#   a limitation in urllib. Nothing I can do about this other than fall back to TCP/IP and 

#   parse the response manually. Although if I'm incorrect in assuming this, please let 

#   me know.


_user_agent = "Blake's Magic Python Async HTTP Fetcher vee one point oh"

import threading as _threading
_is_old = 3 / 2 == 1 # Yeah, I'm sure there's a better way. Deal with it.

if _is_old:
    import urllib as _urllib
    import urllib2 as _urllib2
    import urlparse as _urlparse
else:
    import urllib as _urllib
    import urllib.parse as _urllib_parse
    import urllib.request as _urllib_request
    
def _parse_url(url):
    return _urlparse.urlparse(url)

def set_user_agent(value):
    global _user_agent
    _user_agent = value

def decode_url_value(value):
    if _is_old:
        return _urllib.unquote(value).decode('utf8')
    else:
        return _urllib_parse.unquote(value)

def encode_url_value(value):
    if _is_old:
        return _urllib2.quote(value.encode('utf8'))
    else:
        return _urllib_parse.quote(value)

def _send_impl(req_obj, method, url, headers, content):
    if _is_old:
        opener = _urllib2.build_opener(_urllib2.HTTPHandler)
        if content == None:
            request = _urllib2.Request(url)
        else:
            request = _urllib2.Request(url, data=content)
    else:
        opener = _urllib_request.build_opener(_urllib_request.HTTPHandler)
        if content == None:
            request = _urllib_request.Request(url)
        else:
            request = _urllib_request.Request(url, data=content)
    for header in headers:
        request.add_header(header[0], header[1])
    request.get_method = lambda:method
    output = opener.open(request)
    content = output.read()
    headers = {}
    for header_key in output.headers.keys():
        headers[header_key] = output.headers[header_key]
    response_message = output.msg
    response_code = output.code
    req_obj._set_result(response_code, response_message, content, headers)

class HttpAsyncRequest:
    def __init__(self, url):
        bad_format = False
        try:
            if _is_old:
                url_parts = _parse_url(url)
            else:
                url_parts = _urllib_parse.urlparse(url)
            if url_parts.scheme == '' or url_parts.netloc == '':
                bad_format = True
        except:
            bad_format = True
        if bad_format:
            raise Exception("Bad URL! Bad!")
            
        self.mutex = _threading.Lock()
        self.method = 'GET'
        self.scheme = url_parts.scheme
        self.host = url_parts.hostname
        self.port = url_parts.port
        self.path = url_parts.path
        self.fragment = url_parts.fragment
        self.params = url_parts.params
        self.original_query = url_parts.query # use this if query params are not modified

        self.query = None # if modified, clear original_query and populate this with a dictionary lookup

        self.header_formatting = {} # preserves the formatting of the header key

        self.header_values = {} # canonical key of header with list of values of that header

        self.content = None
        self.set_header('User-Agent', _user_agent)
        self.done = False
        self.response_code = -1
        self.response_message = None
        self.response_content = None
        self.response_headers_values = None
        self.response_headers_formatting = None
    
    def send(self):
        url = self.scheme + '://' + self.host
        
        if self.port != None:
            url += ':' + str(self.port)
        
        if self.path != None and self.path != '':
            if self.path[0] != '/':
                self.path = '/' + self.path
            url += self.path
        
        if self.params != None and self.params != '':
            url += ';' + self.params
        
        if self.query == None:
            if self.original_query != '':
                url += '?' + self.original_query
        else:
            queries = []
            keys = self.query.keys()[:]
            keys.sort() # deterministic requests

            for key in keys:
                e_key = encode_url_value(key)
                for value in self.query[key]:
                    e_value = encode_url_value(value)
                    queries.append(e_key + '=' + e_value)
            url += '?' + '&'.join(queries)
        
        if self.fragment != '':
            url += '#' + self.fragment
        
        headers = []
        keys = list(self.header_formatting.keys())
        keys.sort()
        for key in keys:
            f_key = self.header_formatting[key]
            for value in self.header_values[key]:
                headers.append((f_key, value))
        
        
        thread = _threading.Thread(target = _send_impl, args = (self, self.method, url, headers, self.content))
        thread.daemon = True
        thread.start()
        
    def _set_result(self, code, message, content, headers):
        self.mutex.acquire()
        try:
            self.response_code = code
            self.response_message = message
            self.response_content = content
            self.response_headers_values = {}
            self.response_headers_formatting = {}
            for key in headers.keys():
                ckey = key.lower()
                self.response_headers_values[ckey] = headers[key]
                self.response_headers_formatting[ckey] = key
        finally:
            self.mutex.release()
    
    def is_complete(self):
        self.mutex.acquire()
        try:
            return self.response_code != -1
        finally:
            self.mutex.release()
    
    def _ensure_request_complete(self):
        if not self.is_complete():
            raise Exception("Cannot access response until request is complete.")
    
    def get_response_code(self):
        self._ensure_request_complete()
        return self.response_code
    
    def get_response_message(self):
        self._ensure_request_complete()
        return self.response_message
    
    def get_response_header_names(self):
        self._ensure_request_complete()
        output = list(self.response_headers_formatting.values())
        output.sort()
        return output
    
    def get_response_header(self, name):
        self._ensure_request_complete()
        return self.response_headers_values.get(name.lower(), None)
        
    def get_response_content(self, mode='t'):
        self._ensure_request_complete()
        output = self.response_content
        if mode == 't':
            return output.decode('utf-8')
        else:
            return output
    
    
    def set_header(self, key, value):
        self.header_formatting[key.lower()] = key
        self.header_values[key.lower()] = [value]
    
    def add_header(self, key, value):
        canonical_key = key.lower()
        existing_headers = self.header_values.get(canonical_key, None)
        if existing_headers == None:
            self.set_header(key, value)
        else:
            existing_headers.append(value)
    
    def clear_header(self, key):
        canonical_key = key.lower()
        if self.header_values.get(canonical_key, None) != None:
            self.header_values.pop(canonical_key)
            self.header_formatting.pop(canonical_key)
    
    def set_method(self, method):
        self.method = method
    
    def set_content(self, content):
        self.content = content
    
    def _init_query(self):
        if self.query == None:
            query = [] if self.original_query != '' else self.original_query.split('&')
            lookup_values = {}
            for item in query:
                parts = item.split('=')
                if len(parts) >= 2:
                    item_key = decode_url_value(parts[0])
                    item_value = decode_url_value('='.join(parts[1:]))
                    existing_values = lookup_values.get(item_key, None)
                    if existing_values == None:
                        existing_values = []
                        lookup_values[item_key] = existing_values
                    existing_values.append(item_value)
            self.query = lookup_values
    
    def set_query(self, key, value):
        self._init_query()
        self.query[key] = [value]
    
    def add_query(self, key, value):
        self._init_query()
        values = self.query.get(key, None)
        if values != None:
            values.append(value)
        else:
            self.query[key] = [value]
    
    def clear_query(self, key):
        self._init_query()
        if self.query.get(key, None) != None:
            self.query.pop(key)
    
    def set_port(self, port):
        self.port = port
    
    def set_fragment(self, fragment):
        self.fragment = fragment
    
    def clear_fragment(self):
        self.fragment = None
    
    def set_scehem(self, scheme):
        self.scheme = scheme

Documentation

HttpAsyncRequest(url) - creates a new Aysnchronous HTTP request object, but does not send the request yet so you have a chance to fiddle with it before sending.
asyncRequest.send() - sends the HTTP request and returns immediately. (See asyncRequest.is_complete())
asyncRequest.set_method(method) - set the method type to GET, POST, PUT, DELETE, HAMSTERS, or whatever.
asyncRequest.set_header(name, value) - set an HTTP header
asyncRequest.add_header(name, value) - add an HTTP header (yes, HTTP requests can have multiple headers with the same name)
asyncRequest.clear_header(name) - clears all values of an HTTP header
asyncRequest.set_query(name, value) - sets a URL query value (overrides the constructor value)
asyncRequest.add_query(name, value) - adds a URL query value (overrides the constructor value, HTTP allows for multiple values for the same name).
asyncRequest.clear_query(name, vaue) - clears all query values for a particular name (overrides the constructor value).
asyncRequest.set_content(value) - set the HTTP request body.
asyncRequest.set_port(port) - set the port (overrides the constructor value).
asyncRequest.set_fragment(value) - sets the fragment i.e. the thing after a # (overrides the constructor value).
asyncRequest.set_scheme(value) - sets the scheme (overrides the constructor value).
asyncRequest.is_complete() - returns True if the request is finished.
asyncRequest.get_response_code() - returns the HTTP status code of the response.
asyncRequest.get_response_message() - returns the HTTP status message of the response e.g. OK, NOT FOUND, FORBIDDEN, BEEEEES, etc.
asyncRequest.get_response_headers() - returns the list of all the response header names.
asyncRequest.get_response_header(name) - returns the value of a response header.
asyncRequest.get_response_content(mode='t') - returns the content of the response. mode can be 't' for text or 'b' for binary.

There are some module functions:

set_user_agent(value) - Globally sets the User-Agent header for all subsequent requests.
decode_url_value(value) - Decodes URL values (all the '%40' and '+' silliness) using UTF8. This (and the encode method below) just wrap the relevant Python 2 or 3 urllib libraries so you don't have to worry about compatibility yourself.
encode_url_value(value) - Encodes URL values into escaped hex values using UTF8.

Get notified about new posts and snarky comments by following the twitter account.