#! /usr/local/bin/python # -*- Mode: Python; tab-width: 4 -*- # # Author: Sam Rushing <rushing@nightmare.com> # Copyright 1996, 1997 by Sam Rushing # All Rights Reserved. # # This software is provided free for non-commercial use. If you are # interested in using this software in a commercial context, or in # purchasing support, please contact the author. RCS_ID = '$Id: http_server.py,v 1.1.1.1 1999/01/08 06:58:44 rushing Exp $' import os import socket import tb import asyncore import asynchat import stat import string import sys import mime_type_table import time import http_date import mstatus sanitize = mstatus.sanitize VERSION_STRING = string.split(RCS_ID)[2] HOSTNAME = socket.gethostbyaddr ( socket.gethostbyname (socket.gethostname()) )[0] import regex # split a uri # <path>;<params>?<query>#<fragment> path_regex = regex.compile ( # path params query fragment '\\([^;?#]*\\)\\(;[^?#]*\\)?\\(\\?[^#]*\)?\(#.*\)?' ) def split_path (path): if path_regex.match (path) != len(path): raise ValueError, "bad path" else: return map (lambda i,r=path_regex: r.group(i), range(1,5)) # Request objects hold information specific to a request. This class is # necessary since 'Connection: Keepalive' and HTTP/1.1 have decoupled # requests and connections. # we need to identify: # ways that extensions and channels interact; and which of these will # need to be routed through the request object now. # channel attributes referenced: # # request, header, uri, command # these should all go into the request object # # channel methods referenced: # send_reply, push, response, push_with_producer, # log_request, got_request, close_when_done, date_time_string, # build_header # which attributes & methods need to be moved _into_ the # request object? class request: bytes_out = 0 # all these args could be passed as a tuple. does it matter? def __init__ (self, channel, request, header, uri, command): self.channel = channel # this information has been cracked by the channel. self.request = request self.header = header self.uri = uri self.command = command class http_channel (asynchat.async_chat): # since we're basically a file server, make # the block size large. ac_out_buffer_size = 8192 # set this member when there's a possibility the other # side will close the connection on us. expect_reset = 0 bytes_in = 0 bytes_out = 0 # this provides a default for those extensions that don't bother # to send it. sent_reply_code = 200 handler = None request_number = 0 def __init__ (self, server, conn, addr): asynchat.async_chat.__init__ (self, conn) self.server = server self.addr = addr self.got_request = 0 self.done = 0 self.request = '' self.command = '' # initially, we're looking for the end of the # request header. self.set_terminator ('\r\n\r\n') self.in_buffer = '' def readable (self): return self.connected and not self.got_request or self.command == 'post' # we override these to keep track of throughput. def send (self, data): result = asynchat.async_chat.send (self, data) self.bytes_out = self.bytes_out + result return result def recv (self, buffer_size): data = asynchat.async_chat.recv (self, buffer_size) self.bytes_in = self.bytes_in + len(data) return data current_handler = None def collect_incoming_data (self, data): if not self.got_request: # while receiving the request and header, # just accumulate the data. self.in_buffer = self.in_buffer + data elif self.current_handler: self.current_handler.collect_post_data (data) else: # drop it on the floor, we don't want it. pass def found_terminator (self): if self.got_request: if self.current_handler: self.current_handler.found_post_terminator() else: self.got_request = 1 request = self.in_buffer self.in_buffer = '' # Wrapping an exception handler around the whole request keeps # one request from bringing the server down. Ideally, you'd # log the exception somewhere! try: self.process_request (request) except: sys.stderr.write ('%s:%s\n' % (sys.exc_type, sys.exc_value)) # temporarily rebind stdout to stderr temp_out = sys.stdout sys.stdout = sys.stderr try: tb.printtb (sys.exc_traceback) finally: sys.stdout = temp_out try: self.send_reply (500, close=1) except: pass def handle_error (self, *info): # It's our job to close the connection, but sometimes # the client will do it for us. Catch this and don't # flag it as an error. if self.expect_reset: self.close() return error = info[0] # For some reason, both netscape and mosaic are causing # asynchronous WSAEWOULDBLOCK errors when running under # windows. [may be be related to the above...] if os.name == 'nt' and error == 10035: pass else: # find the origin of the exception by # crawling up the traceback. et,ev,etb = info while etb.tb_next: etb = etb.tb_next sys.stderr.write ( 'error: %s:%d %s:%s file:%s line:%d\n' % ( self.addr[0], self.addr[1], et, ev, etb.tb_frame.f_code.co_filename, etb.tb_lineno ) ) self.close() def log_request (self): self.server.update_throughput (self.bytes_in, self.bytes_out) self.log_line (self.request, self.sent_reply_code, self.bytes_out) # set these back to their defaults # [this is why we need a 'request' object] self.bytes_in = 0 self.bytes_out = 0 self.sent_reply_code = 200 self.request = '' def close (self): self.log_request() self.server.total_requests = self.server.total_requests + self.request_number asynchat.async_chat.close (self) def find_handler (self, path_parts): if self.server.extensions: root = path_parts[0] for ext in self.server.extensions: if ext.match (root): return ext # no matching extensions return None else: # no extensions return None def process_request (self, header): lines = string.split (header, '\r\n') self.header = lines self.request = request = lines[0] command, uri, version = crack_request (request) self.command = command = string.lower (command) self.uri = [path, params, query, fragment] = split_path (uri) self.version = version # find a handler (extension) handler = self.find_handler (string.split (path[1:], '/')) if handler: self.current_handler = handler handler.handle_request (self) else: # not found self.send_reply (404, request) def response (self, code, version='1.0'): short, long = self.responses[code] return 'HTTP/%s %d %s\r\n' % (version, code, short) def date_time_string (self, when): return http_date.build_http_date (when) def log_date_string (self, when): return time.strftime ( '%d/%b/%Y:%H:%M:%S ', time.gmtime(when) ) + tz_for_log def log_line (self, request, code, length): if self.server.logger: self.server.logger.log ( '%s:%d - - [%s] "%s" %d %d\n' % ( self.addr[0], self.addr[1], self.log_date_string (time.time()), request, code, length ) ) # this is not the 'logger' log, but rather the asyncore method. def log (self, *ignore): pass def build_header (self): return [ 'Server: Medusa/%s' % VERSION_STRING, 'Date: %s' % self.date_time_string (time.time()), ] def generated_content_header (self, path, content_type='text/html', extra_headers=None): lines = [ 'Server: Medusa/%s' % VERSION_STRING, 'Date: %s' % self.date_time_string (time.time()), 'Content-Type: %s' % content_type, ] if extra_headers: lines = lines + extra_headers return string.join (lines, '\r\n') + '\r\n\r\n' def send_reply (self, code, request='', close=1, message=None, version='1.0'): short, long = self.responses[code] if message is not None: short = message self.sent_reply_code = code resp = self.response (code, version) em = DEFAULT_ERROR_MESSAGE % { 'code': code, 'message': short, 'explain': long } self.push (resp+'\r\n'+em) if close: self.close_when_done() self.expect_reset = 1 # Table mapping response codes to messages; entries have the # form {code: (shortmessage, longmessage)}. # See http://www.w3.org/hypertext/WWW/Protocols/HTTP/HTRESP.html responses = { 200: ('OK', 'Request fulfilled, document follows'), 201: ('Created', 'Document created, URL follows'), 202: ('Accepted', 'Request accepted, processing continues off-line'), 203: ('Partial information', 'Request fulfilled from cache'), 204: ('No response', 'Request fulfilled, nothing follows'), 301: ('Moved', 'Object moved permanently -- see URI list'), 302: ('Found', 'Object moved temporarily -- see URI list'), 303: ('Method', 'Object moved -- see Method and URL list'), 304: ('Not modified', 'Document has not changed singe given time'), 400: ('Bad request', 'Bad request syntax or unsupported method'), 401: ('Unauthorized', 'No permission -- see authorization schemes'), 402: ('Payment required', 'No payment -- see charging schemes'), 403: ('Forbidden', 'Request forbidden -- authorization will not help'), 404: ('Not found', 'Nothing matches the given URI'), 500: ('Internal error', 'Server got itself in trouble'), 501: ('Not implemented', 'Server does not support this operation'), 502: ('Service temporarily overloaded', 'The server cannot process the request due to a high load'), 503: ('Gateway timeout', 'The gateway server did not receive a timely response'), } class http_server (asyncore.dispatcher): cgi_bin_dirs = [] channel_class = http_channel VERSION_STRING = VERSION_STRING SERVER_IDENT = 'HTTP Server (V%s)' % VERSION_STRING bytes_in = 0 bytes_out = 0 def __init__ (self, port=80): if not os.path.isdir (root): raise TypeError, 'root argument must be a directory' if (type(port) == type('')): port = string.atoi (port) self.port = port asyncore.dispatcher.__init__ (self) self.total_hits = 0 self.total_requests = 0 self.cache_hits = 0 self.files_delivered = 0 self.cgi_bin_hits = 0 self.children = [] self.create_socket (socket.AF_INET, socket.SOCK_STREAM) # try to re-use a server port if possible try: self.socket.setsockopt ( socket.SOL_SOCKET, socket.SO_REUSEADDR, self.socket.getsockopt (socket.SOL_SOCKET, socket.SO_REUSEADDR) | 1 ) except: pass self.bind (('', port)) self.listen (5) host, port = self.socket.getsockname() self.server_name = HOSTNAME self.server_port = port sys.stdout.write ( '%s started at %s' '\n\tHostname: %s' '\n\troot:%s' '\n\tPort:%d' '\n\tCGI directories:%s\n' % ( self.SERVER_IDENT, time.ctime(time.time()), HOSTNAME, root, port, repr(self.cgi_bin_dirs) ) ) self.start_time = time.time() self.extensions = [] logger = None def update_throughput (self, bytes_in, bytes_out): self.bytes_in = self.bytes_in + bytes_in self.bytes_out = self.bytes_out + bytes_out def handle_read (self): pass def readable (self): return self.accepting def handle_connect (self): pass def handle_accept (self): conn, addr = self.accept() self.total_hits = self.total_hits + 1 self.channel_class (self, conn, addr) def add_extension (self, extension): self.extensions.insert (0, extension) def remove_extension (self, extension): self.extensions.remove (extension) def status (self): def nice_bytes (n): return string.join (mstatus.english_bytes (n)) return mstatus.lines_producer ( ['<h2>%s</h2>' % self.SERVER_IDENT, '<br>Listening on port %d' % self.port, '<br>Logging on: %s' % sanitize(repr(self.logger)), '<ul>', '<li><b>Total Connection Hits:</b> %d' % self.total_hits, '<li><b>Files Delivered:</b> %d' % self.files_delivered, '<li><b>Total Bytes In:</b> %s' % nice_bytes (self.bytes_in), '<li><b>Total Bytes Out:</b> %s' % nice_bytes (self.bytes_out), '<li><b>Cache Hits:</b> %d' % self.cache_hits, '<li><b>Open Connections:</b> %d' % len(asyncore.socket_map), '<li><b>Requests/Connection:</b> %d/%d (%.2f)' % ( self.total_requests + 1, self.total_hits, float(self.total_requests+1)/(float(self.total_hits)) ), '</ul>', '<p><b>Extension List</b>', '<ul>', ] + mstatus.html_reprs (self.extensions, '<li>') + ['</ul>'] ) # Default error message DEFAULT_ERROR_MESSAGE = string.joinfields ( ['<head>', '<title>Error response</title>', '</head>', '<body>', '<h1>Error response</h1>', '<p>Error code %(code)d.', '<p>Message: %(message)s.', '<p>Error code explanation: %(code)s = %(explain)s.', '</body>' ], '\r\n' ) class file_producer: def __init__ (self, channel, file): self.done = 0 self.channel = channel self.file = file def ready (self): return not self.done def more (self): if self.done: return '' else: data = self.file.read(8192) if not data: self.file.close() del self.file scs = self.channel.server scs.files_delivered = scs.files_delivered + 1 self.done = 1 return '' else: return data def compute_timezone_for_log (): if time.daylight: tz = time.altzone else: tz = time.timezone if tz > 0: neg = 1 else: neg = 0 tz = -tz h, rem = divmod (tz, 3600) m, rem = divmod (rem, 60) if neg: return '-%02d%02d' % (h, m) else: return '+%02d%02d' % (h, m) # if you run this program over a TZ change boundary, this will be invalid. tz_for_log = compute_timezone_for_log() ACCEPT = regex.compile ('Accept: \(.*\)', regex.casefold) # HTTP/1.0 doesn't say anything about the "; length=nnnn" addition # to this header. I suppose its purpose is to avoid the overhead # of parsing dates... IF_MODIFIED_SINCE = regex.compile ( 'If-Modified-Since: \([^;]+\)\(\(; length=\([0-9]+\)$\)\|$\)', regex.casefold ) USER_AGENT = regex.compile ('User-Agent: \(.*\)', regex.casefold) CONTENT_TYPE = regex.compile ('Content-Type: \(.*\)', regex.casefold) REQUEST = regex.compile ('\([^ ]+\) \([^ ]+\)\(\( HTTP/\([0-9.]+\)\)$\|$\)') def get_header (head_reg, lines, group=1): for line in lines: if head_reg.match (line) == len(line): return head_reg.group(group) return '' def crack_request (r): if REQUEST.match (r) == len(r): if REQUEST.group(3): version = REQUEST.group(5) else: version = None return REQUEST.group (1), REQUEST.group(2), version if __name__ == '__main__': root = sys.argv[1] port = string.atoi (sys.argv[2]) hs = http_server (port) # plug in the 'default' extension. de = default_extension.default_with_post_extension ( # use a filesystem object filesys.os_filesystem (opts['root']) ) hs.add_extension (de) asyncore.loop()