#! /usr/local/bin/python
# -*- Mode: Python; tab-width: 4 -*-
#
#	Author: Sam Rushing <rushing@nightmare.com>
#	Copyright 1996, 1997 by Sam Rushing
#						 All Rights Reserved.
#
# This software is provided free for non-commercial use.  If you are
# interested in using this software in a commercial context, or in
# purchasing support, please contact the author.

RCS_ID = '$Id: http_server.py,v 1.1.1.1 1999/01/08 06:58:44 rushing Exp $'

import os
import socket
import tb
import asyncore
import asynchat
import stat
import string
import sys
import mime_type_table
import time
import http_date

import mstatus

sanitize = mstatus.sanitize

VERSION_STRING = string.split(RCS_ID)[2]

HOSTNAME = socket.gethostbyaddr (
	socket.gethostbyname (socket.gethostname())
	)[0]

import regex
# split a uri
# <path>;<params>?<query>#<fragment>
path_regex = regex.compile (
#        path        params        query       fragment
	'\\([^;?#]*\\)\\(;[^?#]*\\)?\\(\\?[^#]*\)?\(#.*\)?'
	)

def split_path (path):
	if path_regex.match (path) != len(path):
		raise ValueError, "bad path"
	else:
		return map (lambda i,r=path_regex: r.group(i), range(1,5))

# Request objects hold information specific to a request. This class is
# necessary since 'Connection: Keepalive' and HTTP/1.1 have decoupled
# requests and connections.

# we need to identify:
# ways that extensions and channels interact; and which of these will
# need to be routed through the request object now.

# channel attributes referenced:
# 
# request, header, uri, command
#  these should all go into the request object
#
# channel methods referenced:
# send_reply, push, response, push_with_producer,
# log_request, got_request, close_when_done, date_time_string,
# build_header

# which attributes & methods need to be moved _into_ the
# request object?

class request:

	bytes_out = 0

	# all these args could be passed as a tuple.  does it matter?
	def __init__ (self, channel, request, header, uri, command):
		self.channel = channel
		# this information has been cracked by the channel.
		self.request = request
		self.header = header
		self.uri = uri
		self.command = command

class http_channel (asynchat.async_chat):
	# since we're basically a file server, make
	# the block size large.
	ac_out_buffer_size = 8192

	# set this member when there's a possibility the other
	# side will close the connection on us.
	expect_reset = 0

	bytes_in = 0
	bytes_out = 0

	# this provides a default for those extensions that don't bother
	# to send it.
	sent_reply_code = 200

	handler = None
	
	request_number = 0

	def __init__ (self, server, conn, addr):
		asynchat.async_chat.__init__ (self, conn)
		self.server = server
		self.addr = addr
		self.got_request = 0
		self.done = 0
		self.request = ''
		self.command = ''

		# initially, we're looking for the end of the
		# request header.

		self.set_terminator ('\r\n\r\n')
		self.in_buffer = ''
		
	def readable (self):
		return self.connected and not self.got_request or self.command == 'post'

	# we override these to keep track of throughput.
	def send (self, data):
		result = asynchat.async_chat.send (self, data)
		self.bytes_out = self.bytes_out + result
		return result

	def recv (self, buffer_size):
		data = asynchat.async_chat.recv (self, buffer_size)
		self.bytes_in = self.bytes_in + len(data)
		return data

	current_handler = None

	def collect_incoming_data (self, data):
		if not self.got_request:
			# while receiving the request and header,
			# just accumulate the data.
			self.in_buffer = self.in_buffer + data
		elif self.current_handler:
			self.current_handler.collect_post_data (data)
		else:
			# drop it on the floor, we don't want it.
			pass

	def found_terminator (self):
		if self.got_request:
			if self.current_handler:
				self.current_handler.found_post_terminator()
		else:
			self.got_request = 1

			request = self.in_buffer
			self.in_buffer = ''

			# Wrapping an exception handler around the whole request keeps
			# one request from bringing the server down.  Ideally, you'd
			# log the exception somewhere!

			try:
				self.process_request (request)
			except:
				sys.stderr.write ('%s:%s\n' % (sys.exc_type, sys.exc_value))
				# temporarily rebind stdout to stderr
				temp_out = sys.stdout
				sys.stdout = sys.stderr
				try:
					tb.printtb (sys.exc_traceback)
				finally:
					sys.stdout = temp_out

				try:
					self.send_reply (500, close=1)
				except:
					pass

	def handle_error (self, *info):
		# It's our job to close the connection, but sometimes
		# the client will do it for us.  Catch this and don't
		# flag it as an error.
		if self.expect_reset:
			self.close()
			return
		error = info[0]
		# For some reason, both netscape and mosaic are causing
		# asynchronous WSAEWOULDBLOCK errors when running under
		# windows.  [may be be related to the above...]
		if os.name == 'nt' and error == 10035:
			pass
		else:
			# find the origin of the exception by
			# crawling up the traceback.
			et,ev,etb = info
			while etb.tb_next:
				etb = etb.tb_next
			sys.stderr.write (
				'error: %s:%d %s:%s file:%s line:%d\n' % (
					self.addr[0], self.addr[1],
					et,
					ev,
					etb.tb_frame.f_code.co_filename,
					etb.tb_lineno
					)
				)
			self.close()

	def log_request (self):
		self.server.update_throughput (self.bytes_in, self.bytes_out)
		self.log_line (self.request, self.sent_reply_code, self.bytes_out)
		# set these back to their defaults
		# [this is why we need a 'request' object]
		self.bytes_in = 0
		self.bytes_out = 0
		self.sent_reply_code = 200
		self.request = ''

	def close (self):
		self.log_request()
		self.server.total_requests = self.server.total_requests + self.request_number
		asynchat.async_chat.close (self)

	def find_handler (self, path_parts):
		if self.server.extensions:
			root = path_parts[0]
			for ext in self.server.extensions:
				if ext.match (root):
					return ext
			# no matching extensions
			return None
		else:
			# no extensions
			return None

	def process_request (self, header):
		lines = string.split (header, '\r\n')
		self.header = lines
		self.request = request = lines[0]

		command, uri, version = crack_request (request)

		self.command = command = string.lower (command)
		self.uri = [path, params, query, fragment] = split_path (uri)
		self.version = version

		# find a handler (extension)
		handler = self.find_handler (string.split (path[1:], '/'))
		if handler:
			self.current_handler = handler
			handler.handle_request (self)
		else:
			# not found
			self.send_reply (404, request)

	def response (self, code, version='1.0'):
		short, long = self.responses[code]
		return 'HTTP/%s %d %s\r\n' % (version, code, short)

	def date_time_string (self, when):
		return http_date.build_http_date (when)

	def log_date_string (self, when):
		return time.strftime (
			'%d/%b/%Y:%H:%M:%S ',
			time.gmtime(when)
			) + tz_for_log

	def log_line (self, request, code, length):
		if self.server.logger:
			self.server.logger.log (
				'%s:%d - - [%s] "%s" %d %d\n' % (
					self.addr[0],
					self.addr[1],
					self.log_date_string (time.time()),
					request,
					code,
					length
					)
				)

	# this is not the 'logger' log, but rather the asyncore method.
	def log (self, *ignore):
		pass

	def build_header (self):
		return [
			'Server: Medusa/%s'	% VERSION_STRING,
			'Date: %s'			% self.date_time_string (time.time()),
			]

	def generated_content_header (self,
								  path,
								  content_type='text/html',
								  extra_headers=None):
		lines = [
			'Server: Medusa/%s' % VERSION_STRING,
			'Date: %s'			% self.date_time_string (time.time()),
			'Content-Type: %s'	% content_type,
			]

		if extra_headers:
			lines = lines + extra_headers

		return string.join (lines, '\r\n') + '\r\n\r\n'

	def send_reply (self, code, request='', close=1, message=None, version='1.0'):
		short, long = self.responses[code]
		if message is not None:
			short = message
		self.sent_reply_code = code
		resp = self.response (code, version)
		em = DEFAULT_ERROR_MESSAGE % {
			'code': code,
			'message': short,
			'explain': long
			}
		self.push (resp+'\r\n'+em)
		if close:
			self.close_when_done()
			self.expect_reset = 1

	# Table mapping response codes to messages; entries have the
	# form {code: (shortmessage, longmessage)}.
	# See http://www.w3.org/hypertext/WWW/Protocols/HTTP/HTRESP.html
	responses = {
		200: ('OK', 'Request fulfilled, document follows'),
		201: ('Created', 'Document created, URL follows'),
		202: ('Accepted',
		      'Request accepted, processing continues off-line'),
		203: ('Partial information', 'Request fulfilled from cache'),
		204: ('No response', 'Request fulfilled, nothing follows'),
		
		301: ('Moved', 'Object moved permanently -- see URI list'),
		302: ('Found', 'Object moved temporarily -- see URI list'),
		303: ('Method', 'Object moved -- see Method and URL list'),
		304: ('Not modified',
		      'Document has not changed singe given time'),
		
		400: ('Bad request',
		      'Bad request syntax or unsupported method'),
		401: ('Unauthorized',
		      'No permission -- see authorization schemes'),
		402: ('Payment required',
		      'No payment -- see charging schemes'),
		403: ('Forbidden',
		      'Request forbidden -- authorization will not help'),
		404: ('Not found', 'Nothing matches the given URI'),

		500: ('Internal error', 'Server got itself in trouble'),
		501: ('Not implemented',
		      'Server does not support this operation'),
		502: ('Service temporarily overloaded',
		      'The server cannot process the request due to a high load'),
		503: ('Gateway timeout',
		      'The gateway server did not receive a timely response'),
		}
	
class http_server (asyncore.dispatcher):

	cgi_bin_dirs = []

	channel_class = http_channel

	VERSION_STRING = VERSION_STRING
	SERVER_IDENT = 'HTTP Server (V%s)' % VERSION_STRING

	bytes_in = 0
	bytes_out = 0

	def __init__ (self, port=80):
		if not os.path.isdir (root):
			raise TypeError, 'root argument must be a directory'
		if (type(port) == type('')):
			port = string.atoi (port)
		self.port = port
		asyncore.dispatcher.__init__ (self)
		self.total_hits = 0
		self.total_requests = 0
		self.cache_hits = 0
		self.files_delivered = 0
		self.cgi_bin_hits = 0
		self.children = []
		self.create_socket (socket.AF_INET, socket.SOCK_STREAM)

		# try to re-use a server port if possible
		try:
			self.socket.setsockopt (
				socket.SOL_SOCKET, socket.SO_REUSEADDR,
				self.socket.getsockopt (socket.SOL_SOCKET, socket.SO_REUSEADDR) | 1
				)
		except:
			pass

		self.bind (('', port))
		self.listen (5)
		host, port = self.socket.getsockname()
		self.server_name = HOSTNAME
		self.server_port = port
		sys.stdout.write (
			'%s started at %s'
			'\n\tHostname: %s'
			'\n\troot:%s'
			'\n\tPort:%d'
			'\n\tCGI directories:%s\n' % (
				self.SERVER_IDENT,
				time.ctime(time.time()),
				HOSTNAME,
				root,
				port,
				repr(self.cgi_bin_dirs)
				)
			)
		self.start_time = time.time()
		self.extensions = []

	logger = None

	def update_throughput (self, bytes_in, bytes_out):
		self.bytes_in = self.bytes_in + bytes_in
		self.bytes_out = self.bytes_out + bytes_out

	def handle_read (self):
		pass

	def readable (self):
		return self.accepting

	def handle_connect (self):
		pass

	def handle_accept (self):
		conn, addr = self.accept()
		self.total_hits = self.total_hits + 1
		self.channel_class (self, conn, addr)

	def add_extension (self, extension):
		self.extensions.insert (0, extension)
		
	def remove_extension (self, extension):
		self.extensions.remove (extension)

	def status (self):
		def nice_bytes (n):
			return string.join (mstatus.english_bytes (n))

		return mstatus.lines_producer (
			['<h2>%s</h2>' % self.SERVER_IDENT,
			 '<br>Listening on port %d'				% self.port,
			 '<br>Logging on: %s'					% sanitize(repr(self.logger)),
			 '<ul>',
			 '<li><b>Total Connection Hits:</b> %d'	% self.total_hits,
			 '<li><b>Files Delivered:</b> %d'		% self.files_delivered,
			 '<li><b>Total Bytes In:</b> %s'		% nice_bytes (self.bytes_in),
			 '<li><b>Total Bytes Out:</b> %s'		% nice_bytes (self.bytes_out),
			 '<li><b>Cache Hits:</b> %d'			% self.cache_hits,
			 '<li><b>Open Connections:</b> %d'		% len(asyncore.socket_map),
			 '<li><b>Requests/Connection:</b> %d/%d (%.2f)' % (
				 self.total_requests + 1,
				 self.total_hits,
				 float(self.total_requests+1)/(float(self.total_hits))
				 ),
			 '</ul>',
			 '<p><b>Extension List</b>',
			 '<ul>',
			 ] + mstatus.html_reprs (self.extensions, '<li>') + ['</ul>']
			)

# Default error message
DEFAULT_ERROR_MESSAGE = string.joinfields (
	['<head>',
	 '<title>Error response</title>',
	 '</head>',
	 '<body>',
	 '<h1>Error response</h1>',
	 '<p>Error code %(code)d.',
	 '<p>Message: %(message)s.',
	 '<p>Error code explanation: %(code)s = %(explain)s.',
	 '</body>'
	 ],
	'\r\n'
	)

class file_producer:
	def __init__ (self, channel, file):
		self.done = 0
		self.channel = channel
		self.file = file

	def ready (self):
		return not self.done

	def more (self):
		if self.done:
			return ''
		else:
			data = self.file.read(8192)
			if not data:
				self.file.close()
				del self.file
				scs = self.channel.server
				scs.files_delivered = scs.files_delivered + 1
				self.done = 1
				return ''
			else:
				return data

def compute_timezone_for_log ():
	if time.daylight:
		tz = time.altzone
	else:
		tz = time.timezone
	if tz > 0:
		neg = 1
	else:
		neg = 0
		tz = -tz
	h, rem = divmod (tz, 3600)
	m, rem = divmod (rem, 60)
	if neg:
		return '-%02d%02d' % (h, m)
	else:
		return '+%02d%02d' % (h, m)

# if you run this program over a TZ change boundary, this will be invalid.
tz_for_log = compute_timezone_for_log()

ACCEPT = regex.compile ('Accept: \(.*\)', regex.casefold)
# HTTP/1.0 doesn't say anything about the "; length=nnnn" addition
# to this header.  I suppose its purpose is to avoid the overhead
# of parsing dates...
IF_MODIFIED_SINCE = regex.compile (
	'If-Modified-Since: \([^;]+\)\(\(; length=\([0-9]+\)$\)\|$\)',
	regex.casefold
	)

USER_AGENT = regex.compile ('User-Agent: \(.*\)', regex.casefold)
CONTENT_TYPE = regex.compile ('Content-Type: \(.*\)', regex.casefold)

REQUEST = regex.compile ('\([^ ]+\) \([^ ]+\)\(\( HTTP/\([0-9.]+\)\)$\|$\)')

def get_header (head_reg, lines, group=1):
	for line in lines:
		if head_reg.match (line) == len(line):
			return head_reg.group(group)
	return ''

def crack_request (r):
	if REQUEST.match (r) == len(r):
		if REQUEST.group(3):
			version = REQUEST.group(5)
		else:
			version = None
		return REQUEST.group (1), REQUEST.group(2), version

if __name__ == '__main__':
	root = sys.argv[1]
	port = string.atoi (sys.argv[2])
	hs = http_server (port)
	# plug in the 'default' extension.
	de = default_extension.default_with_post_extension (
		# use a filesystem object
		filesys.os_filesystem (opts['root'])
		)
	hs.add_extension (de)
	asyncore.loop()