#!/usr/local/bin/python
#
# adzapper
# Copyright 1999 - 2001 Adam Feuer
# Adam Feuer <adamf@pobox.com>
#
# This file is part of adzapper.
#
# adzapper is free software; you can redistribute it and/or modify
# it under the terms of the Python License, as published by the
# Python Software Foundation.
#
# adzapper is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# Python License for more details.
#
# You should have received a copy of the Python License along
# with adzapper; see the file COPYING.
#
#
# http_proxy.py is the filtering http proxy module for adzapper
#
RCS_ID =  '$Id: http_proxy.py,v 1.7 2001/09/02 04:47:16 adamf Exp $'

# python standard library modules
import socket
import string
import time
import re
import sys
import os
import getopt

# ZServer/medusa module
import asyncore
import asynchat


# adzapper and zaplet modules
from debug import debug
from adzapper import blank_gif,platform_dependent,adzapper_engine
from zaplet import zaplet_engine

# other constants
MAX_RESOLVE_TRIES = 2

ACCESSCONTROL_ERROR = """<html>
<head>
<title>Error response</title
</head>
<body>
<h1>Access Control Error</h1>
<p>
You are not authorized to access this server.
</body>
</html>
"""

HOSTNOTFOUND_ERROR = """<html>
<head>
<title>Error response</title
</head>
<body>
<h1>Host Not Found Error</h1>
<p>
The IP address for this host could not be resolved,
or another error occurred.
</body>
</html>
"""

re_numeric_host = re.compile ('[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+')
#re_content_length = re.compile('^Content-length: (\d+)',re.MULTILINE)
re_content_length = re.compile('^Content-length: (\d+)',re.M|re.I)


class proxy_collector:
    def __init__(self,producer):
        self.bytes_pushed = 0
        self.buffer = ''
        self.producer = producer

    def __del__(self):
        self.close()

    def close(self):
        self.producer = None      # eliminate circular reference
        pass

    def collect_incoming_data(self,incoming_data):
	debug.debug(4,"proxy_collector: collecting %d bytes" % len(incoming_data))
        debug.debug(4,"sent_headers: %d" % self.producer.sent_headers)

        # if we are not filtering--
        # and if we've already sent the headers, then just push data out...
        #   otherwise, save it in the buffer. we'll send that later.
        if (self.producer.sent_headers == 1):
            debug.debug(4,"collect_incoming_data- pushing data.")
#             debug.debug(5, "Postdata: ")
#             debug.debug(5,"..................")
#             debug.debug(5, incoming_data)
#             debug.debug(5, "..................")
#             debug.debug(5,"")
            self.push(incoming_data)
            debug.debug(4,"collect_incoming_data- content_length: %d  self.bytes_pushed: %d" %
                        (self.producer.request.content_length,self.bytes_pushed))
            
        else:
            debug.debug(4,"proxy_collector: collecting into buffer.")
            self.buffer = self.buffer + incoming_data
 
    def push_buffer(self):
        debug.debug(4,"proxy_collector: pushing from buffer.")
#         debug.debug(5,"")
#         debug.debug(5, "Postdata: ")
#         debug.debug(5,"..................")
#         debug.debug(5, self.buffer)
#         debug.debug(5, "..................")
#         debug.debug(5,"")
        self.push(self.buffer)
        self.buffer = ''

    def found_terminator(self):
        if (self.producer.sent_headers ==1):
            debug.debug(4,"proxy_collector: pushing terminator.")
#             debug.debug(5,"")
#             debug.debug(5, "Postdata: ")
#             debug.debug(5,"..................")
#             debug.debug(5, '\r\n\r\n')
#             debug.debug(5, "..................")
#             debug.debug(5,"")
            self.push('\r\n\r\n') 
        else:
            debug.debug(4,"proxy_collector: adding terminator to buffer.")
            self.buffer = self.buffer + '\r\n\r\n'

    def push(self,data):
        self.bytes_pushed = self.bytes_pushed + len(data)
        self.producer.push(data)
        # check to see if we are at the end...
        #   need to add the final CRLF
        if self.bytes_pushed >= self.producer.request.content_length-2:
                self.producer.push('\r\n') 
                self.bytes_pushed = self.bytes_pushed + 2



class http_proxy_handler:
    """this takes requests from http_server and figures out what to do with them--
       making a new http_proxy_producer for each new request, and handing control
       over to it."""

    url_re = re.compile (
	    # groups:  1=host 2=port 3=uri
	    'http://([^/:]+)(:[0-9]+)?([^ ]*).*',
	    re.I
	    )

    def __init__(self, resolver=None, combined_engine=None):
	self.resolver = resolver
	self.combined_engine = combined_engine

    def match (self, request):
	uri = request.uri
	self.uri_match_obj = self.url_re.search(uri)
        if self.uri_match_obj:
            if self.uri_match_obj.group(1) != None:
                return 1
            else:
                return 0
        return 0


    def handle_request (self, request):
	# turn on the proxy version of the writable predicate
	request.channel.writable_for_proxy = 1

        # fix http_request object... :-)
        request.log_info = debug.log_info
        request.log = debug.log

	# our task: feed the request object a producer that will either
	# answer it correctly, or produce an error message.
	http_proxy_producer (
		self.uri_match_obj.group(1),  # host
		self.uri_match_obj.group(2),  # port
		self.uri_match_obj.group(3),  # uri
		request,
                self.resolver,
                self.combined_engine)
	# for now, be sure and close the connection
	#request['Connection'] = 'close'
	#request.channel.producer_fifo.push (None)


class http_proxy_producer (asynchat.async_chat):
    """http_proxy_handler makes a new http_proxy_producer for each new http_proxy
    request.
    
    this producer implements two interfaces: the async_chat interface,
    and the 'stallable producer' interface.  when this object is at the
    front of an http channel's producer fifo, it will affect the channel's
    response to the 'writable' predicate, depending on whether or not
    any proxy data has accumulated."""
    

    def __init__ (self, host, port, uri, request, resolver, combined_engine):

        # default to no filtering and block popups
        self.block = 0
        self.filter_list = None 
        self.block_popups = 0 

        # buffer for filtering, if necessary
        self.incoming_buffer = ''

        self.sent_headers = 0
	self.request = request

	if combined_engine == None:
	    debug.debug(2,"combined_engine == None, no access control")
	else:
            if combined_engine.adzapper == None:
                debug.debug(2,"combined_engine.adzapper == None, no access control")
            else:
                # access control - check requester's IP against our list of valid IPs
                debug.debug(2,"combined_engine.adzapper ok, checking access control...")
                (requesterIP,requesterPort) = request.channel.socket.getpeername()
                if combined_engine.adzapper.accesscontroller(requesterIP):
                    self.send_accesscontrol_error()
                    return

	# ok, passed access control-- now let's start getting the URL
        self.resolver = resolver
        self.resolve_tries = 0
        
        self.combined_engine = combined_engine

	self.host = host
	self.uri = uri

        # set the content length to 0 (this is for POST data)
        self.request.content_length = 0

        # install a collector to collect post data, if there is any
        #   need to pass it our self object, so it can know about
        #   the self.request object and the self.push method
        #   warning! circular reference!!
        self.request.collector = proxy_collector(self)

	mn = re_numeric_host.search(host)
	if mn == None:
	    self.resolved = 0
	    self.ip = None
	else:
            debug.debug(6,"http_proxy: resolved==1")
	    self.resolved = 1
	    self.ip = host
	if port == None:
	    self.port = 80
	    portstr = ''
	else:
	    if type(port) == type(''):
		self.port = string.atoi(port[1:])
	    elif type(port) == type(1):
		self.port = port
	    else:
		# this shouldn't happen
		self.port = 80
		debug.debug(4,"port: unknown type.")

	    portstr = str(self.port)

        if portstr == '':
            self.url = 'http://' + host + uri
        else:
            self.url = 'http://' + host + ':' + portstr + uri
            
	debug.debug(2, "adzapper: about to block url: %s" % self.url)

        # save the last URL visited for the zaplet creation wizard
        self.combined_engine.adzapper.last_url = self.url

        if self.combined_engine != None:

            if self.combined_engine.zaplet != None:
                # zaplet engine exists...
                [self.block, self.block_popups, self.filter_list] = self.combined_engine.zaplet.urlCheckBlockAndFilter(self.url)
                if self.block != 0:
                    self.send_replacement_gif(self.combined_engine.adzapper.replacement_gif)
                    return
            else:
                # no zaplet_engine object-- block nothing.
                debug.debug(2,"zaplet == None, no urlblocker")
        else:
            # no combined_engine object-- block nothing.
            debug.debug(2,"combined_engine == None, no urlblocker")

	# using another proxy to access the internet?
	if self.combined_engine.adzapper.http_proxy == 0:
	    # no proxy
            if not self.resolved:
                self.resolver.resolve (self.host, self.resolver_callback)
	    else:
                debug.debug(6,"http_proxy: already resolved, going straight to callback.")
		self.resolver_callback (self.host, 10, self.ip)
	else:
	    # we are using a proxy
	    self.ip = self.combined_engine.adzapper.http_proxy_ip
	    self.port = self.combined_engine.adzapper.http_proxy_port
	    self.resolver_callback (host, 10, self.ip) # host and 10 are just placeholders

	self.outgoing_buffer = ''


    def resolver_callback (self, host, ttl, ip):
	# handle errors!
	if (ip == None):
            debug.debug(6,"http_proxy- resolver_callback: ip=None.")
            if self.resolve_tries >= MAX_RESOLVE_TRIES:
                debug.debug(6,"http_proxy- sending hostnotfound error.")
                self.resolved = 1
                self.send_hostnotfound_error()
                return
            else:
                debug.debug(6,"http_proxy- resolving again")
                self.resolve_tries = self.resolve_tries + 1
                if self.resolver.cache.has_key(self.host):
                    debug.debug(6,"http_proxy- resolver cache entry: %s %s %s" % self.resolver.cache[self.host])
                    debug.debug(6,"http_proxy- removing resolver cache entry.")
                    del self.resolver.cache[self.host]
                self.resolver.resolve (self.host, self.resolver_callback)
                return

	asynchat.async_chat.__init__ (self)
	self.create_socket (socket.AF_INET, socket.SOCK_STREAM)
        self.resolved = 1
	self.ip = ip
        self.set_terminator(None)
	self.connect ((self.ip, self.port))


    def collect_incoming_data (self, data):
        # check to see if channel to browser has been closed 
        # (this will happen when user hits the 'stop' button on their browser)
        # this way we can avoid exceptions trying to write to a closed socket
        # (suggested by Alexey Marinichev <lyosha@lyosha.transas.com> )
        if hasattr(self.request.channel,'shut'):
            debug.debug(5, 'no more data!')
            self.handle_close()
        else:
            debug.debug(1,'http_proxy_producer: pushing %d bytes' % len(data))
            if self.filter_list == None and self.block_popups == 0:
                # not filtering
                self.request.channel.push (data)
            else:
                # we're filtering-- save the data until we have it all
                self.incoming_buffer = self.incoming_buffer + data
        

    def handle_connect (self):
	debug.debug(5,'handle_connect: connected')

        if self.combined_engine != None:
            if self.combined_engine.zaplet != None:
                if self.combined_engine.adzapper.http_proxy == 0:
                    # we're not using a proxy
                    request_string = self.uri
                else:
                    # we are using a proxy
                    request_string = self.url

        # filter the headers- only do this if there is a filter active for this zaplet
        # and block_accept_encoding_gzip is activated
        new_headers = self.request.header
        if self.filter_list != None and self.combined_engine.adzapper.block_accept_encoding_gzip != 0:
            new_headers = self.munge_headers(self.request.header)

	# for now, just send all the headers, and use HTTP/1.0
	s = string.join (
			['%s %s HTTP/1.0' % (
				string.upper (self.request.command),
				request_string,
				)] + new_headers,
			"\r\n"
			) + "\r\n\r\n" 


	debug.debug(4,"")
	debug.debug(4, "Headers: ")
	debug.debug(4,"..................")
	debug.debug(4, new_headers)
	debug.debug(4, "..................")
	debug.debug(4,"")
	debug.debug(4, "handle_connect: %s " % self.request)

	self.push (s)

        # if there is a content_length header, set it--
        # so we will know when to stop reading data
        self.set_content_length(s)
        self.sent_headers = 1

        # if there is any data in the buffer, push it out now
        if self.request.collector != None:
            if len(self.request.collector.buffer) > 0:
                self.request.collector.push_buffer()


    def handle_close (self):
	debug.debug(5, "handle_close: %s " % self.request)

        if self.block_popups:
            self.incoming_buffer = self.combined_engine.zaplet.blockPopups(self.incoming_buffer)
            self.request.channel.push(self.incoming_buffer)

        if self.filter_list != None:
            self.incoming_buffer = self.combined_engine.zaplet.doFilter(self.filter_list,self.incoming_buffer)
            self.request.channel.push(self.incoming_buffer)

        self.collector_close()

	self.done = 1
	self.close()
        
        self.request.channel.current_request = None
        self.request.channel.close_when_done()
        self.request.channel = None
        self.request = None

    def collector_close(self):
        if self.request:
            if self.request.collector:
                self.request.collector.close()
                self.request.collector = None
        

    
    # this method is called by http_channel.writable_for_proxy(),
    # to let it know whether or not any data is actually available.
    def stalled (self):
	return len(self.outgoing_buffer) == 0 and not self.done

    def more (self):
	# just return the whole buffer.
	r = self.outgoing_buffer
	self.outgoing_buffer = ''
	return r

    # parse the headers and set the content length
    def set_content_length(self,headers):
        debug.debug(4,"set_content_length:")
        m = re_content_length.search(headers)
        if m:
            debug.debug(1,"set_content_length- m.group(0): %s" % m.group(0))
            if m.group(1):
                debug.debug(1,"set_content_length: m.group(1): %s" % m.group(1))
                self.request.content_length = string.atoi(m.group(1))

    def send_replacement_gif (self,gif):
        self.collector_close()
	if gif == None:
	    gif = blank_gif.default_gif
	self.request['Content-Type'] = 'image/gif'
	self.request['Content-Length'] = gif.length
	self.request.push (gif.data)
	self.request.done()

    def send_accesscontrol_error (self):
        self.collector_close()
	self.request['Content-Type'] = 'text/html'
	self.request['Content-Length'] = len(ACCESSCONTROL_ERROR)
	self.request.push (ACCESSCONTROL_ERROR)
	self.request.done()

    def send_hostnotfound_error (self):
        self.collector_close()
	self.request['Content-Type'] = 'text/html'
	self.request['Content-Length'] = len(HOSTNOTFOUND_ERROR)
	self.request.push (HOSTNOTFOUND_ERROR)
	self.request.done()

    def munge_headers(self,header_lines):
        field_dict = {}
        debug.debug(7,"munge_headers- header_lines: %s" % header_lines)
        for line in header_lines:
            parts = string.split(line,':')
            field = parts[0]
            content = string.join(parts[1:],':')
            field_dict[field] = string.strip(content)
        debug.debug(7,"munge_headers- field_dict: %s" % field_dict)

        if field_dict.has_key('Accept-Encoding'):
            del field_dict['Accept-Encoding']

        new_header_lines = []
        for field in field_dict.keys():
            new_header_lines.append(field + ': ' + field_dict[field])

        debug.debug(7,"munge_headers- new_header_lines: %s" % new_header_lines)
        return new_header_lines
            
        
