#
# This file is part of Zaplet
# Copyright 1999 - 2001 Adam Feuer <adamf@pobox.com>
#
# Zaplet is free software; you can redistribute it and/or modify
# it under the terms of the Python License as published by the
# Python Software Foundation, or GNU General Public License as published
# by the Free Software Foundation (either version 2 of the License, or
# (at your option) any later version).
#
# Zaplet is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Zaplet; see the file COPYING-Zaplet. If not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
#
# You should have received a copy of the Python License along
# with Zaplet; see the file COPYING.
#
#
# adzapper_config.py
# to read the adzapper.conf file and set adzapper defaults
#
# for use with adzapper filtering proxy

RCS_ID =  '$Id: adzapper_engine.py,v 1.7 2001/09/02 04:47:16 adamf Exp $'

# constants
ALLOWDENY = "allow,deny"
DENYALLOW = "deny,allow"

DEFAULT_PORT = 51966

import os
import re
import string
import sys
import urlparse
import socket

# adzapper modules
from debug import debug
from config_file.rw_config_file import read_conf, write_conf
from adzapper import blank_gif

numeric_host = re.compile ('[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+')
numeric_network = re.compile ('[0-9]+\.([0-9]+\.)?([0-9]+\.)?([[0-9]+\*])?')

#########################################################################
#
# adzapper_engine
#

class adzapper_engine:
    """initialize the adzapper_engine object; set all defaults"""

    def __init__(self, adzapper_conf_file):
        self.adzapperConfigFile = adzapper_conf_file

        # adzapper.conf version defaults to 0.1
        self.version = '0.1'
        self.majorversion = 0  
        self.minorversion = 1

        # access control list - lists of IPs we should allow or deny
        # defaults to off
        self.acOn = 0
        self.aclOrder = DENYALLOW  # defaults to deny all, then allow specific hosts and networks
        self.aclAllow = []
        self.aclDeny = []
        self.aclAllow_re = None
        self.aclDeny_re = None

        self.pid_file = None
        self.web_configuration = 1
        self.web_ui = 1

        # nameserver
        self.nameserver = None

        # port to accept connections on
        self.port = DEFAULT_PORT

        # replacement gif
        self.replacement_gif_name = ''
        self.replacement_gif = blank_gif.default_gif

        # HTTP proxy - default is 0
        self.http_proxy = 0

        # last URL visited-- for zaplet creation wizard
        self.last_url = ''

        # other zaplet directories
        program=sys.argv[0]
        self.install_dir=os.path.join(os.getcwd(),os.path.split(program)[0])
#        self.default_zaplet_dir = os.path.join(self.install_dir,'zaplets')
#        self.zaplet_dirs = [self.default_zaplet_dir]
        self.zaplet_dirs = []
        self.site_zaplet_dir = None

        self.confxml = None
        self.confxmlfile = None
        self.read_config_file(self.adzapperConfigFile)

        # dev use only
        self.newformat_zaplet_dir = None
        

    ###############################################
    #
    # read_config_file
    #
    def read_config_file(self,adzapperConfigFile):
        """read the adzapper.conf file
        this file contains global settings for adzapper such as access control, etc.

        also sets self.confxmlfile and self.confxml 
        """

        debug.debug(1,"Initializing adzapper_engine...")

        if (adzapperConfigFile[0] == '/'):
            # it is an absolute path
            self.confxmlfile = adzapperConfigFile
        else:
            # not absolute path to the adzapper.conf file-- add cwd to it
            self.confxmlfile = os.path.join(os.getcwd(),adzapperConfigFile)

        # read the adzapper.conf file
        # this will hold configuration options for adzapper as a whole
        debug.debug(5,"reading adzapper.conf (xml format); filename: %s" % self.confxmlfile)
        self.confxml = read_conf(self.confxmlfile)
        self.activate_configuration(self.confxml)
        return 



    ###############################################
    #
    # activate_configuration
    #
    def activate_configuration(self,confxml):
        """this takes the confxml dictionary as read from the config file
        and checks each entry for correctness."""
        
        debug.debug(6,"confxml dict:")
        debug.debug(6,"%s" % confxml)

        keys = confxml.keys()

        # for each key, call the set_function for that key
        for key in keys:
            function_name = "set_%s" % key
            if hasattr(self,function_name):
                function = eval("self.%s" % function_name)
                if callable(function):
                    contents = confxml[key].contents
                    if type(contents) == type([]):
                        # it's a list of itemObj ...
                        listcontents = []
                        for itemObj in contents:
                            listcontents.append(itemObj.contents)
                        debug.debug(5,"calling %s(%s)" % (function_name,listcontents))                    
                        function(listcontents)
                    else:
                        debug.debug(5,"calling %s(%s)" % (function_name,contents))                    
                        function(contents)
                else:
                    debug.debug(5,"read_config_file: %s is not callable." % function_name)


        # initialize the access control
        if len(self.aclAllow) > 0:
            reStr = ''
            debug.debug(4,"init acl: %s" % self.aclAllow)
            for arg in self.aclAllow:
                reStr = reStr + '|' + arg
            # chop off initial '|'
            reStr = reStr[1:]
            debug.debug(4, "adzapper acl allow: %s" % reStr)
            self.aclAllow_re = re.compile(reStr)

        if len(self.aclDeny) > 0:
            reStr = ''
            debug.debug(4,"init acl: %s" % self.aclDeny)
            for arg in self.aclDeny:
                reStr = reStr + '|' + arg
            # chop off initial '|'
            reStr = reStr[1:]
            debug.debug(4, "adzapper acl deny: %s" % reStr)
            self.aclDeny_re = re.compile(reStr)                        



    ###############################################
    #
    #
    #
    def set_version(self, version):
        """set the major and minor version numbers of the config file"""

        if version == None:
            return self.version
        self.version = version
        
        vlist = string.split(version,'.')
        self.majorversion = string.atoi(vlist[0])
        self.minorversion = string.atoi(vlist[1])
        debug.debug(1,'adzapper.conf version: %i.%i' %
                    (self.majorversion, self.minorversion))

        return self.version

    ###############################################
    #
    # 
    #
    def set_block_popups_regex(self, bp_regex):
        """set the regex that is used to remove popups"""
        
        if bp_regex == None:
            return self.block_popups_regex

        self.block_popups_regex = bp_regex
        self.block_popups_re = re.compile(bp_regex)
        
        debug.debug(3,"block_popups_regex: %s" % self.block_popups_regex)

        return self.block_popups_regex

    ###############################################
    #
    # suggested by Rene Dudfield.
    #
    def set_nameserver(self, nameserver):
        """set the nameserver- this overrides the automatic configuration.
        specifying nameserver on the commandline overrides this."""

        if nameserver == None:
            return self.nameserver

        if len (nameserver) < 1:
            debug.debug(1,"nameserver needs an IP number as an argument.")
            return
        self.nameserver = nameserver
        debug.debug(3,"nameserver: %s" % self.nameserver)

        return self.nameserver

    ###############################################
    #
    #
    #
    def set_replacement_graphic(self,file):
        """read in the user-supplied replacement graphic-- currently must be a gif"""

        if file == None:
            return 
        
        if len(file) < 1:
            debug.debug(1,"replacement_gif needs a filename as an argument.")
            return
        
        if (file[0] == '/'):
            # it is an absolute path
            pass
        else:
            # not absolute path to file-- add install_dir to it
            file = os.path.join(self.install_dir,file)

        debug.debug(4,"replacement gif filename: %s " % file)
        self.replacement_gif_name = file

        tmplist = os.stat(file)
        self.replacement_gif.length = tmplist[6]
        file = open(file, 'r')
        self.replacement_gif.data = file.read(self.replacement_gif.length)
        file.close()

        return self.replacement_gif_name


    ###############################################
    #
    #
    #
    def set_http_proxy(self,http_proxy_url):
        """set the HTTP proxy url
        (this is for setting adzapper to use a proxy server)"""

        if http_proxy_url != None and http_proxy_url != '':
            self.http_proxy = 1
            self.http_proxy_url = http_proxy_url

            url_tuple = urlparse.urlparse(http_proxy_url)
            netloc = url_tuple[1]
            if ':' in netloc:
                (host,port) = string.split(netloc,':')
            else:
                host = netloc
                port = '8080'

            self.http_proxy_ip = socket.gethostbyname(host)

            if len(port) == 0:
                port = '8080'

            self.http_proxy_port = string.atoi(port)

            debug.debug(3,"http proxy server: %s" % self.http_proxy_url)
            debug.debug(3,"http proxy ip, port: %s" % self.http_proxy_ip)
            debug.debug(3,"http proxy ip, port: %d" % self.http_proxy_port)

        else:
            self.http_proxy = 0

        debug.debug(3,"http proxy: %d" % self.http_proxy)
        return http_proxy_url


    ###############################################
    #
    #
    #
    def set_pid_file(self,file):
        """set the pid file; write the pid to this file"""

        if file != None and file != '':
            self.pid_file = file

            self.pid = "%d" % os.getpid()
            debug.debug(4,"pid: %s " % self.pid)
            try:
                file = open(file, 'w')
                file.write(self.pid)
                file.close()
            except:
                debug.debug(1, "Cannot write adzapper pid file '"+ file + "'\n")

        debug.debug(4,"pid filename: %s " % self.pid_file)
        return self.pid_file


    ###############################################
    #
    #
    #
    def set_zaplet_dirs(self, dirlist):
        """set the list of zaplet directories
        adzapper always reads the dirs in the order they are specified in dirlist.
        this is usually: install_dir/zaplets/ , then the other dirs
        in the order they were specified in the adzapper.conf file
        """

        self.zaplet_dirs = []
        self.zaplet_dirs_unexpanded = []
        for dir in dirlist:
            unexpanded_dir = dir
            if dir != None and len(dir) > 0:
                if dir[0] == '~':
                    # expand to user's home directory
                    dir = os.path.expanduser(dir)
                elif dir[0] != '/':
                    # not absolute path-- add it to installdir
                    dir = os.path.join(self.install_dir,dir)
                self.zaplet_dirs.append(dir)
                self.zaplet_dirs_unexpanded.append(unexpanded_dir)
                
        debug.debug(3,"zaplet_dirs: %s" % self.zaplet_dirs)

        return self.zaplet_dirs_unexpanded

    ###############################################
    #
    #
    #
    def set_site_zaplet_dir(self, zapletdir):
        """set the site zaplet directory- the dir where locally modified zaplets are kept
        adzapper always reads zaplet_dirs first, then the site_zaplet_dir
        """

        if zapletdir != None:
        
            if len (zapletdir) < 1:
                debug.debug(1,"site_zaplet_dir needs a directory as an argument.")
            elif zapletdir[0] == '~':
                # expand to user's home directory
                self.site_zaplet_dir = os.path.expanduser(zapletdir)
            elif zapletdir[0] != '/':
                # not absolute path-- expand it.
                self.site_zaplet_dir = os.path.abspath(zapletdir)
            else:
                self.site_zaplet_dir = zapletdir

        debug.debug(3,"site_zaplet_dir: %s" % self.site_zaplet_dir)
        return self.site_zaplet_dir

    ###############################################
    #
    #
    #
    def set_newformat_zaplet_dir(self, zapletdir):
        """set the new format zaplet directory- the dir where new format zaplets are kept
        development use only
        """

        if zapletdir != None:
        
            if len (zapletdir) < 1:
                debug.debug(1,"newformat_zaplet_dir needs a directory as an argument.")
            elif zapletdir[0] == '~':
                # expand to user's home directory
                self.newformat_zaplet_dir = os.path.expanduser(zapletdir)
            elif zapletdir[0] != '/':
                # not absolute path-- expand it.
                self.newformat_zaplet_dir = os.path.abspath(zapletdir)
            else:
                self.newformat_zaplet_dir = zapletdir

        debug.debug(3,"newformat_zaplet_dir: %s" % self.newformat_zaplet_dir)
        return self.newformat_zaplet_dir

    ###############################################
    #
    #
    #
    def set_port(self, port):
        """set the port adzapper will accept connections on"""

        if port != None:
            if type(port) == type(123):
                self.port = port
            elif type(port) == type(''):
                self.port = string.atoi(port)
            else:
                debug.debug(1,"adzapper_port needs an integer as an argument")
        else:
            # default
            self.port = DEFAULT_PORT
        
        debug.debug(3,"port: %s" % self.port)
        return self.port



    ###############################################
    #
    #
    #
    def set_block_accept_encoding_gzip(self, arg):
        """block Accept-encoding: gzip HTTP headers-
           allows filtering pages that would otherwise be compressed"""

        if arg != None:
            arg = string.lower(arg)
            if arg == 'on' or arg == '1':
                self.block_accept_encoding_gzip=1
            elif arg == 'off' or arg == '0':
                self.block_accept_encoding_gzip=0
            else:
                debug.debug(1,"syntax error: block_accept_encoding_gzip needs 'on' or 'off' as keywords.")

        debug.debug(3,"block_accept_encoding_gzip: %s" % self.block_accept_encoding_gzip)
        return arg




    ###############################################
    #
    #
    #
    def set_web_configuration(self, arg):
        """turn the web-based configuration on or off"""

        if arg != None:
            arg = string.lower(arg)
            if arg == 'on' or arg == '1':
                self.web_configuration=1
            elif arg == 'off' or arg == '0':
                self.web_configuration=0
            else:
                debug.debug(1,"syntax error: web configuration needs either 'on' or 'off' as keywords.")

        debug.debug(3,"web configuration: %s" % self.web_configuration)
        return arg


    ###############################################
    #
    #
    #
    def set_web_ui(self, arg):
        """turn the web user interface on or off"""

        if arg != None:        
            arg = string.lower(arg)
            if arg == 'on' or arg == '1':
                self.web_ui=1
            elif arg == 'off' or arg == '0':
                self.web_ui=0
            else:
                debug.debug(1,"syntax error: web interface needs either 'on' or 'off' as keywords.")

        debug.debug(3,"web user interface: %s" % self.web_ui)
        return arg


    ###############################################
    #
    #
    #
    def set_accesscontrol(self, arg):
        """turn the access control on or off"""

        if arg == None:
            return
        
        arg = string.lower(arg)
        if arg == 'on' or arg == '1':
            self.acOn=1
        elif arg == 'off' or arg == '0':
            self.acOn=0
        else:
            debug.debug(1,"syntax error: accesscontrol needs either 'on' or 'off' as keywords.")

        debug.debug(3,"accesscontrol: %s" % self.acOn)
        return arg


    ###############################################
    #
    #
    #
    def set_acl_order(self, arg=None):
        """set the access control order by interpreting the arguments"""
        
        if arg != None:
            if type(arg) == type(''):
                arg = string.lower(arg)
                if (arg == 'allow,deny'):
                    self.aclOrder = ALLOWDENY
                elif (arg == 'deny,allow'):
                    self.aclOrder = DENYALLOW
                else:
                    debug.debug(1,"syntax error: order must be followed by \"allow,deny\" or \"deny,allow\"")

        debug.debug(3,"access control order: %s" % self.aclOrder)
        return self.aclOrder


    ###############################################
    #
    #
    #
    def set_acl_allow_from(self,args):
        """add an access control regex to the Allow acl
        interpret allow strings and build regexes for them"""

        if args == None:
            return
        
        for arg in args:
            debug.debug(1,"addacl- arg: %s" % arg)
            if arg == None:
                continue

            if numeric_host.match(arg):
                debug.debug(1,"addacl- numeric host: %s" % arg)
                # it's a literal string, so we need to escape it.
                arg = re.escape(arg)
                self.aclAllow.append(arg)
            elif numeric_network.match(arg):
                debug.debug(1,"addacl- numeric network: %s" % arg)
                # if there is a '*' at the end, get rid of it
                if arg[-1] == '*':
                    arg = arg[:-1]
                    debug.debug(1,"addacl- numeric network (chopped): %s" % arg)
                # it's a literal string, so we need to escape it.
                arg = re.escape(arg)
                self.aclAllow.append(arg)
            else:
                debug.debug(1,"addacl- no match: %s" % arg)
                if arg == 'all':
                    self.aclAllow.append(arg)
                elif arg == 'from':
                    pass # allow apache-like syntax
                debug.debug(1,"addacl- literal: %s" % arg)
        if 'all' in self.aclAllow:
            debug.debug(4,"addacl: found all")
            self.aclAllow = ['.*'] # match any IP address


        return args


    ###############################################
    #
    #
    #
    def set_acl_deny_from(self,args):
        """add an access control regex to the Deny acl
        interpret allow strings and build regexes for them"""
    
        if args == None:
            return
        
        for arg in args:
            debug.debug(1, "addacl- arg: %s" % arg)
            if arg == None:
                continue

            if numeric_host.match(arg):
                debug.debug(1,"addacl- numeric host: %s" % arg)
                self.aclDeny.append(arg)
            elif numeric_network.match(arg):
                debug.debug(1,"addacl- numeric network: %s" % arg)
                # if there is a '*' at the end, get rid of it
                if arg[len(arg)-1] == '*':
                    arg = arg[0:len(arg)-1]
                debug.debug(1,"addacl- numeric network (chopped): %s" % arg)
                # it's a literal string, so we need to escape it.
                arg = re.escape(arg)
                self.aclDeny.append(arg)
            else:
                if arg == 'all':
                    self.aclDeny.append(arg)
                elif arg == 'from':
                    pass # allow apache-like syntax
                debug.debug(1,"addacl- literal: %s" % arg)
        if 'all' in self.aclDeny:
            debug.debug(4, "addacl: found all")
            self.aclDeny = ['.*'] # match any IP address


        return args

    ###############################################
    #
    #
    #
    def check_allow(self,requesterIP):
        """return 1 if requesterIP matches aclAllow_re
        no aclAllow means nothing matches == nothing allowed"""

        if self.aclAllow_re != None:
            if self.aclAllow_re.match(requesterIP):
                return 1
        return 0

    ###############################################
    #
    #
    #
    def check_deny(self,requesterIP):
        """return 1 if requesterIP matches aclDeny_re
        no aclDeny means everything matches == everything denied"""
        
        if self.aclDeny_re != None:
            if self.aclDeny_re.match(requesterIP):
                return 1
            else:
                return 0
        return 1


    ###############################################
    #
    #  access controller - external interface!
    #
    def accesscontroller(self,requesterIP):
        """determine whether a particular IP address should be allowed to use the adzapper
        returns 1 if an IP address should be blocked, 0 otherwise"""
        
        debug.debug(4, "requesterIP: [%s]" % requesterIP)
        if self.acOn == 0:
            return 0

        if self.aclOrder == ALLOWDENY:
            # check allow first, then deny
            if self.check_allow(requesterIP):
                if self.check_deny(requesterIP):
                    return 1 # denied
                else:
                    return 0 # not denied == allowed
            else:
                return 1 # not allowed == denied

        else: # self.aclOrder == DENYALLOW
            # check deny first, then do denies.
            if self.check_deny(requesterIP):
                if self.check_allow(requesterIP):
                    return 0 # allowed
                else:
                    return 1 # not allowed == denied
            else:
                return 0 # not denied == allowed
