################################################################
# HtmlTestCase.py -- a library for testing websites
# Copyright (C) 2003 Chris Curvey
#
#This program is free software; you can redistribute it and/or
#modify it under the terms of the GNU General Public License
#as published by the Free Software Foundation; either version 2
#of the License, or (at your option) any later version.
#
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU General Public License
#along with this program; if not, write to the Free Software
#Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
################################################################

import urllib2
import unittest
import tempfile
import webbrowser
import HTMLParser
import random
import re

import ClientCookie
import HttpTestCase
import ClientForm

class TagNotFoundError(Exception): pass
class MultipleTagsError(Exception): pass
class InvalidFieldError(Exception): pass

##############################################################
class HtmlTestCase(HttpTestCase.HttpTestCase):
    """
    A subclass of unittest.TestCase that knows how to parse
    HTML data into a structure that is useful for testing
    and provides convenience methods for testing.

    Attributes -- these are not private, but should really
    not be modified by anyone else

    self.testId:  random number identifying the test
    self.parser:  the HtmlTestParser that was used to parse the
                  response from the web server

    """

    #############################################################
    def __init__(self,*args):
	"""
	constructor.  Sets up the tags[] attribute and assigns the test id
	"""
        HttpTestCase.HttpTestCase.__init__(self, *args)

        # create a random string to use as the test identifier
	self.testId = random.randint(1,10000000)
	self.tags = []

	# set up the test parser
	self.parser = HtmlTestParser()
	 
    #############################################################
    def get(self, url, params=None):
        """
	gets the given URL (with the optional params) from the web server
	and parses the response
	"""
	# do the get
        HttpTestCase.HttpTestCase.get(self, url, params)

	# Clear out the parser values and parse the results
	self.parser.clearTags()
	self.parser.feed(self.data)

    def post(self, url, params):
        """
	posts to the given URL with the optional params and parses the 
	response from the web server
	"""
	# do the get
        HttpTestCase.HttpTestCase.post(self, url, params)

	# Clear out the parser values and parse the results
	self.parser.clearTags()
	self.parser.feed(self.data)

    def postMultipart(self, url, params):
	"""
	not implemented.  Will be part of a later release of ClientCookie
	"""
        raise NotImplementedError

    #def getTag(self, type, attrs=None, content=None, debugInd=0):
    #    """
    #	deprecated.  see getTags()
    #	"""
    #    return self.parser.getTags(type, attrs, content, debugInd)

    ##############################################################
    def getTags(self, type, attrs=None, content=None, debug=0, displays=None):
        """
	gets a (possibly empty) list of tags with the specified
	attributes and content
	"""
        return self.parser.getTags(type, attrs, content, debug, displays)

    #############################################################
    def followAnchor(self, attrs=None, content=None, displays=None):
        """
	Utility function to get the target of an anchor.  Raises 
	MultipleTagsError if it finds more than one tag that qualifies,
	(unless all qualifying tags go to the same URL)
	and TagNotFoundError if it cannot find a qualifying tag.
	Note that you lose the response that you had
	when you follow the anchor.  (Unless you've stashed it away 
	somewhere.)
	"""

	anchors = self.getTags("a", attrs, content, debug=0, displays=displays)

	if len(anchors) == 0:
	    raise TagNotFoundError
	    return

        if len(anchors) > 1:
	    href = anchors[0].getAttr("href")
	    for iAnchor in anchors:
	         tmpHref = iAnchor.getAttr("href")
		 #print "href1 = %s\nhref2 = %s" % (href, tmpHref)
		 if href != tmpHref:
		     raise MultipleTagsError
		     return

        href = anchors[0].getAttr("href")
	self.get(href)

    def responseContainsTag(self, type=None, attrs=None, content=None):
        """
	states whether a given tag exists in the response
	"""
        return self.parser.responseContainsTag(type, attrs, content)

    def findForms(self):
        """
	Returns a list of ClientForm objects.  Each object represents
	one form on the page.
	"""
        self.response.seek(0)
	return ClientForm.ParseResponse(self.response)
      
    def submitForm(self, request):
	"""
	submits the results of ClientForm.onClick and parses the results
	"""
	try:
	    self.response = ClientCookie.urlopen(request)
        except urllib2.HTTPError, e:
	    self.status = e.code
        else:
	    self.status = 200
	    self.data = self.response.read()
	    self.info = self.response.info()

	    self.parser.clearTags()
	    self.parser.feed(self.data)

    def showInBrowser(self):
        """
	writes out the current data to a temporary file and then sets 
	the local web browser loose on it.  
	"""
	tmpFileName = tempfile.mktemp() + ".html"
	tmpFile = file(tmpFileName, "w")
	tmpFile.write(self.data)
	tmpFile.close()
	
        webbrowser.open(tmpFileName)

####################################################################
class HtmlTestTag:
    """
    An object reprsentation of a test tag. 

    Attributes:
    self.type:  the tag itself
    self.attrs:  a list of (name, value) tuples for each attribute.  
                 For attributes that do not have values (e.g. "selected"),
		 the value is None
    self.content:  The HTML that is contained within the tag (assuming it
                   is a tag that can contain HTML, like <body> or <a>
    self.plainText:  the same as self.content, but with all tags removed
    """
    def __init__(self, tag, attrs):
        """
	constructor.  sets up simple attributes
	"""
        self.type = tag

	self.attrs = []
	if attrs != None:
	    for (attr, value) in attrs:
	        attr = attr.lower()
		if value != None and attr != "href":
		    value = value.lower()
	        self.attrs.append((attr, value))

	self.content = ""
	self.plainText = ""

    def addContent(self, content):
        """
	adds content to the tag
	"""
        self.content += content

    def addPlainText(self, text):
        """
	adds plain text to the tag.  Note that it is assumed that someone
	else has stripped out the tags
	"""
        self.plainText += text

    def genEquivContent(self):
        """
	creates the html equivalent of the starting tag.  should probably be 
	replaced with an __str__ function
	"""
        ec = "<" + self.type
	for (attr, value) in self.attrs:
	    ec += " " + attr 
	    
	    if value != None:
	        ec += "=\"" + value + "\""

        ec += ">"

	return ec

    def __str__(self):
        return self.genEquivContent()

    ###############################################################
    def contains(self, text):
        """
	determines if the given text appears in the *content* of the tag
	"""
        if re.compile(text, re.IGNORECASE).search(self.content) == None:
	   return 0
	else:
	   return 1

    ##############################################################
    def displays(self, text):
        """
	determines if the given text appears in the plaintext of the tag.
	This is useful for things like finding "Hello, stupid" when the
	HTML contains "Hello, <span name="firstname">stupid</span>.
	""" 
        if re.compile(text, re.IGNORECASE).search(self.plainText) != None:
	    return 1
        else:
	    return 0
	
    # a really good question would be "Why did you do it this way
    # and not just make the attributes of the html tag normal
    # attributes of the object?"
    #
    # the answer is that the hasAttr method only returns true
    # if the tag has *both* the attribute and the supplied value. 
    # Besides, mixing up HTML attributes with Python attributes
    # strikes me a wrong.  (But I'm willing to be convinced.)
    # 
    # i suppose my naming could be better.  This is confusing me
    # from time to time, so it better change.
    def hasAttr(self, testAttr):
       """
       determines whether a tag has at least one attribute that matches
       a (name, value) tuple.  This does *not* behave the same was as 
       __hasattr__.  Both the attribute and the value must match to return
       true.
       """
       (testAttr, testValue) = testAttr
       testAttr = testAttr.lower()

       if testAttr != "href":
           testValue = testValue.lower()

       for (attr, value) in self.attrs:
           if attr == testAttr and value == testValue:
	       return 1
       else:
           return 0

    def getAttr(self, testAttr):
	"""
	returns the value of a given attribute of a tag
	"""
        for (attr, value) in self.attrs:
	    if testAttr == attr:
	        return value

    def getTags(self, type, attrs=None, content=None):
        """
	search for a tag within yourself.  Same idea as the getTag()
	method in HtmlTestCase, but returns different information.
	"""
	resultTags = []

	# grab another parser
	parser = HtmlTestParser()
	parser.feed(self.content)

	for tag in parser.allTags:
	    if tag.type == type:
	        attrFound = contentFound = 0

		# check for the attributes
		attrFound = 1
		if attrs != None:
		    for attr in attrs:
			# remember hasAttr checks both the name and the value
		        if not tag.hasAttr(attr):
		            attrFound = 0

		# check for the content
		if content == None:
		    contentFound = 1
                else:
		    if tag.contains(content):
		        contentFound = 1

                if attrFound and contentFound:
		    resultTags.append(tag)

	# return what we got
        return resultTags

##############################################################
# XXX I really should put some data in the payload of this
# XXX I should also override the close() method so that I can do
# the assertion that the liveTags stack is empty
class ParserConfusedError(Exception): pass
class MalformedHtmlError(Exception): pass

class HtmlTestParser(HTMLParser.HTMLParser):
    """
    A parser that takes in an HTML page and creates representations
    that are useful for testing
    """
    def __init__(self, debug=0, strict=0, *args):
	HTMLParser.HTMLParser.__init__(self, *args)
	self.allTags = []
	self.liveTags = []
	self.entitydefs = { "quot" : "\"" ,
	                    "amp" : "&" ,
			    "nbsp" : " " }
        self.debug = debug
	self.strict = strict

	# The attributes of tags were taken from
	# http://www.w3c.org/TR/html4/index/elements.html

        # a list of empty tags which should never be added to 
	# the liveTags stack.
	self.emptytags = ["area", "base", "basefont", "br", "col", "frame",
	                  "hr", "img", "input", "isindex", "link", "meta",
			  "param"]

        # a list of the tags that require a closing tag.  If you 
	# find a close tag for these, keep popping the stack until you find 
	# the opening tag.   If someone does something like
	# "<html></A>" then we will raise ParserConfusedError
	#
	self.endtagRequired = ["a", "abbr", "acronym", "address", "applet",
	                      "b", "bdo", "big", "blockquote", "button",
			      "caption", "center", "cite", "code", "del", 
			      "dfn", "dir", "div", "dl", "em", "fieldset",
			      "font", "form", "frameset", "h1", "h2", "h3",
			      "h4", "h5", "h6", "i", "iframe", "ins", "kbd",
			      "label", "legend", "map", "menu", "noframes",
			      "noscript", "object", "ol", "optgroup", 
			      "pre", "q", "s", "samp", "script", "select",
			      "small", "span", "strike", "strong", "style",
			      "sub", "sup", "table", "td", "textarea",
			      "tfoot", "th", "thead", "title", "tt", 
			      "u", "ul", "var"]

	# a list of tags that have an optional closing tag.  If we hit 
	# a closing tag for one of these, pop the stack until you find
	# the matching opening tag
        self.endtagOptional = ["body", "colgroup", "dd", "dt", "head", "html",
	                       "li", "option", "p", "tbody", "td", "tr"]

        # dammit.  Here's the beginning of the "special rules" section.
	# It appears that the browsers are being smart about picking up
	# dropped </a> tags.  This is a mapping of tags and the tags
	# that they *cannot* be contained in.  So if we find something
	# like <a href="foo">foo<a href="bar">bar, then we have to 
	# close out the foo tag when we see the bar tag.  If this keeps
	# up, I'm going to run up a big bar tab.

	# this list is under development
	self.cannotContain =  { "a" : [ "a" ] } 

    def clearTags(self):
	self.allTags = []
	self.liveTags = []
        
    #####################################################
    def handle_starttag(self, tag, attrs):
	
	testTag = HtmlTestTag(tag, attrs)

	if self.debug:
	    print "starting " + testTag.genEquivContent()

	self.allTags.append(testTag)

	# if we found a tag that has an optional endtag, peek
	# at the top of the stack to see if the previous tag
	# was of the same type.  that should take care of 
	# thinks like <option>foo<option>bar
	if len(self.liveTags) > 0:
	    stackTopTag = self.liveTags[len(self.liveTags) - 1]
	    if stackTopTag.type in self.endtagOptional:
	        if stackTopTag.type == tag:
	            # close out that previous tag
		    popTag = self.liveTags.pop()
		    if self.debug:
		        print "just popped " + popTag.type

        # make sure that we're not setting up an invalid combination.
	# search the stack to see if it contains any of the "prohibited
	# containers" for this tag.  If it does, we can either pop
	# everything back to the offending tag, or we can raise
	# MalformedHtmlError
	if self.cannotContain.has_key(tag):
	    for liveTag in self.liveTags:
	        if liveTag.type in self.cannotContain[tag]:
	            if self.strict:
		        raise MalformedHtmlError
                    else:
		        # start popping off tags until we get to the 
		        # offending tag
			popTag = self.liveTags.pop()
			if self.debug:
			    print "just popped " + popTag.type
		        while popTag != liveTag:
			    popTag = self.liveTags.pop()
			    if self.debug:
			        print "just popped " + popTag.type

	# for any tags that are currently in the active list, 
	# add equivalent content
	equiv = testTag.genEquivContent()
	for liveTag in self.liveTags:
	    liveTag.addContent(equiv)

	if tag in self.endtagOptional or tag in self.endtagRequired:
	    self.liveTags.append(testTag)

	if self.debug:
	    self.dumpLiveTags()

    #######################################################
    def handle_endtag(self, tag):
        """
	adds the end tag to any live tag content (so that we can parse 
	tag.content), then pops off any tags
	that are there until it finds it's starting match.  If it blows
	away the whole stack because of malformed html (or a bug in this 
	logic), raise MalformedHtmlError
	"""
	if self.debug:
	    print "ending " + tag

	# if we have an end tag but there's nothing in the stack,
	# just return and hope
	if len(self.liveTags) == 0:
	    return

	# we want to add the ending tag to everything *except* the
	# current live tag.
	currentTag = self.liveTags[len(self.liveTags) - 1]
	for liveTag in self.liveTags:
	    if liveTag != currentTag:
	        liveTag.addContent ("</" + tag + ">")

	# if we got an end tag that is optional or required,
	# pop all the tags until we find it's mate
	if tag in self.endtagOptional or tag in self.endtagRequired:
	
	    # does this tag have a mate, or is it a stray?
	    tagStatus = "stray"
	    for liveTag in self.liveTags:
	        if tag == liveTag.type:
		    tagStatus = "mated"
		    break
		    
	    # only clear the list if this is a mated tag.  We 
	    # just ignore strays
	    if tagStatus == "mated":
                popTag = HtmlTestTag("invalid", None)
	        while popTag.type != tag:
	            popTag = self.liveTags.pop()
	            if self.debug:
                        print "just popped " + popTag.type
	    else:
	        if self.strict:
		    raise MalformedHtmlError

	if self.debug:
            self.dumpLiveTags()

    def handle_data(self, data):
        for liveTag in self.liveTags:
	   liveTag.addContent(data)
	   liveTag.addPlainText(data)

    ##############################################################
    def getTags(self, type, attrs=None, content=None, debug=0, displays=None):
        """
	Returns a (possibly empty) list of tags with the given values.
	"""
	resultTags = []
	for tag in self.allTags:
	    if tag.type == type:
	        attrFound = contentFound = displaysFound = 0

		# check for the attributes
		attrFound = 1
		if attrs != None:
		    for attr in attrs:
			# remember hasAttr checks both the name and the value
		        if not tag.hasAttr(attr):
		            attrFound = 0

		# check for the content
		if content == None:
		    contentFound = 1
                else:
		    if debug:
		        print "searching in " + tag.genEquivContent()
			print tag.content

		    if tag.contains(content):
		        contentFound = 1

		if displays == None:
		    displaysFound = 1
                else:
		    if tag.displays(displays):
		        displaysFound = 1

                if attrFound and contentFound and displaysFound:
		    resultTags.append(tag)

	# return what we got
        return resultTags

    ##############################################################
    def responseContainsTag(self, type=None, attrs=None, content=None):
        """
	function to determine if the described tag exists in the form.  Should
	just call self.getTags() and return true or false.   Note:  if 
	you just want to search for the text of a tag, you can do that by using 
	responseContainsString.
	"""
	tagList = self.getTags(type, attrs, content)
	if len(tagList) == 0:
	    return 0
        else:
	    return 1

    ###############################################################
    def dumpTags(self):
        """
	dumps a list of all the tags in the parser so far.  Useful for debugging
	"""
	print "---- beginning tag dump ---"
        for tag in self.allTags:
	    print tag.genEquivContent()
      
    def dumpLiveTags(self):
        """
	dumps the stack of live tags as a single text line
	"""
	#print "there are %i live tags" % len(self.liveTags)
        for testTag in self.liveTags:
	    print "<" + testTag.type + ">",
        print ""
