=begin

= HTML Compact Library

htmlcompact.rb

Version 1.0.2

Copyright (C) 2000 MoonWolf Development

MoonWolf <moonwolf-ruby@moonwolf.com>

  * HTMLʲˡǥѥȤˤ롣
    * ̵̤ʶ򡦲Ԥ롣
    * color°ûɽ֤롣(#ff0000 => red)
    * °ͤΰά롣

== Ȥ

  obj = HTMLSplit.new(html)
  obj.compact

=end

require "htmlsplit"

ColorAttr = [
	['color',	%w(hr font basefont)],
	['bgcolor',	%w(body table tr th td marquee layer ilayer)],
	['text', 	%w(body)],
	['link', 	%w(body)],
	['vlink', 	%w(body)],
	['alink', 	%w(body)],
	['bordercolor',		%w(table tr th td frameset frame iframe)],
	['bordercolordark',	%w(table tr th td)],
	['bordercolorlight',%w(table tr th td)]]
Colorname_RGB = {
	'aliceblue'=>'#f0f8ff','antiquewhite'=>'#faebd7',
	'aqua'=>'#00ffff','aquamarine'=>'#7fffd4',
	'azure'=>'#f0ffff','beige'=>'#f5f5dc',
	'bisque'=>'#ffe4c4','black'=>'#000000',
	'blanchedalmond'=>'#ffebcd','blue'=>'#0000ff',
	'blueviolet'=>'#8a2be2','brown'=>'#a52a2a',
	'burlywood'=>'#deb887','cadetblue'=>'#5f9ea0',
	'chartreuse'=>'#7fff00','chocolate'=>'#d2691e',
	'coral'=>'#ff7f50','cornflowerblue'=>'#6495ed',
	'cornisik'=>'#fff8dc','cornsilk'=>'#fff8dc',
	'crimson'=>'#dc143c','cyan'=>'#00ffff',
	'darkblue'=>'#00008b','darkcyan'=>'#008b8b',
	'darkgoldenrod'=>'#b8860b','darkgray'=>'#a9a9a9',
	'darkgreen'=>'#006400','darkkhaki'=>'#bdb76b',
	'darkmagenta'=>'#8b008b','darkolivegreen'=>'#556b2f',
	'darkorange'=>'#ff8c00','darkorchid'=>'#9932cc',
	'darkred'=>'#8b0000','darksalmon'=>'#e9967a',
	'darkseagreen'=>'#8fbc8f','darkslateblue'=>'#483d8b',
	'darkslategray'=>'#2f4f4f','darkturquoise'=>'#00ced1',
	'darkviolet'=>'#9400d3','deeppink'=>'#ff1493',
	'deepskyblue'=>'#00bfff','dimgray'=>'#696969',
	'dodgerblue'=>'#1e90ff','firebrick'=>'#b22222',
	'floralwhite'=>'#fffaf0','forestgreen'=>'#228b22',
	'fuchsia'=>'#ff00ff','gainsboro'=>'#dcdcdc',
	'ghostwhite'=>'#f8f8ff','gold'=>'#ffd700',
	'goldenrod'=>'#daa520','gray'=>'#808080',
	'green'=>'#008000','greenyellow'=>'#adff2f',
	'honeydew'=>'#f0fff0','hotpink'=>'#ff69b4',
	'indianred'=>'#cd5c5c','indigo'=>'#4b0082',
	'ivory'=>'#fffff0','khaki'=>'#f0e68c',
	'lavender'=>'#e6e6fa','lavenderblush'=>'#fff0f5',
	'lawngreen'=>'#7cfc00','lemonchiffon'=>'#fffacd',
	'lightblue'=>'#add8e6','lightcoral'=>'#f08080',
	'lightcyan'=>'#e0ffff','lightgoldenrodyellow'=>'#fafad2',
	'lightgreen'=>'#90ee90','lightgrey'=>'#d3d3d3',
	'lightpink'=>'#ffb6c1','lightsalmon'=>'#ffa07a',
	'lightseagreen'=>'#20b2aa','lightskyblue'=>'#87cefa',
	'lightslategray'=>'#778899','lightsteelblue'=>'#b0c4de',
	'lightyellow'=>'#ffffe0','lime'=>'#00ff00',
	'limegreen'=>'#32cd32','linen'=>'#faf0e6',
	'magenta'=>'#ff00ff','maroon'=>'#800000',
	'mediumaquamarine'=>'#66cdaa','mediumblue'=>'#0000cd',
	'mediumorchid'=>'#ba55d3','mediumpurple'=>'#9370db',
	'mediumseagreen'=>'#3cb371','mediumslateblue'=>'#7b68ee',
	'mediumspringgreen'=>'#00fa9a','mediumturquoise'=>'#48d1cc',
	'mediumvioletred'=>'#c71585','midiumaquamarine'=>'#66cdaa',
	'midiumseagreen'=>'#3cb371','midiumvioletred'=>'#c71585',
	'midnightblue'=>'#191970','mintcream'=>'#f5fffa',
	'mistyrose'=>'#ffe4e1','moccasin'=>'#ffe4b5',
	'navajowhite'=>'#ffdead','navy'=>'#000080',
	'oldlace'=>'#fdf5e6','olive'=>'#808000',
	'olivedrab'=>'#6b8e23','orange'=>'#ffa500',
	'orangered'=>'#ff4500','orchid'=>'#da70d6',
	'palegoldenrod'=>'#eee8aa','palegreen'=>'#98fb98',
	'paleturquoise'=>'#afeeee','palevioletred'=>'#db7093',
	'papayawhip'=>'#ffefd5','peachpuff'=>'#ffdab9',
	'peru'=>'#cd853f','pink'=>'#ffc0cb',
	'plum'=>'#dda0dd','powderblue'=>'#b0e0e6',
	'purple'=>'#800080','red'=>'#ff0000',
	'rosybrown'=>'#bc8f8f','royalblue'=>'#4169e1',
	'saddlebrown'=>'#8b4513','salmon'=>'#fa8072',
	'sandybrown'=>'#f4a460','seagreen'=>'#2e8b57',
	'seashell'=>'#fff5ee','sienna'=>'#a0522d',
	'silver'=>'#c0c0c0','skyblue'=>'#87ceeb',
	'slateblue'=>'#6a5acd','slategray'=>'#708090',
	'snow'=>'#fffafa','springgreen'=>'#00ff7f',
	'steelblue'=>'#4682b4','tan'=>'#d2b48c',
	'teal'=>'#008080','thistle'=>'#d8bfd8',
	'tomato'=>'#ff6347','turquoise'=>'#40e0d0',
	'violet'=>'#ee82ee','wheat'=>'#f5deb3',
	'white'=>'#ffffff','whitesmoke'=>'#f5f5f5',
	'yellow'=>'#ffff00','yellowgreen'=>'#9acd32'}
RGB_Colorname = Colorname_RGB.invert

#color°û
def shortcolor(color)
	color = color.downcase.gsub(/[^0-9a-z#]/,'')
	if color=~/^#[0-9a-f]{6}$/
		rgb = color
		name = RGB_Colorname[rgb]
	elsif Colorname_RGB[color]
		name = color
		rgb = Colorname_RGB[name]
	else
		raise "color error:"+color
	end
	#
	if name && name.length<7
		name
	else
		rgb
	end
end

#°ͤΰά
class EmptyElementTag
	def to_s
		if @attr
			"<"+@name+@attr.keys.sort.collect{|n|
				v = @attr[n]
				if a = ColorAttr.assoc(n)
					if a[1].include?(@name)
						v = shortcolor(v)
					end
				end
				if v==true
					' ' + n
				else
					if v =~ /^[a-zA-Z0-9._:%+\-#]+$/
						' ' + n + '=' + v
					else
						' ' + n + '="' + CGI::escapeHTML(v) + '"'
					end
				end
			}.to_s+">"
		else
			"<#{@name}>"
		end
	end
end
#°ͤΰά
class StartTag
	def to_s
		if @attr
			"<"+@name+@attr.keys.sort.collect{|n|
				v = @attr[n]
				if a = ColorAttr.assoc(n)
					if a[1].include?(@name)
						v = shortcolor(v)
					end
				end
				if v==true
					' ' + n
				else
					if v =~ /^[a-zA-Z0-9._:%+\-#]+$/
						' ' + n + '=' + v
					else
						' ' + n + '="' + CGI::escapeHTML(v) + '"'
					end
				end
			}.to_s+">"
		else
			"<#{@name}>"
		end
	end
end

class HTMLSplit
	COMPACTTAG = %w(html head body table tbody thead tfoot tr td th p hr br ul ol li dl dt dd)
	#ȲԤ
	def compact
		tag = []
		last = nil
		@document.delete_if {|e|
			delete = case e
			when StartTag
				tag.push e.name
				false
			when EndTag
				i = tag.rindex(e.name)
				if i
					if i==0
						tag = []
					else
						tag = tag[0..i-1]
					end
				end
				false
			when CharacterData
				if tag.include?('pre') or
				   tag.include?('script') or
				   tag.include?('style') or
				   tag.include?('xmp')
					false
				else
					e.text.gsub!(/[ \t]+/,' ')		#Ϣ³ΰ
					if e.text=~/\A[ \r\n\t]+\z/
						#ĤäΤ϶Τ
						case last
						when EmptyElementTag,StartTag,EndTag
							#Ǥꤷ
							if COMPACTTAG.include?(last.name)
								true
							else
								if e.text=~/\n/
									e.text="\n"
								else
									e.text=' '
								end
								false
							end
						when Declaration
							true
						else
							if e.text =~ /\n/
								e.text = "\n"
							else
								e.text = ' '
							end
							false
						end
					else
						e.text.gsub!(/[ \t]*\n/,"\n")
						e.text.gsub!(/\n[ \t]*/,' ')
						false
					end
				end
			when Comment
				if tag.include?('script') or
				   tag.include?('style')
					false
				else
					true
				end
			else
				false
			end
			last = e
			delete
		}
	end
end

