#! /usr/bin/perl
#
#  Copyright (c) 1999, 2000
# 	Konstantin Chuguev.  All rights reserved.
# 
#  Redistribution and use in source and binary forms, with or without
#  modification, are permitted provided that the following conditions
#  are met:
#  1. Redistributions of source code must retain the above copyright
#     notice, this list of conditions and the following disclaimer.
#  2. Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#  3. All advertising materials mentioning features or use of this software
#     must display the following acknowledgement:
# 	This product includes software developed by Konstantin Chuguev
# 	and its contributors.
# 
#  THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
#  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
#  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
#  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
#  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
#  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
#  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
#  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
#  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
#  SUCH DAMAGE.
# 
# 	iconv (Charset Conversion Library) v2.0
#

require 'getopts.pl';
use integer;

sub pack_hex {
	"_$_[0](" . join(", ", map sprintf("0x%02X", $_), unpack('C4', $_[1]))
	. ")";
}

sub pack_array {
	my($size, $format, $array_ref) = @_;
	return pack("$format$size", @$array_ref) unless $opt_C;
	my($res, $i);
	if ($format eq 'N') {
		for ($i = 0; $i < $size; $i += 2) {
			$res .= "\t"
			        . &pack_hex('1l', pack("N", $$array_ref[$i]))
			        . ", "
				. &pack_hex('1l', pack("N", $$array_ref[$i+1]))
				. ",\n";
		}
		$array_size += $size * 4;
	} else {
		for ($i = 0; $i < $size; $i += 4) {
			$res .= "\t"
			        . &pack_hex('2s', pack("n2", $$array_ref[$i], $$array_ref[$i+1]))
			        . ", "
				. &pack_hex('2s', pack("n2", $$array_ref[$i+2], $$array_ref[$i+3]))
				. ",\n";
		}
		$array_size += $size * 2;
	}
	return $res;
}

# create an array of short/long values in network byte order
sub build_array {
	my($size, $format, $default, $array_ref) = @_;
	my($i);
	for $i (0 .. $size-1) {
		$$array_ref[$i] = $default unless defined($$array_ref[$i]);
	}
	return &pack_array($size, $format, $array_ref);
}

sub build_table1 {
	my($size, $array_ref) = @_;
	return &build_array($size, "n", 0xFFFE, $array_ref);
}

sub build_table2 {
	my($size, $array_ref) = @_;
	my($offset, $n, $i, @offs) = ($size * 4, 0);
	for $i (0 .. $size-1) {
		next unless defined($$array_ref[$i]);
		$offs[$i] = $offset;
		$offset += $size * 2;
	}
	my($data) = (&build_array($size, "N", 0, \@offs));
	for $i (0 .. $size-1) {
		next unless defined($$array_ref[$i]);
		$n ++;
		$data .= &build_table1($size, $$array_ref[$i]);
	}
	printf STDERR "%d subtables.\n", $n;
	return $data;
}

$control0 = 0;
$control1 = 0;
$delete = 0;

@to_ucs;
@from_ucs;

# set a value in two charset conversion tables; update charset properties
# ($cs, $ucs) = (local charset code, Unicode)
#
sub set_val {
	my($cs, $ucs) = @_;
	return if $opt_a && $cs > 0x7F;
	$to_ucs[$cs >> 8][$cs & 0xFF] = $ucs;
	$from_ucs[$ucs >> 8][$ucs & 0xFF] = $cs;
	if (($cs & 0x60) == 0) {
		if($cs & 0x80) {
			$control1 = 1;
		} else {
			$control0 = 1;
		}
	}
	$delete = 1 if $cs == 0x7F;
	if ($cs < 0x80) {
		$_7bit = 1;
	} elsif ($cs < 0x100) {
		$_8bit = 1;
	} elsif ($cs & 0x8080) {
		$_16bit = 1;
	} else {
		$_14bit = 1;
	}
}

# set a range of equal codes to charset conversion tables
#
sub set_range {
	for (@_) {
		&set_val($_, $_);
	}
}

$opt_c = 0;
$opt_p = '0x';
$opt_u = 1;

&Getopts('aCc:Mm:o:p:u:');
#         ||| || | | +- u N:	field number for Unicode character codes
#         ||| || | +--- p str:	prefix
#         ||| || +----- o file:	output file name
#         ||| |+------- m file:	character mnemonic table from RFC1345
#         ||| +-------- M:	Macintosh newline (<LF> only)
#         ||+---------- c N:	field number for charset character codes
#         |+----------- C:	make C source file
#         +------------ a:	ignore 8 bit (for ASCII)

if ($opt_o) {
	open(STDOUT, ">$opt_o");
	$opt_o =~ s/.c$//;
}

%map;

if ($opt_M) {
	$/ = "\cM";
}

if ($opt_m) {
	open(MAP, $opt_m);
	while(<MAP>) {
		chop;
		next unless /^ [^ ]/;
		next if 2 > split;
		$map{$_[0]} = $_[1];
	}
	close(MAP);
	local($code) = 0;
	while (<>) {
		chop;
		s/^ *//;
		if (/^&[a-z]/) {
			split(' ', substr($_, 1));
			if ($_[0] eq 'code') {
				$code = $_[1];
			}
		} else {
			foreach (split) {
				&set_val($code, hex "0x$map{$_}") if $_ ne '??';
				$code ++;
			}
		}
	}
} else {
	while (<>) {
		s/[#\n].*//;
		next if 2 > split;	# too few fields
		next if ($_[$opt_c] =~ s/^$opt_p/0x/o) != 1;
				# local charset code prefix is invalid
		&set_val(hex $_[$opt_c], hex $_[$opt_u]);
	}
}

if (!$_16bit && !$_14bit) {
	if ($_8bit) {
		print STDERR "8bit charset";
		if (!$control0) {
			&set_range(0 .. 0x1F);
			print STDERR "; control0 chars added";
		}
		if (!$control1) {
			&set_range(0x80 .. 0x9F);
			print STDERR "; control1 chars added";
		}
		if (!$delete) {
			&set_range(0x7F);
			print STDERR "; delete char added";
		}
		$nbits = 8;
		$type = 1;
	} else {
		print STDERR "7bit charset";
		$nbits = 7;
		$type = 0;
	}
	print STDERR ".\n";
	$to = &build_table1($_8bit ? 256 : 128, $to_ucs[0]);
} elsif ($_16bit) {
	print STDERR "16bit charset";
	if (!$_7bit && !$_8bit) {
		&set_range(0 .. 0x7F);
		print STDERR "; ASCII subset added";
	} elsif (!$control0) {
		&set_range(0 .. 0x1F);
		print STDERR "; control0 chars added";
	}
	print STDERR ".\n";
	$to = &build_table2(256, \@to_ucs);
	$nbits = 16;
	$type = 3;
} else {
	print STDERR "14bit charset.\n";
	$to = &build_table2(128, \@to_ucs);
	$nbits = 14;
	$type = 2;
}

$to_size = $opt_C ? $array_size : length($to);

$from = &build_table2(256, \@from_ucs);

if ($opt_C) {
	die "-o option is mandatory with -C" unless $opt_o;
	$opt_o =~ s/\.c$//;
	$opt_o =~ tr/-/_/;
	print qq/#include "endian.h"\n\n/;
	print "const unsigned char iconv_ccs_table_$opt_o" . "[] = {\n";
	print "\t3, 'C', 'S', 'C', 'T', 1, $nbits, $type,\n";
	print &pack_array(2, 'N', [8, 8 + $to_size]);
} else {
	print pack("A5CCCNN", "\003CSCT", 0, $nbits, $type, 8, 8 + $to_size);
}
print $to;
print $from;
print "};\n" if $opt_C;
