#!/usr/bin/perl
#=============================================================================
#    drpl
#        Version 1.04.07
#        Copyright (C)1998 Tomoto SHIMIZU
#-----------------------------------------------------------------------------
#  : ˽äʸִԤ
#-----------------------------------------------------------------------------
#  ˡ: 
#
#    (1) drpl -c [-D <̾>] [<񥽡> ...]
#
#      񥽡(ñбҤ줿ƥ)ɤ߹ǡ롣
#      񥽡̾ά줿硢<̾>.dic Ѥ롣
#
#    (2) drpl [-u] [-D <̾>] [<ϥե> ...]
#
#      ꤵ줿ѤơƥȤִ롣
#      ϥե뤬ά줿硢ɸϤɤ߹ࡣ
#
#      -u ꤵƤ硢<̾>.dic Ȥ̾μ񥽡õ
#      줬⿷ʤмưŪ˹롣
#
#    (3) drpl -h
#      إפɽƽλ롣
#
#    ̾ΥǥեȤ DIC
#    Ķѿ DRPLDIC ˥ѥꤷƤȡ򤽤Υѥõ
#-----------------------------------------------------------------------------
#  :
#    ϡ<̾>.{try|pag|dir} Ȥ3ĤΥե뤫롣
#    ɤϥץȤƱȤߤʤ롣sjis ޤ euc б
#-----------------------------------------------------------------------------
#  This program is free software; ABSOLUTELY NO WARRANTY.
#  you can redistribute it and/or modify it under the terms of the GNU
#  General Public License, which may be found in the drpl 1.04 package.
#=============================================================================

#=============================================================================
# 
#=============================================================================

$DEFAULT_DIC_NAME = "DIC";
$KANJI_CODE_SAMPLE = "";
$CHAR_SEP = $; ;

my($KANJI_RE) = {
	'sjis' => [ '[\201-\237\340-\374]', '[\100-\176\200-\374]' ],
	'euc'  => [ '[\241-\376]', '[\241-\376]' ]
};

#=============================================================================
# 
#=============================================================================

# ⥸塼
use Getopt::Std;
use File::Basename;

&getopts("D:chvu");

#=============================================================================
# ̾
#=============================================================================

my($dic_name);

local($_) = $opt_D || $DEFAULT_DIC_NAME;
s/\\/\//g;	# ѥڤUNIXѤѴ
if (/^\// || /^[a-z]:\//i || /^\.\.?\//) {
	# Хѥ뤤 .  .. Ϥޤ硢DRPLDIC ϻѤʤ
} else {
	my($drpldic)  = $ENV{'DRPLDIC'};
	if ($drpldic) {
		$drpldic =~ s/\\/\//g;	# ѥڤUNIXѤѴ
		$_ = $drpldic . "/" . $_;
	}
}
s/\.dic$//i;	# ĥҤդ뤪ޤ̤եƤ
$dic_name = $_;

#=============================================================================
# ᥤ
#=============================================================================

&initkanjicode($KANJI_CODE_SAMPLE);

&HelpAndExit() if ($opt_h);

if ($opt_c) {
	if (@ARGV > 0) {
		&CreateDic($dic_name, ARGV);
	} else {
		&CreateDic($dic_name);
	}
} else {
	if ($opt_u) {
		if (-e "$dic_name.dic" &&
			(!(-e "$dic_name.try") ||
			 (stat "$dic_name.try")[9] < (stat "$dic_name.dic")[9])) {
			warn "Creating dictionary ...\n" if ($opt_v);
			eval '&CreateDic($dic_name)' ;
			warn $@, "Creating dictionary was failed\n" if ($@ && $opt_v);
		}
	}
	&ReplaceTextStream($dic_name, ARGV);
}


#=============================================================================
# ܥ桼ƥƥ
#=============================================================================

my($kanji_re);
my($kanji_code);

# ɤȽꤷ롣
sub initkanjicode {
	my($sample) = @_;
	foreach (keys %$KANJI_RE) {
		my($k_re) = $KANJI_RE->{$_};
		if ($KANJI_CODE_SAMPLE =~ /$k_re->[0]$k_re->[1]/) {
			$kanji_code = $_;
			$kanji_re = "($k_re->[0])$CHAR_SEP($k_re->[1])";
		}
	}
}


# ʸʸʬ
sub splitchar {
	local($_) = @_;
	$_ = join($CHAR_SEP, split(//, $_));
	s/$kanji_re/$1$2/g;
	split(/$;/, $_);
}


#=============================================================================
# ƥȤִ
#=============================================================================

sub ReplaceTextStream {
	my($dic_name, $stream) = @_;
	my($dic_try) = [];
	
	open(TRY_FILE, "$dic_name.try") || die "Cannot open '$dic_name.try'\n";
	dbmopen(%index, $dic_name, 0644) || die "Cannot open DBM '$dic_name'\n";
	my($index) = { %index };
	
	while (<$stream>) {
		print &ReplaceText($dic_try, $index, \$_);
	}
	dbmclose %index;
	close(TRY_FILE);
}

sub ReplaceText {
	my($try, $index, $text) = @_;
	my(@char) = &splitchar($$text);
	my($c);
	my($result);
	
	while (@char) {
		$c = $char[0];
		if ($index->{$c}) {
			if (! exists($try->[0]->{$c})) {
				&TryRead($try, TRY_FILE, split(/,/, $index->{$c}, 2));
			}
			
			($len, $value) = &TryMatch($try, \@char);
			if ($value) {
				$result .= $value;
				splice(@char, 0, $len);
				next;	# Υ롼פ
			}
		}
		$result .= shift(@char);
	}
	
	$result;
}


#=============================================================================
# ƥȤȥ饤
#=============================================================================

sub CreateDic {
	my($dic_name, $stream) = @_;
	my($dic_try);
	my($need_to_close_stream);

	if (! $stream) {			# ȥ꡼बά줿
		$stream = 'DIC';
		open($stream, "$dic_name.dic") || die "Cannot open '$dic_name.dic'\n";
		$need_to_close_stream = 1;
	}
	$dic_try = &ReadDicTable($stream);	# ƥȷμ񤫤ȥ饤
	close $stream if ($need_to_close_stream);

	my($hash) = $dic_try->[0];		# 롼ȥΡɤΥϥåɽ
	
	open(TRY_FILE, ">$dic_name.try") || die "Cannot create '$dic_name.try'\n";
		# ȥ饤եκ
	dbmopen(%index, $dic_name, 0644)  || die "Cannot create DBM '$dic_name'\n";
		# ǥDBMեκ
	eval( '$index{""} = 1' ) || die "Cannot store into DBM '$dic_name'\n";
		# DBMեν񤭹ߵĤå

	%index = ();					# ǥ򥯥ꥢ
	my($old_stdout) = select TRY_FILE;
	
	foreach (sort keys %$hash) {	# ƱʸǤ뤿᥽Ȥ
		my($ptr) = tell(TRY_FILE);
		my($num) = 0;
		&TryPrint($hash->{$_}, $_, 1, \$num);
		$index{$_} = "$ptr,$num";
	}
	
	select $old_stdout;
	dbmclose(%index);
	close(TRY_FILE);
}


sub ReadDicTable {
	my($stream) = @_;
	my($dic_try)  = [];
	my($key, $value);
	while (<$stream>) {
		chomp;
		s/^\s+|\s+$//g;				# Ƭȹζ
		if (/^$/ || /^#/) { next; }	# ԤȥȹԤФ
		($key, $value) = split(/\s+/, $_, 2);
		&TryRegister($dic_try, $key, $value);
	}
	
	$dic_try;	# ȥ饤֤
}



#=============================================================================
# ȥ饤
#=============================================================================

# ñϿ
sub TryRegister {
	my($try, $key, $value) = @_;
	my(@key) = &splitchar($key);
#	my(@key) = split(//, $key);
	my($node) = $try;
	my($n);
	foreach (@key) {
		if ($n = $node->[0]->{$_}) {
			$node = $n;
		} else {
			$node = ($node->[0]->{$_} = []);
		}
	}
	$node->[1] = $value;
}

# ȥ饤ξȹ
# Ĺ׸Ԥ
# (Ĺ, ȥ饤դ)ΥꥹȤ֤
sub TryMatch {
	my($try, $element) = @_;
	my($i) = 0;
	my(@result);
	my($n, $v);
	foreach (@$element) {
		if ($n = $try->[0]->{$_}) {
			$try = $n;
			$i++;
			if ($v = $try->[1]) {
				@result = ($i, $v);
			}
		} else {
			last;
		}
	}
	@result;
}

# ȥ饤ν
sub TryPrint {
	my($try, $label, $depth, $num) = @_;
	my($child, $value) = @$try;
	
	print $depth, " " x $depth, $label;
	print " ", $value if (defined($value));
	print "\n";
	$$num++;
	
	foreach (keys %$child) {
		&TryPrint($child->{$_}, $_, $depth+1, $num);
	}
}


# ȥ饤ɤ߹
sub TryRead {
	my($try, $file, $ptr, $len) = @_;
	
	my($depth, $key, $value);
	my(@stack) = ($try);
	seek($file, $ptr, 0);
	while ($_ = <$file> and $len-- > 0) {
		chomp;
		($depth, $key, $value) = split;
		if ($depth > 0) {
			$stack[$depth] = $stack[$depth - 1]->[0]->{$key} = [];
			$stack[$depth]->[1] = $value if ($value);
		}
	}
}


#=============================================================================
# إ
#=============================================================================

sub Help {
	open(SCRIPT, $0) || die "Cannot open '$0'\n";
	my($state);
	while (<SCRIPT>) {
		$state++ if (/=======================/);
		print if ($state >= 1);
		last  if ($state == 2);
	}
	close(SCRIPT);
	
	print "#  ߤ:\n";
	print "#     = $kanji_code\n";
	print "#    񸡺ѥ = ", $ENV{'DRPLDIC'} || "(none)", "\n";
	print;
}

sub HelpAndExit {
	&Help();
	exit;
}

