#!/usr/bin/perl -w

#
#  The XML Translation Merge Tool
#
#  Copyright (C) 2000 Free Software Foundation.
#  Copyright (C) 2000, 2001 Eazel, Inc
#
#  This library is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License as
#  published by the Free Software Foundation; either version 2 of the
#  License, or (at your option) any later version.
#
#  This script is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#  General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this library; if not, write to the Free Software
#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
#  Authors:  Maciej Stachowiak <mjs@eazel.com>
#            Kenneth Christiansen <kenneth@gnu.org>
#            Darin Adler <darin@eazel.com>
#


## Release information
my $PROGRAM      = "xml-i18n-merge";
my $PACKAGE      = "xml-i18n-tools";
my $VERSION      = "0.9";

## Script options - Enable by setting value to 1
my $ENABLE_XML   = "1";

## Loaded modules
use strict; 
use File::Basename;
use Getopt::Long;

## Scalars used by the option stuff
my $HELP_ARG 	= "0";
my $VERSION_ARG = "0";
my $OAF_STYLE_ARG = "0";
my $XML_STYLE_ARG = "0";
my $KEYS_STYLE_ARG = "0";
my $DESKTOP_STYLE_ARG = "0";
my $QUIET_ARG = "0";


## Handle options
GetOptions (
	    "help|h|?" => \$HELP_ARG,
	    "version|v" => \$VERSION_ARG,
            "quiet|q" => \$QUIET_ARG,
	    "oaf-style|o" => \$OAF_STYLE_ARG,
	    "xml-style|x" => \$XML_STYLE_ARG,
	    "keys-style|k" => \$KEYS_STYLE_ARG,
	    "desktop-style|d" => \$DESKTOP_STYLE_ARG
	    ) or &error;


my $PO_DIR;
my $FILE;
my $OUTFILE;

my @languages;
my %po_files_by_lang = ();
my %translations = ();

&split_on_argument;


## Check for options. 
## This section will check for the different options.

sub split_on_argument {

    if ($VERSION_ARG) {
	&version;

    } elsif ($HELP_ARG) {
	&help;
    } elsif ($OAF_STYLE_ARG && @ARGV > 2) {
	&place_normal;
	&message;
	&preparation;
	&oaf_merge_translations;
    } elsif ($XML_STYLE_ARG && @ARGV > 2) {
	&place_normal;
	&message;
	&preparation;
	&xml_merge_translations;
    } elsif ($KEYS_STYLE_ARG && @ARGV > 2) {
        &place_normal;
        &message;
        &preparation;
        &keys_merge_translations;
    } elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) {
        &place_normal;
        &message;
        &preparation;
        &desktop_merge_translations;
    } else {
	&help;
    }  
}    


sub place_normal {
    $PO_DIR = $ARGV[0];
    $FILE = $ARGV[1];
    $OUTFILE = $ARGV[2];
}   


## Sub for printing release information
sub version{
    print "${PROGRAM} (${PACKAGE}) ${VERSION}\n";
    print "Written by Maciej Stachowiak and Kenneth Christiansen, 2000.\n\n";
    print "Copyright (C) 2000 Free Software Foundation, Inc.\n";
    print "Copyright (C) 2000, 2001 Eazel, Inc.\n";
    print "This is free software; see the source for copying conditions.  There is NO\n";
    print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
    exit;
}

## Sub for printing usage information
sub help{
    print "Usage: ${PROGRAM} [OPTIONS] PO_DIRECTORY FILENAME OUTPUT_FILE\n";
    print "Generates an xml file that includes translated versions of some attributes,\n";
    print "from an untranslated source and a po directory that includes translations.\n";
    print "  -v, --version                shows the version\n";
    print "  -h, --help                   shows this help page\n";
    print "  -q, --quiet                  quiet mode\n";
    print "  -o, --oaf-style              includes translations in the oaf style\n";
    print "  -x, --xml-style              includes translations in the xml style\n";
    print "  -k, --keys-style		  includes translations in the keys style\n";
    print "  -d, --desktop-style          includes translations in the desktop style\n";
    print "\nReport bugs to <mjs\@eazel.com>.\n";
    exit;
}


## Sub for printing error messages
sub error{
#   print "xml-i18n-merge: invalid option @ARGV\n";
    print "Try `${PROGRAM} --help' for more information.\n";
    exit;
}


sub message {
    print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
}



sub preparation {
   &gather_po_files;
   &create_translation_database;   
}



# General-purpose code for looking up translations in .po files

sub gather_po_files
{
    my @po_files = glob("${PO_DIR}/*.po");

    @languages = map (&po_file2lang, @po_files);

    foreach my $lang (@languages) {
	$po_files_by_lang{$lang} = shift (@po_files);
    }
}

sub po_file2lang 
{ 
    my $tmp = $_; 
    $tmp =~ s/^.*\/(.*)\.po$/$1/; 
    return $tmp; 
}


sub create_translation_database
{
    foreach my $lang (@languages) {

    	my $po_file = $po_files_by_lang{$lang};

    	open PO_FILE, "<$po_file";	

        while (<PO_FILE>) {
            if (/^#,.*fuzzy/) {
                $_ = <PO_FILE>; next;
            }
            if (/^msgid "(.*)"/ ) {
		my $msgid = $1;
                $_ = <PO_FILE>;
		
		if (/^msgstr "(.+)"/) {
		    my $msgstr = $1;
		    $translations{$lang . "|" . $msgid} = $msgstr; 
		    # print "[$lang]$msgstr\n";
		}
	    }            
        }
    }
}

sub lookup_translations 
{
    my ($value) = @_;
 
    my %transmap = ();

    foreach my $lang (@languages) {
        my $translation = lookup_translation ($value, $lang);
            
        if ($translation) {
            $transmap{$lang} = $translation;
        }
    }

    return %transmap;
}


sub lookup_translation
{
    my ($string, $lang) = @_;
    $string =~ s/\+/\\+/g;
  
    my $salt = "$lang|$string";
      
    if ($translations{$salt}) {
        return $translations{$salt};
    }
  
    return "";
}


sub entity_encode_translations
{
    my %transmap = @_;

    foreach my $key (keys %transmap) {
	$transmap{$key} = entity_encode ($transmap{$key});
    }

    return %transmap;
}


sub entity_encode
{
    my ($pre_encoded) = @_;

    $pre_encoded =~ s/\\(.)/$1/g;
    my @list_of_chars = unpack ('C*', $pre_encoded);

    return join ('', map (&entity_encode_int, @list_of_chars));
}

sub entity_encode_int
{
    if ($_ > 127 || $_ == 34 || $_ == 38) {
	return "&#" . $_ . ";";
    } else {
	return chr $_;
    }
}


## XML/OAF-specific merge code
 
sub oaf_merge_translations
{
    my $xml_source; {
       local (*INPUT);
       local $/; # slurp mode
       open INPUT, "<$FILE" or die "can't open $FILE: $!";
       $xml_source = <INPUT>;
       close INPUT;
    }

    open OUTPUT, ">$OUTFILE";

    while ($xml_source =~ /[ \t]*<[^<]*\s_\w+="[^"]*"[^<]*>/m) {
        print OUTPUT $`;
        my $orig_node = $&;
	$xml_source = $';

        my $non_translated_line = $orig_node;
        $non_translated_line =~ s/_(\w+)="/$1="/;
            
        my $new_node = $non_translated_line;
            
        my $value_str = $orig_node;
        $value_str =~ s/.*_\w+="([^"]*)".*/$1/s;

        if ($value_str) {
            my %value_translation_map = entity_encode_translations
                (lookup_translations ($value_str));

            foreach my $key (sort keys %value_translation_map) {
                my $translation = $value_translation_map{$key};
                    
                my $translated_line = $orig_node;
                $translated_line =~ s/name="([^"]*)"/name="$1-$key"/;
                $translated_line =~ s/(\s*)_(\w+)="[^"]*"/$1$2="$translation"/;

                $new_node .= "\n$translated_line";
            }
        }

	$xml_source = $new_node . $xml_source;
    }

    print OUTPUT $xml_source;

    close OUTPUT;
}


## XML (non-OAF) merge code
 
sub xml_merge_translations
{
    my $xml_source; {
       local (*INPUT);
       local $/; # slurp mode
       open INPUT, "<$FILE" or die "can't open $FILE: $!";
       $xml_source = <INPUT>;
       close INPUT;
    }

    open OUTPUT, ">$OUTFILE";

    # FIXME: support attribute translations

    # First just unmark for translation all empty nodes
    # for example <_foo/> is just replaced by <foo/>
    $xml_source =~ s/<_(\w+)\/>/<$1\/>/mg;

    # Support for XML <_foo>blah</_foo> style translations
    while ($xml_source =~ /([ \t]*)<_(\w+)>([^<]+)<\/_\2>/m) {
        print OUTPUT $`;
	$xml_source = $';

        my $spaces = $1;
        my $tag_name = $2;
        my $value_str = $3;

        my $non_translated_line = "$spaces<$tag_name>$value_str</$tag_name>";
            
        my $new_node = $non_translated_line;

        if ($value_str) {
            my %value_translation_map = entity_encode_translations
                (lookup_translations ($value_str));

            foreach my $key (sort keys %value_translation_map) {
                my $translation = $value_translation_map{$key};

                $new_node .= "\n$spaces<$tag_name xml:lang=\"$key\">$translation</$tag_name>";
            }
        }

	$xml_source = $new_node . $xml_source;
    }

    print OUTPUT $xml_source;

    close OUTPUT;
}

sub keys_merge_translations
{       
    open INPUT, "<${FILE}";

    open OUTPUT, ">${OUTFILE}";

    while (<INPUT>) {
        chomp;
        if (/^\s*_\w+=.*/)  {
            my $orig_line = $_;
    
            my $non_translated_line = $orig_line;
            $non_translated_line =~ s/_([^="]*)=/$1=/;
            
            print OUTPUT "${non_translated_line}\n";
            
            my $value_str = $orig_line;
            $value_str =~ s/.*_\w+=(.*)/$1/;
            
            if ($value_str) {
                my %value_translation_map = lookup_translations ($value_str);
            
                foreach my $key (sort keys %value_translation_map) {
                    my $translation = $value_translation_map{$key};

                    my $translated_line = $orig_line;  
                    $translated_line =~ s/_([^="]*)=([^\n]*)/\[$key]$1=$translation/;
                    print OUTPUT "$translated_line\n";
                }
            }
        } else {
            print OUTPUT "$_\n";
        }
    }
                 
    close OUTPUT;
    close INPUT;
}

sub desktop_merge_translations
{
    open INPUT, "<${FILE}";

    open OUTPUT, ">${OUTFILE}";

    while (<INPUT>) {
        chomp;
        if (/^\s*_\w+=.*/)  {
            my $orig_line = $_;

            my $non_translated_line = $orig_line;
            $non_translated_line =~ s/_([^="]*)=/$1=/;

            print OUTPUT "${non_translated_line}\n";

            my $value_str = $orig_line;
            $value_str =~ s/.*_\w+=(.*)/$1/;

            if ($value_str) {
                my %value_translation_map = lookup_translations ($value_str);

                foreach my $key (sort keys %value_translation_map) {
                    my $translation = $value_translation_map{$key};

                    my $translated_line = $orig_line;
                    $translated_line =~ s/^_([^="]*)=([^\n]*)/$1\[$key]=$translation/;
                    print OUTPUT "$translated_line\n";
                }
            }
        } else {
            print OUTPUT "$_\n";
        }
    }

    close OUTPUT;
    close INPUT;

}
