#!/usr/bin/perl -w ########################################################################### # # inittranslation.pl # # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### BEGIN { die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'}; unshift (@INC, "$ENV{'GSDLHOME'}/perllib"); } use util; use GDBM_File; my %updatedata = (); sub main { # Get the name of the source (base) language my $sourcelang = shift(@_); # Get the name of the target language my $targetlang = shift(@_); my $usecvs = shift(@_); # Check that both arguments were supplied if (!$sourcelang) { die "Error: You didn't supply the name of the source language!\n"; } if (!$targetlang) { die "Error: You didn't supply the name of the target language!\n"; } # Casefold both language names $sourcelang =~ tr/A-Z/a-z/; $targetlang =~ tr/A-Z/a-z/; my $gsdldir = "$ENV{'GSDLHOME'}"; my $macrosdir = util::filename_cat($gsdldir, "macros"); my $langdir = util::filename_cat($gsdldir, "tmp", "lang"); # Make sure this is a new translation process my $translationdir = util::filename_cat($langdir, "$sourcelang-$targetlang"); if (-e $translationdir) { print STDERR "Note: Translation has already been initialised (exiting).\n"; return; } # If the source language is not English, it may not be up to date if ($sourcelang !~ m/english/) { print STDERR "Warning: Source language is not English. You should check that" . " the source language is up to date before beginning.\n"; } # Check that the source language macro files exist, and parse them my $sourcedmname1 = &get_macrofile_name($sourcelang, ""); my $sourcehash1 = &parse_macrofile($macrosdir, $sourcedmname1); my $sourcedmname2 = &get_macrofile_name($sourcelang, "2"); my $sourcehash2 = &parse_macrofile($macrosdir, $sourcedmname2); # Make sure some macros exist to be translated if (!$sourcehash1 && !$sourcehash2) { die "Error: No source macro information exists.\n"; } # Combine the two source hashes my $sourcehash = &combine_hashes($sourcehash1, $sourcehash2); # Create a directory to store the data for the new translation, make world-writeable my $currentmask = umask; umask(0000); if (! mkdir($translationdir, 0777)) { umask($currentmask); die "Error: Couldn't create directory $translationdir.\n"; } # Start a new database for the translation information my $translationdb = util::filename_cat($translationdir, "translation.db"); my %translationdata = (); tie(%translationdata, "GDBM_File", $translationdb, GDBM_WRCREAT, 0666); # Work out what the target language code should be, and store it in the database my $maincfgfile = util::filename_cat($gsdldir, "etc", "main.cfg"); my $languagecodes = &get_language_codes(); my $targetcode = &get_language_code($targetlang, $maincfgfile, $languagecodes); $translationdata{"*target_lang_code*"} = $targetcode; # Start a new update database my $updatedb = util::filename_cat($translationdir, "update.db"); tie(%updatedata, "GDBM_File", $updatedb, GDBM_WRCREAT, 0666); umask($currentmask); # Create the target language macro files if they don't exist my $targetdmname1 = &get_macrofile_name($targetlang, ""); my $targetdmfile1 = util::filename_cat($macrosdir, $targetdmname1); if (! -e $targetdmfile1) { my $sourcedmfile1 = util::filename_cat($macrosdir, $sourcedmname1); &create_empty_macrofile($sourcedmfile1, $targetdmfile1); } my $targetdmname2 = &get_macrofile_name($targetlang, "2"); my $targetdmfile2 = util::filename_cat($macrosdir, $targetdmname2); if (! -e $targetdmfile2) { my $sourcedmfile2 = util::filename_cat($macrosdir, $sourcedmname2); &create_empty_macrofile($sourcedmfile2, $targetdmfile2); } # Parse the target language macro files my $targethash1 = &parse_macrofile($macrosdir, $targetdmname1); my $targethash2 = &parse_macrofile($macrosdir, $targetdmname2); # Combine the two target hashes my $targethash = &combine_hashes($targethash1, $targethash2); # Initialise the translation database with the current contents of the macrofiles foreach $sourcekey (sort keys(%$sourcehash)) { $translationdata{$sourcekey} = $sourcehash->{$sourcekey}; } foreach $targetkey (sort keys(%$targethash)) { $translationdata{$targetkey} = $targethash->{$targetkey}; } # Use CVS to initialise the update database, if desired if ($usecvs && $usecvs eq "true") { &initialise_update_db($sourcelang, $targetlang, $targethash, $macrosdir); } # Clean up untie %translationdata; untie %updatedata; } sub get_macrofile_name { my ($language, $suffix) = @_; my $macrofilename = $language; # Handles cases where the macro file name is different to the language name $macrofilename = "port" if ($language =~ m/portuguese/); $macrofilename = "indo" if ($language =~ m/indonesian/); # Add suffix (if any) and file extension, and return $macrofilename = $macrofilename . $suffix . ".dm"; return $macrofilename; } sub get_language_code { my ($language, $maincfgfile, $languagecodes) = @_; # Check if the language is in the main.cfg file open(MAIN_CFG_IN, "<$maincfgfile") or die "Error: Could not open $maincfgfile.\n"; my $lastlangline; while () { $line = $_; chomp($line); # print "Line: $line\n"; if ($line =~ m/^Language\s+/i) { my @args = split(/ +/,$line); my ($lang_abbr) = ($args[1] =~ m/^shortname=(.*)$/); $lang_abbr =~ tr/A-Z/a-z/; my ($lang_long) = ($args[2] =~ m/^longname=(.*)$/); $lang_long =~ tr/A-Z/a-z/; # Is this the language we are translating into? if ($lang_long eq $language) { return $lang_abbr; } $lastlangline = $line; } } close MAIN_CFG_IN; # Try to find it using the ISO 639 language codes my $langcode; if ($languagecodes->{$language}) { $langcode = $languagecodes->{$language}; } # Otherwise we just have to make something up else { $langcode = &make_up_language_code($language, $languagecodes); } # Add the new language code into the main.cfg file, and make sure it is world writeable my $currentmask = umask; umask(0000); if (! open(MAIN_CFG_OUT,">$maincfgfile.new")) { umask($currentmask); die "Error: Could not create $maincfgfile.new: $!\n"; } umask($currentmask); open MAIN_CFG_IN, "<$maincfgfile" or die "Error: Could not open $maincfgfile.\n"; my $inmacros = "false"; while () { $line = $_; chomp($line); print MAIN_CFG_OUT $line; if ($line =~ m/^macrofiles /) { $inmacros = "true"; } if ($inmacros eq "true" && $line !~ m/\\$/) { print MAIN_CFG_OUT " \\\n"; print MAIN_CFG_OUT " $language" . ".dm $language" . "2.dm"; $inmacros = "false"; } print MAIN_CFG_OUT "\n"; if ($line eq $lastlangline) { # Change language into title case my $langtitlecase = $language; substr($langtitlecase, 0, 1) =~ tr/a-z/A-Z/; print MAIN_CFG_OUT ("Language shortname=$langcode longname=$langtitlecase ", "default_encoding=utf-8\n"); } } close MAIN_CFG_IN; close MAIN_CFG_OUT; # Delete the old main.cfg and replace it with the new one unlink($maincfgfile); rename("$maincfgfile.new", $maincfgfile); return $langcode; } sub make_up_language_code { my ($language, $languagecodes) = @_; # !! TO FINISH !! print STDERR "Making up language code...\n"; $langcode = $& if ($language =~ m/\w\w/); $language =~ s/\A.//; return $langcode; } sub create_empty_macrofile { my ($sourcedmfile, $targetdmfile) = @_; open(SOURCE_DM_FILE_IN, "<$sourcedmfile") or die "Error: Could not open file.\n"; open(TARGET_DM_FILE_OUT, ">$targetdmfile") or die "Error: Could not write file.\n"; # Reads in contents of macro file, line by line while () { # Check if a new package is being defined if (s/^package //) { $packagename = $_; chomp($packagename); # It is, so write an empty package section to the target macro file print TARGET_DM_FILE_OUT "package $packagename\n"; print TARGET_DM_FILE_OUT "# text macros\n"; print TARGET_DM_FILE_OUT "# icons\n"; } } close SOURCE_DM_FILE_IN; close TARGET_DM_FILE_OUT; } sub parse_macrofile { my ($macrosdir, $macrofile) = @_; # Opens macro file or returns my $macropath = util::filename_cat($macrosdir, $macrofile); open(IN, "<$macropath") or return; # Initialises some local variables my $currpackage; my %macros = (); # Reads in contents of macro file, line by line while () { # Check if a new package is being defined if (s/^package //) { $currpackage = $_; chomp($currpackage); } # Line contains a macro name elsif (/^(_\w+_)/) { # gets the name of the macro ($1 contains text matched by corresponding # set of parentheses in the last matched pattern within the dynamic scope # which here matches (_\w+_) ) my $macroname = $1; my $macrotext; # get the first line of the macro $_ =~ s/^_\w+_ *//; # while there is still text of the macro to go... while ($_ !~ /.*\}/) { # ... adds it to the macrotext variable $macrotext .= $_; $_ = ; } $macrotext .= $_; # The key consists of macro file, package name, and macro name my $key = $macrofile . "::" . $currpackage . "::" . $macroname; # Store the macro text in the database $macros{$key} = $macrotext; } # Icon: line in format ## "sometext" ## macro ## macroname ## elsif (/^\#\# .*/) { my $macroname = $_; my $macrotext; #if the macro text contains a new line will run over two lines unless ($macroname =~ m/^\#\# .*\#\#/) { $macroname = $_; chomp($macroname); $_ = ; $macroname .= $_; } #split the image macro header on ## my @names = split(/\s*\#\#\s*/, $macroname); #save current contents of macroname into macrotext $macrotext .= $macroname; $_ = ; # overwrite macroname with macroname from ## ... ## ... ## HERE ### $macroname = $names[(scalar @names) - 1]; # read in the rest of the text associated with the image macro while ($_ !~ /^\s+/) { $macrotext .= $_; $_ = ; } # key to the hash and the database my $key = $macrofile . "::" . $currpackage . "::" . $macroname; # print "Icon, key: $key\n text: $macrotext\n"; # hashes macroname and macrotext $macros{$key} = $macrotext; } } close IN; return (\%macros); } # Combines two existing hashes of the same format together sub combine_hashes { my ($hash1, $hash2) = @_; my %combined = (); foreach $key (sort (keys %$hash1)) { $combined{$key} = $hash1->{$key}; } foreach $key (sort (keys %$hash2)) { $combined{$key} = $hash2->{$key}; } return (\%combined); } sub initialise_update_db { my ($sourcelang, $targetlang, $targethash, $macrosdir) = @_; # Use CVS to annotate each line of the source files with the date it was last edited my $sourcedmfile1 = &get_macrofile_name($sourcelang, ""); my $annotatedsourcefile1 = qx/cd $macrosdir; cvs annotate $sourcedmfile1/; my $annotatedsourcehash1 = &parse_annotated_macrofile($annotatedsourcefile1); my $sourcedmfile2 = &get_macrofile_name($sourcelang, "2"); my $annotatedsourcefile2 = qx/cd $macrosdir; cvs annotate $sourcedmfile2/; my $annotatedsourcehash2 = &parse_annotated_macrofile($annotatedsourcefile2); # Use CVS to annotate each line of the target files with the date it was last edited my $targetdmfile1 = &get_macrofile_name($targetlang, ""); my $annotatedtargetfile1 = qx/cd $macrosdir; cvs annotate $targetdmfile1/; my $annotatedtargethash1 = &parse_annotated_macrofile($annotatedtargetfile1); my $targetdmfile2 = &get_macrofile_name($targetlang, "2"); my $annotatedtargetfile2 = qx/cd $macrosdir; cvs annotate $targetdmfile2/; my $annotatedtargethash2 = &parse_annotated_macrofile($annotatedtargetfile2); # Macros needing updating are those in the target file that have been more recently # edited in the source file foreach $targetkey (sort (keys(%$annotatedtargethash1))) { my $targetdate = $annotatedtargethash1->{$targetkey}; my $sourcedate = $annotatedsourcehash1->{$targetkey}; if (&is_date_before($targetdate, $sourcedate) eq "true") { # print "Macro needs updating!!\n"; $targetkey = &get_macrofile_name($targetlang, "") . "::" . $targetkey; # print "Target key: $targetkey"; # print " Original target text: $targethash->{$targetkey}\n"; $updatedata{$targetkey} = $targethash->{$targetkey}; } } foreach $targetkey (sort (keys(%$annotatedtargethash2))) { my $targetdate = $annotatedtargethash2->{$targetkey}; my $sourcedate = $annotatedsourcehash2->{$targetkey}; if (&is_date_before($targetdate, $sourcedate) eq "true") { # print "Macro needs updating!!\n"; $targetkey = &get_macrofile_name($targetlang, "2") . "::" . $targetkey; # print "Target key: $targetkey"; # print " Original target text: $targethash->{$targetkey}\n"; $updatedata{$targetkey} = $targethash->{$targetkey}; } } } sub parse_annotated_macrofile { my ($annotatedfile) = @_; # Initialises some local variables my $currpackage; my %macros = (); my @annotations = split(/\n/, $annotatedfile); for ($i = 0; $i < scalar(@annotations); $i++) { $annotatedline = $annotations[$i]; chomp($annotatedline); # print "Line: $annotatedline\n"; ($line = $annotatedline) =~ s/.*\(.*\):\s//; # Check if a new package is being defined if ($line =~ m/^package (.*)/) { $currpackage = $1; chomp($currpackage); } # Line contains a macro name elsif ($line =~ m/^(_\w+_)/) { # Gets the name of the macro ($1 contains text matched by corresponding set # of parentheses in the last matched pattern within the dynamic scope) my $macroname = $1; my $macrodate = &extract_cvs_date($annotatedline); # while there is still text of the macro to go... while ($line !~ /.*\}/) { $annotatedline = $annotations[++$i]; chomp($annotatedline); my $linedate = &extract_cvs_date($annotatedline); if (&is_date_before($macrodate, $linedate) eq "true") { # This part of the macro has been updated more recently $macrodate = $linedate; } ($line = $annotatedline) =~ s/.*\(.*\):\s//; } # The key consists of package name and macro name my $key = $currpackage . "::" . $macroname; # The key maps to the macrodate $macros{$key} = $macrodate; # print "Macro: $key Date: $macrodate\n\n"; } # Icon: line in format ## "sometext" ## macro ## macroname ## elsif ($line =~ m/^\#\# (.*)/) { my $macroname = $1; my $macrodate = &extract_cvs_date($annotatedline); #if the macro text contains a new line will run over two lines unless ($line =~ m/^\#\# .*\#\#/) { $annotatedline = $annotations[++$i]; chomp($annotatedline); ($line = $annotatedline) =~ s/.*\(.*\):\s//; $macroname .= $line; } # Split the image macro header on ## my @names = split(/\s*\#\#\s*/, $macroname); # Overwrite macroname with macroname from ## ... ## ... ## HERE ### $macroname = $names[(scalar @names) - 1]; # Read the rest of the text associated with the image macro while ($line !~ /^\s*$/) { $annotatedline = $annotations[++$i]; chomp($annotatedline); my $linedate = &extract_cvs_date($annotatedline); if (&is_date_before($macrodate, $linedate) eq "true") { # This part of the macro has been updated more recently $macrodate = $linedate; } ($line = $annotatedline) =~ s/.*\(.*\):\s//; } # The key consists of package name and macro name my $key = $currpackage . "::" . $macroname; # The key maps to the macrodate $macros{$key} = $macrodate; # print "Macro: $key Date: $macrodate\n\n"; } } return (\%macros); } sub extract_cvs_date { my ($line) = @_; # Cut the CVS annotation off the front and extract the date $line =~ s/.*\(.*\):\s//; $annotation = $&; ($macrodate = $annotation) =~ s/.*\(.*(\s)+(.*)\): /$2/; return $macrodate; } # Returns true if $date1 is before $date2, false otherwise sub is_date_before { my ($date1, $date2) = @_; my %months = ("Jan", 1, "Feb", 2, "Mar", 3, "Apr", 4, "May", 5, "Jun", 6, "Jul", 7, "Aug", 8, "Sep", 9, "Oct", 10, "Nov", 11, "Dec", 12); @date1parts = split(/-/,$date1); @date2parts = split(/-/,$date2); # Compare year - nasty because we have rolled over into a new century $year1 = $date1parts[2]; if ($year1 < 80) { $year1 += 100; } $year2 = $date2parts[2]; if ($year2 < 80) { $year2 += 100; } # Compare year if ($year1 < $year2) { return "true"; } elsif ($year1 == $year2) { # Year is the same, so compare month if ($months{$date1parts[1]} < $months{$date2parts[1]}) { return "true"; } elsif ($months{$date1parts[1]} == $months{$date2parts[1]}) { # Month is the same, so compare day if ($date1parts[0] < $date2parts[0]) { return "true"; } } } return "false"; } sub display_hash { my ($hash) = @_; foreach $key (sort (keys %$hash)) { print $key . "\n"; print $hash->{$key} . "\n"; } } &main(@ARGV); sub get_language_codes { %languagecodes = ( 'abkhazian', 'ab', 'afar', 'aa', 'afrikaans', 'af', 'albanian', 'sq', 'amharic', 'am', 'arabic', 'ar', 'armenian', 'hy', 'assamese', 'as', 'aymara', 'ay', 'azerbaijani', 'az', 'bashkir', 'ba', 'basque', 'eu', 'bengali', 'bn', 'bangla', 'bn', 'bhutani', 'dz', 'bihari', 'bh', 'bislama', 'bi', 'breton', 'br', 'bulgarian', 'bg', 'burmese', 'my', 'byelorussian', 'be', 'belarusian', 'be', 'cambodian', 'km', 'catalan', 'ca', 'chinese', 'zh', 'corsican', 'co', 'croatian', 'hr', 'czech', 'cs', 'danish', 'da', 'dutch', 'nl', 'english', 'en', 'esperanto', 'eo', 'estonian', 'et', 'faeroese', 'fo', 'farsi', 'fa', 'fiji', 'fj', 'finnish', 'fi', 'french', 'fr', 'frisian', 'fy', 'galician', 'gl', 'gaelic (scottish)', 'gd', 'gaelic (manx)', 'gv', 'georgian', 'ka', 'german', 'de', 'greek', 'el', 'greenlandic', 'kl', 'guarani', 'gn', 'gujarati', 'gu', 'hausa', 'ha', 'hebrew', 'iw', 'hindi', 'hi', 'hungarian', 'hu', 'icelandic', 'is', 'indonesian', 'id', # Should be 'in' for backward compatibility 'interlingua', 'ia', 'interlingue', 'ie', 'inuktitut', 'iu', 'inupiak', 'ik', 'irish', 'ga', 'italian', 'it', 'japanese', 'ja', 'javanese', 'jv', 'kannada', 'kn', 'kashmiri', 'ks', 'kazakh', 'kk', 'kinyarwanda', 'rw', 'ruanda', 'rw', 'kirghiz', 'ky', 'kirundi', 'rn', 'rundi', 'rn', 'korean', 'ko', 'kurdish', 'ku', 'laothian', 'lo', 'latin', 'la', 'latvian', 'lv', 'lettish', 'lv', 'limburgish', 'li', 'limburger', 'li', 'lingala', 'ln', 'lithuanian', 'lt', 'macedonian', 'mk', 'malagasy', 'mg', 'malay', 'ms', 'malayalam', 'ml', 'maltese', 'mt', 'maori', 'mi', 'marathi', 'mr', 'moldavian', 'mo', 'mongolian', 'mn', 'nauru', 'na', 'nepali', 'ne', 'norwegian', 'no', 'occitan', 'oc', 'oriya', 'or', 'oromo', 'om', 'afan', 'om', 'galla', 'om', 'pashto', 'ps', 'pushto', 'ps', 'polish', 'pl', 'portuguese', 'pt', 'punjabi', 'pa', 'quechua', 'qu', 'rhaeto-romance', 'rm', 'romanian', 'ro', 'russian', 'ru', 'samoan', 'sm', 'sangro', 'sg', 'sanskrit', 'sa', 'serbian', 'sr', 'serbo-croatian', 'sh', 'sesotho', 'st', 'setswana', 'tn', 'shona', 'sn', 'sindhi', 'sd', 'sinhalese', 'si', 'siswati', 'ss', 'slovak', 'sk', 'slovenian', 'sl', 'somali', 'so', 'spanish', 'es', 'sundanese', 'su', 'swahili', 'sw', 'kiswahili', 'sw', 'swedish', 'sv', 'tagalog', 'tl', 'tajik', 'tg', 'tamil', 'ta', 'tatar', 'tt', 'telugu', 'te', 'thai', 'th', 'tibetan', 'bo', 'tigrinya', 'ti', 'tonga', 'to', 'tsonga', 'ts', 'turkish', 'tr', 'turkmen', 'tk', 'twi', 'tw', 'uighur', 'ug', 'ukrainian', 'uk', 'urdu', 'ur', 'uzbek', 'uz', 'vietnamese', 'vi', 'volapük', 'vo', 'welsh', 'cy', 'wolof', 'wo', 'xhosa', 'xh', 'yiddish', 'ji', 'yoruba', 'yo', 'zulu', 'zu' ); return (\%languagecodes); }