root/main/trunk/greenstone2/bin/script/gti.pl @ 32096

Revision 32096, 96.0 KB (checked in by ak19, 21 months ago)

Marking all the uses of sysread() with a comment saying they're a candidate to use FileUtils::readUTF8File() in future, if thinking about each case beforehand has confirmed that the contents will indeed be UTF8.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
RevLine 
[10019]1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# gti.pl
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 2005 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29
30BEGIN {
31    die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
32    unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
33}
34
[24627]35
[10019]36use iso639;
37use strict;
38use util;
[29415]39use FileUtils;
[10019]40
41my $gsdl_root_directory = "$ENV{'GSDLHOME'}";
42my $gti_log_file = &util::filename_cat($gsdl_root_directory, "etc", "gti.log");
[28976]43my $source_language_code = "en";  # This is non-negotiable
[10019]44
45my $gti_translation_files =
[24627]46[ # Greenstone macrofiles
47{ 'key' => "coredm",
[10019]48    'file_type' => "macrofile",
49    'source_file' => "macros/english.dm",
[11224]50    'target_file' => "macros/{bn:bengali;fa:farsi;gd:gaelic;id:indo;lv:latvian;pt-br:port-br;pt-pt:port-pt;zh-tr:chinese-trad;iso_639_1_target_language_name}.dm" },
[24627]51
52{ 'key' => "auxdm",
[10019]53    'file_type' => "macrofile",
54    'source_file' => "macros/english2.dm",
[11224]55    'target_file' => "macros/{bn:bengali;fa:farsi;gd:gaelic;id:indo;lv:latvian;pt-br:port-br;pt-pt:port-pt;zh-tr:chinese-trad;iso_639_1_target_language_name}2.dm" },
[10019]56
[29411]57#{ 'key' => "paperspastdm",
58#   'file_type' => "macrofile",
59#   'source_file' => "macros/paperspast-english.dm",
60#   'target_file' => "macros/paperspast-{bn:bengali;fa:farsi;gd:gaelic;id:indo;lv:latvian;pt-br:port-br;pt-pt:port-pt;zh-tr:chinese-trad;iso_639_1_target_language_name}.dm" },
[25285]61
[24627]62# GLI dictionary
63{ 'key' => "glidict",
[13946]64    'file_type' => "resource_bundle",
65    'source_file' => "gli/classes/dictionary.properties",
66    'target_file' => "gli/classes/dictionary_{target_language_code}.properties" },
[10114]67
[24627]68# GLI help
69{ 'key' => "glihelp",
[13946]70    'file_type' => "greenstone_xml",
71    'source_file' => "gli/help/en/help.xml",
72    'target_file' => "gli/help/{target_language_code}/help.xml" },
[11602]73
[24627]74# Greenstone Perl modules
75{ 'key' => "perlmodules",
[10019]76    'file_type' => "resource_bundle",
[11634]77    'source_file' => "perllib/strings.properties",
78    'target_file' => "perllib/strings_{target_language_code}.properties" },
[10019]79
[28337]80# Greenstone Installer interface
81{ 'key' => "gsinstaller",
82    'file_type' => "resource_bundle",
83    'source_file' => "gsinstaller/LanguagePack.properties",
84    'target_file' => "gsinstaller/LanguagePack_{target_language_code}.properties" },
85
[24627]86# Greenstone tutorial exercises
87# { 'key' => "tutorials",
88# 'file_type' => "greenstone_xml",
89# 'source_file' => "gsdl-documentation/tutorials/xml-source/tutorial_en.xml",
90# 'target_file' => "gsdl-documentation/tutorials/xml-source/tutorial_{target_language_code}.xml" },
[11366]91
[24627]92# new Greenstone.org
93{ 'key' => "greenorg",
94    'file_type' => "resource_bundle",
95    'source_file' => "greenstoneorg/website/classes/Gsc.properties",
96    'target_file' => "greenstoneorg/website/classes/Gsc_{target_language_code}.properties"
[29411]97},
98
[29412]99# greenstone 3 interface files, from http://svn.greenstone.org/main/trunk/greenstone3/web/WEB-INF/classes
[29413]100# check it out as greenstone3
[29411]101{ 'key' => "gs3interface",
[29415]102        'file_type' => "resource_bundle",
103        'source_file' => "greenstone3",
104        'target_file' => "greenstone3"
[30581]105},
106
107# collection config display items of GS3 demo collections. Checked out as gs3-collection-configs
108# from http://svn.greenstone.org/main/trunk/gs3-collection-configs
109{ 'key' => "gs3colcfg",
110    'file_type' => "resource_bundle",
111    'source_file' => "gs3-collection-configs",
112    'target_file' => "gs3-collection-configs"
[24627]113}
114];
[10019]115
[30844]116my @gs3_col_cfg_files = ("lucene-jdbm-demo", "solr-jdbm-demo", "localsite");
[30581]117
[30490]118my @gs3_interface_files = ("interface_default", "ServiceRack", "metadata_names");
119#"AbstractBrowse", "AbstractGS2FieldSearch", "AbstractSearch", "AbstractTextSearch", "Authentication", "CrossCollectionSearch", "GS2LuceneSearch", "LuceneSearch", "MapRetrieve", "MapSearch", "PhindPhraseBrowse", "SharedSoleneGS2FieldSearch");
[11026]120
[26547]121# Auxilliary GS3 interface files. This list is not used at present
122# Combine with above list if generating translation spreadsheet for all interface files
[30490]123my @gs3_aux_interface_files = ("GATEServices","QBRWebServicesHelp", "Visualizer", "IViaSearch", "GS2Construct");
[18460]124
[30490]125my @gs3_other_interface_files = ("interface_default2", "interface_basic", "interface_basic2", "interface_nzdl", "interface_gs2");
126
[26547]127# Not: i18n, log4j
128
[10019]129sub main
130{
131    # Get the command to process, and any arguments
132    my $gti_command = shift(@_);
133    my @gti_command_arguments = @_;
[18460]134    my $module = $_[1];
[26544]135
136    # for GS3, set gsdl_root_dir to GSDL3HOME
[29411]137    #if($module && $module eq "gs3interface"){ # module is empty when the gti-command is create-glihelp-zip-file
138    #if($ENV{'GSDL3SRCHOME'}) {
139     #   $gsdl_root_directory = (defined $ENV{'GSDL3HOME'}) ? $ENV{'GSDL3HOME'} : &util::filename_cat($ENV{'GSDL3SRCHOME'}, "web");
140     #   $gti_log_file = &util::filename_cat($gsdl_root_directory, "logs", "gti.log");
141    #}
142    #}
[24627]143   
[10019]144    # Open the GTI log file for appending, or write to STDERR if that fails
145    if (!open(GTI_LOG, ">>$gti_log_file")) {
[24627]146        open(GTI_LOG, ">&STDERR");
[10019]147    }
[24627]148   
[10019]149    # Log the command that launched this script
150    &log_message("Command: $0 @ARGV");
[24627]151   
[10019]152    # Check that a command was supplied
153    if (!$gti_command) {
[24627]154        &throw_fatal_error("Missing command.");
[18460]155    }     
[24627]156   
[10019]157    # Process the command
[13948]158    if ($gti_command =~ /^get-all-chunks$/i) {
[24627]159        # Check that GS3 interface is the target
[30582]160        if ($module =~ m/^gs3/) { # gs3interface, gs3colcfg
[24627]161            print &get_all_chunks_for_gs3(@gti_command_arguments);
162        } else {
163            print &get_all_chunks(@gti_command_arguments);
164        }
[13948]165    }
[24627]166    elsif ($gti_command =~ /^get-first-n-chunks-requiring-work$/i) {
[30582]167        if ($module =~ m/^gs3/) {     
[24627]168            print &get_first_n_chunks_requiring_work_for_gs3(@gti_command_arguments);
169        } else {
170            print &get_first_n_chunks_requiring_work(@gti_command_arguments);
171        }
[10019]172    }
[25249]173    elsif ($gti_command =~ /^get-uptodate-chunks$/i) {
[30582]174        if ($module =~ m/^gs3/) {     
[25249]175            print &get_uptodate_chunks_for_gs3(@gti_command_arguments);
176        } else {
177            print &get_uptodate_chunks(@gti_command_arguments);
178        }
179    }
[24627]180    elsif ($gti_command =~ /^get-language-status$/i) {
181        print &get_language_status(@gti_command_arguments);       
[10019]182    }
[24627]183    elsif ($gti_command =~ /^search-chunks$/i) {
184        print &search_chunks(@gti_command_arguments);
[10019]185    }
[24627]186    elsif ($gti_command =~ /^submit-translations$/i) {
187        # This command cannot produce any output since it reads input
188        &submit_translations(@gti_command_arguments);
[10019]189    }
[24627]190    elsif ($gti_command =~ /^create-glihelp-zip-file$/i) {
191        # This command cannot produce any output since it reads input
192        &create_glihelp_zip_file(@gti_command_arguments);
[18460]193    }
[24627]194    else {
195        # The command was not recognized
196        &throw_fatal_error("Unknown command \"$gti_command\".");
197    }
[10019]198}
199
200
201sub throw_fatal_error
202{
203    my $error_message = shift(@_);
[24627]204   
[10019]205    # Write an XML error response
206    print "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
207    print "<GTIResponse>\n";
208    print "  <GTIError time=\"" . time() . "\">" . $error_message . "</GTIError>\n";
209    print "</GTIResponse>\n";
[24627]210   
[10019]211    # Log the error message, then die
212    &log_message("Error: $error_message");
213    die "\n";
214}
215
216
217sub log_message
218{
219    my $log_message = shift(@_);
220    print GTI_LOG time() . " -- " . $log_message . "\n";
221}
222
223
[13948]224sub get_all_chunks
225{
226    # The code of the target language (ensure it is lowercase)
227    my $target_language_code = lc(shift(@_));
228    # The key of the file to translate (ensure it is lowercase)
229    my $translation_file_key = lc(shift(@_));
[24627]230   
[13948]231    # Check that the necessary arguments were supplied
232    if (!$target_language_code || !$translation_file_key) {
[24627]233        &throw_fatal_error("Missing command argument.");
[13948]234    }
[24627]235   
[13948]236    # Get (and check) the translation configuration
237    my ($source_file, $target_file, $translation_file_type)
238    = &get_translation_configuration($target_language_code, $translation_file_key);
[24627]239   
[13948]240    # Parse the source language and target language files
241    my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
242    my @source_file_lines = &read_file_lines($source_file_path);
243    my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
244   
245    my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
246    my @target_file_lines = &read_file_lines($target_file_path);
247    my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
[24627]248   
[13948]249    # Filter out any automatically translated chunks
250    foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
[24627]251        if (&is_chunk_automatically_translated($chunk_key, $translation_file_type)) {
252            delete $source_file_key_to_line_mapping{$chunk_key};
253            delete $target_file_key_to_line_mapping{$chunk_key};
254        }
[13948]255    }
[24627]256   
[13948]257    my %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
258    my %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
259    &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
260    &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
[24627]261   
[13948]262    my %source_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($source_file, \@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
263    my %target_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($target_file, \@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
[24627]264   
[18460]265    my $xml_response = &create_xml_response_for_all_chunks($translation_file_key, $target_file, \%source_file_key_to_text_mapping, \%target_file_key_to_text_mapping, \%source_file_key_to_last_update_date_mapping, \%target_file_key_to_last_update_date_mapping);   
[13948]266   
267    return $xml_response;
268}
269
270
[25249]271sub get_uptodate_chunks
272{
273    # The code of the target language (ensure it is lowercase)
274    my $target_language_code = lc(shift(@_));
275    # The key of the file to translate (ensure it is lowercase)
276    my $translation_file_key = lc(shift(@_));
277   
278    # Check that the necessary arguments were supplied
279    if (!$target_language_code || !$translation_file_key) {
280        &throw_fatal_error("Missing command argument.");
281    }
282   
283    # Get (and check) the translation configuration
284    my ($source_file, $target_file, $translation_file_type)
285    = &get_translation_configuration($target_language_code, $translation_file_key);
286   
287    # Parse the source language and target language files
288    my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
289    my @source_file_lines = &read_file_lines($source_file_path);
290    my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
291   
292    my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
293    my @target_file_lines = &read_file_lines($target_file_path);
294    my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
295   
296    # Filter out any automatically translated chunks
297    foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
298        if (&is_chunk_automatically_translated($chunk_key, $translation_file_type)) {
299            delete $source_file_key_to_line_mapping{$chunk_key};
300            delete $target_file_key_to_line_mapping{$chunk_key};
301        }
302    }
303   
304    my %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
305    my %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
306    &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
307    &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
308   
309    my %source_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($source_file, \@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
310    my %target_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($target_file, \@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
311 
312
313    # Chunks needing updating are those in the target file that have been more recently edited in the source file
314    # All others are uptodate (which implies that they have certainly been translated at some point and would not be empty)
315    my @uptodate_target_file_keys = ();
316    foreach my $chunk_key (keys(%source_file_key_to_last_update_date_mapping)) {
317        my $source_chunk_last_update_date = $source_file_key_to_last_update_date_mapping{$chunk_key};
318        my $target_chunk_last_update_date = $target_file_key_to_last_update_date_mapping{$chunk_key};
319       
320        # print "key: $chunk_key\nsource date : $source_chunk_last_update_date\ntarget date : $target_chunk_last_update_date\nafter? ". &is_date_after($source_chunk_last_update_date, $target_chunk_last_update_date) . "\n\n";       
321       
322        if (defined($target_chunk_last_update_date) && !&is_date_after($source_chunk_last_update_date, $target_chunk_last_update_date)) {
323            # &log_message("Chunk with key $chunk_key needs updating.");
324            push(@uptodate_target_file_keys, $chunk_key);
325        }
326    }
327
328    my $xml_response = &create_xml_response_for_uptodate_chunks($translation_file_key, $target_file, \@uptodate_target_file_keys, \%source_file_key_to_text_mapping, \%target_file_key_to_text_mapping, \%source_file_key_to_last_update_date_mapping, \%target_file_key_to_last_update_date_mapping);   
329
330    return $xml_response;
331}
332
333
[10019]334sub get_first_n_chunks_requiring_work
335{
336    # The code of the target language (ensure it is lowercase)
337    my $target_language_code = lc(shift(@_));
338    # The key of the file to translate (ensure it is lowercase)
339    my $translation_file_key = lc(shift(@_));
340    # The number of chunks to return (defaults to one if not specified)
341    my $num_chunks_to_return = shift(@_) || "1";
[24627]342   
[10019]343    # Check that the necessary arguments were supplied
344    if (!$target_language_code || !$translation_file_key) {
[24627]345        &throw_fatal_error("Missing command argument.");
[10019]346    }
[24627]347   
[10019]348    # Get (and check) the translation configuration
349    my ($source_file, $target_file, $translation_file_type)
350    = &get_translation_configuration($target_language_code, $translation_file_key);
[28755]351
[10019]352    # Parse the source language and target language files
353    my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
354    my @source_file_lines = &read_file_lines($source_file_path);
355    my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
[14258]356   
[10019]357    my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
358    my @target_file_lines = &read_file_lines($target_file_path);
359    my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
[24627]360   
[10019]361    # Filter out any automatically translated chunks
362    foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
[24627]363        if (&is_chunk_automatically_translated($chunk_key, $translation_file_type)) {
364            delete $source_file_key_to_line_mapping{$chunk_key};
365            delete $target_file_key_to_line_mapping{$chunk_key};
366        }
[10019]367    }
[24627]368   
[10019]369    my %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
370    my %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
371    &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
372    &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
[24627]373   
[10019]374    # Determine the target file chunks requiring translation
375    my @target_file_keys_requiring_translation = &determine_chunks_requiring_translation(\%source_file_key_to_text_mapping, \%target_file_key_to_text_mapping);
376    &log_message("Number of target chunks requiring translation: " . scalar(@target_file_keys_requiring_translation));
[24627]377   
[10019]378    # Determine the target file chunks requiring updating
379    my %source_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($source_file, \@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
380    my %target_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($target_file, \@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
381    my @target_file_keys_requiring_updating = &determine_chunks_requiring_updating(\%source_file_key_to_last_update_date_mapping, \%target_file_key_to_last_update_date_mapping);
382    &log_message("Number of target chunks requiring updating: " . scalar(@target_file_keys_requiring_updating));
[24627]383   
[18460]384    my $xml_response = &create_xml_response_for_chunks_requiring_work($translation_file_key, $target_file, scalar(keys(%source_file_key_to_text_mapping)), \@target_file_keys_requiring_translation, \@target_file_keys_requiring_updating, $num_chunks_to_return, \%source_file_key_to_text_mapping, \%target_file_key_to_text_mapping, \%source_file_key_to_last_update_date_mapping, \%target_file_key_to_last_update_date_mapping);   
385   
[10040]386    return $xml_response;
[10019]387}
388
389
390sub get_language_status
391{
392    # The code of the target language (ensure it is lowercase)
393    my $target_language_code = lc(shift(@_));
[24627]394   
[10019]395    # Check that the necessary arguments were supplied
396    if (!$target_language_code) {
[24627]397        &throw_fatal_error("Missing command argument.");
[10019]398    }
[24627]399   
[10040]400    # Form an XML response to the command
401    my $xml_response = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
402    $xml_response .= "<GTIResponse>\n";
403    $xml_response .= "  <LanguageStatus code=\"$target_language_code\">\n";
[18460]404   
[24627]405    foreach my $translation_file (@$gti_translation_files) {   
406        my ($num_source_chunks, $num_target_chunks, $num_chunks_requiring_translation, $num_chunks_requiring_updating) = 0;
407        my $target_file_name = "";
408       
[30582]409        if ($translation_file->{'key'} =~ m/^gs3/) { # gs3interface, gs3colcfg
[24627]410            my (%source_file_key_to_text_mapping, %target_file_key_to_text_mapping, %source_file_key_to_last_update_date_mapping, %target_file_key_to_last_update_date_mapping ) = ();
[30581]411            &build_gs3_configuration($translation_file->{'key'}, $target_language_code, \%source_file_key_to_text_mapping, \%target_file_key_to_text_mapping, \%source_file_key_to_last_update_date_mapping, \%target_file_key_to_last_update_date_mapping );   
[24627]412           
413            my @target_file_keys_requiring_translation = &determine_chunks_requiring_translation(\%source_file_key_to_text_mapping, \%target_file_key_to_text_mapping);     
414            my @target_file_keys_requiring_updating = &determine_chunks_requiring_updating(\%source_file_key_to_last_update_date_mapping, \%target_file_key_to_last_update_date_mapping);
415           
416            $num_source_chunks = scalar(keys(%source_file_key_to_text_mapping));
417            $num_target_chunks = scalar(keys(%target_file_key_to_text_mapping));
418            $num_chunks_requiring_translation = scalar(@target_file_keys_requiring_translation);
419            $num_chunks_requiring_updating = scalar(@target_file_keys_requiring_updating);
[18460]420        }
[24627]421        else {
422            # Get (and check) the translation configuration
423            my ($source_file, $target_file, $translation_file_type) = &get_translation_configuration($target_language_code, $translation_file->{'key'});
424            $target_file_name = $target_file;
425           
426            # Parse the source language and target language files
427            my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
428            my @source_file_lines = &read_file_lines($source_file_path);
429            my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
430           
431            my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
432            my @target_file_lines = &read_file_lines($target_file_path);
433            my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
434           
435            # Filter out any automatically translated chunks
436            foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
437                if (&is_chunk_automatically_translated($chunk_key, $translation_file_type)) {
438                    delete $source_file_key_to_line_mapping{$chunk_key};
439                    delete $target_file_key_to_line_mapping{$chunk_key};
440                }
441            }
442           
443            my %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
444            my %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
445           
446            # Determine the target file chunks requiring translation
447            my @target_file_keys_requiring_translation = &determine_chunks_requiring_translation(\%source_file_key_to_text_mapping, \%target_file_key_to_text_mapping);     
448           
449            # Determine the target file chunks requiring updating
450            my @target_file_keys_requiring_updating = ();
451            if (-e $target_file_path) {
452                my %source_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($source_file, \@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
453                my %target_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($target_file, \@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
454                @target_file_keys_requiring_updating = &determine_chunks_requiring_updating(\%source_file_key_to_last_update_date_mapping, \%target_file_key_to_last_update_date_mapping);     
455            }
456           
457            $num_source_chunks = scalar(keys(%source_file_key_to_text_mapping));
458            $num_target_chunks = scalar(keys(%target_file_key_to_text_mapping));
459            $num_chunks_requiring_translation = scalar(@target_file_keys_requiring_translation);
460            $num_chunks_requiring_updating = scalar(@target_file_keys_requiring_updating);
461        }
462       
463        &log_message("Status of " . $translation_file->{'key'});
464        &log_message("Number of source chunks: " . $num_source_chunks);
465        &log_message("Number of target chunks: " . $num_target_chunks);
466        &log_message("Number of target chunks requiring translation: " . $num_chunks_requiring_translation);
467        &log_message("Number of target chunks requiring updating: " . $num_chunks_requiring_updating);
468       
469        $xml_response .= "    <TranslationFile"
[10019]470        . " key=\"" . $translation_file->{'key'} . "\""
[18460]471        . " target_file_path=\"" . $target_file_name . "\""
472        . " num_chunks_translated=\"" . ($num_source_chunks - $num_chunks_requiring_translation) . "\""
473        . " num_chunks_requiring_translation=\"" . $num_chunks_requiring_translation . "\""
474        . " num_chunks_requiring_updating=\"" . $num_chunks_requiring_updating . "\"\/>\n";
[10019]475    }
[24627]476   
[10040]477    $xml_response .= "  </LanguageStatus>\n";
[24627]478   
[10040]479    $xml_response .= "</GTIResponse>\n";
480    return $xml_response;
[10019]481}
482
483
484sub search_chunks
485{
486    # The code of the target language (ensure it is lowercase)
487    my $target_language_code = lc(shift(@_));
488    # The key of the file to translate (ensure it is lowercase)
489    my $translation_file_key = lc(shift(@_));
490    # The query string
[10020]491    my $query_string = join(' ', @_);
[24627]492   
[10019]493    # Check that the necessary arguments were supplied
494    if (!$target_language_code || !$translation_file_key || !$query_string) {
[24627]495        &throw_fatal_error("Missing command argument.");
[10019]496    }
[24627]497   
[18460]498    my ($source_file, $target_file, $translation_file_type) = ();
499    my %source_file_key_to_text_mapping = ();
500    my %target_file_key_to_text_mapping = ();
501   
502   
[30582]503    if ($translation_file_key !~ m/^gs3/) {
[24627]504        # Get (and check) the translation configuration
505        ($source_file, $target_file, $translation_file_type) = &get_translation_configuration($target_language_code, $translation_file_key);
506       
507        # Parse the source language and target language files
508        my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
509        my @source_file_lines = &read_file_lines($source_file_path);
510        my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
511       
512        my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
513        my @target_file_lines = &read_file_lines($target_file_path);
514        my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
515       
516        # Filter out any automatically translated chunks
517        foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
518            if (&is_chunk_automatically_translated($chunk_key, $translation_file_type)) {
519                delete $source_file_key_to_line_mapping{$chunk_key};
520                delete $target_file_key_to_line_mapping{$chunk_key};
521            }
522        }
523       
524        %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
525        %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
[10019]526    }
[18460]527    else {
[24627]528        # Not needed in this case
529        my (%source_file_key_to_gti_command_mapping, %target_file_key_to_gti_command_mapping) = ();
[30581]530        &build_gs3_configuration($translation_file_key, $target_language_code, \%source_file_key_to_text_mapping, \%target_file_key_to_text_mapping,
[30548]531        \%source_file_key_to_gti_command_mapping, \%target_file_key_to_gti_command_mapping, 1);
[18460]532    }
[24627]533   
[10019]534    &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
535    &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
[24627]536   
[10019]537    # Determine the target file chunks matching the query
538    my @target_file_keys_matching_query = ();
539    foreach my $chunk_key (keys(%target_file_key_to_text_mapping)) {
[24627]540        my $target_file_text = $target_file_key_to_text_mapping{$chunk_key};
541        if ($target_file_text =~ /$query_string/i) {
542            # &log_message("Chunk with key $chunk_key matches query.");
543            push(@target_file_keys_matching_query, $chunk_key);
544        }
[10019]545    }
[24627]546   
[10040]547    # Form an XML response to the command
548    my $xml_response = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
549    $xml_response .= "<GTIResponse>\n";
[24627]550   
[10040]551    $xml_response .= "  <ChunksMatchingQuery size=\"" . scalar(@target_file_keys_matching_query) . "\">\n";
[10019]552    foreach my $chunk_key (@target_file_keys_matching_query) {
[24627]553        my $target_file_chunk_text = &make_text_xml_safe($target_file_key_to_text_mapping{$chunk_key});
554       
555        $xml_response .= "    <Chunk key=\"$chunk_key\">\n";
556        $xml_response .= "      <TargetFileText>$target_file_chunk_text</TargetFileText>\n";
557        $xml_response .= "    </Chunk>\n";
[10019]558    }
[10040]559    $xml_response .= "  </ChunksMatchingQuery>\n";
[24627]560   
[10040]561    $xml_response .= "</GTIResponse>\n";
562    return $xml_response;
[10019]563}
564
565
[10050]566sub submit_translations
[10019]567{
568    # The code of the target language (ensure it is lowercase)
569    my $target_language_code = lc(shift(@_));
570    # The key of the file to translate (ensure it is lowercase)
571    my $translation_file_key = lc(shift(@_));
[12484]572    # The username of the translation submitter
573    my $submitter_username = shift(@_);
[10027]574    # Whether to submit a target chunk even if it hasn't changed
575    my $force_submission_flag = shift(@_);
[24627]576   
[10019]577    # Check that the necessary arguments were supplied
[12484]578    if (!$target_language_code || !$translation_file_key || !$submitter_username) {
[24627]579        &log_message("Fatal error (but cannot be thrown): Missing command argument.");
580        die "\n";
[10019]581    }
[18460]582   
583    my %source_file_key_to_text_mapping = ();
584    my %source_file_key_to_gti_comment_mapping = ();
585    my %target_file_key_to_text_mapping = ();
586    my %target_file_key_to_gti_comment_mapping = ();
[24627]587   
[18460]588    my (@source_file_lines, @target_file_lines) = ();
589    my ($source_file, $target_file, $translation_file_type);
[24627]590   
[18460]591   
[30582]592    if ($translation_file_key !~ m/^gs3/) {
[24627]593        # Get (and check) the translation configuration
594        ($source_file, $target_file, $translation_file_type)
[18460]595        = &get_translation_configuration($target_language_code, $translation_file_key);
[24627]596       
597        # Parse the source language and target language files
598        @source_file_lines = &read_file_lines(&util::filename_cat($gsdl_root_directory, $source_file));
599        my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
600        %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
601        %source_file_key_to_gti_comment_mapping = &build_key_to_gti_comment_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);   
602       
603        @target_file_lines = &read_file_lines(&util::filename_cat($gsdl_root_directory, $target_file));
604        my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
605        %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
606        %target_file_key_to_gti_comment_mapping = &build_key_to_gti_comment_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);   
[18460]607    }
608    else {
[30581]609        &build_gs3_configuration($translation_file_key, $target_language_code, \%source_file_key_to_text_mapping, \%target_file_key_to_text_mapping,
[30548]610        \%source_file_key_to_gti_comment_mapping, \%target_file_key_to_gti_comment_mapping, 1);
[18460]611    }
[10019]612    &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
613    &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
[24627]614   
[10019]615    # Submission date
616    my $day = (localtime)[3];
617    my $month = ("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[(localtime)[4]];
618    my $year = (localtime)[5] + 1900;
619    my $submission_date = "$day-$month-$year";
[24627]620   
[10019]621    open(SUBMISSION, "-");
622    my @submission_lines = <SUBMISSION>;
623    close(SUBMISSION);
[24627]624   
[10019]625    # Remove any nasty carriage returns
[18460]626    # &log_message("Submission:");
[10019]627    foreach my $submission_line (@submission_lines) {
[24627]628        $submission_line =~ s/\r$//;
629        #&log_message("  $submission_line");
[10019]630    }
[24627]631   
[10019]632    my %source_file_key_to_submission_mapping = ();
633    my %target_file_key_to_submission_mapping = ();
634    for (my $i = 0; $i < scalar(@submission_lines); $i++) {
[24627]635        # Read source file part of submission
636        if ($submission_lines[$i] =~ /^\<SourceFileText key=\"(.+)\"\>/) {
637            my $chunk_key = $1;
638           
639            # Read the source file text
640            my $source_file_chunk_text = "";
641            $i++;
642            while ($i < scalar(@submission_lines) && $submission_lines[$i] !~ /^\<\/SourceFileText\>/) {
643                $source_file_chunk_text .= $submission_lines[$i];
644                $i++;
645            }
646            $source_file_chunk_text =~ s/\n$//;  # Strip the extra newline character added
647            $source_file_chunk_text = &unmake_text_xml_safe($source_file_chunk_text);
648           
649            #&log_message("Source file key: $chunk_key");
650            #&log_message("Source file text: $source_file_chunk_text");
651            $source_file_key_to_submission_mapping{$chunk_key} = $source_file_chunk_text;
652        }
653       
654        # Read target file part of submission
655        if ($submission_lines[$i] =~ /^\<TargetFileText key=\"(.+)\"\>/) {
656            my $chunk_key = $1;
657           
658            # Read the target file text
659            my $target_file_chunk_text = "";
660            $i++;
661            while ($i < scalar(@submission_lines) && $submission_lines[$i] !~ /^\<\/TargetFileText\>/) {
662                $target_file_chunk_text .= $submission_lines[$i];
663                $i++;
664            }
665            $target_file_chunk_text =~ s/\n$//;  # Strip the extra newline character added
666            $target_file_chunk_text = &unmake_text_xml_safe($target_file_chunk_text);
667           
668            #&log_message("Target file key: $chunk_key");
669            #&log_message("Target file text: $target_file_chunk_text");
670            $target_file_key_to_submission_mapping{$chunk_key} = $target_file_chunk_text;
671        }
[10019]672    }
[24627]673   
[10019]674    # -----------------------------------------
675    #   Validate the translation submissions
676    # -----------------------------------------
[24627]677   
[10019]678    # Check that the translations are valid
679    foreach my $chunk_key (keys(%source_file_key_to_submission_mapping)) {
[30687]680
[30719]681    # Kathy introduced escaped colons ("\:") into chunk keys in properties files (greenstone3/metadata_names), 
682    # but they're not escaped in the submitted XML versions, nor are they escaped in memory (in the $chunk_key)
[30687]683
[24627]684        # Make sure the submitted chunk still exists in the source file
[30719]685        if (!defined($source_file_key_to_text_mapping{$chunk_key})) {
[24627]686            &log_message("Warning: Source chunk $chunk_key no longer exists (ignoring submission).");
687            delete $source_file_key_to_submission_mapping{$chunk_key};
688            delete $target_file_key_to_submission_mapping{$chunk_key};
689            next;
690        }
691       
692        # Make sure the submitted source chunk matches the source file chunk
[30719]693        if ($source_file_key_to_submission_mapping{$chunk_key} ne &unmake_text_xml_safe($source_file_key_to_text_mapping{$chunk_key})) {
[28518]694        #if (&unmake_text_xml_safe($source_file_key_to_submission_mapping{$chunk_key}) ne &unmake_text_xml_safe($source_file_key_to_text_mapping{$chunk_key})) {
[28503]695                    #print STDERR "**** $source_file_key_to_submission_mapping{$chunk_key}\n";
696                #print STDERR "**** " . &unmake_text_xml_safe($source_file_key_to_text_mapping{$chunk_key}) ."\n";
697
[30719]698            &log_message("Warning: Source chunk $chunk_key has changed (ignoring submission).");
[30687]699            &log_message("Submission source: |$source_file_key_to_submission_mapping{$chunk_key}|");
[30719]700            &log_message("      Source text: |$source_file_key_to_text_mapping{$chunk_key}|");
[24627]701            delete $source_file_key_to_submission_mapping{$chunk_key};
702            delete $target_file_key_to_submission_mapping{$chunk_key};
703            next;
704        }
[10027]705    }
[24627]706   
[10027]707    # Apply the submitted translations
708    foreach my $chunk_key (keys(%target_file_key_to_submission_mapping)) {
[24627]709        # Only apply the submission if it is a change, unless -force_submission has been specified
[30719]710        if ($force_submission_flag || !defined($target_file_key_to_text_mapping{$chunk_key}) || $target_file_key_to_submission_mapping{$chunk_key} ne $target_file_key_to_text_mapping{$chunk_key}) {
711            $target_file_key_to_text_mapping{$chunk_key} = $target_file_key_to_submission_mapping{$chunk_key};
712            $target_file_key_to_gti_comment_mapping{$chunk_key} = "Updated $submission_date by $submitter_username";
[24627]713        }
[10019]714    }
[18460]715   
[30582]716    if ($translation_file_key !~ m/^gs3/) {
[24627]717        eval "&write_translated_${translation_file_type}(\$source_file, \\\@source_file_lines, \\\%source_file_key_to_text_mapping, \$target_file, \\\@target_file_lines, \\\%target_file_key_to_text_mapping, \\\%target_file_key_to_gti_comment_mapping, \$target_language_code)";
[18460]718    } else {
[30581]719        eval "&write_translated_gs3interface(\$translation_file_key, \\\%source_file_key_to_text_mapping, \\\%target_file_key_to_text_mapping, \\\%target_file_key_to_gti_comment_mapping, \$target_language_code)";
[18460]720    }
721}
[10019]722
[18460]723
724sub create_glihelp_zip_file
725{
726    my $target_language_code = shift(@_);
727    my $translation_file_key = "glihelp";
728   
729    &log_message("Creating GLI Help zip file for $target_language_code");
[24627]730   
[18460]731    my ($source_file, $target_file, $translation_file_type) = &get_translation_data_for($target_language_code, $translation_file_key);   
732   
733    my $classpath = &util::filename_cat($gsdl_root_directory, "gti-lib");
[28977]734    my $oldclasspath = $classpath;
[18460]735    if ( ! -e $classpath) {
[28977]736    $classpath = &util::filename_cat($gsdl_root_directory, "gli", "shared");
737    }
738    if ( ! -e $classpath) {
739        &throw_fatal_error("$classpath doesn't exist! (Neither does $oldclasspath.) Need the files in this directory (ApplyXLST and its related files) to create the zip file for GLI Help");
740    }
741
[28991]742   
743    my $perllib_path = &util::filename_cat($gsdl_root_directory, "perllib"); # strings.properties
744    my $gliclasses_path = &util::filename_cat($gsdl_root_directory, "gli", "classes"); # dictionary.properties
745    my $os = $^O;
746    my $path_separator = ($^O =~ m/mswin/i) ? ";" : ":";
747    my $xalan_path = &util::filename_cat($classpath, "xalan.jar");
748    $classpath = "$perllib_path$path_separator$gliclasses_path$path_separator$classpath$path_separator$xalan_path";
749
[18460]750    my $gli_help_directory = &util::filename_cat($gsdl_root_directory, "gli");
751    $gli_help_directory = &util::filename_cat($gli_help_directory, "help");
752   
753    my $gen_many_html_xsl_filepath = &util::filename_cat($gli_help_directory, "gen-many-html.xsl");
754    if ( ! -e $gen_many_html_xsl_filepath) {
[24627]755        &throw_fatal_error("$gen_many_html_xsl_filepath doesn't exist! Need this file to create the zip file for GLI Help");
[18460]756    }
[24627]757   
[18460]758    my $gen_index_xml_xsl_filepath = &util::filename_cat($gli_help_directory, "gen-index-xml.xsl");   
759    my $split_script_filepath = &util::filename_cat($gli_help_directory, "splithelpdocument.pl");   
760   
761    my $target_file_directory = &util::filename_cat($gli_help_directory, $target_language_code);
762    $target_file_directory = $target_file_directory."/";
[24627]763   
[18460]764    my $target_filepath = &util::filename_cat($gsdl_root_directory, $target_file);
[28991]765
766    # if gli/help/nl doesn't exist, create it by copying over gli/help/en/help.xml, then process the copied file
767    my ($tailname, $glihelp_lang_dir, $suffix) =  &File::Basename::fileparse($target_filepath, "\\.[^\\.]+\$");   
768    if(!&FileUtils::directoryExists($glihelp_lang_dir)) {
769
770    # copy across the gli/help/en/help.xml into a new folder for the new language gli/help/<newlang>
771    my $en_glihelp_dir = &util::filename_cat($gli_help_directory, "en");
[28992]772    my $en_helpxml_file = &util::filename_cat($en_glihelp_dir, "$tailname$suffix"); #$tailname$suffix="help.xml"
[28991]773    &FileUtils::copyFilesRecursiveNoSVN($en_helpxml_file, $glihelp_lang_dir);
774
[32096]775    # The following file reading section is a candidate to use FileUtils::readUTF8File()
776    # in place of calling sysread() directly. But only if we can reason we'd be working with UTF8
[28991]777    # In gli/help/<newlang>/help.xml, replace all occurrences of
778    # <Text id="1">This text in en will be removed for new langcode</Text>
779    # with <!-- Missing translation: 1 -->
780    open(FIN,"<$target_filepath") or &throw_fatal_error("Could not open $target_filepath for READING after creating it");
781    my $help_xml_contents;
782    # Read in the entire contents of the file in one hit
783    sysread(FIN, $help_xml_contents, -s FIN);
784    close(FIN);
[24627]785   
[28991]786    $help_xml_contents =~ s@<Text id="([^"]+?)">(.*?)</Text>@<!-- Missing translation: $1 -->@sg;
787
788    open(FOUT, ">$target_filepath") or &throw_fatal_error("Could not open $target_filepath for WRITING after creating it");
789    print FOUT $help_xml_contents;
790    close(FOUT);
791    }
792
[24627]793    my $perl_exec = &util::get_perl_exec();
794    my $java_exec = "java";
795    if(defined($ENV{'JAVA_HOME'}) && $ENV{'JAVA_HOME'} ne ""){
796        $java_exec = &util::filename_cat($ENV{'JAVA_HOME'}, "bin", "java");
[31751]797    } elsif(defined($ENV{'JRE_HOME'}) && $ENV{'JRE_HOME'} ne ""){
798        $java_exec = &util::filename_cat($ENV{'JRE_HOME'}, "bin", "java");
[24627]799    }
[28991]800
801    #my $cmd = "$java_exec -cp $classpath:$classpath/xalan.jar ApplyXSLT $target_language_code $gen_many_html_xsl_filepath $target_filepath | \"$perl_exec\" -S $split_script_filepath $target_file_directory";
802    my $cmd = "$java_exec -DGSDLHOME=$gsdl_root_directory -cp $classpath ApplyXSLT $target_language_code $gen_many_html_xsl_filepath $target_filepath | \"$perl_exec\" -S $split_script_filepath $target_file_directory";
803    #&throw_fatal_error("RAN gti command: $cmd");
[18460]804    my $response = `$cmd`;
[28991]805
806    #$cmd = "$java_exec -cp $classpath:$classpath/xalan.jar ApplyXSLT $target_language_code $gen_index_xml_xsl_filepath $target_filepath > " . $target_file_directory . "help_index.xml"; # 2>/dev/null";
807    $cmd = "$java_exec -cp $classpath -DGSDLHOME=$gsdl_root_directory ApplyXSLT $target_language_code $gen_index_xml_xsl_filepath $target_filepath > " . $target_file_directory . "help_index.xml"; # 2>/dev/null";
[18460]808    $response = `$cmd`;
[28991]809
810    # create a gti/tmp folder, if one doesn't already exist, and store the downloadable zip file in there
811    my $tmpdir = &util::filename_cat($gsdl_root_directory, "tmp");
812    if(!&FileUtils::directoryExists($tmpdir)) {
813    &FileUtils::makeDirectory($tmpdir);
814    }
[28977]815    #my $zip_file_path = "/greenstone/custom/gti/" . $target_language_code . "_GLIHelp.zip";   
[28991]816    my $zip_file_path = &util::filename_cat($tmpdir, $target_language_code . "_GLIHelp.zip");
[18460]817    $cmd = "zip -rj $zip_file_path $target_file_directory -i \*.htm \*.xml";
[28991]818
[18460]819    $response = `$cmd`;
[10019]820}
821
822
823sub get_translation_configuration
824{
825    # Get the code of the target language
826    my $target_language_code = shift(@_);
827    # Get the key of the file to translate
828    my $translation_file_key = shift(@_);
[24627]829   
[10019]830    # Read the translation data from the gti.cfg file
831    my ($source_file, $target_file, $translation_file_type) =
832    &get_translation_data_for($target_language_code, $translation_file_key);
[24627]833   
[10019]834    # Check that the file to translate is defined in the gti.cfg file
835    if (!$source_file || !$target_file || !$translation_file_type) {
[24627]836        &throw_fatal_error("Missing or incomplete specification for translation file \"$translation_file_key\" in gti.pl.");
[10019]837    }
[24627]838   
[10019]839    # Check that the source file exists
840    my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
841    if (!-e $source_file_path) {
[24627]842        &throw_fatal_error("Source file $source_file_path does not exist.");
[10019]843    }
[24627]844   
[10019]845    # Check that the source file is up to date
[11104]846    # The "2>/dev/null" is very important! If it is missing this will never return when run from the receptionist
[10019]847    # unless ($translation_file_is_not_in_cvs) {
[14258]848    #my $source_file_cvs_status = `cd $gsdl_root_directory; cvs -d $anonymous_cvs_root update $source_file 2>/dev/null`;
[24627]849    my $source_file_cvs_status = `cd $gsdl_root_directory; svn status $source_file 2>/dev/null`;
[10019]850    if ($source_file_cvs_status =~ /^C /) {
851        &throw_fatal_error("Source file $source_file_path conflicts with the repository.");
852    }
853    if ($source_file_cvs_status =~ /^M /) {
854        &throw_fatal_error("Source file $source_file_path contains uncommitted changes.");
855    }
856    # }
[24627]857   
[10019]858    return ($source_file, $target_file, $translation_file_type);
859}
860
861
862sub get_translation_data_for
863{
864    my ($target_language_code, $translation_file_key) = @_;
[24627]865   
[10019]866    foreach my $translation_file (@$gti_translation_files) {
[24627]867        # If this isn't the correct translation file, move onto the next one
868        next if ($translation_file_key ne $translation_file->{'key'});
869       
870        # Resolve the target language file
871        my $target_language_file = $translation_file->{'target_file'};
872        if ($target_language_file =~ /(\{.+\;.+\})/) {
873            my $unresolved_target_language_file_part = $1;
874           
875            # Check for a special case for the target language code
876            if ($unresolved_target_language_file_part =~ /(\{|\;)$target_language_code:([^\;]+)(\;|\})/) {
877                my $resolved_target_language_file_part = $2;
878                $target_language_file =~ s/$unresolved_target_language_file_part/$resolved_target_language_file_part/;
879            }
880            # Otherwise use the last part as the default value
881            else {
882                my ($default_target_language_file_part) = $unresolved_target_language_file_part =~ /([^\;]+)\}/;
883            $target_language_file =~ s/$unresolved_target_language_file_part/\{$default_target_language_file_part\}/;           
[10019]884        }
[10041]885    }
[24627]886   
[10041]887    # Resolve instances of {iso_639_1_target_language_name}
888    my $iso_639_1_target_language_name = $iso639::fromiso639{$target_language_code};
889    $iso_639_1_target_language_name =~ tr/A-Z/a-z/ if $iso_639_1_target_language_name;
890    $target_language_file =~ s/\{iso_639_1_target_language_name\}/$iso_639_1_target_language_name/g;
[24627]891   
[10041]892    # Resolve instances of {target_language_code}
893    $target_language_file =~ s/\{target_language_code\}/$target_language_code/g;
[24627]894   
[10041]895    return ($translation_file->{'source_file'}, $target_language_file, $translation_file->{'file_type'});
[24627]896}
[10019]897
[24627]898return ();
[10019]899}
900
901
902sub read_file_lines
903{
904    my ($file_path) = @_;
[24627]905   
[10019]906    if (!open(FILE_IN, "<$file_path")) {
[24627]907        &log_message("Note: Could not open file $file_path.");
908        return ();
[10019]909    }
910    my @file_lines = <FILE_IN>;
911    close(FILE_IN);
[24627]912   
[10019]913    return @file_lines;
914}
915
916
917sub build_key_to_line_mapping
918{
919    my ($file_lines, $translation_file_type) = @_;
920    eval "return &build_key_to_line_mapping_for_${translation_file_type}(\@\$file_lines)";
921}
922
923
924sub build_key_to_text_mapping
925{
926    my ($file_lines, $key_to_line_mapping, $translation_file_type) = @_;
[24627]927   
[10019]928    my %key_to_text_mapping = ();
929    foreach my $chunk_key (keys(%$key_to_line_mapping)) {
[24627]930        my $chunk_starting_line = (split(/-/, $key_to_line_mapping->{$chunk_key}))[0];
931        my $chunk_finishing_line = (split(/-/, $key_to_line_mapping->{$chunk_key}))[1];
932       
933        my $chunk_text = @$file_lines[$chunk_starting_line];
934        for (my $l = ($chunk_starting_line + 1); $l <= $chunk_finishing_line; $l++) {
935            $chunk_text .= @$file_lines[$l];
936        }
937       
938        # Map from chunk key to text
939        eval "\$key_to_text_mapping{\${chunk_key}} = &import_chunk_from_${translation_file_type}(\$chunk_text)";
[30681]940
941        #if($chunk_key =~ m/document\\/) {
942            #&log_message("Submission source: $source_file_key_to_submission_mapping{$chunk_key}");
943            #&log_message("@@@ chunk key: $chunk_key");
944        #}
945
[10019]946    }
[24627]947   
[10019]948    return %key_to_text_mapping;
949}
950
951
952sub build_key_to_last_update_date_mapping
953{
954    my ($file, $file_lines, $key_to_line_mapping, $translation_file_type) = @_;
[24627]955   
[10019]956    # If the files aren't in CVS then we can't tell anything about what needs updating
957    # return () if ($translation_file_is_not_in_cvs);
[24627]958   
[10019]959    # Build a mapping from key to CVS date
960    # Need to be careful with this mapping because the chunk keys won't necessarily all be valid
961    my %key_to_cvs_date_mapping = &build_key_to_cvs_date_mapping($file, $translation_file_type);
[24627]962   
[10019]963    # Build a mapping from key to comment date
[12483]964    my %key_to_gti_comment_mapping = &build_key_to_gti_comment_mapping($file_lines, $key_to_line_mapping, $translation_file_type);
[24627]965   
[10019]966    # Build a mapping from key to last update date (the latter of the CVS date and comment date)
967    my %key_to_last_update_date_mapping = ();
968    foreach my $chunk_key (keys(%$key_to_line_mapping)) {
[24627]969        # Use the CVS date as a starting point
970        my $chunk_cvs_date = $key_to_cvs_date_mapping{$chunk_key};
971        $key_to_last_update_date_mapping{$chunk_key} = $chunk_cvs_date;
[18460]972       
[24627]973        # If a comment date exists and it is after the CVS date, use that instead
974        # need to convert the comment date format to SVN format
975        my $chunk_gti_comment = $key_to_gti_comment_mapping{$chunk_key};
976        if (defined($chunk_gti_comment) && $chunk_gti_comment =~ /(\d?\d-\D\D\D-\d\d\d\d)/) {
977            my $chunk_comment_date = $1;           
978            if ((!defined($chunk_cvs_date) || &is_date_after($chunk_comment_date, $chunk_cvs_date))) {
979                $key_to_last_update_date_mapping{$chunk_key} = $chunk_comment_date;         
980            }
981        }
[10019]982    }
[24627]983   
[10019]984    return %key_to_last_update_date_mapping;
985}
986
987
988sub build_key_to_cvs_date_mapping
989{
990    my ($filename, $translation_file_type) = @_;
[24627]991   
992    # Use SVN to annotate each line of the file with the date it was last edited
[10019]993    # The "2>/dev/null" is very important! If it is missing this will never return when run from the receptionist
[24627]994    my $cvs_annotated_file = `cd $gsdl_root_directory; svn annotate -v --force $filename 2>/dev/null`;
[14258]995   
[10019]996    my @cvs_annotated_file_lines = split(/\n/, $cvs_annotated_file);
[24627]997   
[10019]998    my @cvs_annotated_file_lines_date = ();
999    foreach my $cvs_annotated_file_line (@cvs_annotated_file_lines) {
[24627]1000        # Extract the date from the SVN annotation at the front
1001        # svn format : 2007-07-16
[14258]1002        $cvs_annotated_file_line =~ s/^\s+\S+\s+\S+\s(\S+)//;
1003       
1004        push(@cvs_annotated_file_lines_date, $1);
1005       
1006        # trim extra date information in svn annotation format
1007        # 15:42:49 +1200 (Wed, 21 Jun 2006)
1008        $cvs_annotated_file_line =~ s/^\s+\S+\s\S+\s\((.+?)\)\s//;
1009    }   
1010   
[10019]1011    # Build a key to line mapping for the CVS annotated file, for matching the chunk key to the CVS date
1012    my %key_to_line_mapping = &build_key_to_line_mapping(\@cvs_annotated_file_lines, $translation_file_type);
[24627]1013   
[10019]1014    my %key_to_cvs_date_mapping = ();
1015    foreach my $chunk_key (keys(%key_to_line_mapping)) {
[24627]1016        my $chunk_starting_line = (split(/-/, $key_to_line_mapping{$chunk_key}))[0];
1017        my $chunk_finishing_line = (split(/-/, $key_to_line_mapping{$chunk_key}))[1];
1018       
1019        # Find the date this chunk was last edited, from the CVS annotation
1020        my $chunk_date = $cvs_annotated_file_lines_date[$chunk_starting_line];       
1021        for (my $l = ($chunk_starting_line + 1); $l <= $chunk_finishing_line; $l++) {
1022            if (&is_date_after($cvs_annotated_file_lines_date[$l], $chunk_date)) {
1023                # This part of the chunk has been updated more recently
1024                $chunk_date = $cvs_annotated_file_lines_date[$l];
1025               
1026            }
1027        }
1028       
1029        # Map from chunk key to CVS date
1030        $key_to_cvs_date_mapping{$chunk_key} = $chunk_date;
[10019]1031    }
[24627]1032   
[10019]1033    return %key_to_cvs_date_mapping;
1034}
1035
1036
[12483]1037sub build_key_to_gti_comment_mapping
[10019]1038{
1039    my ($file_lines, $key_to_line_mapping, $translation_file_type) = @_;
[24627]1040   
[12483]1041    my %key_to_gti_comment_mapping = ();
[10019]1042    foreach my $chunk_key (keys(%$key_to_line_mapping)) {
[24627]1043        my $chunk_starting_line = (split(/-/, $key_to_line_mapping->{$chunk_key}))[0];
1044        my $chunk_finishing_line = (split(/-/, $key_to_line_mapping->{$chunk_key}))[1];
1045       
1046        my $chunk_text = @$file_lines[$chunk_starting_line];
1047        for (my $l = ($chunk_starting_line + 1); $l <= $chunk_finishing_line; $l++) {
1048            $chunk_text .= @$file_lines[$l];
1049        }
1050       
1051        # Map from chunk key to GTI comment
1052        my $chunk_gti_comment;
1053        eval "\$chunk_gti_comment = &get_${translation_file_type}_chunk_gti_comment(\$chunk_text)";
1054        $key_to_gti_comment_mapping{$chunk_key} = $chunk_gti_comment if (defined($chunk_gti_comment));
[10019]1055    }
[24627]1056   
[12483]1057    return %key_to_gti_comment_mapping;
[10019]1058}
1059
1060
1061sub determine_chunks_requiring_translation
1062{
1063    my $source_file_key_to_text_mapping = shift(@_);
1064    my $target_file_key_to_text_mapping = shift(@_);
[24627]1065   
[10019]1066    # Chunks needing translation are those in the source file with no translation in the target file
1067    my @target_file_keys_requiring_translation = ();
1068    foreach my $chunk_key (keys(%$source_file_key_to_text_mapping)) {
[24627]1069        if ($source_file_key_to_text_mapping->{$chunk_key} && !$target_file_key_to_text_mapping->{$chunk_key}) {
1070            # &log_message("Chunk with key $chunk_key needs translating.");
1071            push(@target_file_keys_requiring_translation, $chunk_key);
1072        }
[10019]1073    }
[24627]1074   
[10019]1075    return @target_file_keys_requiring_translation;
1076}
1077
1078
1079sub determine_chunks_requiring_updating
1080{
1081    my $source_file_key_to_last_update_date_mapping = shift(@_);
1082    my $target_file_key_to_last_update_date_mapping = shift(@_);
[24627]1083   
[10019]1084    # Chunks needing updating are those in the target file that have been more recently edited in the source file
1085    my @target_file_keys_requiring_updating = ();
1086    foreach my $chunk_key (keys(%$source_file_key_to_last_update_date_mapping)) {
[24627]1087        my $source_chunk_last_update_date = $source_file_key_to_last_update_date_mapping->{$chunk_key};
1088        my $target_chunk_last_update_date = $target_file_key_to_last_update_date_mapping->{$chunk_key};
[14258]1089       
[30548]1090        # print "key: $chunk_key\nsource date : $source_chunk_last_update_date\ntarget date : $target_chunk_last_update_date\nafter? ". &is_date_after($source_chunk_last_update_date, $target_chunk_last_update_date) . "\n\n";
1091
[14258]1092        if (defined($target_chunk_last_update_date) && &is_date_after($source_chunk_last_update_date, $target_chunk_last_update_date)) {
[24627]1093            # &log_message("Chunk with key $chunk_key needs updating.");
[30548]1094                # &log_message("key: $chunk_key\nsource date : $source_chunk_last_update_date\ntarget date : $target_chunk_last_update_date\nafter? ". &is_date_after($source_chunk_last_update_date, $target_chunk_last_update_date) . "\n\n");
[24627]1095            push(@target_file_keys_requiring_updating, $chunk_key);
1096        }
[10019]1097    }
[24627]1098   
[10019]1099    return @target_file_keys_requiring_updating;
1100}
1101
1102
1103sub is_chunk_automatically_translated
1104{
1105    my ($chunk_key, $translation_file_type) = @_;
1106    eval "return &is_${translation_file_type}_chunk_automatically_translated(\$chunk_key)";
1107}
1108
1109
1110sub make_text_xml_safe
1111{
1112    my $text = shift(@_);
[11452]1113    $text =~ s/\&/\&amp\;/g;
1114    $text =~ s/\&amp\;lt\;/\&amp\;amp\;lt\;/g;
1115    $text =~ s/\&amp\;gt\;/\&amp\;amp\;gt\;/g;
[11498]1116    $text =~ s/\&amp\;rarr\;/\&amp\;amp\;rarr\;/g;
1117    $text =~ s/\&amp\;mdash\;/\&amp\;amp\;mdash\;/g;
[10019]1118    $text =~ s/</\&lt\;/g;
1119    $text =~ s/>/\&gt\;/g;
1120    return $text;
1121}
1122
1123
[11448]1124sub unmake_text_xml_safe
1125{
1126    my $text = shift(@_);
1127    $text =~ s/\&lt\;/</g;
1128    $text =~ s/\&gt\;/>/g;
1129    $text =~ s/\&amp\;/\&/g;
1130    return $text;
1131}
1132
1133
[10019]1134# Returns 1 if $date1 is after $date2, 0 otherwise
[14258]1135sub is_date_after_cvs
[10019]1136{
1137    my ($date1, $date2) = @_;
1138    my %months = ("Jan", 1, "Feb", 2, "Mar", 3, "Apr",  4, "May",  5, "Jun",  6,
[24627]1139    "Jul", 7, "Aug", 8, "Sep", 9, "Oct", 10, "Nov", 11, "Dec", 12);
1140   
[14258]1141    if(!defined $date1) {
1142        return 1;
1143    }
[24627]1144   
[10019]1145    my @date1parts = split(/-/, $date1);
1146    my @date2parts = split(/-/, $date2);
[24627]1147   
[10019]1148    # Compare year - nasty because we have rolled over into a new century
1149    my $year1 = $date1parts[2];
1150    if ($year1 < 80) {
[14258]1151        $year1 += 2000;
[10019]1152    }
1153    my $year2 = $date2parts[2];
1154    if ($year2 < 80) {
[14258]1155        $year2 += 2000;
[10019]1156    }
[24627]1157   
[10019]1158    # Compare year
1159    if ($year1 > $year2) {
[24627]1160        return 1;
[10019]1161    }
1162    elsif ($year1 == $year2) {
[24627]1163        # Year is the same, so compare month
1164        if ($months{$date1parts[1]} > $months{$date2parts[1]}) {
1165            return 1;
1166        }
1167        elsif ($months{$date1parts[1]} == $months{$date2parts[1]}) {
1168            # Month is the same, so compare day
1169            if ($date1parts[0] > $date2parts[0]) {
1170                return 1;
1171            }
1172        }
[10019]1173    }
[24627]1174   
[10019]1175    return 0;
1176}
1177
[14258]1178sub is_date_after
1179{
1180    my ($date1, $date2) = @_;
1181   
1182    if(!defined $date1) {
[24627]1183        return 1;
[14258]1184    }
1185    if(!defined $date2) {
[24627]1186        return 0;
[14258]1187    }
1188   
1189    # 16-Aug-2006
1190    if($date1=~ /(\d+?)-(\S\S\S)-(\d\d\d\d)/){
[24627]1191        my %months = ("Jan", "01", "Feb", "02", "Mar", "03", "Apr",  "04", "May",  "05", "Jun",  "06",
1192        "Jul", "07", "Aug", "08", "Sep", "09", "Oct", "10", "Nov", "11", "Dec", "12");
1193        $date1=$3 . "-" . $months{$2} . "-" . $1;
1194        # print "** converted date1: $date1\n";
[14258]1195    }
1196    if($date2=~ /(\d+?)-(\S\S\S)-(\d\d\d\d)/){
[24627]1197        my %months = ("Jan", "01", "Feb", "02", "Mar", "03", "Apr",  "04", "May",  "05", "Jun",  "06",
1198        "Jul", "07", "Aug", "08", "Sep", "09", "Oct", "10", "Nov", "11", "Dec", "12");
1199        $date2=$3 . "-" . $months{$2} . "-" . $1;
1200        # print "** converted date2: $date2\n";
[14258]1201    }
1202   
1203   
1204    # 2006-08-16
1205    my @date1parts = split(/-/, $date1);
1206    my @date2parts = split(/-/, $date2);
1207   
1208    # Compare year
1209    if ($date1parts[0] > $date2parts[0]) {
[24627]1210        return 1;
[14258]1211    }
1212    elsif ($date1parts[0] == $date2parts[0]) {
[24627]1213        # Year is the same, so compare month
1214        if ($date1parts[1] > $date2parts[1]) {
1215            return 1;
1216        }
1217        elsif ($date1parts[1] == $date2parts[1]) {
1218            # Month is the same, so compare day
1219            if ($date1parts[2] > $date2parts[2]) {
1220                return 1;
1221            }
1222        }
[14258]1223    }   
1224   
1225    return 0;
1226}
[10019]1227
[14258]1228
[18460]1229sub create_xml_response_for_chunks_requiring_work
1230{
1231    my ($translation_file_key, $target_file, $total_num_chunks, $target_files_keys_requiring_translation, $target_files_keys_requiring_updating, $num_chunks_to_return, $source_files_key_to_text_mapping, $target_files_key_to_text_mapping, $source_files_key_to_last_update_date_mapping, $target_files_key_to_last_update_date_mapping) = @_;
[24627]1232   
[18460]1233    # Form an XML response to the command
1234    my $xml_response = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
1235    $xml_response .= "<GTIResponse>\n";
1236    $xml_response .= "  <TranslationFile"
1237    . " key=\"" . $translation_file_key . "\""
1238    . " target_file_path=\"" . $target_file . "\""
1239    . " num_chunks_translated=\"" . ($total_num_chunks - scalar(@$target_files_keys_requiring_translation)) . "\""
1240    . " num_chunks_requiring_translation=\"" . scalar(@$target_files_keys_requiring_translation) . "\""
1241    . " num_chunks_requiring_updating=\"" . scalar(@$target_files_keys_requiring_updating) . "\"\/>\n";
[24627]1242   
[18460]1243    # Do chunks requiring translation first
1244    if ($num_chunks_to_return > scalar(@$target_files_keys_requiring_translation)) {
[24627]1245        $xml_response .= "  <ChunksRequiringTranslation size=\"" . scalar(@$target_files_keys_requiring_translation) . "\">\n";
[18460]1246    }
1247    else {
[24627]1248        $xml_response .= "  <ChunksRequiringTranslation size=\"" . $num_chunks_to_return . "\">\n";
[18460]1249    }
[24627]1250   
[18460]1251    my @sorted_chunk_keys = sort (@$target_files_keys_requiring_translation);
1252    foreach my $chunk_key (@sorted_chunk_keys) {
[24627]1253        last if ($num_chunks_to_return == 0);
1254       
1255        my $source_file_chunk_date = $source_files_key_to_last_update_date_mapping->{$chunk_key} || "";
1256        my $source_file_chunk_text = &make_text_xml_safe($source_files_key_to_text_mapping->{$chunk_key}); 
1257       
1258        $xml_response .= "    <Chunk key=\"" . &make_text_xml_safe($chunk_key) . "\">\n";
1259        $xml_response .= "      <SourceFileText date=\"$source_file_chunk_date\">$source_file_chunk_text</SourceFileText>\n";   
1260        $xml_response .= "      <TargetFileText></TargetFileText>\n";
1261        $xml_response .= "    </Chunk>\n";
1262       
1263        $num_chunks_to_return--;
[18460]1264    }
[24627]1265   
[18460]1266    $xml_response .= "  </ChunksRequiringTranslation>\n";
[24627]1267   
[18460]1268    # Then do chunks requiring updating
1269    if ($num_chunks_to_return > scalar(@$target_files_keys_requiring_updating)) {
[24627]1270        $xml_response .= "  <ChunksRequiringUpdating size=\"" . scalar(@$target_files_keys_requiring_updating) . "\">\n";
[18460]1271    }
1272    else {
[24627]1273        $xml_response .= "  <ChunksRequiringUpdating size=\"" . $num_chunks_to_return . "\">\n";
[18460]1274    }
[24627]1275   
[18460]1276    # foreach my $chunk_key (@target_file_keys_requiring_updating) {
1277    @sorted_chunk_keys = sort (@$target_files_keys_requiring_updating);
1278    foreach my $chunk_key (@sorted_chunk_keys) {
[24627]1279        last if ($num_chunks_to_return == 0);
1280       
1281        my $source_file_chunk_date = $source_files_key_to_last_update_date_mapping->{$chunk_key} || "";
1282        my $source_file_chunk_text = &make_text_xml_safe($source_files_key_to_text_mapping->{$chunk_key});
1283        my $target_file_chunk_date = $target_files_key_to_last_update_date_mapping->{$chunk_key} || "";
1284        my $target_file_chunk_text = &make_text_xml_safe($target_files_key_to_text_mapping->{$chunk_key});
1285       
1286        $xml_response .= "    <Chunk key=\"" . &make_text_xml_safe($chunk_key) . "\">\n";   
1287        $xml_response .= "      <SourceFileText date=\"$source_file_chunk_date\">$source_file_chunk_text</SourceFileText>\n";
1288        $xml_response .= "      <TargetFileText date=\"$target_file_chunk_date\">$target_file_chunk_text</TargetFileText>\n";
1289        $xml_response .= "    </Chunk>\n";
1290       
1291        $num_chunks_to_return--;
[18460]1292    }
[24627]1293   
[18460]1294    $xml_response .= "  </ChunksRequiringUpdating>\n";
[24627]1295   
[18460]1296    $xml_response .= "</GTIResponse>\n";
[24627]1297   
[18460]1298    return $xml_response;
1299}
1300
[25249]1301sub create_xml_response_for_uptodate_chunks
1302{
1303    my ($translation_file_key, $target_file, $uptodate_target_files_keys, $source_files_key_to_text_mapping, $target_files_key_to_text_mapping, $source_files_key_to_last_update_date_mapping, $target_files_key_to_last_update_date_mapping) = @_;
1304   
1305    # Form an XML response to the command
1306    my $xml_response = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
1307    $xml_response .= "<GTIResponse>\n";
1308    $xml_response .= "  <TranslationFile"
1309    . " key=\"" . $translation_file_key . "\""
1310    . " target_file_path=\"" . $target_file . "\""
1311    . " num_chunks_uptodate=\"" . scalar(@$uptodate_target_files_keys) . "\"\/>\n";
1312   
1313   
1314    # Then do chunks requiring updating
1315    $xml_response .= "  <UptodateChunks size=\"" . scalar(@$uptodate_target_files_keys) . "\">\n";
1316   
1317   
1318    # foreach my $chunk_key (@uptodate_target_file_keys) {
1319    my @sorted_chunk_keys = sort (@$uptodate_target_files_keys);
1320    foreach my $chunk_key (@sorted_chunk_keys) {
1321       
1322        my $source_file_chunk_date = $source_files_key_to_last_update_date_mapping->{$chunk_key} || "";
1323        my $source_file_chunk_text = &make_text_xml_safe($source_files_key_to_text_mapping->{$chunk_key});
1324        my $target_file_chunk_date = $target_files_key_to_last_update_date_mapping->{$chunk_key} || "";
1325        my $target_file_chunk_text = &make_text_xml_safe($target_files_key_to_text_mapping->{$chunk_key});
1326       
1327        $xml_response .= "    <Chunk key=\"" . &make_text_xml_safe($chunk_key) . "\">\n";   
1328        $xml_response .= "      <SourceFileText date=\"$source_file_chunk_date\">$source_file_chunk_text</SourceFileText>\n";
1329        $xml_response .= "      <TargetFileText date=\"$target_file_chunk_date\">$target_file_chunk_text</TargetFileText>\n";
1330        $xml_response .= "    </Chunk>\n";
[18460]1331
[25249]1332    }
1333   
1334    $xml_response .= "  </UptodateChunks>\n";
1335   
1336    $xml_response .= "</GTIResponse>\n";
1337   
1338    return $xml_response;
1339}
1340
[18460]1341sub create_xml_response_for_all_chunks
1342{
1343    my ($translation_file_key, $target_file, $source_file_key_to_text_mapping, $target_file_key_to_text_mapping, $source_file_key_to_last_update_date_mapping, $target_file_key_to_last_update_date_mapping) = @_;
[24627]1344   
[18460]1345    # Form an XML response to the command
1346    my $xml_response = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
1347    $xml_response .= "<GTIResponse>\n";
1348    $xml_response .= "  <TranslationFile"
1349    . " key=\"" . $translation_file_key . "\""
1350    . " target_file_path=\"" . $target_file . "\"\/>\n";
1351   
1352    # Do all the chunks
1353    $xml_response .= "  <Chunks size=\"" . scalar(keys(%$source_file_key_to_text_mapping)) . "\">\n";
[24627]1354   
[18460]1355    my @sorted_chunk_keys = sort (keys(%$source_file_key_to_text_mapping));
1356    foreach my $chunk_key (@sorted_chunk_keys) {
[24627]1357        my $source_file_chunk_date = $source_file_key_to_last_update_date_mapping->{$chunk_key} || "";
1358        my $source_file_chunk_text = &make_text_xml_safe($source_file_key_to_text_mapping->{$chunk_key});
1359       
1360        $xml_response .= "    <Chunk key=\"" . &make_text_xml_safe($chunk_key) . "\">\n";
1361        $xml_response .= "      <SourceFileText date=\"$source_file_chunk_date\">$source_file_chunk_text</SourceFileText>\n";
1362        if (defined($target_file_key_to_text_mapping->{$chunk_key})) {
1363            my $target_file_chunk_date = $target_file_key_to_last_update_date_mapping->{$chunk_key} || "";
1364            my $target_file_chunk_text = &make_text_xml_safe($target_file_key_to_text_mapping->{$chunk_key});
1365            $xml_response .= "      <TargetFileText date=\"$target_file_chunk_date\">$target_file_chunk_text</TargetFileText>\n";
1366        }
1367        else {
1368            $xml_response .= "      <TargetFileText></TargetFileText>\n";
1369        }
1370       
1371        $xml_response .= "    </Chunk>\n";
[18460]1372    }
1373    $xml_response .= "  </Chunks>\n";
1374   
1375    $xml_response .= "</GTIResponse>\n";
1376    return $xml_response;
1377}
1378
1379
1380
[10019]1381# ==========================================================================================
1382#   MACROFILE FUNCTIONS
1383
1384sub build_key_to_line_mapping_for_macrofile
1385{
1386    my (@file_lines) = @_;
[24627]1387   
[10019]1388    my $macro_package;
1389    my %chunk_key_to_line_mapping = ();
1390    # Process the contents of the file, line by line
1391    for (my $i = 0; $i < scalar(@file_lines); $i++) {
[24627]1392        my $line = $file_lines[$i];
1393        $line =~ s/(\s*)$//;  # Remove any nasty whitespace, carriage returns etc.
1394       
1395        # Check if a new package is being defined
1396        if ($line =~ m/^package\s+(.+)/) {
1397            $macro_package = $1;
[10041]1398        }
[24627]1399       
1400        # Line contains a macro name
1401        elsif ($line =~ m/^(_\w+_)/) {
1402            my $macro_key = $1;
1403            $line =~ s/\s*([^\\]\#[^\}]+)?$//;  # Remove any comments and nasty whitespace
1404           
1405            # While there is still text of the macro to go...
1406            my $startline = $i;
1407            while ($line !~ /\}$/) {
1408                $i++;
1409                if ($i == scalar(@file_lines)) {
1410                    &throw_fatal_error("Could not find end of macro $macro_key.");
1411                }
1412                $line = $file_lines[$i];
1413                $line =~ s/\s*([^\\]\#[^\}]+)?$//;  # Remove any comments and nasty whitespace
1414            }
1415       
[10019]1416        # The chunk key consists of the package name and the macro key
1417        my $chunk_key = $macro_package . "." . $macro_key;
1418        # Map from chunk key to line
1419        $chunk_key_to_line_mapping{$chunk_key} = $startline . "-" . $i;
1420    }
[24627]1421   
[10019]1422    # Icon: line in format ## "image text" ## image_type ## macro_name ##
1423    elsif ($line =~ m/^\#\# .* \#\# .* \#\# (.*) \#\#/) {
[24627]1424    # The chunk key consists of package name and macro key
1425    my $chunk_key = $macro_package . "." . $1;
1426    # Map from chunk key to line
1427    $chunk_key_to_line_mapping{$chunk_key} = $i . "-" . $i;
1428}
1429}
[10019]1430
[24627]1431return %chunk_key_to_line_mapping;
[10019]1432}
1433
1434
1435sub import_chunk_from_macrofile
1436{
1437    my ($chunk_text) = @_;
[24627]1438   
[10019]1439    # Is this an icon macro??
1440    if ($chunk_text =~ /^\#\# (.*)/) {
[24627]1441        # Extract image macro text
1442        $chunk_text =~ /^\#\#\s+([^\#]+)\s+\#\#/;
1443        $chunk_text = $1;
1444   
[10019]1445    # Remove enclosing quotes
1446    $chunk_text =~ s/^\"//;
1447    $chunk_text =~ s/\"$//;
[30562]1448    }
[10019]1449
[30562]1450    # No, so it must be a text macro
1451    else {
[10019]1452    # Remove macro key
1453    $chunk_text =~ s/^_([^_]+)_(\s*)//;
[24627]1454   
[10019]1455    # Remove language specifier
[30562]1456    $chunk_text =~ s/^\[l=[^\]]*\](\s*)//; # only remove until first closing square bracket, ]
[24627]1457   
[10019]1458    # Remove braces enclosing text
1459    $chunk_text =~ s/^{(\s*)((.|\n)*)}(\s*)(\#.+\s*)?/$2/;
[30562]1460    }
[10019]1461
[30562]1462    return $chunk_text;
[10019]1463}
1464
1465
[12483]1466sub get_macrofile_chunk_gti_comment
[10019]1467{
1468    my ($chunk_text) = @_;
[24627]1469   
[10019]1470    # Check for an "Updated DD-MMM-YYYY" comment at the end of the chunk
[12484]1471    if ($chunk_text =~ /\#\s+(Updated\s+\d?\d-\D\D\D-\d\d\d\d.*)\s*$/i) {
[24627]1472        return $1;
1473}
[10019]1474
[24627]1475return undef;
[10019]1476}
1477
1478
1479sub is_macrofile_chunk_automatically_translated
1480{
1481    my ($chunk_key) = @_;
[24627]1482   
[10019]1483    # The _httpiconX_, _widthX_ and _heightX_ image macros are automatically translated
1484    if ($chunk_key =~ /\._(httpicon|width|height)/) {
[24627]1485        return 1;
[10019]1486    }
[24627]1487   
[10019]1488    return 0;
1489}
1490
1491
1492# Use the source file to generate a target file that is formatted the same
1493sub write_translated_macrofile
1494{
1495    my $source_file = shift(@_);  # Not used
[11321]1496    my @source_file_lines = @{shift(@_)};
[10019]1497    my $source_file_key_to_text_mapping = shift(@_);
1498    my $target_file = shift(@_);
[11321]1499    my @target_file_lines = @{shift(@_)};
[10019]1500    my $target_file_key_to_text_mapping = shift(@_);
[12483]1501    my $target_file_key_to_gti_comment_mapping = shift(@_);
[10019]1502    my $target_language_code = shift(@_);
[24627]1503   
[10126]1504    # Build a mapping from source file line to chunk key
[11321]1505    my %source_file_key_to_line_mapping = &build_key_to_line_mapping_for_macrofile(@source_file_lines);
[10019]1506    my %source_file_line_to_key_mapping = ();
1507    foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
[24627]1508        $source_file_line_to_key_mapping{$source_file_key_to_line_mapping{$chunk_key}} = $chunk_key;
[10019]1509    }
[10126]1510    my @source_file_line_keys = (sort sort_by_line (keys(%source_file_line_to_key_mapping)));
1511    my $source_file_line_number = 0;
[24627]1512   
[10126]1513    # Build a mapping from target file line to chunk key
[11321]1514    my %target_file_key_to_line_mapping = &build_key_to_line_mapping_for_macrofile(@target_file_lines);
[10126]1515    my %target_file_line_to_key_mapping = ();
1516    foreach my $chunk_key (keys(%target_file_key_to_line_mapping)) {
[24627]1517        $target_file_line_to_key_mapping{$target_file_key_to_line_mapping{$chunk_key}} = $chunk_key;
[10126]1518    }
1519    my @target_file_line_keys = (sort sort_by_line (keys(%target_file_line_to_key_mapping)));
[24627]1520   
[10019]1521    # Write the new target file
1522    my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
1523    if (!open(TARGET_FILE, ">$target_file_path")) {
[24627]1524        &throw_fatal_error("Could not write target file $target_file_path.");
[10019]1525    }
[24627]1526   
[10126]1527    # Use the header from the target file, to keep language and author information
1528    if (scalar(@target_file_line_keys) > 0) {
[24627]1529        my $target_file_line_number = 0;
1530        my $target_file_chunk_starting_line_number = (split(/-/, $target_file_line_keys[0]))[0];
1531        while ($target_file_line_number < $target_file_chunk_starting_line_number) {
1532            my $target_file_line = $target_file_lines[$target_file_line_number];
1533            last if ($target_file_line =~ /^\# -- Missing translation: /);  # We don't want to get into the macros
1534                print TARGET_FILE $target_file_line;
1535            $target_file_line_number++;
1536        }
1537       
1538        $source_file_line_number = (split(/-/, $source_file_line_keys[0]))[0];
[10126]1539    }
[24627]1540   
[10019]1541    # Model the new target file on the source file, with the target file translations
[10126]1542    foreach my $line_key (@source_file_line_keys) {
[24627]1543        # Fill in the gaps before this chunk starts
1544        my $source_file_chunk_starting_line_number = (split(/-/, $line_key))[0];
1545        my $source_file_chunk_finishing_line_number = (split(/-/, $line_key))[1];
1546        while ($source_file_line_number < $source_file_chunk_starting_line_number) {
1547            print TARGET_FILE $source_file_lines[$source_file_line_number];
1548            $source_file_line_number++;
1549        }
1550        $source_file_line_number = $source_file_chunk_finishing_line_number + 1;
1551       
1552        my $chunk_key = $source_file_line_to_key_mapping{$line_key};
1553        my $source_file_chunk_text = $source_file_key_to_text_mapping->{$chunk_key};
1554        my $target_file_chunk_text = $target_file_key_to_text_mapping->{$chunk_key} || "";
1555       
1556        my $macrofile_key = $chunk_key;
1557        $macrofile_key =~ s/^(.+?)\.//;
1558       
1559        # If no translation exists for this chunk, show this, and move on
1560        if ($source_file_chunk_text ne "" && $target_file_chunk_text eq "") {
1561            print TARGET_FILE "# -- Missing translation: $macrofile_key\n";
1562            next;
1563        }
1564       
1565        # Grab the source chunk text
1566        my $source_file_chunk = $source_file_lines[$source_file_chunk_starting_line_number];
1567        for (my $l = ($source_file_chunk_starting_line_number + 1); $l <= $source_file_chunk_finishing_line_number; $l++) {
1568            $source_file_chunk .= $source_file_lines[$l];
1569        }
1570       
1571        # Is this an icon macro??
1572        if ($source_file_chunk =~ /^\#\# (.*)/) {
1573            # Escape any newline and question mark characters so the source text is replaced correctly
1574            $source_file_chunk_text =~ s/\\/\\\\/g;
[10019]1575        $source_file_chunk_text =~ s/\?/\\\?/g;
[24627]1576       
[10019]1577        # Build the new target chunk from the source chunk
1578        my $target_file_chunk = $source_file_chunk;
1579        $target_file_chunk =~ s/$source_file_chunk_text/$target_file_chunk_text/;
[10050]1580        $target_file_chunk =~ s/(\s)*$//;
[10019]1581        print TARGET_FILE "$target_file_chunk";
1582    }
[24627]1583   
[10019]1584    # No, it is just a normal text macro
1585    else {
1586        print TARGET_FILE "$macrofile_key [l=$target_language_code] {$target_file_chunk_text}";
1587    }
[24627]1588   
[12483]1589    # Add the "updated" comment, if one exists
1590    if ($target_file_key_to_gti_comment_mapping->{$chunk_key}) {
1591        print TARGET_FILE "  # " . $target_file_key_to_gti_comment_mapping->{$chunk_key};
[10050]1592    }
1593    print TARGET_FILE "\n";
[24627]1594}
[10019]1595
[24627]1596close(TARGET_FILE);
[10019]1597}
1598
1599
1600sub sort_by_line
1601{
1602    return ((split(/-/, $a))[0] <=> (split(/-/, $b))[0]);
1603}
1604
1605
1606# ==========================================================================================
1607#   RESOURCE BUNDLE FUNCTIONS
1608
[29456]1609# need to handle multi-line properties. A multiline ends on \ if it continues over the next line
[10019]1610sub build_key_to_line_mapping_for_resource_bundle
1611{
1612    my (@file_lines) = @_;
[24627]1613   
[11321]1614    my %chunk_key_to_line_mapping = ();
[29456]1615
1616    my $chunk_key;
1617    my $startindex = -1;
1618
[10019]1619    for (my $i = 0; $i < scalar(@file_lines); $i++) {
[24627]1620        my $line = $file_lines[$i];
1621        $line =~ s/(\s*)$//;  # Remove any nasty whitespace, carriage returns etc.
1622       
[30719]1623        # a property line has a colon/equals sign as separator that is NOT escaped with a backslash (both keys and values
1624        # can use the colon or = sign. But in the key, such a char is always escaped. Unfortunately, they've not always been
1625        # escaped in the values. So we get the left most occurrence by not doing a greedy match (use ? to not be greedy).
1626        # So find the first :/= char not preceded by \. That will be the true separator of a chunk_key and its value chunk_text
[30687]1627
[30735]1628        if ($line =~ m/^(\S*?[^\\])[:|=](.*)$/) {
[30719]1629            # Line contains a dictionary string
[30687]1630
[30719]1631            # Unused but useful: http://stackoverflow.com/questions/87380/how-can-i-find-the-location-of-a-regex-match-in-perl
1632            # http://perldoc.perl.org/perlvar.html
[30687]1633           
[30719]1634            $chunk_key = $1;
1635            # remove the escaping of any :/= property separator from the chunk_key in memory,
1636            # to make comparison with its unescaped version during submissions easier. Will write out with escaping.
1637            $chunk_key =~ s/\\([:=])/$1/g;       
1638           
[29456]1639            $startindex = $i;
1640        }       
1641        if ($startindex != -1) {
1642            if($line !~ m/\\$/) { # line finished
1643            # $i keeps track of the line at which this property (chunk_key) finishes
1644
[24627]1645            # Map from chunk key to line
[29456]1646            $chunk_key_to_line_mapping{$chunk_key} = $startindex . "-" . $i;
1647            $startindex = -1;
1648            $chunk_key = "";
1649            }
[30687]1650        }       
[10019]1651    }
[24627]1652   
[11321]1653    return %chunk_key_to_line_mapping;
[10019]1654}
1655
1656
1657sub import_chunk_from_resource_bundle
1658{
1659    my ($chunk_text) = @_;
[24627]1660   
[30719]1661    # Simple: just remove string key.
1662    # But key can contain an escaped separator (\: or \=).
1663    # So just as in the previous subroutine, find the first (leftmost) : or = char not preceded by \.
1664    # That will be the true separator of a chunk_key and its value chunk_text
[30735]1665    $chunk_text =~ s/^(\S*?[^\\])[:|=](\s*)//s;
[30687]1666
[29456]1667    $chunk_text =~ s/(\s*)$//s;  # Remove any nasty whitespace, carriage returns etc.
1668    $chunk_text =~ s/(\s*)\#\s+Updated\s+(\d?\d-\D\D\D-\d\d\d\d.*)\s*$//is;
[24627]1669   
[10019]1670    return $chunk_text;
1671}
1672
1673
[12483]1674sub get_resource_bundle_chunk_gti_comment
[10019]1675{
1676    my ($chunk_text) = @_;
[24627]1677   
[10019]1678    # Check for an "Updated DD-MMM-YYYY" comment at the end of the chunk
[12484]1679    if ($chunk_text =~ /\#\s+(Updated\s+\d?\d-\D\D\D-\d\d\d\d.*)\s*$/i) {
[24627]1680        return $1;
[30719]1681    }
[10019]1682
[30719]1683    return undef;
[10019]1684}
1685
1686
1687sub is_resource_bundle_chunk_automatically_translated
1688{
1689    # No resource bundle chunks are automatically translated
1690    return 0;
1691}
1692
1693
1694sub write_translated_resource_bundle
1695{
1696    my $source_file = shift(@_);  # Not used
[11321]1697    my @source_file_lines = @{shift(@_)};
[10019]1698    my $source_file_key_to_text_mapping = shift(@_);
1699    my $target_file = shift(@_);
[11321]1700    my @target_file_lines = @{shift(@_)};  # Not used
[10019]1701    my $target_file_key_to_text_mapping = shift(@_);
[12483]1702    my $target_file_key_to_gti_comment_mapping = shift(@_);
[10019]1703    my $target_language_code = shift(@_);  # Not used
[24627]1704   
[10019]1705    # Build a mapping from chunk key to source file line, and from source file line to chunk key
[11321]1706    my %source_file_key_to_line_mapping = &build_key_to_line_mapping_for_resource_bundle(@source_file_lines);
[10019]1707    my %source_file_line_to_key_mapping = ();
1708    foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
[24627]1709        $source_file_line_to_key_mapping{$source_file_key_to_line_mapping{$chunk_key}} = $chunk_key;
[10019]1710    }
[24627]1711   
[10019]1712    # Write the new target file
1713    my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
1714    if (!open(TARGET_FILE, ">$target_file_path")) {
[24627]1715        &throw_fatal_error("Could not write target file $target_file_path.");
[10019]1716    }
[24627]1717   
[10019]1718    # Model the new target file on the source file, with the target file translations
[10091]1719    my $source_file_line_number = 0;
[10019]1720    foreach my $line_key (sort sort_by_line (keys(%source_file_line_to_key_mapping))) {
[24627]1721        # Fill in the gaps before this chunk starts
1722        my $source_file_chunk_starting_line_number = (split(/-/, $line_key))[0];
1723        my $source_file_chunk_finishing_line_number = (split(/-/, $line_key))[1];
1724        while ($source_file_line_number < $source_file_chunk_starting_line_number) {
1725            print TARGET_FILE $source_file_lines[$source_file_line_number];
1726            $source_file_line_number++;
1727        }
1728        $source_file_line_number = $source_file_chunk_finishing_line_number + 1;
1729       
1730        my $chunk_key = $source_file_line_to_key_mapping{$line_key};
1731        my $source_file_chunk_text = $source_file_key_to_text_mapping->{$chunk_key};
1732        my $target_file_chunk_text = $target_file_key_to_text_mapping->{$chunk_key} || "";
1733       
[30719]1734        # make sure any : or = sign in the chunk key is escaped again (with \) when written out
1735        # since the key-value separator in a property resource bundle file is : or =
1736        my $escaped_chunk_key = $chunk_key;
1737        $escaped_chunk_key =~ s/(:|=)/\\$1/g; #$escaped_chunk_key =~ s/([^\\])(:|=)/\\$1$2/g;
1738       
[24627]1739        # If no translation exists for this chunk, show this, and move on
1740        if ($source_file_chunk_text ne "" && $target_file_chunk_text eq "") {
[30719]1741            print TARGET_FILE "# -- Missing translation: $escaped_chunk_key\n";
[24627]1742            next;
1743        }
[30719]1744
1745        print TARGET_FILE "$escaped_chunk_key:$target_file_chunk_text";
[24627]1746        if ($target_file_key_to_gti_comment_mapping->{$chunk_key}) {
1747            print TARGET_FILE "  # " . $target_file_key_to_gti_comment_mapping->{$chunk_key};
1748        }
1749        print TARGET_FILE "\n";
[10019]1750    }
[24627]1751   
[10019]1752    close(TARGET_FILE);
1753}
1754
1755
[11321]1756# ==========================================================================================
1757#   GREENSTONE XML FUNCTIONS
1758
1759sub build_key_to_line_mapping_for_greenstone_xml
1760{
1761    my (@file_lines) = @_;
[24627]1762   
[11321]1763    my %chunk_key_to_line_mapping = ();
1764    for (my $i = 0; $i < scalar(@file_lines); $i++) {
[24627]1765        my $line = $file_lines[$i];
1766        $line =~ s/(\s*)$//;  # Remove any nasty whitespace, carriage returns etc.
1767       
1768        # Line contains a string to translate
1769        if ($line =~ /^\s*<Text id=\"(.*?)\">/) {
1770            my $chunk_key = $1;
1771            $line =~ s/\s*$//;  # Remove any nasty whitespace
1772            $line =~ s/<Updated date=\"\d?\d-\D\D\D-\d\d\d\d.*\"\/>$//;
1773           
1774            # While there is still text of the string to go...
1775            my $startline = $i;
1776            while ($line !~ /<\/Text>$/) {
1777                $i++;
1778                if ($i == scalar(@file_lines)) {
1779                    &throw_fatal_error("Could not find end of string $chunk_key.");
1780                }
1781                $line = $file_lines[$i];
1782                $line =~ s/\s*$//;  # Remove any nasty whitespace
1783                $line =~ s/<Updated date=\"\d?\d-\D\D\D-\d\d\d\d.*\"\/>$//;
1784            }
1785           
1786            # Map from chunk key to line
1787            if (!defined($chunk_key_to_line_mapping{$chunk_key})) {
1788                $chunk_key_to_line_mapping{$chunk_key} = $startline . "-" . $i;
1789            }
1790            else {
1791                &throw_fatal_error("Duplicate key $chunk_key.");
1792            }
[11321]1793        }
1794    }
[24627]1795   
[11321]1796    return %chunk_key_to_line_mapping;
1797}
1798
1799
1800sub import_chunk_from_greenstone_xml
1801{
1802    my ($chunk_text) = @_;
[24627]1803   
[11321]1804    # Simple: just remove the Text tags
[11447]1805    $chunk_text =~ s/^\s*<Text id=\"(.*?)\">(\s*)//;
[12484]1806    $chunk_text =~ s/<Updated date=\"\d?\d-\D\D\D-\d\d\d\d.*\"\/>$//;
[11321]1807    $chunk_text =~ s/<\/Text>$//;
[24627]1808   
[11321]1809    return $chunk_text;
1810}
1811
1812
[12483]1813sub get_greenstone_xml_chunk_gti_comment
[11321]1814{
1815    my ($chunk_text) = @_;
[24627]1816   
[11321]1817    # Check for an "Updated DD-MMM-YYYY" comment at the end of the chunk
[12484]1818    if ($chunk_text =~ /<Updated date=\"(\d?\d-\D\D\D-\d\d\d\d.*)\"\/>$/i) {
[24627]1819        return $1;
[11487]1820    }
[24627]1821   
[11321]1822    return undef;
1823}
1824
1825
1826sub is_greenstone_xml_chunk_automatically_translated
1827{
1828    # No greenstone XML chunks are automatically translated
1829    return 0;
1830}
1831
1832
1833sub write_translated_greenstone_xml
1834{
1835    my $source_file = shift(@_);  # Not used
1836    my @source_file_lines = @{shift(@_)};
1837    my $source_file_key_to_text_mapping = shift(@_);
1838    my $target_file = shift(@_);
1839    my @target_file_lines = @{shift(@_)};  # Not used
1840    my $target_file_key_to_text_mapping = shift(@_);
[12483]1841    my $target_file_key_to_gti_comment_mapping = shift(@_);
[11321]1842    my $target_language_code = shift(@_);  # Not used
[24627]1843   
[11321]1844    # Build a mapping from chunk key to source file line, and from source file line to chunk key
1845    my %source_file_key_to_line_mapping = &build_key_to_line_mapping_for_greenstone_xml(@source_file_lines);
1846    my %source_file_line_to_key_mapping = ();
1847    foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
[24627]1848        $source_file_line_to_key_mapping{$source_file_key_to_line_mapping{$chunk_key}} = $chunk_key;
[11321]1849    }
[24627]1850   
[11321]1851    # Write the new target file
1852    my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
1853    if (!open(TARGET_FILE, ">$target_file_path")) {
[24627]1854        &throw_fatal_error("Could not write target file $target_file_path.");
[11321]1855    }
[24627]1856   
[11321]1857    # Model the new target file on the source file, with the target file translations
1858    my $source_file_line_number = 0;
1859    foreach my $line_key (sort sort_by_line (keys(%source_file_line_to_key_mapping))) {
[24627]1860        # Fill in the gaps before this chunk starts
1861        my $source_file_chunk_starting_line_number = (split(/-/, $line_key))[0];
1862        my $source_file_chunk_finishing_line_number = (split(/-/, $line_key))[1];
1863        while ($source_file_line_number < $source_file_chunk_starting_line_number) {
1864            print TARGET_FILE $source_file_lines[$source_file_line_number];
1865            $source_file_line_number++;
1866        }
1867        $source_file_line_number = $source_file_chunk_finishing_line_number + 1;
1868       
1869        my $chunk_key = $source_file_line_to_key_mapping{$line_key};
1870        my $source_file_chunk_text = $source_file_key_to_text_mapping->{$chunk_key};
1871        my $target_file_chunk_text = $target_file_key_to_text_mapping->{$chunk_key} || "";
1872        $target_file_chunk_text =~ s/(\n)*$//g;
1873       
1874        # If no translation exists for this chunk, show this, and move on
1875        if ($source_file_chunk_text ne "" && $target_file_chunk_text eq "") {
1876            print TARGET_FILE "<!-- Missing translation: $chunk_key -->\n";
1877            next;
1878        }
1879       
1880        print TARGET_FILE "<Text id=\"$chunk_key\">$target_file_chunk_text</Text>";
1881        if ($target_file_key_to_gti_comment_mapping->{$chunk_key}) {
1882            my $chunk_gti_comment = $target_file_key_to_gti_comment_mapping->{$chunk_key};
1883            $chunk_gti_comment =~ s/^Updated //;
1884            print TARGET_FILE "<Updated date=\"" . $chunk_gti_comment . "\"\/>";
1885        }
1886        print TARGET_FILE "\n";
[11321]1887    }
[24627]1888   
[11321]1889    # Fill in the end of the file
1890    while ($source_file_line_number < scalar(@source_file_lines)) {
[24627]1891        print TARGET_FILE $source_file_lines[$source_file_line_number];
1892        $source_file_line_number++;
[11321]1893    }
[24627]1894   
[11321]1895    close(TARGET_FILE);
1896}
1897
1898
[18460]1899# ==========================================================================================
1900#   GREENSTONE3 FUNCTIONS
1901
1902sub get_all_chunks_for_gs3
1903{
1904    # The code of the target language (ensure it is lowercase)
1905    my $target_language_code = lc(shift(@_));
1906    my $translation_file_key = lc(shift(@_));
[24627]1907   
[18460]1908    # Check that the necessary arguments were supplied
1909    if (!$target_language_code) {
[24627]1910        &throw_fatal_error("Missing command argument.");
[18460]1911    }
[24627]1912   
[18460]1913    # Get (and check) the translation configuration
1914    # my ($source_file_dir, $target_file, $translation_file_type) = &get_translation_configuration($target_language_code, $translation_file_key);
1915   
1916    my %source_files_key_to_text_mapping = ();
1917    my %target_files_key_to_text_mapping = ();
1918    my %source_files_key_to_last_update_date_mapping = ();
1919    my %target_files_key_to_last_update_date_mapping = ();
[24627]1920   
[30581]1921    &build_gs3_configuration($translation_file_key, $target_language_code, \%source_files_key_to_text_mapping, \%target_files_key_to_text_mapping, \%source_files_key_to_last_update_date_mapping, \%target_files_key_to_last_update_date_mapping);
[24627]1922   
[18460]1923    &log_message("Total number of source chunks: " . scalar(keys(%source_files_key_to_text_mapping)));
1924    &log_message("Total number of target chunks: " . scalar(keys(%target_files_key_to_text_mapping)));
[24627]1925   
[18460]1926    my $xml_response = &create_xml_response_for_all_chunks($translation_file_key, "", \%source_files_key_to_text_mapping, \%target_files_key_to_text_mapping, \%source_files_key_to_last_update_date_mapping, \%target_files_key_to_last_update_date_mapping);   
1927    return $xml_response;
1928}
1929
1930
1931sub get_first_n_chunks_requiring_work_for_gs3
1932{
1933    # The code of the target language (ensure it is lowercase)
1934    my $target_language_code = lc(shift(@_));
1935    # The key of the file to translate (ensure it is lowercase)
1936    my $translation_file_key = lc(shift(@_));
1937    # The number of chunks to return (defaults to one if not specified)
1938    my $num_chunks_to_return = shift(@_) || "1";
1939   
1940    # Check that the necessary arguments were supplied
1941    if (!$target_language_code || !$translation_file_key) {
[24627]1942        &throw_fatal_error("Missing command argument.");
[18460]1943    }
[28755]1944
[18460]1945    my %source_files_key_to_text_mapping = ();
1946    my %target_files_key_to_text_mapping = ();
1947    my %source_files_key_to_last_update_date_mapping = ();
1948    my %target_files_key_to_last_update_date_mapping = ();
[24627]1949   
[30581]1950    &build_gs3_configuration($translation_file_key, $target_language_code, \%source_files_key_to_text_mapping, \%target_files_key_to_text_mapping,
[24627]1951    \%source_files_key_to_last_update_date_mapping, \%target_files_key_to_last_update_date_mapping);
[18460]1952   
1953    # Determine the target file chunks requiring translation
1954    my @target_files_keys_requiring_translation = &determine_chunks_requiring_translation(\%source_files_key_to_text_mapping, \%target_files_key_to_text_mapping);   
1955    # Determine the target file chunks requiring updating
1956    my @target_files_keys_requiring_updating = &determine_chunks_requiring_updating(\%source_files_key_to_last_update_date_mapping, \%target_files_key_to_last_update_date_mapping);
1957    &log_message("Total number of target chunks requiring translation: " . scalar(@target_files_keys_requiring_translation));
1958    &log_message("Total number of target chunks requiring updating: " . scalar(@target_files_keys_requiring_updating));
[29415]1959
1960    my $download_target_filepath = "";
1961
1962
1963    # ****** DOWNLOADING LANGUAGE FILES WAS NOT YET IMPLEMENTED FOR GS3. RUDIMENTARY VERSION ****** #
1964
1965    # if there is no copy of the language files for download, there's also no link to the spreadsheet
1966    # for translating offline. So GS3's download option, we will zip up all the relevant greenstone 3
1967    # interface *.properties files,and link to that zip as the file for offline translation.
1968    # Selecting only properties files for English and the language they're working on (if the last exists)
1969
1970    # tar -cvzf gs3interface.tar.gz greenstone3/AbstractBrowse.properties greenstone3/AbstractBrowse_nl.properties
1971    # will generate a tar file containing a folder called "greenstone3" with the specified *.properties files
1972
1973    my $zip = &FileUtils::filenameConcatenate("tmp", "gs3interface_".$target_language_code.".tar.gz");
1974    my $tar_cmd = "tar -cvzf $zip";
1975
1976
1977    # store cur dir and cd to gsdlhome to generate the correct path in the zip file
1978    my $curdir = `pwd`;
1979    chdir $gsdl_root_directory;
1980
[30581]1981    $tar_cmd .= " " . &get_gs3_zip_file_listing($target_language_code, "greenstone3", \@gs3_interface_files);
1982    $tar_cmd .= " " . &get_gs3_zip_file_listing($target_language_code, "gs3-collection-configs", \@gs3_col_cfg_files);
[29415]1983
1984    # tar command will overwrite the previous version, but want to check we've created it
1985    if(&FileUtils::fileExists($zip)) {
1986    &FileUtils::removeFiles($zip);
1987    }
1988
1989    #my $tar_result = system($tar_cmd); # works but then interface breaks
1990    `$tar_cmd`;
1991    my $tar_result = $?;
1992
1993    if(&FileUtils::fileExists($zip)) { ## if($tar_result == 0) {, # breaks the interface
1994    $download_target_filepath = $zip;
1995    } else {
1996    &log_message("Unable to generate zip containing gs3interface files " . $download_target_filepath . "$!");
1997    }
1998
1999    # change back to original working directory (cgi-bin/linux probably)
2000    chdir $curdir;
2001
2002    # ************** END RUDIMENTARY VERSION OF DOWNLOADING LANGUAGE FILES FOR GS3 ************* #
2003
2004
2005    my $xml_response = &create_xml_response_for_chunks_requiring_work($translation_file_key, $download_target_filepath, scalar(keys(%source_files_key_to_text_mapping)),
[24627]2006    \@target_files_keys_requiring_translation, \@target_files_keys_requiring_updating,
2007    $num_chunks_to_return, \%source_files_key_to_text_mapping, \%target_files_key_to_text_mapping,
2008    \%source_files_key_to_last_update_date_mapping, \%target_files_key_to_last_update_date_mapping);
2009   
[18460]2010    return $xml_response;
2011}
2012
[30581]2013# helper function
2014# gets the listing of gs3 files for a gs3 interface module (gs3interface, gs3colcfg)
2015# formatted correctly to go into a zip file
2016sub get_gs3_zip_file_listing
2017{
2018   my $target_language_code = shift(@_);
2019   my $sourcedir = shift(@_);
2020   my $files_array = shift(@_); # reference to an array of the interfaces files for the gs3 module
2021
2022   my $filelisting = "";
2023   foreach my $interface_file (@$files_array) {
2024
2025    my $source_filepath = &FileUtils::filenameConcatenate($sourcedir, $interface_file.".properties");
2026    my $target_filepath = &FileUtils::filenameConcatenate($sourcedir, $interface_file."_".$target_language_code.".properties");
2027   
2028    $filelisting = "$filelisting $source_filepath";
2029    if(&FileUtils::fileExists($target_filepath)) {
2030        $filelisting = "$filelisting $target_filepath";
2031    }
2032    }
2033
2034   return $filelisting;
2035}
2036
[25249]2037sub get_uptodate_chunks_for_gs3
2038{
2039    # The code of the target language (ensure it is lowercase)
2040    my $target_language_code = lc(shift(@_));
2041    # The key of the file to translate (ensure it is lowercase)
2042    my $translation_file_key = lc(shift(@_));
2043    # The number of chunks to return (defaults to one if not specified)
2044    my $num_chunks_to_return = shift(@_) || "1";
2045   
2046    # Check that the necessary arguments were supplied
2047    if (!$target_language_code || !$translation_file_key) {
2048        &throw_fatal_error("Missing command argument.");
2049    }
2050   
2051    my %source_files_key_to_text_mapping = ();
2052    my %target_files_key_to_text_mapping = ();
2053    my %source_files_key_to_last_update_date_mapping = ();
2054    my %target_files_key_to_last_update_date_mapping = ();
2055   
[30581]2056    &build_gs3_configuration($translation_file_key, $target_language_code, \%source_files_key_to_text_mapping, \%target_files_key_to_text_mapping,
[25249]2057    \%source_files_key_to_last_update_date_mapping, \%target_files_key_to_last_update_date_mapping);
2058   
[18460]2059
[25249]2060    # Chunks needing updating are those in the target file that have been more recently edited in the source file
2061    # All others are uptodate (which implies that they have certainly been translated at some point and would not be empty)
2062    my @uptodate_target_file_keys = ();
2063    foreach my $chunk_key (keys(%source_files_key_to_last_update_date_mapping)) {
2064        my $source_chunk_last_update_date = $source_files_key_to_last_update_date_mapping{$chunk_key};
2065        my $target_chunk_last_update_date = $target_files_key_to_last_update_date_mapping{$chunk_key};
2066       
2067        # print "key: $chunk_key\nsource date : $source_chunk_last_update_date\ntarget date : $target_chunk_last_update_date\nafter? ". &is_date_after($source_chunk_last_update_date, $target_chunk_last_update_date) . "\n\n";       
2068       
2069        if (defined($target_chunk_last_update_date) && !&is_date_after($source_chunk_last_update_date, $target_chunk_last_update_date)) {
2070            # &log_message("Chunk with key $chunk_key needs updating.");
2071            push(@uptodate_target_file_keys, $chunk_key);
2072        }
2073    }
[18460]2074
[25249]2075    my $xml_response = &create_xml_response_for_uptodate_chunks($translation_file_key, "", \@uptodate_target_file_keys, \%source_files_key_to_text_mapping, \%target_files_key_to_text_mapping, \%source_files_key_to_last_update_date_mapping, \%target_files_key_to_last_update_date_mapping);
2076   
2077    return $xml_response;
2078}
2079
2080
[18460]2081sub build_gs3_configuration
2082{
[30581]2083    my ($translation_file_key, $target_language_code, $source_files_key_to_text_mapping, $target_files_key_to_text_mapping,
[30548]2084    $source_files_key_to_gti_comment_or_last_updated_mapping, $target_files_key_to_gti_comment_or_last_updated_mapping, $get_gti_comments_not_last_updated) = @_;
[18460]2085   
[28755]2086    my $source_file_directory = "greenstone3";  # my $source_file_directory = &util::filename_cat("WEB-INF","classes");
[30581]2087    my $files_array = \@gs3_interface_files;
2088
2089    if($translation_file_key eq "gs3colcfg") {
2090    $source_file_directory = "gs3-collection-configs";
2091    $files_array = \@gs3_col_cfg_files;
2092    }
[18460]2093    my $translation_file_type = "resource_bundle";
2094   
[30581]2095    foreach my $interface_file_key (@$files_array) {
[24627]2096       
2097        &log_message("Greenstone 3 interface file: " . $interface_file_key);
2098       
2099        # Parse the source language and target language files
2100        my $source_file = &util::filename_cat($source_file_directory, $interface_file_key.".properties");
2101        my @source_file_lines = &read_file_lines(&util::filename_cat($gsdl_root_directory, $source_file));
2102        my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
2103        my %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
[30548]2104        #my %source_file_key_to_gti_comment_mapping = &build_key_to_gti_comment_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);   
[24627]2105       
[30548]2106        my %source_file_key_to_gti_comment_or_last_updated_mapping;
2107        if($get_gti_comments_not_last_updated) {
2108            %source_file_key_to_gti_comment_or_last_updated_mapping = &build_key_to_gti_comment_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);   
2109        } else {
2110            %source_file_key_to_gti_comment_or_last_updated_mapping = &build_key_to_last_update_date_mapping($source_file, \@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
2111        }
2112
[24627]2113        my $target_file = &util::filename_cat($source_file_directory, $interface_file_key."_".$target_language_code.".properties");
2114        my @target_file_lines = &read_file_lines(&util::filename_cat($gsdl_root_directory, $target_file));
2115        my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
2116        my %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
[30548]2117        #my %target_file_key_to_gti_comment_mapping = &build_key_to_gti_comment_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
[24627]2118       
[30548]2119        my %target_file_key_to_gti_comment_or_last_updated_mapping;
2120        if($get_gti_comments_not_last_updated) {
2121            %target_file_key_to_gti_comment_or_last_updated_mapping = &build_key_to_gti_comment_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
2122        } else {
2123            %target_file_key_to_gti_comment_or_last_updated_mapping = &build_key_to_last_update_date_mapping($target_file, \@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
2124        }
[24627]2125       
[30548]2126       
[24627]2127        # Filter out any automatically translated chunks
2128        foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
2129            if (&is_chunk_automatically_translated($chunk_key, $translation_file_type)) {
2130                delete $source_file_key_to_line_mapping{$chunk_key};
2131                delete $target_file_key_to_line_mapping{$chunk_key};
2132            }
2133        }
2134       
2135        &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
2136        &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
2137       
2138        foreach my $chunk_key (keys(%source_file_key_to_text_mapping)) {
2139            my $global_chunk_key = "$interface_file_key.$chunk_key";
2140            $source_files_key_to_text_mapping->{$global_chunk_key} = $source_file_key_to_text_mapping{$chunk_key};
[30548]2141            $source_files_key_to_gti_comment_or_last_updated_mapping->{$global_chunk_key} = $source_file_key_to_gti_comment_or_last_updated_mapping{$chunk_key};
[24627]2142           
2143            if (defined $target_file_key_to_text_mapping{$chunk_key}) {
2144                $target_files_key_to_text_mapping->{$global_chunk_key} = $target_file_key_to_text_mapping{$chunk_key};
[30548]2145                $target_files_key_to_gti_comment_or_last_updated_mapping->{$global_chunk_key} = $target_file_key_to_gti_comment_or_last_updated_mapping{$chunk_key};
[24627]2146            }
[29456]2147        }
[18460]2148    }
2149}
2150
2151
2152sub write_translated_gs3interface
2153{
[30581]2154    my $translation_file_key = shift(@_);
[18460]2155    my $source_file_key_to_text_mapping = shift(@_);
2156    my $target_file_key_to_text_mapping = shift(@_);
2157    my $target_file_key_to_gti_comment_mapping = shift(@_);
2158    my $target_language_code = shift(@_);
2159   
2160    my @sorted_chunk_keys = sort (keys(%$source_file_key_to_text_mapping));
[24627]2161   
[18460]2162    my %translated_interface_file_keys = ();
2163    foreach my $chunk_key (keys(%$target_file_key_to_text_mapping)) {
[24627]2164        $chunk_key =~ /^([^\.]+)?\.(.*)$/;
2165        if (!defined $translated_interface_file_keys{$1}) {
2166            &log_message("Updated interface file: " . $1); 
2167            $translated_interface_file_keys{$1}="";
2168        }
[18460]2169    }
2170    &log_message("Updated interface files: " . scalar(keys(%translated_interface_file_keys)));
2171   
2172    my $source_file_directory = "greenstone3";   
[30581]2173    $source_file_directory = "gs3-collection-configs" if $translation_file_key eq "gs3colcfg";
2174
[18460]2175    foreach my $interface_file_key (keys(%translated_interface_file_keys)) {
[24627]2176       
2177        # Build a mapping from chunk key to source file line, and from source file line to chunk key
2178        my $source_file = &util::filename_cat($source_file_directory, "$interface_file_key.properties");
2179        my @source_file_lines = &read_file_lines(&util::filename_cat($gsdl_root_directory, $source_file));
2180        my %source_file_key_to_line_mapping = &build_key_to_line_mapping_for_resource_bundle(@source_file_lines);
2181        my %source_file_line_to_key_mapping = ();
2182        foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
2183            $source_file_line_to_key_mapping{$source_file_key_to_line_mapping{$chunk_key}} = $chunk_key;
[18460]2184        }
[24627]2185       
2186        # Write the new target file
2187        my $target_file = &util::filename_cat($source_file_directory, $interface_file_key . "_" . $target_language_code . ".properties");
2188        my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
2189        if (!open(TARGET_FILE, ">$target_file_path")) {
2190            &throw_fatal_error("Could not write target file $target_file_path.");
2191        }
2192       
2193        # Model the new target file on the source file, with the target file translations
2194        my $source_file_line_number = 0;
2195        foreach my $line_key (sort sort_by_line (keys(%source_file_line_to_key_mapping))) {
2196            # Fill in the gaps before this chunk starts
2197            my $source_file_chunk_starting_line_number = (split(/-/, $line_key))[0];
2198            my $source_file_chunk_finishing_line_number = (split(/-/, $line_key))[1];
2199            while ($source_file_line_number < $source_file_chunk_starting_line_number) {
2200                print TARGET_FILE $source_file_lines[$source_file_line_number];
2201                $source_file_line_number++;
2202            }
2203            $source_file_line_number = $source_file_chunk_finishing_line_number + 1;
2204           
2205            my $chunk_key = $source_file_line_to_key_mapping{$line_key};
2206            my $global_chunk_key = "$interface_file_key.$chunk_key";
2207            my $source_file_chunk_text = $source_file_key_to_text_mapping->{$global_chunk_key};
2208            my $target_file_chunk_text = $target_file_key_to_text_mapping->{$global_chunk_key} || "";
2209           
[30719]2210            # make sure any : or = sign in the chunk key is escaped again (with \) when written out
2211            # since the key-value separator in a property resource bundle file is : or =
2212            my $escaped_chunk_key = $chunk_key;
2213            $escaped_chunk_key =~ s/(:|=)/\\$1/g; #$escaped_chunk_key =~ s/([^\\])(:|=)/\\$1$2/g;
2214
[24627]2215            # If no translation exists for this chunk, show this, and move on
2216            if ($source_file_chunk_text ne "" && $target_file_chunk_text eq "") {
[30719]2217                print TARGET_FILE "# -- Missing translation: $escaped_chunk_key\n";
[24627]2218                next;
2219            }
2220           
[30719]2221            print TARGET_FILE "$escaped_chunk_key:$target_file_chunk_text";
[24627]2222            if ($target_file_key_to_gti_comment_mapping->{$global_chunk_key}) {
2223                print TARGET_FILE "  # " . $target_file_key_to_gti_comment_mapping->{$global_chunk_key};
2224            }
2225            print TARGET_FILE "\n";
2226        }
2227       
2228        close(TARGET_FILE);
[18460]2229    }           
2230}
2231
[10019]2232&main(@ARGV);
Note: See TracBrowser for help on using the browser.