source: trunk/gsdl/bin/script/gti.pl@ 11634

Last change on this file since 11634 was 11634, checked in by kjdon, 18 years ago

strings.rb renamed to strings.properties

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 51.3 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# gti.pl
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 2005 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29
30BEGIN {
31 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
32 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
33}
34
35
36use iso639;
37use strict;
38use util;
39
40
41my $anonymous_cvs_root = ":pserver:cvs_anon\@cvs.scms.waikato.ac.nz:2402/usr/local/global-cvs/gsdl-src";
42my $gsdl_root_directory = "$ENV{'GSDLHOME'}";
43my $gti_log_file = &util::filename_cat($gsdl_root_directory, "etc", "gti.log");
44my $source_language_code = "en"; # This is non-negiotable
45
46my $gti_translation_files =
47 [ # Greenstone macrofiles
48 { 'key' => "coredm",
49 'file_type' => "macrofile",
50 'source_file' => "macros/english.dm",
51 'target_file' => "macros/{bn:bengali;fa:farsi;gd:gaelic;id:indo;lv:latvian;pt-br:port-br;pt-pt:port-pt;zh-tr:chinese-trad;iso_639_1_target_language_name}.dm" },
52 { 'key' => "auxdm",
53 'file_type' => "macrofile",
54 'source_file' => "macros/english2.dm",
55 'target_file' => "macros/{bn:bengali;fa:farsi;gd:gaelic;id:indo;lv:latvian;pt-br:port-br;pt-pt:port-pt;zh-tr:chinese-trad;iso_639_1_target_language_name}2.dm" },
56
57 # GLI dictionary
58 { 'key' => "glidict",
59 'file_type' => "resource_bundle",
60 'source_file' => "gli/classes/dictionary.properties",
61 'target_file' => "gli/classes/dictionary_{target_language_code}.properties" },
62
63 # GLI help
64 { 'key' => "glihelp",
65 'file_type' => "greenstone_xml",
66 'source_file' => "gli/help/en/help.xml",
67 'target_file' => "gli/help/{target_language_code}/help.xml" },
68
69 # Greenstone Perl modules
70 { 'key' => "perlmodules",
71 'file_type' => "resource_bundle",
72 'source_file' => "perllib/strings.properties",
73 'target_file' => "perllib/strings_{target_language_code}.properties" },
74
75 # Greenstone tutorial exercises
76 { 'key' => "tutorials",
77 'file_type' => "greenstone_xml",
78 'source_file' => "gsdl-tutorials/tutorial_en.xml",
79 'target_file' => "gsdl-tutorials/tutorial_{target_language_code}.xml" },
80
81 # Greenstone.org
82 # { 'key' => "greenorg",
83 # 'file_type' => "macrofile",
84 # 'source_file' => "greenorg/macros/english.dm",
85 # 'target_file' => "greenorg/macros/{iso_639_1_target_language_name}.dm" }
86 ];
87
88
89sub main
90{
91 # Get the command to process, and any arguments
92 my $gti_command = shift(@_);
93 my @gti_command_arguments = @_;
94
95 # Open the GTI log file for appending, or write to STDERR if that fails
96 if (!open(GTI_LOG, ">>$gti_log_file")) {
97 open(GTI_LOG, ">&STDERR");
98 }
99
100 # Log the command that launched this script
101 &log_message("Command: $0 @ARGV");
102
103 # Check that a command was supplied
104 if (!$gti_command) {
105 &throw_fatal_error("Missing command.");
106 }
107
108 # Process the command
109 if ($gti_command =~ /^get-first-n-chunks-requiring-work$/i) {
110 print &get_first_n_chunks_requiring_work(@gti_command_arguments);
111 }
112 if ($gti_command =~ /^get-language-status$/i) {
113 print &get_language_status(@gti_command_arguments);
114 }
115 if ($gti_command =~ /^search-chunks$/i) {
116 print &search_chunks(@gti_command_arguments);
117 }
118 if ($gti_command =~ /^submit-translations$/i) {
119 # This command cannot produce any output since it reads input
120 &submit_translations(@gti_command_arguments);
121 }
122
123 # The command was not recognized
124 # &throw_fatal_error("Unknown command \"$gti_command\".");
125}
126
127
128sub throw_fatal_error
129{
130 my $error_message = shift(@_);
131
132 # Write an XML error response
133 print "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
134 print "<GTIResponse>\n";
135 print " <GTIError time=\"" . time() . "\">" . $error_message . "</GTIError>\n";
136 print "</GTIResponse>\n";
137
138 # Log the error message, then die
139 &log_message("Error: $error_message");
140 die "\n";
141}
142
143
144sub log_message
145{
146 my $log_message = shift(@_);
147 print GTI_LOG time() . " -- " . $log_message . "\n";
148}
149
150
151sub get_first_n_chunks_requiring_work
152{
153 # The code of the target language (ensure it is lowercase)
154 my $target_language_code = lc(shift(@_));
155 # The key of the file to translate (ensure it is lowercase)
156 my $translation_file_key = lc(shift(@_));
157 # The number of chunks to return (defaults to one if not specified)
158 my $num_chunks_to_return = shift(@_) || "1";
159
160 # Check that the necessary arguments were supplied
161 if (!$target_language_code || !$translation_file_key) {
162 &throw_fatal_error("Missing command argument.");
163 }
164
165 # Get (and check) the translation configuration
166 my ($source_file, $target_file, $translation_file_type)
167 = &get_translation_configuration($target_language_code, $translation_file_key);
168
169 # Parse the source language and target language files
170 my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
171 my @source_file_lines = &read_file_lines($source_file_path);
172 my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
173
174 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
175 my @target_file_lines = &read_file_lines($target_file_path);
176 my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
177
178 # Filter out any automatically translated chunks
179 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
180 if (&is_chunk_automatically_translated($chunk_key, $translation_file_type)) {
181 delete $source_file_key_to_line_mapping{$chunk_key};
182 delete $target_file_key_to_line_mapping{$chunk_key};
183 }
184 }
185
186 my %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
187 my %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
188 &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
189 &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
190
191 # Determine the target file chunks requiring translation
192 my @target_file_keys_requiring_translation = &determine_chunks_requiring_translation(\%source_file_key_to_text_mapping, \%target_file_key_to_text_mapping);
193 &log_message("Number of target chunks requiring translation: " . scalar(@target_file_keys_requiring_translation));
194
195 # Determine the target file chunks requiring updating
196 my %source_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($source_file, \@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
197 my %target_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($target_file, \@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
198 my @target_file_keys_requiring_updating = &determine_chunks_requiring_updating(\%source_file_key_to_last_update_date_mapping, \%target_file_key_to_last_update_date_mapping);
199 &log_message("Number of target chunks requiring updating: " . scalar(@target_file_keys_requiring_updating));
200
201 # Form an XML response to the command
202 my $xml_response = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
203 $xml_response .= "<GTIResponse>\n";
204 $xml_response .= " <TranslationFile"
205 . " key=\"" . $translation_file_key . "\""
206 . " target_file_path=\"" . $target_file . "\""
207 . " num_chunks_translated=\"" . (scalar(keys(%source_file_key_to_text_mapping)) - scalar(@target_file_keys_requiring_translation)) . "\""
208 . " num_chunks_requiring_translation=\"" . scalar(@target_file_keys_requiring_translation) . "\""
209 . " num_chunks_requiring_updating=\"" . scalar(@target_file_keys_requiring_updating) . "\"\/>\n";
210
211 # Do chunks requiring translation first
212 if ($num_chunks_to_return > scalar(@target_file_keys_requiring_translation)) {
213 $xml_response .= " <ChunksRequiringTranslation size=\"" . scalar(@target_file_keys_requiring_translation) . "\">\n";
214 }
215 else {
216 $xml_response .= " <ChunksRequiringTranslation size=\"" . $num_chunks_to_return . "\">\n";
217 }
218
219 foreach my $chunk_key (@target_file_keys_requiring_translation) {
220 last if ($num_chunks_to_return == 0);
221
222 my $source_file_chunk_date = $source_file_key_to_last_update_date_mapping{$chunk_key};
223 my $source_file_chunk_text = &make_text_xml_safe($source_file_key_to_text_mapping{$chunk_key});
224
225 $xml_response .= " <Chunk key=\"$chunk_key\">\n";
226 $xml_response .= " <SourceFileText date=\"$source_file_chunk_date\">$source_file_chunk_text</SourceFileText>\n";
227 $xml_response .= " <TargetFileText></TargetFileText>\n";
228 $xml_response .= " </Chunk>\n";
229
230 $num_chunks_to_return--;
231 }
232
233 $xml_response .= " </ChunksRequiringTranslation>\n";
234
235 # Then do chunks requiring updating
236 if ($num_chunks_to_return > scalar(@target_file_keys_requiring_updating)) {
237 $xml_response .= " <ChunksRequiringUpdating size=\"" . scalar(@target_file_keys_requiring_updating) . "\">\n";
238 }
239 else {
240 $xml_response .= " <ChunksRequiringUpdating size=\"" . $num_chunks_to_return . "\">\n";
241 }
242
243 foreach my $chunk_key (@target_file_keys_requiring_updating) {
244 last if ($num_chunks_to_return == 0);
245
246 my $source_file_chunk_date = $source_file_key_to_last_update_date_mapping{$chunk_key};
247 my $source_file_chunk_text = &make_text_xml_safe($source_file_key_to_text_mapping{$chunk_key});
248 my $target_file_chunk_date = $target_file_key_to_last_update_date_mapping{$chunk_key};
249 my $target_file_chunk_text = &make_text_xml_safe($target_file_key_to_text_mapping{$chunk_key});
250
251 $xml_response .= " <Chunk key=\"$chunk_key\">\n";
252 $xml_response .= " <SourceFileText date=\"$source_file_chunk_date\">$source_file_chunk_text</SourceFileText>\n";
253 $xml_response .= " <TargetFileText date=\"$target_file_chunk_date\">$target_file_chunk_text</TargetFileText>\n";
254 $xml_response .= " </Chunk>\n";
255
256 $num_chunks_to_return--;
257 }
258
259 $xml_response .= " </ChunksRequiringUpdating>\n";
260
261 $xml_response .= "</GTIResponse>\n";
262 return $xml_response;
263}
264
265
266sub get_language_status
267{
268 # The code of the target language (ensure it is lowercase)
269 my $target_language_code = lc(shift(@_));
270
271 # Check that the necessary arguments were supplied
272 if (!$target_language_code) {
273 &throw_fatal_error("Missing command argument.");
274 }
275
276 # Form an XML response to the command
277 my $xml_response = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
278 $xml_response .= "<GTIResponse>\n";
279 $xml_response .= " <LanguageStatus code=\"$target_language_code\">\n";
280
281 foreach my $translation_file (@$gti_translation_files) {
282 # Get (and check) the translation configuration
283 my ($source_file, $target_file, $translation_file_type)
284 = &get_translation_configuration($target_language_code, $translation_file->{'key'});
285
286 # Parse the source language and target language files
287 my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
288 my @source_file_lines = &read_file_lines($source_file_path);
289 my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
290
291 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
292 my @target_file_lines = &read_file_lines($target_file_path);
293 my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
294
295 # Filter out any automatically translated chunks
296 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
297 if (&is_chunk_automatically_translated($chunk_key, $translation_file_type)) {
298 delete $source_file_key_to_line_mapping{$chunk_key};
299 delete $target_file_key_to_line_mapping{$chunk_key};
300 }
301 }
302
303 my %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
304 my %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
305 &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
306 &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
307
308 # Determine the target file chunks requiring translation
309 my @target_file_keys_requiring_translation = &determine_chunks_requiring_translation(\%source_file_key_to_text_mapping, \%target_file_key_to_text_mapping);
310 &log_message("Number of target chunks requiring translation: " . scalar(@target_file_keys_requiring_translation));
311
312 # Determine the target file chunks requiring updating
313 my @target_file_keys_requiring_updating = ();
314 if (-e $target_file_path) {
315 my %source_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($source_file, \@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
316 my %target_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($target_file, \@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
317 @target_file_keys_requiring_updating = &determine_chunks_requiring_updating(\%source_file_key_to_last_update_date_mapping, \%target_file_key_to_last_update_date_mapping);
318 &log_message("Number of target chunks requiring updating: " . scalar(@target_file_keys_requiring_updating));
319 }
320
321 $xml_response .= " <TranslationFile"
322 . " key=\"" . $translation_file->{'key'} . "\""
323 . " target_file_path=\"" . $target_file . "\""
324 . " num_chunks_translated=\"" . (scalar(keys(%source_file_key_to_text_mapping)) - scalar(@target_file_keys_requiring_translation)) . "\""
325 . " num_chunks_requiring_translation=\"" . scalar(@target_file_keys_requiring_translation) . "\""
326 . " num_chunks_requiring_updating=\"" . scalar(@target_file_keys_requiring_updating) . "\"\/>\n";
327 }
328
329 $xml_response .= " </LanguageStatus>\n";
330
331 $xml_response .= "</GTIResponse>\n";
332 return $xml_response;
333}
334
335
336sub search_chunks
337{
338 # The code of the target language (ensure it is lowercase)
339 my $target_language_code = lc(shift(@_));
340 # The key of the file to translate (ensure it is lowercase)
341 my $translation_file_key = lc(shift(@_));
342 # The query string
343 my $query_string = join(' ', @_);
344
345 # Check that the necessary arguments were supplied
346 if (!$target_language_code || !$translation_file_key || !$query_string) {
347 &throw_fatal_error("Missing command argument.");
348 }
349
350 # Get (and check) the translation configuration
351 my ($source_file, $target_file, $translation_file_type)
352 = &get_translation_configuration($target_language_code, $translation_file_key);
353
354 # Parse the source language and target language files
355 my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
356 my @source_file_lines = &read_file_lines($source_file_path);
357 my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
358
359 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
360 my @target_file_lines = &read_file_lines($target_file_path);
361 my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
362
363 # Filter out any automatically translated chunks
364 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
365 if (&is_chunk_automatically_translated($chunk_key, $translation_file_type)) {
366 delete $source_file_key_to_line_mapping{$chunk_key};
367 delete $target_file_key_to_line_mapping{$chunk_key};
368 }
369 }
370
371 my %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
372 my %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
373 &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
374 &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
375
376 # Determine the target file chunks matching the query
377 my @target_file_keys_matching_query = ();
378 foreach my $chunk_key (keys(%target_file_key_to_text_mapping)) {
379 my $target_file_text = $target_file_key_to_text_mapping{$chunk_key};
380 if ($target_file_text =~ /$query_string/i) {
381 # &log_message("Chunk with key $chunk_key matches query.");
382 push(@target_file_keys_matching_query, $chunk_key);
383 }
384 }
385
386 # Form an XML response to the command
387 my $xml_response = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
388 $xml_response .= "<GTIResponse>\n";
389
390 $xml_response .= " <ChunksMatchingQuery size=\"" . scalar(@target_file_keys_matching_query) . "\">\n";
391 foreach my $chunk_key (@target_file_keys_matching_query) {
392 my $target_file_chunk_text = &make_text_xml_safe($target_file_key_to_text_mapping{$chunk_key});
393
394 $xml_response .= " <Chunk key=\"$chunk_key\">\n";
395 $xml_response .= " <TargetFileText>$target_file_chunk_text</TargetFileText>\n";
396 $xml_response .= " </Chunk>\n";
397 }
398 $xml_response .= " </ChunksMatchingQuery>\n";
399
400 $xml_response .= "</GTIResponse>\n";
401 return $xml_response;
402}
403
404
405sub submit_translations
406{
407 # The code of the target language (ensure it is lowercase)
408 my $target_language_code = lc(shift(@_));
409 # The key of the file to translate (ensure it is lowercase)
410 my $translation_file_key = lc(shift(@_));
411 # Whether to submit a target chunk even if it hasn't changed
412 my $force_submission_flag = shift(@_);
413
414 # Check that the necessary arguments were supplied
415 if (!$target_language_code || !$translation_file_key) {
416 &log_message("Fatal error (but cannot be thrown): Missing command argument.");
417 die "\n";
418 }
419
420 # Get (and check) the translation configuration
421 my ($source_file, $target_file, $translation_file_type)
422 = &get_translation_configuration($target_language_code, $translation_file_key);
423
424 # Parse the source language and target language files
425 my @source_file_lines = &read_file_lines(&util::filename_cat($gsdl_root_directory, $source_file));
426 my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
427 my %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
428 my %source_file_key_to_comment_date_mapping = &build_key_to_comment_date_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
429 &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
430
431 my @target_file_lines = &read_file_lines(&util::filename_cat($gsdl_root_directory, $target_file));
432 my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
433 my %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
434 my %target_file_key_to_comment_date_mapping = &build_key_to_comment_date_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
435 &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
436
437 # Submission date
438 my $day = (localtime)[3];
439 my $month = ("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[(localtime)[4]];
440 my $year = (localtime)[5] + 1900;
441 my $submission_date = "$day-$month-$year";
442
443 open(SUBMISSION, "-");
444 my @submission_lines = <SUBMISSION>;
445 close(SUBMISSION);
446
447 # Remove any nasty carriage returns
448 &log_message("Submission:");
449 foreach my $submission_line (@submission_lines) {
450 $submission_line =~ s/\r$//;
451 &log_message(" $submission_line");
452 }
453
454 my %source_file_key_to_submission_mapping = ();
455 my %target_file_key_to_submission_mapping = ();
456 for (my $i = 0; $i < scalar(@submission_lines); $i++) {
457 # Read source file part of submission
458 if ($submission_lines[$i] =~ /^\<SourceFileText key=\"(.+)\"\>/) {
459 my $chunk_key = $1;
460
461 # Read the source file text
462 my $source_file_chunk_text = "";
463 $i++;
464 while ($i < scalar(@submission_lines) && $submission_lines[$i] !~ /^\<\/SourceFileText\>/) {
465 $source_file_chunk_text .= $submission_lines[$i];
466 $i++;
467 }
468 $source_file_chunk_text =~ s/\n$//; # Strip the extra newline character added
469 $source_file_chunk_text = &unmake_text_xml_safe($source_file_chunk_text);
470
471 &log_message("Source file key: $chunk_key");
472 &log_message("Source file text: $source_file_chunk_text");
473 $source_file_key_to_submission_mapping{$chunk_key} = $source_file_chunk_text;
474 }
475
476 # Read target file part of submission
477 if ($submission_lines[$i] =~ /^\<TargetFileText key=\"(.+)\"\>/) {
478 my $chunk_key = $1;
479
480 # Read the target file text
481 my $target_file_chunk_text = "";
482 $i++;
483 while ($i < scalar(@submission_lines) && $submission_lines[$i] !~ /^\<\/TargetFileText\>/) {
484 $target_file_chunk_text .= $submission_lines[$i];
485 $i++;
486 }
487 $target_file_chunk_text =~ s/\n$//; # Strip the extra newline character added
488 $target_file_chunk_text = &unmake_text_xml_safe($target_file_chunk_text);
489
490 &log_message("Target file key: $chunk_key");
491 &log_message("Target file text: $target_file_chunk_text");
492 $target_file_key_to_submission_mapping{$chunk_key} = $target_file_chunk_text;
493 }
494 }
495
496 # -----------------------------------------
497 # Validate the translation submissions
498 # -----------------------------------------
499
500 # Check that the translations are valid
501 foreach my $chunk_key (keys(%source_file_key_to_submission_mapping)) {
502 # Make sure the submitted chunk still exists in the source file
503 if (!defined($source_file_key_to_text_mapping{$chunk_key})) {
504 &log_message("Warning: Source chunk $chunk_key no longer exists (ignoring submission).");
505 delete $source_file_key_to_submission_mapping{$chunk_key};
506 delete $target_file_key_to_submission_mapping{$chunk_key};
507 next;
508 }
509
510 # Make sure the submitted source chunk matches the source file chunk
511 if ($source_file_key_to_submission_mapping{$chunk_key} ne $source_file_key_to_text_mapping{$chunk_key}) {
512 &log_message("Warning: Source chunk $chunk_key has changed (ignoring submission).");
513 &log_message("Submission source: $source_file_key_to_submission_mapping{$chunk_key}");
514 &log_message(" Source text: $source_file_key_to_text_mapping{$chunk_key}");
515 delete $source_file_key_to_submission_mapping{$chunk_key};
516 delete $target_file_key_to_submission_mapping{$chunk_key};
517 next;
518 }
519 }
520
521 # Apply the submitted translations
522 foreach my $chunk_key (keys(%target_file_key_to_submission_mapping)) {
523 # Only apply the submission if it is a change, unless -force_submission has been specified
524 if ($force_submission_flag || $target_file_key_to_submission_mapping{$chunk_key} ne $target_file_key_to_text_mapping{$chunk_key}) {
525 $target_file_key_to_text_mapping{$chunk_key} = $target_file_key_to_submission_mapping{$chunk_key};
526 $target_file_key_to_comment_date_mapping{$chunk_key} = $submission_date;
527 }
528 }
529
530 eval "&write_translated_${translation_file_type}(\$source_file, \\\@source_file_lines, \\\%source_file_key_to_text_mapping, \$target_file, \\\@target_file_lines, \\\%target_file_key_to_text_mapping, \\\%target_file_key_to_comment_date_mapping, \$target_language_code)";
531}
532
533
534sub get_translation_configuration
535{
536 # Get the code of the target language
537 my $target_language_code = shift(@_);
538 # Get the key of the file to translate
539 my $translation_file_key = shift(@_);
540
541 # Read the translation data from the gti.cfg file
542 my ($source_file, $target_file, $translation_file_type) =
543 &get_translation_data_for($target_language_code, $translation_file_key);
544
545 # Check that the file to translate is defined in the gti.cfg file
546 if (!$source_file || !$target_file || !$translation_file_type) {
547 &throw_fatal_error("Missing or incomplete specification for translation file \"$translation_file_key\" in gti.pl.");
548 }
549
550 # Check that the source file exists
551 my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
552 if (!-e $source_file_path) {
553 &throw_fatal_error("Source file $source_file_path does not exist.");
554 }
555
556 # Check that the source file is up to date
557 # The "2>/dev/null" is very important! If it is missing this will never return when run from the receptionist
558 # unless ($translation_file_is_not_in_cvs) {
559 my $source_file_cvs_status = `cd $gsdl_root_directory; cvs -d $anonymous_cvs_root update $source_file 2>/dev/null`;
560 if ($source_file_cvs_status =~ /^C /) {
561 &throw_fatal_error("Source file $source_file_path conflicts with the repository.");
562 }
563 if ($source_file_cvs_status =~ /^M /) {
564 &throw_fatal_error("Source file $source_file_path contains uncommitted changes.");
565 }
566 # }
567
568 return ($source_file, $target_file, $translation_file_type);
569}
570
571
572sub get_translation_data_for
573{
574 my ($target_language_code, $translation_file_key) = @_;
575
576 foreach my $translation_file (@$gti_translation_files) {
577 # If this isn't the correct translation file, move onto the next one
578 next if ($translation_file_key ne $translation_file->{'key'});
579
580 # Resolve the target language file
581 my $target_language_file = $translation_file->{'target_file'};
582 if ($target_language_file =~ /(\{.+\;.+\})/) {
583 my $unresolved_target_language_file_part = $1;
584
585 # Check for a special case for the target language code
586 if ($unresolved_target_language_file_part =~ /(\{|\;)$target_language_code:([^\;]+)(\;|\})/) {
587 my $resolved_target_language_file_part = $2;
588 $target_language_file =~ s/$unresolved_target_language_file_part/$resolved_target_language_file_part/;
589 }
590 # Otherwise use the last part as the default value
591 else {
592 my ($default_target_language_file_part) = $unresolved_target_language_file_part =~ /([^\;]+)\}/;
593 $target_language_file =~ s/$unresolved_target_language_file_part/\{$default_target_language_file_part\}/;
594 }
595 }
596
597 # Resolve instances of {iso_639_1_target_language_name}
598 my $iso_639_1_target_language_name = $iso639::fromiso639{$target_language_code};
599 $iso_639_1_target_language_name =~ tr/A-Z/a-z/ if $iso_639_1_target_language_name;
600 $target_language_file =~ s/\{iso_639_1_target_language_name\}/$iso_639_1_target_language_name/g;
601
602 # Resolve instances of {target_language_code}
603 $target_language_file =~ s/\{target_language_code\}/$target_language_code/g;
604
605 return ($translation_file->{'source_file'}, $target_language_file, $translation_file->{'file_type'});
606 }
607
608 return ();
609}
610
611
612sub read_file_lines
613{
614 my ($file_path) = @_;
615
616 if (!open(FILE_IN, "<$file_path")) {
617 &log_message("Note: Could not open file $file_path.");
618 return ();
619 }
620 my @file_lines = <FILE_IN>;
621 close(FILE_IN);
622
623 return @file_lines;
624}
625
626
627sub build_key_to_line_mapping
628{
629 my ($file_lines, $translation_file_type) = @_;
630 eval "return &build_key_to_line_mapping_for_${translation_file_type}(\@\$file_lines)";
631}
632
633
634sub build_key_to_text_mapping
635{
636 my ($file_lines, $key_to_line_mapping, $translation_file_type) = @_;
637
638 my %key_to_text_mapping = ();
639 foreach my $chunk_key (keys(%$key_to_line_mapping)) {
640 my $chunk_starting_line = (split(/-/, $key_to_line_mapping->{$chunk_key}))[0];
641 my $chunk_finishing_line = (split(/-/, $key_to_line_mapping->{$chunk_key}))[1];
642
643 my $chunk_text = @$file_lines[$chunk_starting_line];
644 for (my $l = ($chunk_starting_line + 1); $l <= $chunk_finishing_line; $l++) {
645 $chunk_text .= @$file_lines[$l];
646 }
647
648 # Map from chunk key to text
649 eval "\$key_to_text_mapping{\${chunk_key}} = &import_chunk_from_${translation_file_type}(\$chunk_text)";
650 }
651
652 return %key_to_text_mapping;
653}
654
655
656sub build_key_to_last_update_date_mapping
657{
658 my ($file, $file_lines, $key_to_line_mapping, $translation_file_type) = @_;
659
660 # If the files aren't in CVS then we can't tell anything about what needs updating
661 # return () if ($translation_file_is_not_in_cvs);
662
663 # Build a mapping from key to CVS date
664 # Need to be careful with this mapping because the chunk keys won't necessarily all be valid
665 my %key_to_cvs_date_mapping = &build_key_to_cvs_date_mapping($file, $translation_file_type);
666
667 # Build a mapping from key to comment date
668 my %key_to_comment_date_mapping = &build_key_to_comment_date_mapping($file_lines, $key_to_line_mapping, $translation_file_type);
669
670 # Build a mapping from key to last update date (the latter of the CVS date and comment date)
671 my %key_to_last_update_date_mapping = ();
672 foreach my $chunk_key (keys(%$key_to_line_mapping)) {
673 # Use the CVS date as a starting point
674 my $chunk_cvs_date = $key_to_cvs_date_mapping{$chunk_key};
675 $key_to_last_update_date_mapping{$chunk_key} = $chunk_cvs_date;
676
677 # If a comment date exists and it is after the CVS date, use that instead
678 my $chunk_comment_date = $key_to_comment_date_mapping{$chunk_key};
679 if (defined($chunk_comment_date) && (!defined($chunk_cvs_date) || &is_date_after($chunk_comment_date, $chunk_cvs_date))) {
680 $key_to_last_update_date_mapping{$chunk_key} = $chunk_comment_date;
681 }
682 }
683
684 return %key_to_last_update_date_mapping;
685}
686
687
688sub build_key_to_cvs_date_mapping
689{
690 my ($filename, $translation_file_type) = @_;
691
692 # Use CVS to annotate each line of the file with the date it was last edited
693 # The "2>/dev/null" is very important! If it is missing this will never return when run from the receptionist
694 my $cvs_annotated_file = `cd $gsdl_root_directory; cvs -d $anonymous_cvs_root annotate -F $filename 2>/dev/null`;
695 my @cvs_annotated_file_lines = split(/\n/, $cvs_annotated_file);
696
697 my @cvs_annotated_file_lines_date = ();
698 foreach my $cvs_annotated_file_line (@cvs_annotated_file_lines) {
699 # Extract the date from the CVS annotation at the front
700 $cvs_annotated_file_line =~ s/^\S+\s+\(\S+\s+(\S+)\):\s//;
701 push(@cvs_annotated_file_lines_date, $1);
702 }
703
704 # Build a key to line mapping for the CVS annotated file, for matching the chunk key to the CVS date
705 my %key_to_line_mapping = &build_key_to_line_mapping(\@cvs_annotated_file_lines, $translation_file_type);
706
707 my %key_to_cvs_date_mapping = ();
708 foreach my $chunk_key (keys(%key_to_line_mapping)) {
709 my $chunk_starting_line = (split(/-/, $key_to_line_mapping{$chunk_key}))[0];
710 my $chunk_finishing_line = (split(/-/, $key_to_line_mapping{$chunk_key}))[1];
711
712 # Find the date this chunk was last edited, from the CVS annotation
713 my $chunk_date = $cvs_annotated_file_lines_date[$chunk_starting_line];
714 for (my $l = ($chunk_starting_line + 1); $l <= $chunk_finishing_line; $l++) {
715 if (&is_date_after($cvs_annotated_file_lines_date[$l], $chunk_date)) {
716 # This part of the chunk has been updated more recently
717 $chunk_date = $cvs_annotated_file_lines_date[$l];
718 }
719 }
720
721 # Map from chunk key to CVS date
722 $key_to_cvs_date_mapping{$chunk_key} = $chunk_date;
723 }
724
725 return %key_to_cvs_date_mapping;
726}
727
728
729sub build_key_to_comment_date_mapping
730{
731 my ($file_lines, $key_to_line_mapping, $translation_file_type) = @_;
732
733 my %key_to_comment_date_mapping = ();
734 foreach my $chunk_key (keys(%$key_to_line_mapping)) {
735 my $chunk_starting_line = (split(/-/, $key_to_line_mapping->{$chunk_key}))[0];
736 my $chunk_finishing_line = (split(/-/, $key_to_line_mapping->{$chunk_key}))[1];
737
738 my $chunk_text = @$file_lines[$chunk_starting_line];
739 for (my $l = ($chunk_starting_line + 1); $l <= $chunk_finishing_line; $l++) {
740 $chunk_text .= @$file_lines[$l];
741 }
742
743 # Map from chunk key to comment date
744 my $chunk_comment_date;
745 eval "\$chunk_comment_date = &get_${translation_file_type}_chunk_comment_date(\$chunk_text)";
746 $key_to_comment_date_mapping{$chunk_key} = $chunk_comment_date if (defined($chunk_comment_date));
747 }
748
749 return %key_to_comment_date_mapping;
750}
751
752
753sub determine_chunks_requiring_translation
754{
755 my $source_file_key_to_text_mapping = shift(@_);
756 my $target_file_key_to_text_mapping = shift(@_);
757
758 # Chunks needing translation are those in the source file with no translation in the target file
759 my @target_file_keys_requiring_translation = ();
760 foreach my $chunk_key (keys(%$source_file_key_to_text_mapping)) {
761 if ($source_file_key_to_text_mapping->{$chunk_key} && !$target_file_key_to_text_mapping->{$chunk_key}) {
762 # &log_message("Chunk with key $chunk_key needs translating.");
763 push(@target_file_keys_requiring_translation, $chunk_key);
764 }
765 }
766
767 return @target_file_keys_requiring_translation;
768}
769
770
771sub determine_chunks_requiring_updating
772{
773 my $source_file_key_to_last_update_date_mapping = shift(@_);
774 my $target_file_key_to_last_update_date_mapping = shift(@_);
775
776 # Chunks needing updating are those in the target file that have been more recently edited in the source file
777 my @target_file_keys_requiring_updating = ();
778 foreach my $chunk_key (keys(%$source_file_key_to_last_update_date_mapping)) {
779 my $source_chunk_last_update_date = $source_file_key_to_last_update_date_mapping->{$chunk_key};
780 my $target_chunk_last_update_date = $target_file_key_to_last_update_date_mapping->{$chunk_key};
781 if (defined($target_chunk_last_update_date) && &is_date_after($source_chunk_last_update_date, $target_chunk_last_update_date)) {
782 # &log_message("Chunk with key $chunk_key needs updating.");
783 push(@target_file_keys_requiring_updating, $chunk_key);
784 }
785 }
786
787 return @target_file_keys_requiring_updating;
788}
789
790
791sub is_chunk_automatically_translated
792{
793 my ($chunk_key, $translation_file_type) = @_;
794 eval "return &is_${translation_file_type}_chunk_automatically_translated(\$chunk_key)";
795}
796
797
798sub make_text_xml_safe
799{
800 my $text = shift(@_);
801 $text =~ s/\&/\&amp\;/g;
802 $text =~ s/\&amp\;lt\;/\&amp\;amp\;lt\;/g;
803 $text =~ s/\&amp\;gt\;/\&amp\;amp\;gt\;/g;
804 $text =~ s/\&amp\;rarr\;/\&amp\;amp\;rarr\;/g;
805 $text =~ s/\&amp\;mdash\;/\&amp\;amp\;mdash\;/g;
806 $text =~ s/</\&lt\;/g;
807 $text =~ s/>/\&gt\;/g;
808 return $text;
809}
810
811
812sub unmake_text_xml_safe
813{
814 my $text = shift(@_);
815 $text =~ s/\&lt\;/</g;
816 $text =~ s/\&gt\;/>/g;
817 $text =~ s/\&amp\;/\&/g;
818 return $text;
819}
820
821
822# Returns 1 if $date1 is after $date2, 0 otherwise
823sub is_date_after
824{
825 my ($date1, $date2) = @_;
826 my %months = ("Jan", 1, "Feb", 2, "Mar", 3, "Apr", 4, "May", 5, "Jun", 6,
827 "Jul", 7, "Aug", 8, "Sep", 9, "Oct", 10, "Nov", 11, "Dec", 12);
828
829 my @date1parts = split(/-/, $date1);
830 my @date2parts = split(/-/, $date2);
831
832 # Compare year - nasty because we have rolled over into a new century
833 my $year1 = $date1parts[2];
834 if ($year1 < 80) {
835 $year1 += 2000;
836 }
837 my $year2 = $date2parts[2];
838 if ($year2 < 80) {
839 $year2 += 2000;
840 }
841
842 # Compare year
843 if ($year1 > $year2) {
844 return 1;
845 }
846 elsif ($year1 == $year2) {
847 # Year is the same, so compare month
848 if ($months{$date1parts[1]} > $months{$date2parts[1]}) {
849 return 1;
850 }
851 elsif ($months{$date1parts[1]} == $months{$date2parts[1]}) {
852 # Month is the same, so compare day
853 if ($date1parts[0] > $date2parts[0]) {
854 return 1;
855 }
856 }
857 }
858
859 return 0;
860}
861
862
863# ==========================================================================================
864# MACROFILE FUNCTIONS
865
866sub build_key_to_line_mapping_for_macrofile
867{
868 my (@file_lines) = @_;
869
870 my $macro_package;
871 my %chunk_key_to_line_mapping = ();
872 # Process the contents of the file, line by line
873 for (my $i = 0; $i < scalar(@file_lines); $i++) {
874 my $line = $file_lines[$i];
875 $line =~ s/(\s*)$//; # Remove any nasty whitespace, carriage returns etc.
876
877 # Check if a new package is being defined
878 if ($line =~ m/^package\s+(.+)/) {
879 $macro_package = $1;
880 }
881
882 # Line contains a macro name
883 elsif ($line =~ m/^(_\w+_)/) {
884 my $macro_key = $1;
885 $line =~ s/\s*([^\\]\#[^\}]+)?$//; # Remove any comments and nasty whitespace
886
887 # While there is still text of the macro to go...
888 my $startline = $i;
889 while ($line !~ /\}$/) {
890 $i++;
891 if ($i == scalar(@file_lines)) {
892 &throw_fatal_error("Could not find end of macro $macro_key.");
893 }
894 $line = $file_lines[$i];
895 $line =~ s/\s*([^\\]\#[^\}]+)?$//; # Remove any comments and nasty whitespace
896 }
897
898 # The chunk key consists of the package name and the macro key
899 my $chunk_key = $macro_package . "." . $macro_key;
900 # Map from chunk key to line
901 $chunk_key_to_line_mapping{$chunk_key} = $startline . "-" . $i;
902 }
903
904 # Icon: line in format ## "image text" ## image_type ## macro_name ##
905 elsif ($line =~ m/^\#\# .* \#\# .* \#\# (.*) \#\#/) {
906 # The chunk key consists of package name and macro key
907 my $chunk_key = $macro_package . "." . $1;
908 # Map from chunk key to line
909 $chunk_key_to_line_mapping{$chunk_key} = $i . "-" . $i;
910 }
911 }
912
913 return %chunk_key_to_line_mapping;
914}
915
916
917sub import_chunk_from_macrofile
918{
919 my ($chunk_text) = @_;
920
921 # Is this an icon macro??
922 if ($chunk_text =~ /^\#\# (.*)/) {
923 # Extract image macro text
924 $chunk_text =~ /^\#\#\s+([^\#]+)\s+\#\#/;
925 $chunk_text = $1;
926
927 # Remove enclosing quotes
928 $chunk_text =~ s/^\"//;
929 $chunk_text =~ s/\"$//;
930 }
931
932 # No, so it must be a text macro
933 else {
934 # Remove macro key
935 $chunk_text =~ s/^_([^_]+)_(\s*)//;
936
937 # Remove language specifier
938 $chunk_text =~ s/^\[l=.*\](\s*)//;
939
940 # Remove braces enclosing text
941 $chunk_text =~ s/^{(\s*)((.|\n)*)}(\s*)(\#.+\s*)?/$2/;
942 }
943
944 return $chunk_text;
945}
946
947
948sub get_macrofile_chunk_comment_date
949{
950 my ($chunk_text) = @_;
951
952 # Check for an "Updated DD-MMM-YYYY" comment at the end of the chunk
953 if ($chunk_text =~ /\#\s+Updated\s+(\d?\d-\D\D\D-\d\d\d\d)\s*$/i) {
954 return $1;
955 }
956
957 return undef;
958}
959
960
961sub is_macrofile_chunk_automatically_translated
962{
963 my ($chunk_key) = @_;
964
965 # The _httpiconX_, _widthX_ and _heightX_ image macros are automatically translated
966 if ($chunk_key =~ /\._(httpicon|width|height)/) {
967 return 1;
968 }
969
970 return 0;
971}
972
973
974# Use the source file to generate a target file that is formatted the same
975sub write_translated_macrofile
976{
977 my $source_file = shift(@_); # Not used
978 my @source_file_lines = @{shift(@_)};
979 my $source_file_key_to_text_mapping = shift(@_);
980 my $target_file = shift(@_);
981 my @target_file_lines = @{shift(@_)};
982 my $target_file_key_to_text_mapping = shift(@_);
983 my $target_file_key_to_comment_date_mapping = shift(@_);
984 my $target_language_code = shift(@_);
985
986 # Build a mapping from source file line to chunk key
987 my %source_file_key_to_line_mapping = &build_key_to_line_mapping_for_macrofile(@source_file_lines);
988 my %source_file_line_to_key_mapping = ();
989 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
990 $source_file_line_to_key_mapping{$source_file_key_to_line_mapping{$chunk_key}} = $chunk_key;
991 }
992 my @source_file_line_keys = (sort sort_by_line (keys(%source_file_line_to_key_mapping)));
993 my $source_file_line_number = 0;
994
995 # Build a mapping from target file line to chunk key
996 my %target_file_key_to_line_mapping = &build_key_to_line_mapping_for_macrofile(@target_file_lines);
997 my %target_file_line_to_key_mapping = ();
998 foreach my $chunk_key (keys(%target_file_key_to_line_mapping)) {
999 $target_file_line_to_key_mapping{$target_file_key_to_line_mapping{$chunk_key}} = $chunk_key;
1000 }
1001 my @target_file_line_keys = (sort sort_by_line (keys(%target_file_line_to_key_mapping)));
1002
1003 # Write the new target file
1004 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
1005 if (!open(TARGET_FILE, ">$target_file_path")) {
1006 &throw_fatal_error("Could not write target file $target_file_path.");
1007 }
1008
1009 # Use the header from the target file, to keep language and author information
1010 if (scalar(@target_file_line_keys) > 0) {
1011 my $target_file_line_number = 0;
1012 my $target_file_chunk_starting_line_number = (split(/-/, $target_file_line_keys[0]))[0];
1013 while ($target_file_line_number < $target_file_chunk_starting_line_number) {
1014 my $target_file_line = $target_file_lines[$target_file_line_number];
1015 last if ($target_file_line =~ /^\# -- Missing translation: /); # We don't want to get into the macros
1016 print TARGET_FILE $target_file_line;
1017 $target_file_line_number++;
1018 }
1019
1020 $source_file_line_number = (split(/-/, $source_file_line_keys[0]))[0];
1021 }
1022
1023 # Model the new target file on the source file, with the target file translations
1024 foreach my $line_key (@source_file_line_keys) {
1025 # Fill in the gaps before this chunk starts
1026 my $source_file_chunk_starting_line_number = (split(/-/, $line_key))[0];
1027 my $source_file_chunk_finishing_line_number = (split(/-/, $line_key))[1];
1028 while ($source_file_line_number < $source_file_chunk_starting_line_number) {
1029 print TARGET_FILE $source_file_lines[$source_file_line_number];
1030 $source_file_line_number++;
1031 }
1032 $source_file_line_number = $source_file_chunk_finishing_line_number + 1;
1033
1034 my $chunk_key = $source_file_line_to_key_mapping{$line_key};
1035 my $source_file_chunk_text = $source_file_key_to_text_mapping->{$chunk_key};
1036 my $target_file_chunk_text = $target_file_key_to_text_mapping->{$chunk_key} || "";
1037
1038 my $macrofile_key = $chunk_key;
1039 $macrofile_key =~ s/^(.+?)\.//;
1040
1041 # If no translation exists for this chunk, show this, and move on
1042 if ($source_file_chunk_text ne "" && $target_file_chunk_text eq "") {
1043 print TARGET_FILE "# -- Missing translation: $macrofile_key\n";
1044 next;
1045 }
1046
1047 # Grab the source chunk text
1048 my $source_file_chunk = $source_file_lines[$source_file_chunk_starting_line_number];
1049 for (my $l = ($source_file_chunk_starting_line_number + 1); $l <= $source_file_chunk_finishing_line_number; $l++) {
1050 $source_file_chunk .= $source_file_lines[$l];
1051 }
1052
1053 # Is this an icon macro??
1054 if ($source_file_chunk =~ /^\#\# (.*)/) {
1055 # Escape any newline and question mark characters so the source text is replaced correctly
1056 $source_file_chunk_text =~ s/\\/\\\\/g;
1057 $source_file_chunk_text =~ s/\?/\\\?/g;
1058
1059 # Build the new target chunk from the source chunk
1060 my $target_file_chunk = $source_file_chunk;
1061 $target_file_chunk =~ s/$source_file_chunk_text/$target_file_chunk_text/;
1062 $target_file_chunk =~ s/(\s)*$//;
1063 print TARGET_FILE "$target_file_chunk";
1064 }
1065
1066 # No, it is just a normal text macro
1067 else {
1068 print TARGET_FILE "$macrofile_key [l=$target_language_code] {$target_file_chunk_text}";
1069 }
1070
1071 # Add the update date, if one exists
1072 if ($target_file_key_to_comment_date_mapping->{$chunk_key}) {
1073 print TARGET_FILE " # Updated " . $target_file_key_to_comment_date_mapping->{$chunk_key};
1074 }
1075 print TARGET_FILE "\n";
1076 }
1077
1078 close(TARGET_FILE);
1079}
1080
1081
1082sub sort_by_line
1083{
1084 return ((split(/-/, $a))[0] <=> (split(/-/, $b))[0]);
1085}
1086
1087
1088# ==========================================================================================
1089# RESOURCE BUNDLE FUNCTIONS
1090
1091sub build_key_to_line_mapping_for_resource_bundle
1092{
1093 my (@file_lines) = @_;
1094
1095 my %chunk_key_to_line_mapping = ();
1096 for (my $i = 0; $i < scalar(@file_lines); $i++) {
1097 my $line = $file_lines[$i];
1098 $line =~ s/(\s*)$//; # Remove any nasty whitespace, carriage returns etc.
1099
1100 # Line contains a dictionary string
1101 if ($line =~ /^(\S+?):(.*)$/) {
1102 my $chunk_key = $1;
1103
1104 # Map from chunk key to line
1105 $chunk_key_to_line_mapping{$chunk_key} = $i . "-" . $i;
1106 }
1107 }
1108
1109 return %chunk_key_to_line_mapping;
1110}
1111
1112
1113sub import_chunk_from_resource_bundle
1114{
1115 my ($chunk_text) = @_;
1116
1117 # Simple: just remove string key
1118 $chunk_text =~ s/^(\S+?):(\s*)//;
1119 $chunk_text =~ s/(\s*)$//; # Remove any nasty whitespace, carriage returns etc.
1120 $chunk_text =~ s/(\s*)\#\s+Updated\s+(\d?\d-\D\D\D-\d\d\d\d)\s*$//i;
1121
1122 return $chunk_text;
1123}
1124
1125
1126sub get_resource_bundle_chunk_comment_date
1127{
1128 my ($chunk_text) = @_;
1129
1130 # Check for an "Updated DD-MMM-YYYY" comment at the end of the chunk
1131 if ($chunk_text =~ /\#\s+Updated\s+(\d?\d-\D\D\D-\d\d\d\d)\s*$/i) {
1132 return $1;
1133 }
1134
1135 return undef;
1136}
1137
1138
1139sub is_resource_bundle_chunk_automatically_translated
1140{
1141 # No resource bundle chunks are automatically translated
1142 return 0;
1143}
1144
1145
1146sub write_translated_resource_bundle
1147{
1148 my $source_file = shift(@_); # Not used
1149 my @source_file_lines = @{shift(@_)};
1150 my $source_file_key_to_text_mapping = shift(@_);
1151 my $target_file = shift(@_);
1152 my @target_file_lines = @{shift(@_)}; # Not used
1153 my $target_file_key_to_text_mapping = shift(@_);
1154 my $target_file_key_to_comment_date_mapping = shift(@_);
1155 my $target_language_code = shift(@_); # Not used
1156
1157 # Build a mapping from chunk key to source file line, and from source file line to chunk key
1158 my %source_file_key_to_line_mapping = &build_key_to_line_mapping_for_resource_bundle(@source_file_lines);
1159 my %source_file_line_to_key_mapping = ();
1160 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
1161 $source_file_line_to_key_mapping{$source_file_key_to_line_mapping{$chunk_key}} = $chunk_key;
1162 }
1163
1164 # Write the new target file
1165 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
1166 if (!open(TARGET_FILE, ">$target_file_path")) {
1167 &throw_fatal_error("Could not write target file $target_file_path.");
1168 }
1169
1170 # Model the new target file on the source file, with the target file translations
1171 my $source_file_line_number = 0;
1172 foreach my $line_key (sort sort_by_line (keys(%source_file_line_to_key_mapping))) {
1173 # Fill in the gaps before this chunk starts
1174 my $source_file_chunk_starting_line_number = (split(/-/, $line_key))[0];
1175 my $source_file_chunk_finishing_line_number = (split(/-/, $line_key))[1];
1176 while ($source_file_line_number < $source_file_chunk_starting_line_number) {
1177 print TARGET_FILE $source_file_lines[$source_file_line_number];
1178 $source_file_line_number++;
1179 }
1180 $source_file_line_number = $source_file_chunk_finishing_line_number + 1;
1181
1182 my $chunk_key = $source_file_line_to_key_mapping{$line_key};
1183 my $source_file_chunk_text = $source_file_key_to_text_mapping->{$chunk_key};
1184 my $target_file_chunk_text = $target_file_key_to_text_mapping->{$chunk_key} || "";
1185
1186 # If no translation exists for this chunk, show this, and move on
1187 if ($source_file_chunk_text ne "" && $target_file_chunk_text eq "") {
1188 print TARGET_FILE "# -- Missing translation: $chunk_key\n";
1189 next;
1190 }
1191
1192 print TARGET_FILE "$chunk_key:$target_file_chunk_text";
1193 if ($target_file_key_to_comment_date_mapping->{$chunk_key}) {
1194 print TARGET_FILE " # Updated " . $target_file_key_to_comment_date_mapping->{$chunk_key};
1195 }
1196 print TARGET_FILE "\n";
1197 }
1198
1199 close(TARGET_FILE);
1200}
1201
1202
1203# ==========================================================================================
1204# GREENSTONE XML FUNCTIONS
1205
1206sub build_key_to_line_mapping_for_greenstone_xml
1207{
1208 my (@file_lines) = @_;
1209
1210 my %chunk_key_to_line_mapping = ();
1211 for (my $i = 0; $i < scalar(@file_lines); $i++) {
1212 my $line = $file_lines[$i];
1213 $line =~ s/(\s*)$//; # Remove any nasty whitespace, carriage returns etc.
1214
1215 # Line contains a string to translate
1216 if ($line =~ /^\s*<Text id=\"(.*?)\">/) {
1217 my $chunk_key = $1;
1218 $line =~ s/\s*$//; # Remove any nasty whitespace
1219 $line =~ s/<Updated date=\"\d?\d-\D\D\D-\d\d\d\d\"\/>$//;
1220
1221 # While there is still text of the string to go...
1222 my $startline = $i;
1223 while ($line !~ /<\/Text>$/) {
1224 $i++;
1225 if ($i == scalar(@file_lines)) {
1226 &throw_fatal_error("Could not find end of string $chunk_key.");
1227 }
1228 $line = $file_lines[$i];
1229 $line =~ s/\s*$//; # Remove any nasty whitespace
1230 $line =~ s/<Updated date=\"\d?\d-\D\D\D-\d\d\d\d\"\/>$//;
1231 }
1232
1233 # Map from chunk key to line
1234 if (!defined($chunk_key_to_line_mapping{$chunk_key})) {
1235 $chunk_key_to_line_mapping{$chunk_key} = $startline . "-" . $i;
1236 }
1237 else {
1238 &throw_fatal_error("Duplicate key $chunk_key.");
1239 }
1240 }
1241 }
1242
1243 return %chunk_key_to_line_mapping;
1244}
1245
1246
1247sub import_chunk_from_greenstone_xml
1248{
1249 my ($chunk_text) = @_;
1250
1251 # Simple: just remove the Text tags
1252 $chunk_text =~ s/^\s*<Text id=\"(.*?)\">(\s*)//;
1253 $chunk_text =~ s/<Updated date=\"\d?\d-\D\D\D-\d\d\d\d\"\/>$//;
1254 $chunk_text =~ s/<\/Text>$//;
1255
1256 return $chunk_text;
1257}
1258
1259
1260sub get_greenstone_xml_chunk_comment_date
1261{
1262 my ($chunk_text) = @_;
1263
1264 # Check for an "Updated DD-MMM-YYYY" comment at the end of the chunk
1265 if ($chunk_text =~ /<Updated date=\"(\d?\d-\D\D\D-\d\d\d\d)\"\/>$/i) {
1266 return $1;
1267 }
1268
1269 return undef;
1270}
1271
1272
1273sub is_greenstone_xml_chunk_automatically_translated
1274{
1275 # No greenstone XML chunks are automatically translated
1276 return 0;
1277}
1278
1279
1280sub write_translated_greenstone_xml
1281{
1282 my $source_file = shift(@_); # Not used
1283 my @source_file_lines = @{shift(@_)};
1284 my $source_file_key_to_text_mapping = shift(@_);
1285 my $target_file = shift(@_);
1286 my @target_file_lines = @{shift(@_)}; # Not used
1287 my $target_file_key_to_text_mapping = shift(@_);
1288 my $target_file_key_to_comment_date_mapping = shift(@_);
1289 my $target_language_code = shift(@_); # Not used
1290
1291 # Build a mapping from chunk key to source file line, and from source file line to chunk key
1292 my %source_file_key_to_line_mapping = &build_key_to_line_mapping_for_greenstone_xml(@source_file_lines);
1293 my %source_file_line_to_key_mapping = ();
1294 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
1295 $source_file_line_to_key_mapping{$source_file_key_to_line_mapping{$chunk_key}} = $chunk_key;
1296 }
1297
1298 # Write the new target file
1299 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
1300 if (!open(TARGET_FILE, ">$target_file_path")) {
1301 &throw_fatal_error("Could not write target file $target_file_path.");
1302 }
1303
1304 # Model the new target file on the source file, with the target file translations
1305 my $source_file_line_number = 0;
1306 foreach my $line_key (sort sort_by_line (keys(%source_file_line_to_key_mapping))) {
1307 # Fill in the gaps before this chunk starts
1308 my $source_file_chunk_starting_line_number = (split(/-/, $line_key))[0];
1309 my $source_file_chunk_finishing_line_number = (split(/-/, $line_key))[1];
1310 while ($source_file_line_number < $source_file_chunk_starting_line_number) {
1311 print TARGET_FILE $source_file_lines[$source_file_line_number];
1312 $source_file_line_number++;
1313 }
1314 $source_file_line_number = $source_file_chunk_finishing_line_number + 1;
1315
1316 my $chunk_key = $source_file_line_to_key_mapping{$line_key};
1317 my $source_file_chunk_text = $source_file_key_to_text_mapping->{$chunk_key};
1318 my $target_file_chunk_text = $target_file_key_to_text_mapping->{$chunk_key} || "";
1319 $target_file_chunk_text =~ s/(\n)*$//g;
1320
1321 # If no translation exists for this chunk, show this, and move on
1322 if ($source_file_chunk_text ne "" && $target_file_chunk_text eq "") {
1323 print TARGET_FILE "<!-- Missing translation: $chunk_key -->\n";
1324 next;
1325 }
1326
1327 print TARGET_FILE "<Text id=\"$chunk_key\">$target_file_chunk_text</Text>";
1328 if ($target_file_key_to_comment_date_mapping->{$chunk_key}) {
1329 print TARGET_FILE "<Updated date=\"" . $target_file_key_to_comment_date_mapping->{$chunk_key} . "\"\/>";
1330 }
1331 print TARGET_FILE "\n";
1332 }
1333
1334 # Fill in the end of the file
1335 while ($source_file_line_number < scalar(@source_file_lines)) {
1336 print TARGET_FILE $source_file_lines[$source_file_line_number];
1337 $source_file_line_number++;
1338 }
1339
1340 close(TARGET_FILE);
1341}
1342
1343
1344&main(@ARGV);
Note: See TracBrowser for help on using the repository browser.