source: trunk/gsdl/bin/script/gti.pl@ 11487

Last change on this file since 11487 was 11487, checked in by mdewsnip, 18 years ago

Added <Updated date="..."> comments to the greenstone XML files, to support updating.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 50.8 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# gti.pl
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 2005 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29
30BEGIN {
31 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
32 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
33}
34
35
36use iso639;
37use strict;
38use util;
39
40
41my $anonymous_cvs_root = ":pserver:cvs_anon\@cvs.scms.waikato.ac.nz:2402/usr/local/global-cvs/gsdl-src";
42my $gsdl_root_directory = "$ENV{'GSDLHOME'}";
43my $gti_log_file = &util::filename_cat($gsdl_root_directory, "etc", "gti.log");
44my $source_language_code = "en"; # This is non-negiotable
45
46my $gti_translation_files =
47 [ # Greenstone macrofiles
48 { 'key' => "coredm",
49 'file_type' => "macrofile",
50 'source_file' => "macros/english.dm",
51 'target_file' => "macros/{bn:bengali;fa:farsi;gd:gaelic;id:indo;lv:latvian;pt-br:port-br;pt-pt:port-pt;zh-tr:chinese-trad;iso_639_1_target_language_name}.dm" },
52 { 'key' => "auxdm",
53 'file_type' => "macrofile",
54 'source_file' => "macros/english2.dm",
55 'target_file' => "macros/{bn:bengali;fa:farsi;gd:gaelic;id:indo;lv:latvian;pt-br:port-br;pt-pt:port-pt;zh-tr:chinese-trad;iso_639_1_target_language_name}2.dm" },
56
57 # GLI dictionary
58 { 'key' => "glidict",
59 'file_type' => "resource_bundle",
60 'source_file' => "gli/classes/dictionary.properties",
61 'target_file' => "gli/classes/dictionary_{target_language_code}.properties" },
62
63 # Greenstone Perl modules
64 { 'key' => "perlmodules",
65 'file_type' => "resource_bundle",
66 'source_file' => "perllib/strings.rb",
67 'target_file' => "perllib/strings_{target_language_code}.rb" },
68
69 # Greenstone tutorial exercises
70 # { 'key' => "tutorials",
71 # 'file_type' => "greenstone_xml",
72 # 'source_file' => "gsdl-tutorials/tutorial_en.xml",
73 # 'target_file' => "gsdl-tutorials/tutorial_{target_language_code}.xml" },
74
75 # Greenstone.org
76 # { 'key' => "greenorg",
77 # 'file_type' => "macrofile",
78 # 'source_file' => "greenorg/macros/english.dm",
79 # 'target_file' => "greenorg/macros/{iso_639_1_target_language_name}.dm" }
80 ];
81
82
83sub main
84{
85 # Get the command to process, and any arguments
86 my $gti_command = shift(@_);
87 my @gti_command_arguments = @_;
88
89 # Open the GTI log file for appending, or write to STDERR if that fails
90 if (!open(GTI_LOG, ">>$gti_log_file")) {
91 open(GTI_LOG, ">&STDERR");
92 }
93
94 # Log the command that launched this script
95 &log_message("Command: $0 @ARGV");
96
97 # Check that a command was supplied
98 if (!$gti_command) {
99 &throw_fatal_error("Missing command.");
100 }
101
102 # Process the command
103 if ($gti_command =~ /^get-first-n-chunks-requiring-work$/i) {
104 print &get_first_n_chunks_requiring_work(@gti_command_arguments);
105 }
106 if ($gti_command =~ /^get-language-status$/i) {
107 print &get_language_status(@gti_command_arguments);
108 }
109 if ($gti_command =~ /^search-chunks$/i) {
110 print &search_chunks(@gti_command_arguments);
111 }
112 if ($gti_command =~ /^submit-translations$/i) {
113 # This command cannot produce any output since it reads input
114 &submit_translations(@gti_command_arguments);
115 }
116
117 # The command was not recognized
118 # &throw_fatal_error("Unknown command \"$gti_command\".");
119}
120
121
122sub throw_fatal_error
123{
124 my $error_message = shift(@_);
125
126 # Write an XML error response
127 print "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
128 print "<GTIResponse>\n";
129 print " <GTIError time=\"" . time() . "\">" . $error_message . "</GTIError>\n";
130 print "</GTIResponse>\n";
131
132 # Log the error message, then die
133 &log_message("Error: $error_message");
134 die "\n";
135}
136
137
138sub log_message
139{
140 my $log_message = shift(@_);
141 print GTI_LOG time() . " -- " . $log_message . "\n";
142}
143
144
145sub get_first_n_chunks_requiring_work
146{
147 # The code of the target language (ensure it is lowercase)
148 my $target_language_code = lc(shift(@_));
149 # The key of the file to translate (ensure it is lowercase)
150 my $translation_file_key = lc(shift(@_));
151 # The number of chunks to return (defaults to one if not specified)
152 my $num_chunks_to_return = shift(@_) || "1";
153
154 # Check that the necessary arguments were supplied
155 if (!$target_language_code || !$translation_file_key) {
156 &throw_fatal_error("Missing command argument.");
157 }
158
159 # Get (and check) the translation configuration
160 my ($source_file, $target_file, $translation_file_type)
161 = &get_translation_configuration($target_language_code, $translation_file_key);
162
163 # Parse the source language and target language files
164 my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
165 my @source_file_lines = &read_file_lines($source_file_path);
166 my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
167
168 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
169 my @target_file_lines = &read_file_lines($target_file_path);
170 my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
171
172 # Filter out any automatically translated chunks
173 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
174 if (&is_chunk_automatically_translated($chunk_key, $translation_file_type)) {
175 delete $source_file_key_to_line_mapping{$chunk_key};
176 delete $target_file_key_to_line_mapping{$chunk_key};
177 }
178 }
179
180 my %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
181 my %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
182 &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
183 &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
184
185 # Determine the target file chunks requiring translation
186 my @target_file_keys_requiring_translation = &determine_chunks_requiring_translation(\%source_file_key_to_text_mapping, \%target_file_key_to_text_mapping);
187 &log_message("Number of target chunks requiring translation: " . scalar(@target_file_keys_requiring_translation));
188
189 # Determine the target file chunks requiring updating
190 my %source_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($source_file, \@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
191 my %target_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($target_file, \@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
192 my @target_file_keys_requiring_updating = &determine_chunks_requiring_updating(\%source_file_key_to_last_update_date_mapping, \%target_file_key_to_last_update_date_mapping);
193 &log_message("Number of target chunks requiring updating: " . scalar(@target_file_keys_requiring_updating));
194
195 # Form an XML response to the command
196 my $xml_response = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
197 $xml_response .= "<GTIResponse>\n";
198 $xml_response .= " <TranslationFile"
199 . " key=\"" . $translation_file_key . "\""
200 . " target_file_path=\"" . $target_file . "\""
201 . " num_chunks_translated=\"" . (scalar(keys(%source_file_key_to_text_mapping)) - scalar(@target_file_keys_requiring_translation)) . "\""
202 . " num_chunks_requiring_translation=\"" . scalar(@target_file_keys_requiring_translation) . "\""
203 . " num_chunks_requiring_updating=\"" . scalar(@target_file_keys_requiring_updating) . "\"\/>\n";
204
205 # Do chunks requiring translation first
206 if ($num_chunks_to_return > scalar(@target_file_keys_requiring_translation)) {
207 $xml_response .= " <ChunksRequiringTranslation size=\"" . scalar(@target_file_keys_requiring_translation) . "\">\n";
208 }
209 else {
210 $xml_response .= " <ChunksRequiringTranslation size=\"" . $num_chunks_to_return . "\">\n";
211 }
212
213 foreach my $chunk_key (@target_file_keys_requiring_translation) {
214 last if ($num_chunks_to_return == 0);
215
216 my $source_file_chunk_date = $source_file_key_to_last_update_date_mapping{$chunk_key};
217 my $source_file_chunk_text = &make_text_xml_safe($source_file_key_to_text_mapping{$chunk_key});
218
219 $xml_response .= " <Chunk key=\"$chunk_key\">\n";
220 $xml_response .= " <SourceFileText date=\"$source_file_chunk_date\">$source_file_chunk_text</SourceFileText>\n";
221 $xml_response .= " <TargetFileText></TargetFileText>\n";
222 $xml_response .= " </Chunk>\n";
223
224 $num_chunks_to_return--;
225 }
226
227 $xml_response .= " </ChunksRequiringTranslation>\n";
228
229 # Then do chunks requiring updating
230 if ($num_chunks_to_return > scalar(@target_file_keys_requiring_updating)) {
231 $xml_response .= " <ChunksRequiringUpdating size=\"" . scalar(@target_file_keys_requiring_updating) . "\">\n";
232 }
233 else {
234 $xml_response .= " <ChunksRequiringUpdating size=\"" . $num_chunks_to_return . "\">\n";
235 }
236
237 foreach my $chunk_key (@target_file_keys_requiring_updating) {
238 last if ($num_chunks_to_return == 0);
239
240 my $source_file_chunk_date = $source_file_key_to_last_update_date_mapping{$chunk_key};
241 my $source_file_chunk_text = &make_text_xml_safe($source_file_key_to_text_mapping{$chunk_key});
242 my $target_file_chunk_date = $target_file_key_to_last_update_date_mapping{$chunk_key};
243 my $target_file_chunk_text = &make_text_xml_safe($target_file_key_to_text_mapping{$chunk_key});
244
245 $xml_response .= " <Chunk key=\"$chunk_key\">\n";
246 $xml_response .= " <SourceFileText date=\"$source_file_chunk_date\">$source_file_chunk_text</SourceFileText>\n";
247 $xml_response .= " <TargetFileText date=\"$target_file_chunk_date\">$target_file_chunk_text</TargetFileText>\n";
248 $xml_response .= " </Chunk>\n";
249
250 $num_chunks_to_return--;
251 }
252
253 $xml_response .= " </ChunksRequiringUpdating>\n";
254
255 $xml_response .= "</GTIResponse>\n";
256 return $xml_response;
257}
258
259
260sub get_language_status
261{
262 # The code of the target language (ensure it is lowercase)
263 my $target_language_code = lc(shift(@_));
264
265 # Check that the necessary arguments were supplied
266 if (!$target_language_code) {
267 &throw_fatal_error("Missing command argument.");
268 }
269
270 # Form an XML response to the command
271 my $xml_response = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
272 $xml_response .= "<GTIResponse>\n";
273 $xml_response .= " <LanguageStatus code=\"$target_language_code\">\n";
274
275 foreach my $translation_file (@$gti_translation_files) {
276 # Get (and check) the translation configuration
277 my ($source_file, $target_file, $translation_file_type)
278 = &get_translation_configuration($target_language_code, $translation_file->{'key'});
279
280 # Parse the source language and target language files
281 my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
282 my @source_file_lines = &read_file_lines($source_file_path);
283 my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
284
285 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
286 my @target_file_lines = &read_file_lines($target_file_path);
287 my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
288
289 # Filter out any automatically translated chunks
290 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
291 if (&is_chunk_automatically_translated($chunk_key, $translation_file_type)) {
292 delete $source_file_key_to_line_mapping{$chunk_key};
293 delete $target_file_key_to_line_mapping{$chunk_key};
294 }
295 }
296
297 my %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
298 my %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
299 &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
300 &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
301
302 # Determine the target file chunks requiring translation
303 my @target_file_keys_requiring_translation = &determine_chunks_requiring_translation(\%source_file_key_to_text_mapping, \%target_file_key_to_text_mapping);
304 &log_message("Number of target chunks requiring translation: " . scalar(@target_file_keys_requiring_translation));
305
306 # Determine the target file chunks requiring updating
307 my @target_file_keys_requiring_updating = ();
308 if (-e $target_file_path) {
309 my %source_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($source_file, \@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
310 my %target_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($target_file, \@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
311 @target_file_keys_requiring_updating = &determine_chunks_requiring_updating(\%source_file_key_to_last_update_date_mapping, \%target_file_key_to_last_update_date_mapping);
312 &log_message("Number of target chunks requiring updating: " . scalar(@target_file_keys_requiring_updating));
313 }
314
315 $xml_response .= " <TranslationFile"
316 . " key=\"" . $translation_file->{'key'} . "\""
317 . " target_file_path=\"" . $target_file . "\""
318 . " num_chunks_translated=\"" . (scalar(keys(%source_file_key_to_text_mapping)) - scalar(@target_file_keys_requiring_translation)) . "\""
319 . " num_chunks_requiring_translation=\"" . scalar(@target_file_keys_requiring_translation) . "\""
320 . " num_chunks_requiring_updating=\"" . scalar(@target_file_keys_requiring_updating) . "\"\/>\n";
321 }
322
323 $xml_response .= " </LanguageStatus>\n";
324
325 $xml_response .= "</GTIResponse>\n";
326 return $xml_response;
327}
328
329
330sub search_chunks
331{
332 # The code of the target language (ensure it is lowercase)
333 my $target_language_code = lc(shift(@_));
334 # The key of the file to translate (ensure it is lowercase)
335 my $translation_file_key = lc(shift(@_));
336 # The query string
337 my $query_string = join(' ', @_);
338
339 # Check that the necessary arguments were supplied
340 if (!$target_language_code || !$translation_file_key || !$query_string) {
341 &throw_fatal_error("Missing command argument.");
342 }
343
344 # Get (and check) the translation configuration
345 my ($source_file, $target_file, $translation_file_type)
346 = &get_translation_configuration($target_language_code, $translation_file_key);
347
348 # Parse the source language and target language files
349 my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
350 my @source_file_lines = &read_file_lines($source_file_path);
351 my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
352
353 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
354 my @target_file_lines = &read_file_lines($target_file_path);
355 my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
356
357 # Filter out any automatically translated chunks
358 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
359 if (&is_chunk_automatically_translated($chunk_key, $translation_file_type)) {
360 delete $source_file_key_to_line_mapping{$chunk_key};
361 delete $target_file_key_to_line_mapping{$chunk_key};
362 }
363 }
364
365 my %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
366 my %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
367 &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
368 &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
369
370 # Determine the target file chunks matching the query
371 my @target_file_keys_matching_query = ();
372 foreach my $chunk_key (keys(%target_file_key_to_text_mapping)) {
373 my $target_file_text = $target_file_key_to_text_mapping{$chunk_key};
374 if ($target_file_text =~ /$query_string/i) {
375 # &log_message("Chunk with key $chunk_key matches query.");
376 push(@target_file_keys_matching_query, $chunk_key);
377 }
378 }
379
380 # Form an XML response to the command
381 my $xml_response = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
382 $xml_response .= "<GTIResponse>\n";
383
384 $xml_response .= " <ChunksMatchingQuery size=\"" . scalar(@target_file_keys_matching_query) . "\">\n";
385 foreach my $chunk_key (@target_file_keys_matching_query) {
386 my $target_file_chunk_text = &make_text_xml_safe($target_file_key_to_text_mapping{$chunk_key});
387
388 $xml_response .= " <Chunk key=\"$chunk_key\">\n";
389 $xml_response .= " <TargetFileText>$target_file_chunk_text</TargetFileText>\n";
390 $xml_response .= " </Chunk>\n";
391 }
392 $xml_response .= " </ChunksMatchingQuery>\n";
393
394 $xml_response .= "</GTIResponse>\n";
395 return $xml_response;
396}
397
398
399sub submit_translations
400{
401 # The code of the target language (ensure it is lowercase)
402 my $target_language_code = lc(shift(@_));
403 # The key of the file to translate (ensure it is lowercase)
404 my $translation_file_key = lc(shift(@_));
405 # Whether to submit a target chunk even if it hasn't changed
406 my $force_submission_flag = shift(@_);
407
408 # Check that the necessary arguments were supplied
409 if (!$target_language_code || !$translation_file_key) {
410 &log_message("Fatal error (but cannot be thrown): Missing command argument.");
411 die "\n";
412 }
413
414 # Get (and check) the translation configuration
415 my ($source_file, $target_file, $translation_file_type)
416 = &get_translation_configuration($target_language_code, $translation_file_key);
417
418 # Parse the source language and target language files
419 my @source_file_lines = &read_file_lines(&util::filename_cat($gsdl_root_directory, $source_file));
420 my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
421 my %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
422 my %source_file_key_to_comment_date_mapping = &build_key_to_comment_date_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
423 &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
424
425 my @target_file_lines = &read_file_lines(&util::filename_cat($gsdl_root_directory, $target_file));
426 my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
427 my %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
428 my %target_file_key_to_comment_date_mapping = &build_key_to_comment_date_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
429 &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
430
431 # Submission date
432 my $day = (localtime)[3];
433 my $month = ("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[(localtime)[4]];
434 my $year = (localtime)[5] + 1900;
435 my $submission_date = "$day-$month-$year";
436
437 open(SUBMISSION, "-");
438 my @submission_lines = <SUBMISSION>;
439 close(SUBMISSION);
440
441 # Remove any nasty carriage returns
442 &log_message("Submission:");
443 foreach my $submission_line (@submission_lines) {
444 $submission_line =~ s/\r$//;
445 &log_message(" $submission_line");
446 }
447
448 my %source_file_key_to_submission_mapping = ();
449 my %target_file_key_to_submission_mapping = ();
450 for (my $i = 0; $i < scalar(@submission_lines); $i++) {
451 # Read source file part of submission
452 if ($submission_lines[$i] =~ /^\<SourceFileText key=\"(.+)\"\>/) {
453 my $chunk_key = $1;
454
455 # Read the source file text
456 my $source_file_chunk_text = "";
457 $i++;
458 while ($i < scalar(@submission_lines) && $submission_lines[$i] !~ /^\<\/SourceFileText\>/) {
459 $source_file_chunk_text .= $submission_lines[$i];
460 $i++;
461 }
462 $source_file_chunk_text =~ s/\n$//; # Strip the extra newline character added
463 $source_file_chunk_text = &unmake_text_xml_safe($source_file_chunk_text);
464
465 &log_message("Source file key: $chunk_key");
466 &log_message("Source file text: $source_file_chunk_text");
467 $source_file_key_to_submission_mapping{$chunk_key} = $source_file_chunk_text;
468 }
469
470 # Read target file part of submission
471 if ($submission_lines[$i] =~ /^\<TargetFileText key=\"(.+)\"\>/) {
472 my $chunk_key = $1;
473
474 # Read the target file text
475 my $target_file_chunk_text = "";
476 $i++;
477 while ($i < scalar(@submission_lines) && $submission_lines[$i] !~ /^\<\/TargetFileText\>/) {
478 $target_file_chunk_text .= $submission_lines[$i];
479 $i++;
480 }
481 $target_file_chunk_text =~ s/\n$//; # Strip the extra newline character added
482 $target_file_chunk_text = &unmake_text_xml_safe($target_file_chunk_text);
483
484 &log_message("Target file key: $chunk_key");
485 &log_message("Target file text: $target_file_chunk_text");
486 $target_file_key_to_submission_mapping{$chunk_key} = $target_file_chunk_text;
487 }
488 }
489
490 # -----------------------------------------
491 # Validate the translation submissions
492 # -----------------------------------------
493
494 # Check that the translations are valid
495 foreach my $chunk_key (keys(%source_file_key_to_submission_mapping)) {
496 # Make sure the submitted chunk still exists in the source file
497 if (!defined($source_file_key_to_text_mapping{$chunk_key})) {
498 &log_message("Warning: Source chunk $chunk_key no longer exists (ignoring submission).");
499 delete $source_file_key_to_submission_mapping{$chunk_key};
500 delete $target_file_key_to_submission_mapping{$chunk_key};
501 next;
502 }
503
504 # Make sure the submitted source chunk matches the source file chunk
505 if ($source_file_key_to_submission_mapping{$chunk_key} ne $source_file_key_to_text_mapping{$chunk_key}) {
506 &log_message("Warning: Source chunk $chunk_key has changed (ignoring submission).");
507 &log_message("Submission source: $source_file_key_to_submission_mapping{$chunk_key}");
508 &log_message(" Source text: $source_file_key_to_text_mapping{$chunk_key}");
509 delete $source_file_key_to_submission_mapping{$chunk_key};
510 delete $target_file_key_to_submission_mapping{$chunk_key};
511 next;
512 }
513 }
514
515 # Apply the submitted translations
516 foreach my $chunk_key (keys(%target_file_key_to_submission_mapping)) {
517 # Only apply the submission if it is a change, unless -force_submission has been specified
518 if ($force_submission_flag || $target_file_key_to_submission_mapping{$chunk_key} ne $target_file_key_to_text_mapping{$chunk_key}) {
519 $target_file_key_to_text_mapping{$chunk_key} = $target_file_key_to_submission_mapping{$chunk_key};
520 $target_file_key_to_comment_date_mapping{$chunk_key} = $submission_date;
521 }
522 }
523
524 eval "&write_translated_${translation_file_type}(\$source_file, \\\@source_file_lines, \\\%source_file_key_to_text_mapping, \$target_file, \\\@target_file_lines, \\\%target_file_key_to_text_mapping, \\\%target_file_key_to_comment_date_mapping, \$target_language_code)";
525}
526
527
528sub get_translation_configuration
529{
530 # Get the code of the target language
531 my $target_language_code = shift(@_);
532 # Get the key of the file to translate
533 my $translation_file_key = shift(@_);
534
535 # Read the translation data from the gti.cfg file
536 my ($source_file, $target_file, $translation_file_type) =
537 &get_translation_data_for($target_language_code, $translation_file_key);
538
539 # Check that the file to translate is defined in the gti.cfg file
540 if (!$source_file || !$target_file || !$translation_file_type) {
541 &throw_fatal_error("Missing or incomplete specification for translation file \"$translation_file_key\" in gti.pl.");
542 }
543
544 # Check that the source file exists
545 my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
546 if (!-e $source_file_path) {
547 &throw_fatal_error("Source file $source_file_path does not exist.");
548 }
549
550 # Check that the source file is up to date
551 # The "2>/dev/null" is very important! If it is missing this will never return when run from the receptionist
552 # unless ($translation_file_is_not_in_cvs) {
553 my $source_file_cvs_status = `cd $gsdl_root_directory; cvs -d $anonymous_cvs_root update $source_file 2>/dev/null`;
554 if ($source_file_cvs_status =~ /^C /) {
555 &throw_fatal_error("Source file $source_file_path conflicts with the repository.");
556 }
557 if ($source_file_cvs_status =~ /^M /) {
558 &throw_fatal_error("Source file $source_file_path contains uncommitted changes.");
559 }
560 # }
561
562 return ($source_file, $target_file, $translation_file_type);
563}
564
565
566sub get_translation_data_for
567{
568 my ($target_language_code, $translation_file_key) = @_;
569
570 foreach my $translation_file (@$gti_translation_files) {
571 # If this isn't the correct translation file, move onto the next one
572 next if ($translation_file_key ne $translation_file->{'key'});
573
574 # Resolve the target language file
575 my $target_language_file = $translation_file->{'target_file'};
576 if ($target_language_file =~ /(\{.+\;.+\})/) {
577 my $unresolved_target_language_file_part = $1;
578
579 # Check for a special case for the target language code
580 if ($unresolved_target_language_file_part =~ /(\{|\;)$target_language_code:([^\;]+)(\;|\})/) {
581 my $resolved_target_language_file_part = $2;
582 $target_language_file =~ s/$unresolved_target_language_file_part/$resolved_target_language_file_part/;
583 }
584 # Otherwise use the last part as the default value
585 else {
586 my ($default_target_language_file_part) = $unresolved_target_language_file_part =~ /([^\;]+)\}/;
587 $target_language_file =~ s/$unresolved_target_language_file_part/\{$default_target_language_file_part\}/;
588 }
589 }
590
591 # Resolve instances of {iso_639_1_target_language_name}
592 my $iso_639_1_target_language_name = $iso639::fromiso639{$target_language_code};
593 $iso_639_1_target_language_name =~ tr/A-Z/a-z/ if $iso_639_1_target_language_name;
594 $target_language_file =~ s/\{iso_639_1_target_language_name\}/$iso_639_1_target_language_name/g;
595
596 # Resolve instances of {target_language_code}
597 $target_language_file =~ s/\{target_language_code\}/$target_language_code/g;
598
599 return ($translation_file->{'source_file'}, $target_language_file, $translation_file->{'file_type'});
600 }
601
602 return ();
603}
604
605
606sub read_file_lines
607{
608 my ($file_path) = @_;
609
610 if (!open(FILE_IN, "<$file_path")) {
611 &log_message("Note: Could not open file $file_path.");
612 return ();
613 }
614 my @file_lines = <FILE_IN>;
615 close(FILE_IN);
616
617 return @file_lines;
618}
619
620
621sub build_key_to_line_mapping
622{
623 my ($file_lines, $translation_file_type) = @_;
624 eval "return &build_key_to_line_mapping_for_${translation_file_type}(\@\$file_lines)";
625}
626
627
628sub build_key_to_text_mapping
629{
630 my ($file_lines, $key_to_line_mapping, $translation_file_type) = @_;
631
632 my %key_to_text_mapping = ();
633 foreach my $chunk_key (keys(%$key_to_line_mapping)) {
634 my $chunk_starting_line = (split(/-/, $key_to_line_mapping->{$chunk_key}))[0];
635 my $chunk_finishing_line = (split(/-/, $key_to_line_mapping->{$chunk_key}))[1];
636
637 my $chunk_text = @$file_lines[$chunk_starting_line];
638 for (my $l = ($chunk_starting_line + 1); $l <= $chunk_finishing_line; $l++) {
639 $chunk_text .= @$file_lines[$l];
640 }
641
642 # Map from chunk key to text
643 eval "\$key_to_text_mapping{\${chunk_key}} = &import_chunk_from_${translation_file_type}(\$chunk_text)";
644 }
645
646 return %key_to_text_mapping;
647}
648
649
650sub build_key_to_last_update_date_mapping
651{
652 my ($file, $file_lines, $key_to_line_mapping, $translation_file_type) = @_;
653
654 # If the files aren't in CVS then we can't tell anything about what needs updating
655 # return () if ($translation_file_is_not_in_cvs);
656
657 # Build a mapping from key to CVS date
658 # Need to be careful with this mapping because the chunk keys won't necessarily all be valid
659 my %key_to_cvs_date_mapping = &build_key_to_cvs_date_mapping($file, $translation_file_type);
660
661 # Build a mapping from key to comment date
662 my %key_to_comment_date_mapping = &build_key_to_comment_date_mapping($file_lines, $key_to_line_mapping, $translation_file_type);
663
664 # Build a mapping from key to last update date (the latter of the CVS date and comment date)
665 my %key_to_last_update_date_mapping = ();
666 foreach my $chunk_key (keys(%$key_to_line_mapping)) {
667 # Use the CVS date as a starting point
668 my $chunk_cvs_date = $key_to_cvs_date_mapping{$chunk_key};
669 $key_to_last_update_date_mapping{$chunk_key} = $chunk_cvs_date;
670
671 # If a comment date exists and it is after the CVS date, use that instead
672 my $chunk_comment_date = $key_to_comment_date_mapping{$chunk_key};
673 if (defined($chunk_comment_date) && (!defined($chunk_cvs_date) || &is_date_after($chunk_comment_date, $chunk_cvs_date))) {
674 $key_to_last_update_date_mapping{$chunk_key} = $chunk_comment_date;
675 }
676 }
677
678 return %key_to_last_update_date_mapping;
679}
680
681
682sub build_key_to_cvs_date_mapping
683{
684 my ($filename, $translation_file_type) = @_;
685
686 # Use CVS to annotate each line of the file with the date it was last edited
687 # The "2>/dev/null" is very important! If it is missing this will never return when run from the receptionist
688 my $cvs_annotated_file = `cd $gsdl_root_directory; cvs -d $anonymous_cvs_root annotate -F $filename 2>/dev/null`;
689 my @cvs_annotated_file_lines = split(/\n/, $cvs_annotated_file);
690
691 my @cvs_annotated_file_lines_date = ();
692 foreach my $cvs_annotated_file_line (@cvs_annotated_file_lines) {
693 # Extract the date from the CVS annotation at the front
694 $cvs_annotated_file_line =~ s/^\S+\s+\(\S+\s+(\S+)\):\s//;
695 push(@cvs_annotated_file_lines_date, $1);
696 }
697
698 # Build a key to line mapping for the CVS annotated file, for matching the chunk key to the CVS date
699 my %key_to_line_mapping = &build_key_to_line_mapping(\@cvs_annotated_file_lines, $translation_file_type);
700
701 my %key_to_cvs_date_mapping = ();
702 foreach my $chunk_key (keys(%key_to_line_mapping)) {
703 my $chunk_starting_line = (split(/-/, $key_to_line_mapping{$chunk_key}))[0];
704 my $chunk_finishing_line = (split(/-/, $key_to_line_mapping{$chunk_key}))[1];
705
706 # Find the date this chunk was last edited, from the CVS annotation
707 my $chunk_date = $cvs_annotated_file_lines_date[$chunk_starting_line];
708 for (my $l = ($chunk_starting_line + 1); $l <= $chunk_finishing_line; $l++) {
709 if (&is_date_after($cvs_annotated_file_lines_date[$l], $chunk_date)) {
710 # This part of the chunk has been updated more recently
711 $chunk_date = $cvs_annotated_file_lines_date[$l];
712 }
713 }
714
715 # Map from chunk key to CVS date
716 $key_to_cvs_date_mapping{$chunk_key} = $chunk_date;
717 }
718
719 return %key_to_cvs_date_mapping;
720}
721
722
723sub build_key_to_comment_date_mapping
724{
725 my ($file_lines, $key_to_line_mapping, $translation_file_type) = @_;
726
727 my %key_to_comment_date_mapping = ();
728 foreach my $chunk_key (keys(%$key_to_line_mapping)) {
729 my $chunk_starting_line = (split(/-/, $key_to_line_mapping->{$chunk_key}))[0];
730 my $chunk_finishing_line = (split(/-/, $key_to_line_mapping->{$chunk_key}))[1];
731
732 my $chunk_text = @$file_lines[$chunk_starting_line];
733 for (my $l = ($chunk_starting_line + 1); $l <= $chunk_finishing_line; $l++) {
734 $chunk_text .= @$file_lines[$l];
735 }
736
737 # Map from chunk key to comment date
738 my $chunk_comment_date;
739 eval "\$chunk_comment_date = &get_${translation_file_type}_chunk_comment_date(\$chunk_text)";
740 $key_to_comment_date_mapping{$chunk_key} = $chunk_comment_date if (defined($chunk_comment_date));
741 }
742
743 return %key_to_comment_date_mapping;
744}
745
746
747sub determine_chunks_requiring_translation
748{
749 my $source_file_key_to_text_mapping = shift(@_);
750 my $target_file_key_to_text_mapping = shift(@_);
751
752 # Chunks needing translation are those in the source file with no translation in the target file
753 my @target_file_keys_requiring_translation = ();
754 foreach my $chunk_key (keys(%$source_file_key_to_text_mapping)) {
755 if ($source_file_key_to_text_mapping->{$chunk_key} && !$target_file_key_to_text_mapping->{$chunk_key}) {
756 # &log_message("Chunk with key $chunk_key needs translating.");
757 push(@target_file_keys_requiring_translation, $chunk_key);
758 }
759 }
760
761 return @target_file_keys_requiring_translation;
762}
763
764
765sub determine_chunks_requiring_updating
766{
767 my $source_file_key_to_last_update_date_mapping = shift(@_);
768 my $target_file_key_to_last_update_date_mapping = shift(@_);
769
770 # Chunks needing updating are those in the target file that have been more recently edited in the source file
771 my @target_file_keys_requiring_updating = ();
772 foreach my $chunk_key (keys(%$source_file_key_to_last_update_date_mapping)) {
773 my $source_chunk_last_update_date = $source_file_key_to_last_update_date_mapping->{$chunk_key};
774 my $target_chunk_last_update_date = $target_file_key_to_last_update_date_mapping->{$chunk_key};
775 if (defined($target_chunk_last_update_date) && &is_date_after($source_chunk_last_update_date, $target_chunk_last_update_date)) {
776 # &log_message("Chunk with key $chunk_key needs updating.");
777 push(@target_file_keys_requiring_updating, $chunk_key);
778 }
779 }
780
781 return @target_file_keys_requiring_updating;
782}
783
784
785sub is_chunk_automatically_translated
786{
787 my ($chunk_key, $translation_file_type) = @_;
788 eval "return &is_${translation_file_type}_chunk_automatically_translated(\$chunk_key)";
789}
790
791
792sub make_text_xml_safe
793{
794 my $text = shift(@_);
795 $text =~ s/\&/\&amp\;/g;
796 $text =~ s/\&amp\;lt\;/\&amp\;amp\;lt\;/g;
797 $text =~ s/\&amp\;gt\;/\&amp\;amp\;gt\;/g;
798 $text =~ s/</\&lt\;/g;
799 $text =~ s/>/\&gt\;/g;
800 return $text;
801}
802
803
804sub unmake_text_xml_safe
805{
806 my $text = shift(@_);
807 $text =~ s/\&lt\;/</g;
808 $text =~ s/\&gt\;/>/g;
809 $text =~ s/\&amp\;/\&/g;
810 return $text;
811}
812
813
814# Returns 1 if $date1 is after $date2, 0 otherwise
815sub is_date_after
816{
817 my ($date1, $date2) = @_;
818 my %months = ("Jan", 1, "Feb", 2, "Mar", 3, "Apr", 4, "May", 5, "Jun", 6,
819 "Jul", 7, "Aug", 8, "Sep", 9, "Oct", 10, "Nov", 11, "Dec", 12);
820
821 my @date1parts = split(/-/, $date1);
822 my @date2parts = split(/-/, $date2);
823
824 # Compare year - nasty because we have rolled over into a new century
825 my $year1 = $date1parts[2];
826 if ($year1 < 80) {
827 $year1 += 100;
828 }
829 my $year2 = $date2parts[2];
830 if ($year2 < 80) {
831 $year2 += 100;
832 }
833
834 # Compare year
835 if ($year1 > $year2) {
836 return 1;
837 }
838 elsif ($year1 == $year2) {
839 # Year is the same, so compare month
840 if ($months{$date1parts[1]} > $months{$date2parts[1]}) {
841 return 1;
842 }
843 elsif ($months{$date1parts[1]} == $months{$date2parts[1]}) {
844 # Month is the same, so compare day
845 if ($date1parts[0] > $date2parts[0]) {
846 return 1;
847 }
848 }
849 }
850
851 return 0;
852}
853
854
855# ==========================================================================================
856# MACROFILE FUNCTIONS
857
858sub build_key_to_line_mapping_for_macrofile
859{
860 my (@file_lines) = @_;
861
862 my $macro_package;
863 my %chunk_key_to_line_mapping = ();
864 # Process the contents of the file, line by line
865 for (my $i = 0; $i < scalar(@file_lines); $i++) {
866 my $line = $file_lines[$i];
867 $line =~ s/(\s*)$//; # Remove any nasty whitespace, carriage returns etc.
868
869 # Check if a new package is being defined
870 if ($line =~ m/^package\s+(.+)/) {
871 $macro_package = $1;
872 }
873
874 # Line contains a macro name
875 elsif ($line =~ m/^(_\w+_)/) {
876 my $macro_key = $1;
877 $line =~ s/\s*([^\\]\#[^\}]+)?$//; # Remove any comments and nasty whitespace
878
879 # While there is still text of the macro to go...
880 my $startline = $i;
881 while ($line !~ /\}$/) {
882 $i++;
883 if ($i == scalar(@file_lines)) {
884 &throw_fatal_error("Could not find end of macro $macro_key.");
885 }
886 $line = $file_lines[$i];
887 $line =~ s/\s*([^\\]\#[^\}]+)?$//; # Remove any comments and nasty whitespace
888 }
889
890 # The chunk key consists of the package name and the macro key
891 my $chunk_key = $macro_package . "." . $macro_key;
892 # Map from chunk key to line
893 $chunk_key_to_line_mapping{$chunk_key} = $startline . "-" . $i;
894 }
895
896 # Icon: line in format ## "image text" ## image_type ## macro_name ##
897 elsif ($line =~ m/^\#\# .* \#\# .* \#\# (.*) \#\#/) {
898 # The chunk key consists of package name and macro key
899 my $chunk_key = $macro_package . "." . $1;
900 # Map from chunk key to line
901 $chunk_key_to_line_mapping{$chunk_key} = $i . "-" . $i;
902 }
903 }
904
905 return %chunk_key_to_line_mapping;
906}
907
908
909sub import_chunk_from_macrofile
910{
911 my ($chunk_text) = @_;
912
913 # Is this an icon macro??
914 if ($chunk_text =~ /^\#\# (.*)/) {
915 # Extract image macro text
916 $chunk_text =~ /^\#\#\s+([^\#]+)\s+\#\#/;
917 $chunk_text = $1;
918
919 # Remove enclosing quotes
920 $chunk_text =~ s/^\"//;
921 $chunk_text =~ s/\"$//;
922 }
923
924 # No, so it must be a text macro
925 else {
926 # Remove macro key
927 $chunk_text =~ s/^_([^_]+)_(\s*)//;
928
929 # Remove language specifier
930 $chunk_text =~ s/^\[l=.*\](\s*)//;
931
932 # Remove braces enclosing text
933 $chunk_text =~ s/^{(\s*)((.|\n)*)}(\s*)(\#.+\s*)?/$2/;
934 }
935
936 return $chunk_text;
937}
938
939
940sub get_macrofile_chunk_comment_date
941{
942 my ($chunk_text) = @_;
943
944 # Check for an "Updated DD-MMM-YYYY" comment at the end of the chunk
945 if ($chunk_text =~ /\#\s+Updated\s+(\d?\d-\D\D\D-\d\d\d\d)\s*$/i) {
946 return $1;
947 }
948
949 return undef;
950}
951
952
953sub is_macrofile_chunk_automatically_translated
954{
955 my ($chunk_key) = @_;
956
957 # The _httpiconX_, _widthX_ and _heightX_ image macros are automatically translated
958 if ($chunk_key =~ /\._(httpicon|width|height)/) {
959 return 1;
960 }
961
962 return 0;
963}
964
965
966# Use the source file to generate a target file that is formatted the same
967sub write_translated_macrofile
968{
969 my $source_file = shift(@_); # Not used
970 my @source_file_lines = @{shift(@_)};
971 my $source_file_key_to_text_mapping = shift(@_);
972 my $target_file = shift(@_);
973 my @target_file_lines = @{shift(@_)};
974 my $target_file_key_to_text_mapping = shift(@_);
975 my $target_file_key_to_comment_date_mapping = shift(@_);
976 my $target_language_code = shift(@_);
977
978 # Build a mapping from source file line to chunk key
979 my %source_file_key_to_line_mapping = &build_key_to_line_mapping_for_macrofile(@source_file_lines);
980 my %source_file_line_to_key_mapping = ();
981 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
982 $source_file_line_to_key_mapping{$source_file_key_to_line_mapping{$chunk_key}} = $chunk_key;
983 }
984 my @source_file_line_keys = (sort sort_by_line (keys(%source_file_line_to_key_mapping)));
985 my $source_file_line_number = 0;
986
987 # Build a mapping from target file line to chunk key
988 my %target_file_key_to_line_mapping = &build_key_to_line_mapping_for_macrofile(@target_file_lines);
989 my %target_file_line_to_key_mapping = ();
990 foreach my $chunk_key (keys(%target_file_key_to_line_mapping)) {
991 $target_file_line_to_key_mapping{$target_file_key_to_line_mapping{$chunk_key}} = $chunk_key;
992 }
993 my @target_file_line_keys = (sort sort_by_line (keys(%target_file_line_to_key_mapping)));
994
995 # Write the new target file
996 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
997 if (!open(TARGET_FILE, ">$target_file_path")) {
998 &throw_fatal_error("Could not write target file $target_file_path.");
999 }
1000
1001 # Use the header from the target file, to keep language and author information
1002 if (scalar(@target_file_line_keys) > 0) {
1003 my $target_file_line_number = 0;
1004 my $target_file_chunk_starting_line_number = (split(/-/, $target_file_line_keys[0]))[0];
1005 while ($target_file_line_number < $target_file_chunk_starting_line_number) {
1006 my $target_file_line = $target_file_lines[$target_file_line_number];
1007 last if ($target_file_line =~ /^\# -- Missing translation: /); # We don't want to get into the macros
1008 print TARGET_FILE $target_file_line;
1009 $target_file_line_number++;
1010 }
1011
1012 $source_file_line_number = (split(/-/, $source_file_line_keys[0]))[0];
1013 }
1014
1015 # Model the new target file on the source file, with the target file translations
1016 foreach my $line_key (@source_file_line_keys) {
1017 # Fill in the gaps before this chunk starts
1018 my $source_file_chunk_starting_line_number = (split(/-/, $line_key))[0];
1019 my $source_file_chunk_finishing_line_number = (split(/-/, $line_key))[1];
1020 while ($source_file_line_number < $source_file_chunk_starting_line_number) {
1021 print TARGET_FILE $source_file_lines[$source_file_line_number];
1022 $source_file_line_number++;
1023 }
1024 $source_file_line_number = $source_file_chunk_finishing_line_number + 1;
1025
1026 my $chunk_key = $source_file_line_to_key_mapping{$line_key};
1027 my $source_file_chunk_text = $source_file_key_to_text_mapping->{$chunk_key};
1028 my $target_file_chunk_text = $target_file_key_to_text_mapping->{$chunk_key} || "";
1029
1030 my $macrofile_key = $chunk_key;
1031 $macrofile_key =~ s/^(.+?)\.//;
1032
1033 # If no translation exists for this chunk, show this, and move on
1034 if ($source_file_chunk_text ne "" && $target_file_chunk_text eq "") {
1035 print TARGET_FILE "# -- Missing translation: $macrofile_key\n";
1036 next;
1037 }
1038
1039 # Grab the source chunk text
1040 my $source_file_chunk = $source_file_lines[$source_file_chunk_starting_line_number];
1041 for (my $l = ($source_file_chunk_starting_line_number + 1); $l <= $source_file_chunk_finishing_line_number; $l++) {
1042 $source_file_chunk .= $source_file_lines[$l];
1043 }
1044
1045 # Is this an icon macro??
1046 if ($source_file_chunk =~ /^\#\# (.*)/) {
1047 # Escape any newline and question mark characters so the source text is replaced correctly
1048 $source_file_chunk_text =~ s/\\/\\\\/g;
1049 $source_file_chunk_text =~ s/\?/\\\?/g;
1050
1051 # Build the new target chunk from the source chunk
1052 my $target_file_chunk = $source_file_chunk;
1053 $target_file_chunk =~ s/$source_file_chunk_text/$target_file_chunk_text/;
1054 $target_file_chunk =~ s/(\s)*$//;
1055 print TARGET_FILE "$target_file_chunk";
1056 }
1057
1058 # No, it is just a normal text macro
1059 else {
1060 print TARGET_FILE "$macrofile_key [l=$target_language_code] {$target_file_chunk_text}";
1061 }
1062
1063 # Add the update date, if one exists
1064 if ($target_file_key_to_comment_date_mapping->{$chunk_key}) {
1065 print TARGET_FILE " # Updated " . $target_file_key_to_comment_date_mapping->{$chunk_key};
1066 }
1067 print TARGET_FILE "\n";
1068 }
1069
1070 close(TARGET_FILE);
1071}
1072
1073
1074sub sort_by_line
1075{
1076 return ((split(/-/, $a))[0] <=> (split(/-/, $b))[0]);
1077}
1078
1079
1080# ==========================================================================================
1081# RESOURCE BUNDLE FUNCTIONS
1082
1083sub build_key_to_line_mapping_for_resource_bundle
1084{
1085 my (@file_lines) = @_;
1086
1087 my %chunk_key_to_line_mapping = ();
1088 for (my $i = 0; $i < scalar(@file_lines); $i++) {
1089 my $line = $file_lines[$i];
1090 $line =~ s/(\s*)$//; # Remove any nasty whitespace, carriage returns etc.
1091
1092 # Line contains a dictionary string
1093 if ($line =~ /^(\S+?):(.*)$/) {
1094 my $chunk_key = $1;
1095
1096 # Map from chunk key to line
1097 $chunk_key_to_line_mapping{$chunk_key} = $i . "-" . $i;
1098 }
1099 }
1100
1101 return %chunk_key_to_line_mapping;
1102}
1103
1104
1105sub import_chunk_from_resource_bundle
1106{
1107 my ($chunk_text) = @_;
1108
1109 # Simple: just remove string key
1110 $chunk_text =~ s/^(\S+?):(\s*)//;
1111 $chunk_text =~ s/(\s*)$//; # Remove any nasty whitespace, carriage returns etc.
1112 $chunk_text =~ s/(\s*)\#\s+Updated\s+(\d?\d-\D\D\D-\d\d\d\d)\s*$//i;
1113
1114 return $chunk_text;
1115}
1116
1117
1118sub get_resource_bundle_chunk_comment_date
1119{
1120 my ($chunk_text) = @_;
1121
1122 # Check for an "Updated DD-MMM-YYYY" comment at the end of the chunk
1123 if ($chunk_text =~ /\#\s+Updated\s+(\d?\d-\D\D\D-\d\d\d\d)\s*$/i) {
1124 return $1;
1125 }
1126
1127 return undef;
1128}
1129
1130
1131sub is_resource_bundle_chunk_automatically_translated
1132{
1133 # No resource bundle chunks are automatically translated
1134 return 0;
1135}
1136
1137
1138sub write_translated_resource_bundle
1139{
1140 my $source_file = shift(@_); # Not used
1141 my @source_file_lines = @{shift(@_)};
1142 my $source_file_key_to_text_mapping = shift(@_);
1143 my $target_file = shift(@_);
1144 my @target_file_lines = @{shift(@_)}; # Not used
1145 my $target_file_key_to_text_mapping = shift(@_);
1146 my $target_file_key_to_comment_date_mapping = shift(@_);
1147 my $target_language_code = shift(@_); # Not used
1148
1149 # Build a mapping from chunk key to source file line, and from source file line to chunk key
1150 my %source_file_key_to_line_mapping = &build_key_to_line_mapping_for_resource_bundle(@source_file_lines);
1151 my %source_file_line_to_key_mapping = ();
1152 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
1153 $source_file_line_to_key_mapping{$source_file_key_to_line_mapping{$chunk_key}} = $chunk_key;
1154 }
1155
1156 # Write the new target file
1157 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
1158 if (!open(TARGET_FILE, ">$target_file_path")) {
1159 &throw_fatal_error("Could not write target file $target_file_path.");
1160 }
1161
1162 # Model the new target file on the source file, with the target file translations
1163 my $source_file_line_number = 0;
1164 foreach my $line_key (sort sort_by_line (keys(%source_file_line_to_key_mapping))) {
1165 # Fill in the gaps before this chunk starts
1166 my $source_file_chunk_starting_line_number = (split(/-/, $line_key))[0];
1167 my $source_file_chunk_finishing_line_number = (split(/-/, $line_key))[1];
1168 while ($source_file_line_number < $source_file_chunk_starting_line_number) {
1169 print TARGET_FILE $source_file_lines[$source_file_line_number];
1170 $source_file_line_number++;
1171 }
1172 $source_file_line_number = $source_file_chunk_finishing_line_number + 1;
1173
1174 my $chunk_key = $source_file_line_to_key_mapping{$line_key};
1175 my $source_file_chunk_text = $source_file_key_to_text_mapping->{$chunk_key};
1176 my $target_file_chunk_text = $target_file_key_to_text_mapping->{$chunk_key} || "";
1177
1178 # If no translation exists for this chunk, show this, and move on
1179 if ($source_file_chunk_text ne "" && $target_file_chunk_text eq "") {
1180 print TARGET_FILE "# -- Missing translation: $chunk_key\n";
1181 next;
1182 }
1183
1184 print TARGET_FILE "$chunk_key:$target_file_chunk_text";
1185 if ($target_file_key_to_comment_date_mapping->{$chunk_key}) {
1186 print TARGET_FILE " # Updated " . $target_file_key_to_comment_date_mapping->{$chunk_key};
1187 }
1188 print TARGET_FILE "\n";
1189 }
1190
1191 close(TARGET_FILE);
1192}
1193
1194
1195# ==========================================================================================
1196# GREENSTONE XML FUNCTIONS
1197
1198sub build_key_to_line_mapping_for_greenstone_xml
1199{
1200 my (@file_lines) = @_;
1201
1202 my %chunk_key_to_line_mapping = ();
1203 for (my $i = 0; $i < scalar(@file_lines); $i++) {
1204 my $line = $file_lines[$i];
1205 $line =~ s/(\s*)$//; # Remove any nasty whitespace, carriage returns etc.
1206
1207 # Line contains a string to translate
1208 if ($line =~ /^\s*<Text id=\"(.*?)\">/) {
1209 my $chunk_key = $1;
1210 $line =~ s/\s*$//; # Remove any nasty whitespace
1211 $line =~ s/<Updated date=\"\d?\d-\D\D\D-\d\d\d\d\"\/>$//;
1212
1213 # While there is still text of the string to go...
1214 my $startline = $i;
1215 while ($line !~ /<\/Text>$/) {
1216 $i++;
1217 if ($i == scalar(@file_lines)) {
1218 &throw_fatal_error("Could not find end of string $chunk_key.");
1219 }
1220 $line = $file_lines[$i];
1221 $line =~ s/\s*$//; # Remove any nasty whitespace
1222 }
1223
1224 # Map from chunk key to line
1225 $chunk_key_to_line_mapping{$chunk_key} = $startline . "-" . $i;
1226 }
1227 }
1228
1229 return %chunk_key_to_line_mapping;
1230}
1231
1232
1233sub import_chunk_from_greenstone_xml
1234{
1235 my ($chunk_text) = @_;
1236
1237 # Simple: just remove the Text tags
1238 $chunk_text =~ s/^\s*<Text id=\"(.*?)\">(\s*)//;
1239 $chunk_text =~ s/<Updated date=\"\d?\d-\D\D\D-\d\d\d\d\"\/>$//;
1240 $chunk_text =~ s/<\/Text>$//;
1241
1242 return $chunk_text;
1243}
1244
1245
1246sub get_greenstone_xml_chunk_comment_date
1247{
1248 my ($chunk_text) = @_;
1249
1250 # Check for an "Updated DD-MMM-YYYY" comment at the end of the chunk
1251 if ($chunk_text =~ /<Updated date=\"(\d?\d-\D\D\D-\d\d\d\d)\"\/>$/i) {
1252 return $1;
1253 }
1254
1255 return undef;
1256}
1257
1258
1259sub is_greenstone_xml_chunk_automatically_translated
1260{
1261 # No greenstone XML chunks are automatically translated
1262 return 0;
1263}
1264
1265
1266sub write_translated_greenstone_xml
1267{
1268 my $source_file = shift(@_); # Not used
1269 my @source_file_lines = @{shift(@_)};
1270 my $source_file_key_to_text_mapping = shift(@_);
1271 my $target_file = shift(@_);
1272 my @target_file_lines = @{shift(@_)}; # Not used
1273 my $target_file_key_to_text_mapping = shift(@_);
1274 my $target_file_key_to_comment_date_mapping = shift(@_);
1275 my $target_language_code = shift(@_); # Not used
1276
1277 # Build a mapping from chunk key to source file line, and from source file line to chunk key
1278 my %source_file_key_to_line_mapping = &build_key_to_line_mapping_for_greenstone_xml(@source_file_lines);
1279 my %source_file_line_to_key_mapping = ();
1280 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
1281 $source_file_line_to_key_mapping{$source_file_key_to_line_mapping{$chunk_key}} = $chunk_key;
1282 }
1283
1284 # Write the new target file
1285 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
1286 if (!open(TARGET_FILE, ">$target_file_path")) {
1287 &throw_fatal_error("Could not write target file $target_file_path.");
1288 }
1289
1290 # Model the new target file on the source file, with the target file translations
1291 my $source_file_line_number = 0;
1292 foreach my $line_key (sort sort_by_line (keys(%source_file_line_to_key_mapping))) {
1293 # Fill in the gaps before this chunk starts
1294 my $source_file_chunk_starting_line_number = (split(/-/, $line_key))[0];
1295 my $source_file_chunk_finishing_line_number = (split(/-/, $line_key))[1];
1296 while ($source_file_line_number < $source_file_chunk_starting_line_number) {
1297 print TARGET_FILE $source_file_lines[$source_file_line_number];
1298 $source_file_line_number++;
1299 }
1300 $source_file_line_number = $source_file_chunk_finishing_line_number + 1;
1301
1302 my $chunk_key = $source_file_line_to_key_mapping{$line_key};
1303 my $source_file_chunk_text = $source_file_key_to_text_mapping->{$chunk_key};
1304 my $target_file_chunk_text = $target_file_key_to_text_mapping->{$chunk_key} || "";
1305 $target_file_chunk_text =~ s/(\n)*$//g;
1306
1307 # If no translation exists for this chunk, show this, and move on
1308 if ($source_file_chunk_text ne "" && $target_file_chunk_text eq "") {
1309 print TARGET_FILE "<!-- Missing translation: $chunk_key -->\n";
1310 next;
1311 }
1312
1313 print TARGET_FILE "<Text id=\"$chunk_key\">$target_file_chunk_text</Text>";
1314 if ($target_file_key_to_comment_date_mapping->{$chunk_key}) {
1315 print TARGET_FILE "<Updated date=\"" . $target_file_key_to_comment_date_mapping->{$chunk_key} . "\"\/>";
1316 }
1317 print TARGET_FILE "\n";
1318 }
1319
1320 # Fill in the end of the file
1321 while ($source_file_line_number < scalar(@source_file_lines)) {
1322 print TARGET_FILE $source_file_lines[$source_file_line_number];
1323 $source_file_line_number++;
1324 }
1325
1326 close(TARGET_FILE);
1327}
1328
1329
1330&main(@ARGV);
Note: See TracBrowser for help on using the repository browser.