source: gsdl/branches/gsdl-2.74/bin/script/gti.pl@ 14270

Last change on this file since 14270 was 14270, checked in by oranfry, 17 years ago

merged selected changes to the gsdl trunk since r14217 into the 2.74 branch

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 58.6 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# gti.pl
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 2005 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29
30BEGIN {
31 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
32 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
33}
34
35
36use iso639;
37use strict;
38use util;
39
40
41#my $anonymous_cvs_root = ":pserver:cvs_anon\@cvs.scms.waikato.ac.nz:2402/usr/local/global-cvs/gsdl-src";
42#my $anonymous_svn_root = "http://http://svn.greenstone.org/gsdl/trunk/";
43my $gsdl_root_directory = "$ENV{'GSDLHOME'}";
44my $gti_log_file = &util::filename_cat($gsdl_root_directory, "etc", "gti.log");
45my $source_language_code = "en"; # This is non-negiotable
46
47my $gti_translation_files =
48 [ # Greenstone macrofiles
49 { 'key' => "coredm",
50 'file_type' => "macrofile",
51 'source_file' => "macros/english.dm",
52 'target_file' => "macros/{bn:bengali;fa:farsi;gd:gaelic;id:indo;lv:latvian;pt-br:port-br;pt-pt:port-pt;zh-tr:chinese-trad;iso_639_1_target_language_name}.dm" },
53 { 'key' => "auxdm",
54 'file_type' => "macrofile",
55 'source_file' => "macros/english2.dm",
56 'target_file' => "macros/{bn:bengali;fa:farsi;gd:gaelic;id:indo;lv:latvian;pt-br:port-br;pt-pt:port-pt;zh-tr:chinese-trad;iso_639_1_target_language_name}2.dm" },
57
58 # GLI dictionary
59 { 'key' => "glidict",
60 'file_type' => "resource_bundle",
61 'source_file' => "gli/classes/dictionary.properties",
62 'target_file' => "gli/classes/dictionary_{target_language_code}.properties" },
63
64 # GLI help
65 { 'key' => "glihelp",
66 'file_type' => "greenstone_xml",
67 'source_file' => "gli/help/en/help.xml",
68 'target_file' => "gli/help/{target_language_code}/help.xml" },
69
70 # Greenstone Perl modules
71 { 'key' => "perlmodules",
72 'file_type' => "resource_bundle",
73 'source_file' => "perllib/strings.properties",
74 'target_file' => "perllib/strings_{target_language_code}.properties" },
75
76 # Greenstone tutorial exercises
77 # { 'key' => "tutorials",
78 # 'file_type' => "greenstone_xml",
79 # 'source_file' => "gsdl-documentation/tutorials/xml-source/tutorial_en.xml",
80 # 'target_file' => "gsdl-documentation/tutorials/xml-source/tutorial_{target_language_code}.xml" },
81
82 # new Greenstone.org
83 { 'key' => "greenorg",
84 'file_type' => "resource_bundle",
85 'source_file' => "greenstoneorg/website/classes/Gsc.properties",
86 'target_file' => "greenstoneorg/website/classes/Gsc_{iso_639_1_target_language_name}.properties"
87 # 'file_type' => "macrofile",
88 # 'source_file' => "greenorg/macros/english.dm",
89 # 'target_file' => "greenorg/macros/{iso_639_1_target_language_name}.dm"
90 }
91 ];
92
93
94sub main
95{
96 # Get the command to process, and any arguments
97 my $gti_command = shift(@_);
98 my @gti_command_arguments = @_;
99
100 # Open the GTI log file for appending, or write to STDERR if that fails
101 if (!open(GTI_LOG, ">>$gti_log_file")) {
102 open(GTI_LOG, ">&STDERR");
103 }
104
105 # Log the command that launched this script
106 &log_message("Command: $0 @ARGV");
107
108 # Check that a command was supplied
109 if (!$gti_command) {
110 &throw_fatal_error("Missing command.");
111 }
112
113 # Process the command
114 if ($gti_command =~ /^get-all-chunks$/i) {
115 print &get_all_chunks(@gti_command_arguments);
116 }
117 if ($gti_command =~ /^get-first-n-chunks-requiring-work$/i) {
118 print &get_first_n_chunks_requiring_work(@gti_command_arguments);
119 }
120 if ($gti_command =~ /^get-language-status$/i) {
121 print &get_language_status(@gti_command_arguments);
122 }
123 if ($gti_command =~ /^search-chunks$/i) {
124 print &search_chunks(@gti_command_arguments);
125 }
126 if ($gti_command =~ /^submit-translations$/i) {
127 # This command cannot produce any output since it reads input
128 &submit_translations(@gti_command_arguments);
129 }
130
131 # The command was not recognized
132 # &throw_fatal_error("Unknown command \"$gti_command\".");
133}
134
135
136sub throw_fatal_error
137{
138 my $error_message = shift(@_);
139
140 # Write an XML error response
141 print "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
142 print "<GTIResponse>\n";
143 print " <GTIError time=\"" . time() . "\">" . $error_message . "</GTIError>\n";
144 print "</GTIResponse>\n";
145
146 # Log the error message, then die
147 &log_message("Error: $error_message");
148 die "\n";
149}
150
151
152sub log_message
153{
154 my $log_message = shift(@_);
155 print GTI_LOG time() . " -- " . $log_message . "\n";
156}
157
158
159sub get_all_chunks
160{
161 # The code of the target language (ensure it is lowercase)
162 my $target_language_code = lc(shift(@_));
163 # The key of the file to translate (ensure it is lowercase)
164 my $translation_file_key = lc(shift(@_));
165
166 # Check that the necessary arguments were supplied
167 if (!$target_language_code || !$translation_file_key) {
168 &throw_fatal_error("Missing command argument.");
169 }
170
171 # Get (and check) the translation configuration
172 my ($source_file, $target_file, $translation_file_type)
173 = &get_translation_configuration($target_language_code, $translation_file_key);
174
175 # Parse the source language and target language files
176 my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
177 my @source_file_lines = &read_file_lines($source_file_path);
178 my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
179
180 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
181 my @target_file_lines = &read_file_lines($target_file_path);
182 my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
183
184 # Filter out any automatically translated chunks
185 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
186 if (&is_chunk_automatically_translated($chunk_key, $translation_file_type)) {
187 delete $source_file_key_to_line_mapping{$chunk_key};
188 delete $target_file_key_to_line_mapping{$chunk_key};
189 }
190 }
191
192 my %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
193 my %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
194 &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
195 &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
196
197 my %source_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($source_file, \@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
198 my %target_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($target_file, \@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
199
200 # Form an XML response to the command
201 my $xml_response = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
202 $xml_response .= "<GTIResponse>\n";
203 $xml_response .= " <TranslationFile"
204 . " key=\"" . $translation_file_key . "\""
205 . " target_file_path=\"" . $target_file . "\"\/>\n";
206
207 # Do all the chunks
208 $xml_response .= " <Chunks size=\"" . scalar(keys(%source_file_key_to_text_mapping)) . "\">\n";
209 foreach my $chunk_key (keys(%source_file_key_to_text_mapping)) {
210 my $source_file_chunk_date = $source_file_key_to_last_update_date_mapping{$chunk_key};
211 my $source_file_chunk_text = &make_text_xml_safe($source_file_key_to_text_mapping{$chunk_key});
212
213 $xml_response .= " <Chunk key=\"" . &make_text_xml_safe($chunk_key) . "\">\n";
214 $xml_response .= " <SourceFileText date=\"$source_file_chunk_date\">$source_file_chunk_text</SourceFileText>\n";
215 if (defined($target_file_key_to_text_mapping{$chunk_key})) {
216 my $target_file_chunk_date = $target_file_key_to_last_update_date_mapping{$chunk_key};
217 my $target_file_chunk_text = &make_text_xml_safe($target_file_key_to_text_mapping{$chunk_key});
218 $xml_response .= " <TargetFileText date=\"$target_file_chunk_date\">$target_file_chunk_text</TargetFileText>\n";
219 }
220 else {
221 $xml_response .= " <TargetFileText></TargetFileText>\n";
222 }
223
224 $xml_response .= " </Chunk>\n";
225 }
226 $xml_response .= " </Chunks>\n";
227
228 $xml_response .= "</GTIResponse>\n";
229 return $xml_response;
230}
231
232
233sub get_first_n_chunks_requiring_work
234{
235 # The code of the target language (ensure it is lowercase)
236 my $target_language_code = lc(shift(@_));
237 # The key of the file to translate (ensure it is lowercase)
238 my $translation_file_key = lc(shift(@_));
239 # The number of chunks to return (defaults to one if not specified)
240 my $num_chunks_to_return = shift(@_) || "1";
241
242 # Check that the necessary arguments were supplied
243 if (!$target_language_code || !$translation_file_key) {
244 &throw_fatal_error("Missing command argument.");
245 }
246
247 # Get (and check) the translation configuration
248 my ($source_file, $target_file, $translation_file_type)
249 = &get_translation_configuration($target_language_code, $translation_file_key);
250
251 # Parse the source language and target language files
252 my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
253 my @source_file_lines = &read_file_lines($source_file_path);
254 my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
255
256 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
257 my @target_file_lines = &read_file_lines($target_file_path);
258 my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
259
260 # Filter out any automatically translated chunks
261 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
262 if (&is_chunk_automatically_translated($chunk_key, $translation_file_type)) {
263 delete $source_file_key_to_line_mapping{$chunk_key};
264 delete $target_file_key_to_line_mapping{$chunk_key};
265 }
266 }
267
268 my %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
269 my %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
270 &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
271 &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
272
273 # Determine the target file chunks requiring translation
274 my @target_file_keys_requiring_translation = &determine_chunks_requiring_translation(\%source_file_key_to_text_mapping, \%target_file_key_to_text_mapping);
275 &log_message("Number of target chunks requiring translation: " . scalar(@target_file_keys_requiring_translation));
276
277 # Determine the target file chunks requiring updating
278 my %source_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($source_file, \@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
279 my %target_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($target_file, \@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
280 my @target_file_keys_requiring_updating = &determine_chunks_requiring_updating(\%source_file_key_to_last_update_date_mapping, \%target_file_key_to_last_update_date_mapping);
281 &log_message("Number of target chunks requiring updating: " . scalar(@target_file_keys_requiring_updating));
282
283 # Form an XML response to the command
284 my $xml_response = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
285 $xml_response .= "<GTIResponse>\n";
286 $xml_response .= " <TranslationFile"
287 . " key=\"" . $translation_file_key . "\""
288 . " target_file_path=\"" . $target_file . "\""
289 . " num_chunks_translated=\"" . (scalar(keys(%source_file_key_to_text_mapping)) - scalar(@target_file_keys_requiring_translation)) . "\""
290 . " num_chunks_requiring_translation=\"" . scalar(@target_file_keys_requiring_translation) . "\""
291 . " num_chunks_requiring_updating=\"" . scalar(@target_file_keys_requiring_updating) . "\"\/>\n";
292
293 # Do chunks requiring translation first
294 if ($num_chunks_to_return > scalar(@target_file_keys_requiring_translation)) {
295 $xml_response .= " <ChunksRequiringTranslation size=\"" . scalar(@target_file_keys_requiring_translation) . "\">\n";
296 }
297 else {
298 $xml_response .= " <ChunksRequiringTranslation size=\"" . $num_chunks_to_return . "\">\n";
299 }
300
301 foreach my $chunk_key (@target_file_keys_requiring_translation) {
302 last if ($num_chunks_to_return == 0);
303
304 my $source_file_chunk_date = $source_file_key_to_last_update_date_mapping{$chunk_key};
305 my $source_file_chunk_text = &make_text_xml_safe($source_file_key_to_text_mapping{$chunk_key});
306
307 if(!defined $source_file_chunk_date){
308 $source_file_chunk_date = "";
309 }
310
311 $xml_response .= " <Chunk key=\"" . &make_text_xml_safe($chunk_key) . "\">\n";
312 $xml_response .= " <SourceFileText date=\"$source_file_chunk_date\">$source_file_chunk_text</SourceFileText>\n";
313 $xml_response .= " <TargetFileText></TargetFileText>\n";
314 $xml_response .= " </Chunk>\n";
315
316 $num_chunks_to_return--;
317 }
318
319 $xml_response .= " </ChunksRequiringTranslation>\n";
320
321 # Then do chunks requiring updating
322 if ($num_chunks_to_return > scalar(@target_file_keys_requiring_updating)) {
323 $xml_response .= " <ChunksRequiringUpdating size=\"" . scalar(@target_file_keys_requiring_updating) . "\">\n";
324 }
325 else {
326 $xml_response .= " <ChunksRequiringUpdating size=\"" . $num_chunks_to_return . "\">\n";
327 }
328
329 foreach my $chunk_key (@target_file_keys_requiring_updating) {
330 last if ($num_chunks_to_return == 0);
331
332 my $source_file_chunk_date = $source_file_key_to_last_update_date_mapping{$chunk_key};
333 my $source_file_chunk_text = &make_text_xml_safe($source_file_key_to_text_mapping{$chunk_key});
334 my $target_file_chunk_date = $target_file_key_to_last_update_date_mapping{$chunk_key};
335 my $target_file_chunk_text = &make_text_xml_safe($target_file_key_to_text_mapping{$chunk_key});
336
337 if(!defined $source_file_chunk_date){
338 $source_file_chunk_date = "";
339 }
340
341 $xml_response .= " <Chunk key=\"" . &make_text_xml_safe($chunk_key) . "\">\n";
342 $xml_response .= " <SourceFileText date=\"$source_file_chunk_date\">$source_file_chunk_text</SourceFileText>\n";
343 $xml_response .= " <TargetFileText date=\"$target_file_chunk_date\">$target_file_chunk_text</TargetFileText>\n";
344 $xml_response .= " </Chunk>\n";
345
346 $num_chunks_to_return--;
347 }
348
349 $xml_response .= " </ChunksRequiringUpdating>\n";
350
351 $xml_response .= "</GTIResponse>\n";
352 return $xml_response;
353}
354
355
356sub get_language_status
357{
358 # The code of the target language (ensure it is lowercase)
359 my $target_language_code = lc(shift(@_));
360
361 # Check that the necessary arguments were supplied
362 if (!$target_language_code) {
363 &throw_fatal_error("Missing command argument.");
364 }
365
366 # Form an XML response to the command
367 my $xml_response = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
368 $xml_response .= "<GTIResponse>\n";
369 $xml_response .= " <LanguageStatus code=\"$target_language_code\">\n";
370
371 foreach my $translation_file (@$gti_translation_files) {
372 # Get (and check) the translation configuration
373 my ($source_file, $target_file, $translation_file_type)
374 = &get_translation_configuration($target_language_code, $translation_file->{'key'});
375
376 # Parse the source language and target language files
377 my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
378 my @source_file_lines = &read_file_lines($source_file_path);
379 my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
380
381 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
382 my @target_file_lines = &read_file_lines($target_file_path);
383 my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
384
385 # Filter out any automatically translated chunks
386 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
387 if (&is_chunk_automatically_translated($chunk_key, $translation_file_type)) {
388 delete $source_file_key_to_line_mapping{$chunk_key};
389 delete $target_file_key_to_line_mapping{$chunk_key};
390 }
391 }
392
393 my %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
394 my %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
395 &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
396 &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
397
398 # Determine the target file chunks requiring translation
399 my @target_file_keys_requiring_translation = &determine_chunks_requiring_translation(\%source_file_key_to_text_mapping, \%target_file_key_to_text_mapping);
400 &log_message("Number of target chunks requiring translation: " . scalar(@target_file_keys_requiring_translation));
401
402 # Determine the target file chunks requiring updating
403 my @target_file_keys_requiring_updating = ();
404 if (-e $target_file_path) {
405 my %source_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($source_file, \@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
406 my %target_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($target_file, \@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
407 @target_file_keys_requiring_updating = &determine_chunks_requiring_updating(\%source_file_key_to_last_update_date_mapping, \%target_file_key_to_last_update_date_mapping);
408 &log_message("Number of target chunks requiring updating: " . scalar(@target_file_keys_requiring_updating));
409 }
410
411 $xml_response .= " <TranslationFile"
412 . " key=\"" . $translation_file->{'key'} . "\""
413 . " target_file_path=\"" . $target_file . "\""
414 . " num_chunks_translated=\"" . (scalar(keys(%source_file_key_to_text_mapping)) - scalar(@target_file_keys_requiring_translation)) . "\""
415 . " num_chunks_requiring_translation=\"" . scalar(@target_file_keys_requiring_translation) . "\""
416 . " num_chunks_requiring_updating=\"" . scalar(@target_file_keys_requiring_updating) . "\"\/>\n";
417 }
418
419 $xml_response .= " </LanguageStatus>\n";
420
421 $xml_response .= "</GTIResponse>\n";
422 return $xml_response;
423}
424
425
426sub search_chunks
427{
428 # The code of the target language (ensure it is lowercase)
429 my $target_language_code = lc(shift(@_));
430 # The key of the file to translate (ensure it is lowercase)
431 my $translation_file_key = lc(shift(@_));
432 # The query string
433 my $query_string = join(' ', @_);
434
435 # Check that the necessary arguments were supplied
436 if (!$target_language_code || !$translation_file_key || !$query_string) {
437 &throw_fatal_error("Missing command argument.");
438 }
439
440 # Get (and check) the translation configuration
441 my ($source_file, $target_file, $translation_file_type)
442 = &get_translation_configuration($target_language_code, $translation_file_key);
443
444 # Parse the source language and target language files
445 my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
446 my @source_file_lines = &read_file_lines($source_file_path);
447 my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
448
449 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
450 my @target_file_lines = &read_file_lines($target_file_path);
451 my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
452
453 # Filter out any automatically translated chunks
454 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
455 if (&is_chunk_automatically_translated($chunk_key, $translation_file_type)) {
456 delete $source_file_key_to_line_mapping{$chunk_key};
457 delete $target_file_key_to_line_mapping{$chunk_key};
458 }
459 }
460
461 my %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
462 my %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
463 &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
464 &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
465
466 # Determine the target file chunks matching the query
467 my @target_file_keys_matching_query = ();
468 foreach my $chunk_key (keys(%target_file_key_to_text_mapping)) {
469 my $target_file_text = $target_file_key_to_text_mapping{$chunk_key};
470 if ($target_file_text =~ /$query_string/i) {
471 # &log_message("Chunk with key $chunk_key matches query.");
472 push(@target_file_keys_matching_query, $chunk_key);
473 }
474 }
475
476 # Form an XML response to the command
477 my $xml_response = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
478 $xml_response .= "<GTIResponse>\n";
479
480 $xml_response .= " <ChunksMatchingQuery size=\"" . scalar(@target_file_keys_matching_query) . "\">\n";
481 foreach my $chunk_key (@target_file_keys_matching_query) {
482 my $target_file_chunk_text = &make_text_xml_safe($target_file_key_to_text_mapping{$chunk_key});
483
484 $xml_response .= " <Chunk key=\"$chunk_key\">\n";
485 $xml_response .= " <TargetFileText>$target_file_chunk_text</TargetFileText>\n";
486 $xml_response .= " </Chunk>\n";
487 }
488 $xml_response .= " </ChunksMatchingQuery>\n";
489
490 $xml_response .= "</GTIResponse>\n";
491 return $xml_response;
492}
493
494
495sub submit_translations
496{
497 # The code of the target language (ensure it is lowercase)
498 my $target_language_code = lc(shift(@_));
499 # The key of the file to translate (ensure it is lowercase)
500 my $translation_file_key = lc(shift(@_));
501 # The username of the translation submitter
502 my $submitter_username = shift(@_);
503 # Whether to submit a target chunk even if it hasn't changed
504 my $force_submission_flag = shift(@_);
505
506 # Check that the necessary arguments were supplied
507 if (!$target_language_code || !$translation_file_key || !$submitter_username) {
508 &log_message("Fatal error (but cannot be thrown): Missing command argument.");
509 die "\n";
510 }
511
512 # Get (and check) the translation configuration
513 my ($source_file, $target_file, $translation_file_type)
514 = &get_translation_configuration($target_language_code, $translation_file_key);
515
516 # Parse the source language and target language files
517 my @source_file_lines = &read_file_lines(&util::filename_cat($gsdl_root_directory, $source_file));
518 my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
519 my %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
520 my %source_file_key_to_gti_comment_mapping = &build_key_to_gti_comment_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
521 &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
522
523 my @target_file_lines = &read_file_lines(&util::filename_cat($gsdl_root_directory, $target_file));
524 my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
525 my %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
526 my %target_file_key_to_gti_comment_mapping = &build_key_to_gti_comment_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
527 &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
528
529 # Submission date
530 my $day = (localtime)[3];
531 my $month = ("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[(localtime)[4]];
532 my $year = (localtime)[5] + 1900;
533 my $submission_date = "$day-$month-$year";
534
535 open(SUBMISSION, "-");
536 my @submission_lines = <SUBMISSION>;
537 close(SUBMISSION);
538
539 # Remove any nasty carriage returns
540 &log_message("Submission:");
541 foreach my $submission_line (@submission_lines) {
542 $submission_line =~ s/\r$//;
543 &log_message(" $submission_line");
544 }
545
546 my %source_file_key_to_submission_mapping = ();
547 my %target_file_key_to_submission_mapping = ();
548 for (my $i = 0; $i < scalar(@submission_lines); $i++) {
549 # Read source file part of submission
550 if ($submission_lines[$i] =~ /^\<SourceFileText key=\"(.+)\"\>/) {
551 my $chunk_key = $1;
552
553 # Read the source file text
554 my $source_file_chunk_text = "";
555 $i++;
556 while ($i < scalar(@submission_lines) && $submission_lines[$i] !~ /^\<\/SourceFileText\>/) {
557 $source_file_chunk_text .= $submission_lines[$i];
558 $i++;
559 }
560 $source_file_chunk_text =~ s/\n$//; # Strip the extra newline character added
561 $source_file_chunk_text = &unmake_text_xml_safe($source_file_chunk_text);
562
563 &log_message("Source file key: $chunk_key");
564 &log_message("Source file text: $source_file_chunk_text");
565 $source_file_key_to_submission_mapping{$chunk_key} = $source_file_chunk_text;
566 }
567
568 # Read target file part of submission
569 if ($submission_lines[$i] =~ /^\<TargetFileText key=\"(.+)\"\>/) {
570 my $chunk_key = $1;
571
572 # Read the target file text
573 my $target_file_chunk_text = "";
574 $i++;
575 while ($i < scalar(@submission_lines) && $submission_lines[$i] !~ /^\<\/TargetFileText\>/) {
576 $target_file_chunk_text .= $submission_lines[$i];
577 $i++;
578 }
579 $target_file_chunk_text =~ s/\n$//; # Strip the extra newline character added
580 $target_file_chunk_text = &unmake_text_xml_safe($target_file_chunk_text);
581
582 &log_message("Target file key: $chunk_key");
583 &log_message("Target file text: $target_file_chunk_text");
584 $target_file_key_to_submission_mapping{$chunk_key} = $target_file_chunk_text;
585 }
586 }
587
588 # -----------------------------------------
589 # Validate the translation submissions
590 # -----------------------------------------
591
592 # Check that the translations are valid
593 foreach my $chunk_key (keys(%source_file_key_to_submission_mapping)) {
594 # Make sure the submitted chunk still exists in the source file
595 if (!defined($source_file_key_to_text_mapping{$chunk_key})) {
596 &log_message("Warning: Source chunk $chunk_key no longer exists (ignoring submission).");
597 delete $source_file_key_to_submission_mapping{$chunk_key};
598 delete $target_file_key_to_submission_mapping{$chunk_key};
599 next;
600 }
601
602 # Make sure the submitted source chunk matches the source file chunk
603 if ($source_file_key_to_submission_mapping{$chunk_key} ne &unmake_text_xml_safe($source_file_key_to_text_mapping{$chunk_key})) {
604 &log_message("Warning: Source chunk $chunk_key has changed (ignoring submission).");
605 &log_message("Submission source: $source_file_key_to_submission_mapping{$chunk_key}");
606 &log_message(" Source text: $source_file_key_to_text_mapping{$chunk_key}");
607 delete $source_file_key_to_submission_mapping{$chunk_key};
608 delete $target_file_key_to_submission_mapping{$chunk_key};
609 next;
610 }
611 }
612
613 # Apply the submitted translations
614 foreach my $chunk_key (keys(%target_file_key_to_submission_mapping)) {
615 # Only apply the submission if it is a change, unless -force_submission has been specified
616 if ($force_submission_flag || !defined($target_file_key_to_text_mapping{$chunk_key}) || $target_file_key_to_submission_mapping{$chunk_key} ne $target_file_key_to_text_mapping{$chunk_key}) {
617 $target_file_key_to_text_mapping{$chunk_key} = $target_file_key_to_submission_mapping{$chunk_key};
618 $target_file_key_to_gti_comment_mapping{$chunk_key} = "Updated $submission_date by $submitter_username";
619 }
620 }
621
622 eval "&write_translated_${translation_file_type}(\$source_file, \\\@source_file_lines, \\\%source_file_key_to_text_mapping, \$target_file, \\\@target_file_lines, \\\%target_file_key_to_text_mapping, \\\%target_file_key_to_gti_comment_mapping, \$target_language_code)";
623}
624
625
626sub get_translation_configuration
627{
628 # Get the code of the target language
629 my $target_language_code = shift(@_);
630 # Get the key of the file to translate
631 my $translation_file_key = shift(@_);
632
633 # Read the translation data from the gti.cfg file
634 my ($source_file, $target_file, $translation_file_type) =
635 &get_translation_data_for($target_language_code, $translation_file_key);
636
637 # Check that the file to translate is defined in the gti.cfg file
638 if (!$source_file || !$target_file || !$translation_file_type) {
639 &throw_fatal_error("Missing or incomplete specification for translation file \"$translation_file_key\" in gti.pl.");
640 }
641
642 # Check that the source file exists
643 my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
644 if (!-e $source_file_path) {
645 &throw_fatal_error("Source file $source_file_path does not exist.");
646 }
647
648 # Check that the source file is up to date
649 # The "2>/dev/null" is very important! If it is missing this will never return when run from the receptionist
650 # unless ($translation_file_is_not_in_cvs) {
651 #my $source_file_cvs_status = `cd $gsdl_root_directory; cvs -d $anonymous_cvs_root update $source_file 2>/dev/null`;
652 my $source_file_cvs_status = `cd $gsdl_root_directory; svn status $source_file 2>/dev/null`;
653 if ($source_file_cvs_status =~ /^C /) {
654 &throw_fatal_error("Source file $source_file_path conflicts with the repository.");
655 }
656 if ($source_file_cvs_status =~ /^M /) {
657 &throw_fatal_error("Source file $source_file_path contains uncommitted changes.");
658 }
659 # }
660
661 return ($source_file, $target_file, $translation_file_type);
662}
663
664
665sub get_translation_data_for
666{
667 my ($target_language_code, $translation_file_key) = @_;
668
669 foreach my $translation_file (@$gti_translation_files) {
670 # If this isn't the correct translation file, move onto the next one
671 next if ($translation_file_key ne $translation_file->{'key'});
672
673 # Resolve the target language file
674 my $target_language_file = $translation_file->{'target_file'};
675 if ($target_language_file =~ /(\{.+\;.+\})/) {
676 my $unresolved_target_language_file_part = $1;
677
678 # Check for a special case for the target language code
679 if ($unresolved_target_language_file_part =~ /(\{|\;)$target_language_code:([^\;]+)(\;|\})/) {
680 my $resolved_target_language_file_part = $2;
681 $target_language_file =~ s/$unresolved_target_language_file_part/$resolved_target_language_file_part/;
682 }
683 # Otherwise use the last part as the default value
684 else {
685 my ($default_target_language_file_part) = $unresolved_target_language_file_part =~ /([^\;]+)\}/;
686 $target_language_file =~ s/$unresolved_target_language_file_part/\{$default_target_language_file_part\}/;
687 }
688 }
689
690 # Resolve instances of {iso_639_1_target_language_name}
691 my $iso_639_1_target_language_name = $iso639::fromiso639{$target_language_code};
692 $iso_639_1_target_language_name =~ tr/A-Z/a-z/ if $iso_639_1_target_language_name;
693 $target_language_file =~ s/\{iso_639_1_target_language_name\}/$iso_639_1_target_language_name/g;
694
695 # Resolve instances of {target_language_code}
696 $target_language_file =~ s/\{target_language_code\}/$target_language_code/g;
697
698 return ($translation_file->{'source_file'}, $target_language_file, $translation_file->{'file_type'});
699 }
700
701 return ();
702}
703
704
705sub read_file_lines
706{
707 my ($file_path) = @_;
708
709 if (!open(FILE_IN, "<$file_path")) {
710 &log_message("Note: Could not open file $file_path.");
711 return ();
712 }
713 my @file_lines = <FILE_IN>;
714 close(FILE_IN);
715
716 return @file_lines;
717}
718
719
720sub build_key_to_line_mapping
721{
722 my ($file_lines, $translation_file_type) = @_;
723 eval "return &build_key_to_line_mapping_for_${translation_file_type}(\@\$file_lines)";
724}
725
726
727sub build_key_to_text_mapping
728{
729 my ($file_lines, $key_to_line_mapping, $translation_file_type) = @_;
730
731 my %key_to_text_mapping = ();
732 foreach my $chunk_key (keys(%$key_to_line_mapping)) {
733 my $chunk_starting_line = (split(/-/, $key_to_line_mapping->{$chunk_key}))[0];
734 my $chunk_finishing_line = (split(/-/, $key_to_line_mapping->{$chunk_key}))[1];
735
736 my $chunk_text = @$file_lines[$chunk_starting_line];
737 for (my $l = ($chunk_starting_line + 1); $l <= $chunk_finishing_line; $l++) {
738 $chunk_text .= @$file_lines[$l];
739 }
740
741 # Map from chunk key to text
742 eval "\$key_to_text_mapping{\${chunk_key}} = &import_chunk_from_${translation_file_type}(\$chunk_text)";
743 }
744
745 return %key_to_text_mapping;
746}
747
748
749sub build_key_to_last_update_date_mapping
750{
751 my ($file, $file_lines, $key_to_line_mapping, $translation_file_type) = @_;
752
753 # If the files aren't in CVS then we can't tell anything about what needs updating
754 # return () if ($translation_file_is_not_in_cvs);
755
756 # Build a mapping from key to CVS date
757 # Need to be careful with this mapping because the chunk keys won't necessarily all be valid
758 my %key_to_cvs_date_mapping = &build_key_to_cvs_date_mapping($file, $translation_file_type);
759
760 # Build a mapping from key to comment date
761 my %key_to_gti_comment_mapping = &build_key_to_gti_comment_mapping($file_lines, $key_to_line_mapping, $translation_file_type);
762
763 # Build a mapping from key to last update date (the latter of the CVS date and comment date)
764 my %key_to_last_update_date_mapping = ();
765 foreach my $chunk_key (keys(%$key_to_line_mapping)) {
766 # Use the CVS date as a starting point
767 my $chunk_cvs_date = $key_to_cvs_date_mapping{$chunk_key};
768 $key_to_last_update_date_mapping{$chunk_key} = $chunk_cvs_date;
769
770 # If a comment date exists and it is after the CVS date, use that instead
771 # need to convert the comment date format to SVN format
772 my $chunk_gti_comment = $key_to_gti_comment_mapping{$chunk_key};
773 if (defined($chunk_gti_comment) && $chunk_gti_comment =~ /(\d?\d-\D\D\D-\d\d\d\d)/) {
774 my $chunk_comment_date = $1;
775 if ((!defined($chunk_cvs_date) || &is_date_after($chunk_comment_date, $chunk_cvs_date))) {
776 $key_to_last_update_date_mapping{$chunk_key} = $chunk_comment_date;
777 }
778 }
779 }
780
781 return %key_to_last_update_date_mapping;
782}
783
784
785sub build_key_to_cvs_date_mapping
786{
787 my ($filename, $translation_file_type) = @_;
788
789 # Use CVS to annotate each line of the file with the date it was last edited
790 # The "2>/dev/null" is very important! If it is missing this will never return when run from the receptionist
791 # my $cvs_annotated_file = `cd $gsdl_root_directory; cvs -d $anonymous_cvs_root annotate -F $filename 2>/dev/null`;
792 # my $cvs_annotated_file = `cd $gsdl_root_directory; export PATH=.:/research/lh92/programs/subversion/bin; svn annotate -v --force $filename`;
793 my $cvs_annotated_file = `cd $gsdl_root_directory; svn annotate -v $filename`;
794
795 my @cvs_annotated_file_lines = split(/\n/, $cvs_annotated_file);
796
797 my @cvs_annotated_file_lines_date = ();
798 foreach my $cvs_annotated_file_line (@cvs_annotated_file_lines) {
799 # Extract the date from the CVS annotation at the front
800 # cvs format : 07-Jun-02
801 # svn format : 2007-07-16
802 # $cvs_annotated_file_line =~ s/^\S+\s+\(\S+\s+(\S+)\):\s//;
803 $cvs_annotated_file_line =~ s/^\s+\S+\s+\S+\s(\S+)//;
804
805 push(@cvs_annotated_file_lines_date, $1);
806
807 # trim extra date information in svn annotation format
808 # 15:42:49 +1200 (Wed, 21 Jun 2006)
809 $cvs_annotated_file_line =~ s/^\s+\S+\s\S+\s\((.+?)\)\s//;
810 }
811
812 # Build a key to line mapping for the CVS annotated file, for matching the chunk key to the CVS date
813 my %key_to_line_mapping = &build_key_to_line_mapping(\@cvs_annotated_file_lines, $translation_file_type);
814
815 my %key_to_cvs_date_mapping = ();
816 foreach my $chunk_key (keys(%key_to_line_mapping)) {
817 my $chunk_starting_line = (split(/-/, $key_to_line_mapping{$chunk_key}))[0];
818 my $chunk_finishing_line = (split(/-/, $key_to_line_mapping{$chunk_key}))[1];
819
820 # Find the date this chunk was last edited, from the CVS annotation
821 my $chunk_date = $cvs_annotated_file_lines_date[$chunk_starting_line];
822 for (my $l = ($chunk_starting_line + 1); $l <= $chunk_finishing_line; $l++) {
823 if (&is_date_after($cvs_annotated_file_lines_date[$l], $chunk_date)) {
824 # This part of the chunk has been updated more recently
825 $chunk_date = $cvs_annotated_file_lines_date[$l];
826
827 }
828 }
829
830 # Map from chunk key to CVS date
831 $key_to_cvs_date_mapping{$chunk_key} = $chunk_date;
832 }
833
834 return %key_to_cvs_date_mapping;
835}
836
837
838sub build_key_to_gti_comment_mapping
839{
840 my ($file_lines, $key_to_line_mapping, $translation_file_type) = @_;
841
842 my %key_to_gti_comment_mapping = ();
843 foreach my $chunk_key (keys(%$key_to_line_mapping)) {
844 my $chunk_starting_line = (split(/-/, $key_to_line_mapping->{$chunk_key}))[0];
845 my $chunk_finishing_line = (split(/-/, $key_to_line_mapping->{$chunk_key}))[1];
846
847 my $chunk_text = @$file_lines[$chunk_starting_line];
848 for (my $l = ($chunk_starting_line + 1); $l <= $chunk_finishing_line; $l++) {
849 $chunk_text .= @$file_lines[$l];
850 }
851
852 # Map from chunk key to GTI comment
853 my $chunk_gti_comment;
854 eval "\$chunk_gti_comment = &get_${translation_file_type}_chunk_gti_comment(\$chunk_text)";
855 $key_to_gti_comment_mapping{$chunk_key} = $chunk_gti_comment if (defined($chunk_gti_comment));
856 }
857
858 return %key_to_gti_comment_mapping;
859}
860
861
862sub determine_chunks_requiring_translation
863{
864 my $source_file_key_to_text_mapping = shift(@_);
865 my $target_file_key_to_text_mapping = shift(@_);
866
867 # Chunks needing translation are those in the source file with no translation in the target file
868 my @target_file_keys_requiring_translation = ();
869 foreach my $chunk_key (keys(%$source_file_key_to_text_mapping)) {
870 if ($source_file_key_to_text_mapping->{$chunk_key} && !$target_file_key_to_text_mapping->{$chunk_key}) {
871 # &log_message("Chunk with key $chunk_key needs translating.");
872 push(@target_file_keys_requiring_translation, $chunk_key);
873 }
874 }
875
876 return @target_file_keys_requiring_translation;
877}
878
879
880sub determine_chunks_requiring_updating
881{
882 my $source_file_key_to_last_update_date_mapping = shift(@_);
883 my $target_file_key_to_last_update_date_mapping = shift(@_);
884
885 # Chunks needing updating are those in the target file that have been more recently edited in the source file
886 my @target_file_keys_requiring_updating = ();
887 foreach my $chunk_key (keys(%$source_file_key_to_last_update_date_mapping)) {
888 my $source_chunk_last_update_date = $source_file_key_to_last_update_date_mapping->{$chunk_key};
889 my $target_chunk_last_update_date = $target_file_key_to_last_update_date_mapping->{$chunk_key};
890
891 # print "key: $chunk_key\nsource date : $source_chunk_last_update_date\ntarget date : $target_chunk_last_update_date\nafter? ". &is_date_after($source_chunk_last_update_date, $target_chunk_last_update_date) . "\n\n";
892
893 if (defined($target_chunk_last_update_date) && &is_date_after($source_chunk_last_update_date, $target_chunk_last_update_date)) {
894 # &log_message("Chunk with key $chunk_key needs updating.");
895 push(@target_file_keys_requiring_updating, $chunk_key);
896 }
897 }
898
899 return @target_file_keys_requiring_updating;
900}
901
902
903sub is_chunk_automatically_translated
904{
905 my ($chunk_key, $translation_file_type) = @_;
906 eval "return &is_${translation_file_type}_chunk_automatically_translated(\$chunk_key)";
907}
908
909
910sub make_text_xml_safe
911{
912 my $text = shift(@_);
913 $text =~ s/\&/\&amp\;/g;
914 $text =~ s/\&amp\;lt\;/\&amp\;amp\;lt\;/g;
915 $text =~ s/\&amp\;gt\;/\&amp\;amp\;gt\;/g;
916 $text =~ s/\&amp\;rarr\;/\&amp\;amp\;rarr\;/g;
917 $text =~ s/\&amp\;mdash\;/\&amp\;amp\;mdash\;/g;
918 $text =~ s/</\&lt\;/g;
919 $text =~ s/>/\&gt\;/g;
920 return $text;
921}
922
923
924sub unmake_text_xml_safe
925{
926 my $text = shift(@_);
927 $text =~ s/\&lt\;/</g;
928 $text =~ s/\&gt\;/>/g;
929 $text =~ s/\&amp\;/\&/g;
930 return $text;
931}
932
933
934# Returns 1 if $date1 is after $date2, 0 otherwise
935sub is_date_after_cvs
936{
937 my ($date1, $date2) = @_;
938 my %months = ("Jan", 1, "Feb", 2, "Mar", 3, "Apr", 4, "May", 5, "Jun", 6,
939 "Jul", 7, "Aug", 8, "Sep", 9, "Oct", 10, "Nov", 11, "Dec", 12);
940
941 if(!defined $date1) {
942 return 1;
943 }
944
945 my @date1parts = split(/-/, $date1);
946 my @date2parts = split(/-/, $date2);
947
948 # Compare year - nasty because we have rolled over into a new century
949 my $year1 = $date1parts[2];
950 if ($year1 < 80) {
951 $year1 += 2000;
952 }
953 my $year2 = $date2parts[2];
954 if ($year2 < 80) {
955 $year2 += 2000;
956 }
957
958 # Compare year
959 if ($year1 > $year2) {
960 return 1;
961 }
962 elsif ($year1 == $year2) {
963 # Year is the same, so compare month
964 if ($months{$date1parts[1]} > $months{$date2parts[1]}) {
965 return 1;
966 }
967 elsif ($months{$date1parts[1]} == $months{$date2parts[1]}) {
968 # Month is the same, so compare day
969 if ($date1parts[0] > $date2parts[0]) {
970 return 1;
971 }
972 }
973 }
974
975 return 0;
976}
977
978sub is_date_after
979{
980 my ($date1, $date2) = @_;
981
982 if(!defined $date1) {
983 return 1;
984 }
985 if(!defined $date2) {
986 return 0;
987 }
988
989 # 16-Aug-2006
990 if($date1=~ /(\d+?)-(\S\S\S)-(\d\d\d\d)/){
991 my %months = ("Jan", "01", "Feb", "02", "Mar", "03", "Apr", "04", "May", "05", "Jun", "06",
992 "Jul", "07", "Aug", "08", "Sep", "09", "Oct", "10", "Nov", "11", "Dec", "12");
993 $date1=$3 . "-" . $months{$2} . "-" . $1;
994 # print "** converted date1: $date1\n";
995 }
996 if($date2=~ /(\d+?)-(\S\S\S)-(\d\d\d\d)/){
997 my %months = ("Jan", "01", "Feb", "02", "Mar", "03", "Apr", "04", "May", "05", "Jun", "06",
998 "Jul", "07", "Aug", "08", "Sep", "09", "Oct", "10", "Nov", "11", "Dec", "12");
999 $date2=$3 . "-" . $months{$2} . "-" . $1;
1000 # print "** converted date2: $date2\n";
1001 }
1002
1003
1004 # 2006-08-16
1005 my @date1parts = split(/-/, $date1);
1006 my @date2parts = split(/-/, $date2);
1007
1008 # Compare year
1009 if ($date1parts[0] > $date2parts[0]) {
1010 return 1;
1011 }
1012 elsif ($date1parts[0] == $date2parts[0]) {
1013 # Year is the same, so compare month
1014 if ($date1parts[1] > $date2parts[1]) {
1015 return 1;
1016 }
1017 elsif ($date1parts[1] == $date2parts[1]) {
1018 # Month is the same, so compare day
1019 if ($date1parts[2] > $date2parts[2]) {
1020 return 1;
1021 }
1022 }
1023 }
1024
1025 return 0;
1026}
1027
1028
1029# ==========================================================================================
1030# MACROFILE FUNCTIONS
1031
1032sub build_key_to_line_mapping_for_macrofile
1033{
1034 my (@file_lines) = @_;
1035
1036 my $macro_package;
1037 my %chunk_key_to_line_mapping = ();
1038 # Process the contents of the file, line by line
1039 for (my $i = 0; $i < scalar(@file_lines); $i++) {
1040 my $line = $file_lines[$i];
1041 $line =~ s/(\s*)$//; # Remove any nasty whitespace, carriage returns etc.
1042
1043 # Check if a new package is being defined
1044 if ($line =~ m/^package\s+(.+)/) {
1045 $macro_package = $1;
1046 }
1047
1048 # Line contains a macro name
1049 elsif ($line =~ m/^(_\w+_)/) {
1050 my $macro_key = $1;
1051 $line =~ s/\s*([^\\]\#[^\}]+)?$//; # Remove any comments and nasty whitespace
1052
1053 # While there is still text of the macro to go...
1054 my $startline = $i;
1055 while ($line !~ /\}$/) {
1056 $i++;
1057 if ($i == scalar(@file_lines)) {
1058 &throw_fatal_error("Could not find end of macro $macro_key.");
1059 }
1060 $line = $file_lines[$i];
1061 $line =~ s/\s*([^\\]\#[^\}]+)?$//; # Remove any comments and nasty whitespace
1062 }
1063
1064 # The chunk key consists of the package name and the macro key
1065 my $chunk_key = $macro_package . "." . $macro_key;
1066 # Map from chunk key to line
1067 $chunk_key_to_line_mapping{$chunk_key} = $startline . "-" . $i;
1068 }
1069
1070 # Icon: line in format ## "image text" ## image_type ## macro_name ##
1071 elsif ($line =~ m/^\#\# .* \#\# .* \#\# (.*) \#\#/) {
1072 # The chunk key consists of package name and macro key
1073 my $chunk_key = $macro_package . "." . $1;
1074 # Map from chunk key to line
1075 $chunk_key_to_line_mapping{$chunk_key} = $i . "-" . $i;
1076 }
1077 }
1078
1079 return %chunk_key_to_line_mapping;
1080}
1081
1082
1083sub import_chunk_from_macrofile
1084{
1085 my ($chunk_text) = @_;
1086
1087 # Is this an icon macro??
1088 if ($chunk_text =~ /^\#\# (.*)/) {
1089 # Extract image macro text
1090 $chunk_text =~ /^\#\#\s+([^\#]+)\s+\#\#/;
1091 $chunk_text = $1;
1092
1093 # Remove enclosing quotes
1094 $chunk_text =~ s/^\"//;
1095 $chunk_text =~ s/\"$//;
1096 }
1097
1098 # No, so it must be a text macro
1099 else {
1100 # Remove macro key
1101 $chunk_text =~ s/^_([^_]+)_(\s*)//;
1102
1103 # Remove language specifier
1104 $chunk_text =~ s/^\[l=.*\](\s*)//;
1105
1106 # Remove braces enclosing text
1107 $chunk_text =~ s/^{(\s*)((.|\n)*)}(\s*)(\#.+\s*)?/$2/;
1108 }
1109
1110 return $chunk_text;
1111}
1112
1113
1114sub get_macrofile_chunk_gti_comment
1115{
1116 my ($chunk_text) = @_;
1117
1118 # Check for an "Updated DD-MMM-YYYY" comment at the end of the chunk
1119 if ($chunk_text =~ /\#\s+(Updated\s+\d?\d-\D\D\D-\d\d\d\d.*)\s*$/i) {
1120 return $1;
1121 }
1122
1123 return undef;
1124}
1125
1126
1127sub is_macrofile_chunk_automatically_translated
1128{
1129 my ($chunk_key) = @_;
1130
1131 # The _httpiconX_, _widthX_ and _heightX_ image macros are automatically translated
1132 if ($chunk_key =~ /\._(httpicon|width|height)/) {
1133 return 1;
1134 }
1135
1136 return 0;
1137}
1138
1139
1140# Use the source file to generate a target file that is formatted the same
1141sub write_translated_macrofile
1142{
1143 my $source_file = shift(@_); # Not used
1144 my @source_file_lines = @{shift(@_)};
1145 my $source_file_key_to_text_mapping = shift(@_);
1146 my $target_file = shift(@_);
1147 my @target_file_lines = @{shift(@_)};
1148 my $target_file_key_to_text_mapping = shift(@_);
1149 my $target_file_key_to_gti_comment_mapping = shift(@_);
1150 my $target_language_code = shift(@_);
1151
1152 # Build a mapping from source file line to chunk key
1153 my %source_file_key_to_line_mapping = &build_key_to_line_mapping_for_macrofile(@source_file_lines);
1154 my %source_file_line_to_key_mapping = ();
1155 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
1156 $source_file_line_to_key_mapping{$source_file_key_to_line_mapping{$chunk_key}} = $chunk_key;
1157 }
1158 my @source_file_line_keys = (sort sort_by_line (keys(%source_file_line_to_key_mapping)));
1159 my $source_file_line_number = 0;
1160
1161 # Build a mapping from target file line to chunk key
1162 my %target_file_key_to_line_mapping = &build_key_to_line_mapping_for_macrofile(@target_file_lines);
1163 my %target_file_line_to_key_mapping = ();
1164 foreach my $chunk_key (keys(%target_file_key_to_line_mapping)) {
1165 $target_file_line_to_key_mapping{$target_file_key_to_line_mapping{$chunk_key}} = $chunk_key;
1166 }
1167 my @target_file_line_keys = (sort sort_by_line (keys(%target_file_line_to_key_mapping)));
1168
1169 # Write the new target file
1170 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
1171 if (!open(TARGET_FILE, ">$target_file_path")) {
1172 &throw_fatal_error("Could not write target file $target_file_path.");
1173 }
1174
1175 # Use the header from the target file, to keep language and author information
1176 if (scalar(@target_file_line_keys) > 0) {
1177 my $target_file_line_number = 0;
1178 my $target_file_chunk_starting_line_number = (split(/-/, $target_file_line_keys[0]))[0];
1179 while ($target_file_line_number < $target_file_chunk_starting_line_number) {
1180 my $target_file_line = $target_file_lines[$target_file_line_number];
1181 last if ($target_file_line =~ /^\# -- Missing translation: /); # We don't want to get into the macros
1182 print TARGET_FILE $target_file_line;
1183 $target_file_line_number++;
1184 }
1185
1186 $source_file_line_number = (split(/-/, $source_file_line_keys[0]))[0];
1187 }
1188
1189 # Model the new target file on the source file, with the target file translations
1190 foreach my $line_key (@source_file_line_keys) {
1191 # Fill in the gaps before this chunk starts
1192 my $source_file_chunk_starting_line_number = (split(/-/, $line_key))[0];
1193 my $source_file_chunk_finishing_line_number = (split(/-/, $line_key))[1];
1194 while ($source_file_line_number < $source_file_chunk_starting_line_number) {
1195 print TARGET_FILE $source_file_lines[$source_file_line_number];
1196 $source_file_line_number++;
1197 }
1198 $source_file_line_number = $source_file_chunk_finishing_line_number + 1;
1199
1200 my $chunk_key = $source_file_line_to_key_mapping{$line_key};
1201 my $source_file_chunk_text = $source_file_key_to_text_mapping->{$chunk_key};
1202 my $target_file_chunk_text = $target_file_key_to_text_mapping->{$chunk_key} || "";
1203
1204 my $macrofile_key = $chunk_key;
1205 $macrofile_key =~ s/^(.+?)\.//;
1206
1207 # If no translation exists for this chunk, show this, and move on
1208 if ($source_file_chunk_text ne "" && $target_file_chunk_text eq "") {
1209 print TARGET_FILE "# -- Missing translation: $macrofile_key\n";
1210 next;
1211 }
1212
1213 # Grab the source chunk text
1214 my $source_file_chunk = $source_file_lines[$source_file_chunk_starting_line_number];
1215 for (my $l = ($source_file_chunk_starting_line_number + 1); $l <= $source_file_chunk_finishing_line_number; $l++) {
1216 $source_file_chunk .= $source_file_lines[$l];
1217 }
1218
1219 # Is this an icon macro??
1220 if ($source_file_chunk =~ /^\#\# (.*)/) {
1221 # Escape any newline and question mark characters so the source text is replaced correctly
1222 $source_file_chunk_text =~ s/\\/\\\\/g;
1223 $source_file_chunk_text =~ s/\?/\\\?/g;
1224
1225 # Build the new target chunk from the source chunk
1226 my $target_file_chunk = $source_file_chunk;
1227 $target_file_chunk =~ s/$source_file_chunk_text/$target_file_chunk_text/;
1228 $target_file_chunk =~ s/(\s)*$//;
1229 print TARGET_FILE "$target_file_chunk";
1230 }
1231
1232 # No, it is just a normal text macro
1233 else {
1234 print TARGET_FILE "$macrofile_key [l=$target_language_code] {$target_file_chunk_text}";
1235 }
1236
1237 # Add the "updated" comment, if one exists
1238 if ($target_file_key_to_gti_comment_mapping->{$chunk_key}) {
1239 print TARGET_FILE " # " . $target_file_key_to_gti_comment_mapping->{$chunk_key};
1240 }
1241 print TARGET_FILE "\n";
1242 }
1243
1244 close(TARGET_FILE);
1245}
1246
1247
1248sub sort_by_line
1249{
1250 return ((split(/-/, $a))[0] <=> (split(/-/, $b))[0]);
1251}
1252
1253
1254# ==========================================================================================
1255# RESOURCE BUNDLE FUNCTIONS
1256
1257sub build_key_to_line_mapping_for_resource_bundle
1258{
1259 my (@file_lines) = @_;
1260
1261 my %chunk_key_to_line_mapping = ();
1262 for (my $i = 0; $i < scalar(@file_lines); $i++) {
1263 my $line = $file_lines[$i];
1264 $line =~ s/(\s*)$//; # Remove any nasty whitespace, carriage returns etc.
1265
1266 # Line contains a dictionary string
1267 if ($line =~ /^(\S+?):(.*)$/) {
1268 my $chunk_key = $1;
1269
1270 # Map from chunk key to line
1271 $chunk_key_to_line_mapping{$chunk_key} = $i . "-" . $i;
1272 }
1273 }
1274
1275 return %chunk_key_to_line_mapping;
1276}
1277
1278
1279sub import_chunk_from_resource_bundle
1280{
1281 my ($chunk_text) = @_;
1282
1283 # Simple: just remove string key
1284 $chunk_text =~ s/^(\S+?):(\s*)//;
1285 $chunk_text =~ s/(\s*)$//; # Remove any nasty whitespace, carriage returns etc.
1286 $chunk_text =~ s/(\s*)\#\s+Updated\s+(\d?\d-\D\D\D-\d\d\d\d.*)\s*$//i;
1287
1288 return $chunk_text;
1289}
1290
1291
1292sub get_resource_bundle_chunk_gti_comment
1293{
1294 my ($chunk_text) = @_;
1295
1296 # Check for an "Updated DD-MMM-YYYY" comment at the end of the chunk
1297 if ($chunk_text =~ /\#\s+(Updated\s+\d?\d-\D\D\D-\d\d\d\d.*)\s*$/i) {
1298 return $1;
1299 }
1300
1301 return undef;
1302}
1303
1304
1305sub is_resource_bundle_chunk_automatically_translated
1306{
1307 # No resource bundle chunks are automatically translated
1308 return 0;
1309}
1310
1311
1312sub write_translated_resource_bundle
1313{
1314 my $source_file = shift(@_); # Not used
1315 my @source_file_lines = @{shift(@_)};
1316 my $source_file_key_to_text_mapping = shift(@_);
1317 my $target_file = shift(@_);
1318 my @target_file_lines = @{shift(@_)}; # Not used
1319 my $target_file_key_to_text_mapping = shift(@_);
1320 my $target_file_key_to_gti_comment_mapping = shift(@_);
1321 my $target_language_code = shift(@_); # Not used
1322
1323 # Build a mapping from chunk key to source file line, and from source file line to chunk key
1324 my %source_file_key_to_line_mapping = &build_key_to_line_mapping_for_resource_bundle(@source_file_lines);
1325 my %source_file_line_to_key_mapping = ();
1326 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
1327 $source_file_line_to_key_mapping{$source_file_key_to_line_mapping{$chunk_key}} = $chunk_key;
1328 }
1329
1330 # Write the new target file
1331 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
1332 if (!open(TARGET_FILE, ">$target_file_path")) {
1333 &throw_fatal_error("Could not write target file $target_file_path.");
1334 }
1335
1336 # Model the new target file on the source file, with the target file translations
1337 my $source_file_line_number = 0;
1338 foreach my $line_key (sort sort_by_line (keys(%source_file_line_to_key_mapping))) {
1339 # Fill in the gaps before this chunk starts
1340 my $source_file_chunk_starting_line_number = (split(/-/, $line_key))[0];
1341 my $source_file_chunk_finishing_line_number = (split(/-/, $line_key))[1];
1342 while ($source_file_line_number < $source_file_chunk_starting_line_number) {
1343 print TARGET_FILE $source_file_lines[$source_file_line_number];
1344 $source_file_line_number++;
1345 }
1346 $source_file_line_number = $source_file_chunk_finishing_line_number + 1;
1347
1348 my $chunk_key = $source_file_line_to_key_mapping{$line_key};
1349 my $source_file_chunk_text = $source_file_key_to_text_mapping->{$chunk_key};
1350 my $target_file_chunk_text = $target_file_key_to_text_mapping->{$chunk_key} || "";
1351
1352 # If no translation exists for this chunk, show this, and move on
1353 if ($source_file_chunk_text ne "" && $target_file_chunk_text eq "") {
1354 print TARGET_FILE "# -- Missing translation: $chunk_key\n";
1355 next;
1356 }
1357
1358 print TARGET_FILE "$chunk_key:$target_file_chunk_text";
1359 if ($target_file_key_to_gti_comment_mapping->{$chunk_key}) {
1360 print TARGET_FILE " # " . $target_file_key_to_gti_comment_mapping->{$chunk_key};
1361 }
1362 print TARGET_FILE "\n";
1363 }
1364
1365 close(TARGET_FILE);
1366}
1367
1368
1369# ==========================================================================================
1370# GREENSTONE XML FUNCTIONS
1371
1372sub build_key_to_line_mapping_for_greenstone_xml
1373{
1374 my (@file_lines) = @_;
1375
1376 my %chunk_key_to_line_mapping = ();
1377 for (my $i = 0; $i < scalar(@file_lines); $i++) {
1378 my $line = $file_lines[$i];
1379 $line =~ s/(\s*)$//; # Remove any nasty whitespace, carriage returns etc.
1380
1381 # Line contains a string to translate
1382 if ($line =~ /^\s*<Text id=\"(.*?)\">/) {
1383 my $chunk_key = $1;
1384 $line =~ s/\s*$//; # Remove any nasty whitespace
1385 $line =~ s/<Updated date=\"\d?\d-\D\D\D-\d\d\d\d.*\"\/>$//;
1386
1387 # While there is still text of the string to go...
1388 my $startline = $i;
1389 while ($line !~ /<\/Text>$/) {
1390 $i++;
1391 if ($i == scalar(@file_lines)) {
1392 &throw_fatal_error("Could not find end of string $chunk_key.");
1393 }
1394 $line = $file_lines[$i];
1395 $line =~ s/\s*$//; # Remove any nasty whitespace
1396 $line =~ s/<Updated date=\"\d?\d-\D\D\D-\d\d\d\d.*\"\/>$//;
1397 }
1398
1399 # Map from chunk key to line
1400 if (!defined($chunk_key_to_line_mapping{$chunk_key})) {
1401 $chunk_key_to_line_mapping{$chunk_key} = $startline . "-" . $i;
1402 }
1403 else {
1404 &throw_fatal_error("Duplicate key $chunk_key.");
1405 }
1406 }
1407 }
1408
1409 return %chunk_key_to_line_mapping;
1410}
1411
1412
1413sub import_chunk_from_greenstone_xml
1414{
1415 my ($chunk_text) = @_;
1416
1417 # Simple: just remove the Text tags
1418 $chunk_text =~ s/^\s*<Text id=\"(.*?)\">(\s*)//;
1419 $chunk_text =~ s/<Updated date=\"\d?\d-\D\D\D-\d\d\d\d.*\"\/>$//;
1420 $chunk_text =~ s/<\/Text>$//;
1421
1422 return $chunk_text;
1423}
1424
1425
1426sub get_greenstone_xml_chunk_gti_comment
1427{
1428 my ($chunk_text) = @_;
1429
1430 # Check for an "Updated DD-MMM-YYYY" comment at the end of the chunk
1431 if ($chunk_text =~ /<Updated date=\"(\d?\d-\D\D\D-\d\d\d\d.*)\"\/>$/i) {
1432 return $1;
1433 }
1434
1435 return undef;
1436}
1437
1438
1439sub is_greenstone_xml_chunk_automatically_translated
1440{
1441 # No greenstone XML chunks are automatically translated
1442 return 0;
1443}
1444
1445
1446sub write_translated_greenstone_xml
1447{
1448 my $source_file = shift(@_); # Not used
1449 my @source_file_lines = @{shift(@_)};
1450 my $source_file_key_to_text_mapping = shift(@_);
1451 my $target_file = shift(@_);
1452 my @target_file_lines = @{shift(@_)}; # Not used
1453 my $target_file_key_to_text_mapping = shift(@_);
1454 my $target_file_key_to_gti_comment_mapping = shift(@_);
1455 my $target_language_code = shift(@_); # Not used
1456
1457 # Build a mapping from chunk key to source file line, and from source file line to chunk key
1458 my %source_file_key_to_line_mapping = &build_key_to_line_mapping_for_greenstone_xml(@source_file_lines);
1459 my %source_file_line_to_key_mapping = ();
1460 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
1461 $source_file_line_to_key_mapping{$source_file_key_to_line_mapping{$chunk_key}} = $chunk_key;
1462 }
1463
1464 # Write the new target file
1465 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
1466 if (!open(TARGET_FILE, ">$target_file_path")) {
1467 &throw_fatal_error("Could not write target file $target_file_path.");
1468 }
1469
1470 # Model the new target file on the source file, with the target file translations
1471 my $source_file_line_number = 0;
1472 foreach my $line_key (sort sort_by_line (keys(%source_file_line_to_key_mapping))) {
1473 # Fill in the gaps before this chunk starts
1474 my $source_file_chunk_starting_line_number = (split(/-/, $line_key))[0];
1475 my $source_file_chunk_finishing_line_number = (split(/-/, $line_key))[1];
1476 while ($source_file_line_number < $source_file_chunk_starting_line_number) {
1477 print TARGET_FILE $source_file_lines[$source_file_line_number];
1478 $source_file_line_number++;
1479 }
1480 $source_file_line_number = $source_file_chunk_finishing_line_number + 1;
1481
1482 my $chunk_key = $source_file_line_to_key_mapping{$line_key};
1483 my $source_file_chunk_text = $source_file_key_to_text_mapping->{$chunk_key};
1484 my $target_file_chunk_text = $target_file_key_to_text_mapping->{$chunk_key} || "";
1485 $target_file_chunk_text =~ s/(\n)*$//g;
1486
1487 # If no translation exists for this chunk, show this, and move on
1488 if ($source_file_chunk_text ne "" && $target_file_chunk_text eq "") {
1489 print TARGET_FILE "<!-- Missing translation: $chunk_key -->\n";
1490 next;
1491 }
1492
1493 print TARGET_FILE "<Text id=\"$chunk_key\">$target_file_chunk_text</Text>";
1494 if ($target_file_key_to_gti_comment_mapping->{$chunk_key}) {
1495 my $chunk_gti_comment = $target_file_key_to_gti_comment_mapping->{$chunk_key};
1496 $chunk_gti_comment =~ s/^Updated //;
1497 print TARGET_FILE "<Updated date=\"" . $chunk_gti_comment . "\"\/>";
1498 }
1499 print TARGET_FILE "\n";
1500 }
1501
1502 # Fill in the end of the file
1503 while ($source_file_line_number < scalar(@source_file_lines)) {
1504 print TARGET_FILE $source_file_lines[$source_file_line_number];
1505 $source_file_line_number++;
1506 }
1507
1508 close(TARGET_FILE);
1509}
1510
1511
1512&main(@ARGV);
Note: See TracBrowser for help on using the repository browser.