source: gsdl/trunk/bin/script/gti.pl@ 14258

Last change on this file since 14258 was 14258, checked in by anna, 17 years ago

Fixed bug in using svn annotate command, and bug in comparing dates different formats (svn date format and gti comment format).

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 58.6 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# gti.pl
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 2005 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29
30BEGIN {
31 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
32 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
33}
34
35
36use iso639;
37use strict;
38use util;
39
40
41#my $anonymous_cvs_root = ":pserver:cvs_anon\@cvs.scms.waikato.ac.nz:2402/usr/local/global-cvs/gsdl-src";
42#my $anonymous_svn_root = "http://http://svn.greenstone.org/gsdl/trunk/";
43my $gsdl_root_directory = "$ENV{'GSDLHOME'}";
44my $gti_log_file = &util::filename_cat($gsdl_root_directory, "etc", "gti.log");
45my $source_language_code = "en"; # This is non-negiotable
46
47my $gti_translation_files =
48 [ # Greenstone macrofiles
49 { 'key' => "coredm",
50 'file_type' => "macrofile",
51 'source_file' => "macros/english.dm",
52 'target_file' => "macros/{bn:bengali;fa:farsi;gd:gaelic;id:indo;lv:latvian;pt-br:port-br;pt-pt:port-pt;zh-tr:chinese-trad;iso_639_1_target_language_name}.dm" },
53 { 'key' => "auxdm",
54 'file_type' => "macrofile",
55 'source_file' => "macros/english2.dm",
56 'target_file' => "macros/{bn:bengali;fa:farsi;gd:gaelic;id:indo;lv:latvian;pt-br:port-br;pt-pt:port-pt;zh-tr:chinese-trad;iso_639_1_target_language_name}2.dm" },
57
58 # GLI dictionary
59 { 'key' => "glidict",
60 'file_type' => "resource_bundle",
61 'source_file' => "gli/classes/dictionary.properties",
62 'target_file' => "gli/classes/dictionary_{target_language_code}.properties" },
63
64 # GLI help
65 { 'key' => "glihelp",
66 'file_type' => "greenstone_xml",
67 'source_file' => "gli/help/en/help.xml",
68 'target_file' => "gli/help/{target_language_code}/help.xml" },
69
70 # Greenstone Perl modules
71 { 'key' => "perlmodules",
72 'file_type' => "resource_bundle",
73 'source_file' => "perllib/strings.properties",
74 'target_file' => "perllib/strings_{target_language_code}.properties" },
75
76 # Greenstone tutorial exercises
77 # { 'key' => "tutorials",
78 # 'file_type' => "greenstone_xml",
79 # 'source_file' => "gsdl-documentation/tutorials/xml-source/tutorial_en.xml",
80 # 'target_file' => "gsdl-documentation/tutorials/xml-source/tutorial_{target_language_code}.xml" },
81
82 # new Greenstone.org
83 { 'key' => "greenorg",
84 'file_type' => "resource_bundle",
85 'source_file' => "greenstoneorg/website/classes/Gsc.properties",
86 'target_file' => "greenstoneorg/website/classes/Gsc_{iso_639_1_target_language_name}.properties"
87 # 'file_type' => "macrofile",
88 # 'source_file' => "greenorg/macros/english.dm",
89 # 'target_file' => "greenorg/macros/{iso_639_1_target_language_name}.dm"
90 }
91 ];
92
93
94sub main
95{
96 # Get the command to process, and any arguments
97 my $gti_command = shift(@_);
98 my @gti_command_arguments = @_;
99
100 # Open the GTI log file for appending, or write to STDERR if that fails
101 if (!open(GTI_LOG, ">>$gti_log_file")) {
102 open(GTI_LOG, ">&STDERR");
103 }
104
105 # Log the command that launched this script
106 &log_message("Command: $0 @ARGV");
107
108 # Check that a command was supplied
109 if (!$gti_command) {
110 &throw_fatal_error("Missing command.");
111 }
112
113 # Process the command
114 if ($gti_command =~ /^get-all-chunks$/i) {
115 print &get_all_chunks(@gti_command_arguments);
116 }
117 if ($gti_command =~ /^get-first-n-chunks-requiring-work$/i) {
118 print &get_first_n_chunks_requiring_work(@gti_command_arguments);
119 }
120 if ($gti_command =~ /^get-language-status$/i) {
121 print &get_language_status(@gti_command_arguments);
122 }
123 if ($gti_command =~ /^search-chunks$/i) {
124 print &search_chunks(@gti_command_arguments);
125 }
126 if ($gti_command =~ /^submit-translations$/i) {
127 # This command cannot produce any output since it reads input
128 &submit_translations(@gti_command_arguments);
129 }
130
131 # The command was not recognized
132 # &throw_fatal_error("Unknown command \"$gti_command\".");
133}
134
135
136sub throw_fatal_error
137{
138 my $error_message = shift(@_);
139
140 # Write an XML error response
141 print "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
142 print "<GTIResponse>\n";
143 print " <GTIError time=\"" . time() . "\">" . $error_message . "</GTIError>\n";
144 print "</GTIResponse>\n";
145
146 # Log the error message, then die
147 &log_message("Error: $error_message");
148 die "\n";
149}
150
151
152sub log_message
153{
154 my $log_message = shift(@_);
155 print GTI_LOG time() . " -- " . $log_message . "\n";
156}
157
158
159sub get_all_chunks
160{
161 # The code of the target language (ensure it is lowercase)
162 my $target_language_code = lc(shift(@_));
163 # The key of the file to translate (ensure it is lowercase)
164 my $translation_file_key = lc(shift(@_));
165
166 # Check that the necessary arguments were supplied
167 if (!$target_language_code || !$translation_file_key) {
168 &throw_fatal_error("Missing command argument.");
169 }
170
171 # Get (and check) the translation configuration
172 my ($source_file, $target_file, $translation_file_type)
173 = &get_translation_configuration($target_language_code, $translation_file_key);
174
175 # Parse the source language and target language files
176 my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
177 my @source_file_lines = &read_file_lines($source_file_path);
178 my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
179
180 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
181 my @target_file_lines = &read_file_lines($target_file_path);
182 my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
183
184 # Filter out any automatically translated chunks
185 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
186 if (&is_chunk_automatically_translated($chunk_key, $translation_file_type)) {
187 delete $source_file_key_to_line_mapping{$chunk_key};
188 delete $target_file_key_to_line_mapping{$chunk_key};
189 }
190 }
191
192 my %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
193 my %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
194 &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
195 &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
196
197 my %source_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($source_file, \@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
198 my %target_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($target_file, \@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
199
200 # Form an XML response to the command
201 my $xml_response = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
202 $xml_response .= "<GTIResponse>\n";
203 $xml_response .= " <TranslationFile"
204 . " key=\"" . $translation_file_key . "\""
205 . " target_file_path=\"" . $target_file . "\"\/>\n";
206
207 # Do all the chunks
208 $xml_response .= " <Chunks size=\"" . scalar(keys(%source_file_key_to_text_mapping)) . "\">\n";
209 foreach my $chunk_key (keys(%source_file_key_to_text_mapping)) {
210 my $source_file_chunk_date = $source_file_key_to_last_update_date_mapping{$chunk_key};
211 my $source_file_chunk_text = &make_text_xml_safe($source_file_key_to_text_mapping{$chunk_key});
212
213 $xml_response .= " <Chunk key=\"" . &make_text_xml_safe($chunk_key) . "\">\n";
214 $xml_response .= " <SourceFileText date=\"$source_file_chunk_date\">$source_file_chunk_text</SourceFileText>\n";
215 if (defined($target_file_key_to_text_mapping{$chunk_key})) {
216 my $target_file_chunk_date = $target_file_key_to_last_update_date_mapping{$chunk_key};
217 my $target_file_chunk_text = &make_text_xml_safe($target_file_key_to_text_mapping{$chunk_key});
218 $xml_response .= " <TargetFileText date=\"$target_file_chunk_date\">$target_file_chunk_text</TargetFileText>\n";
219 }
220 else {
221 $xml_response .= " <TargetFileText></TargetFileText>\n";
222 }
223
224 $xml_response .= " </Chunk>\n";
225 }
226 $xml_response .= " </Chunks>\n";
227
228 $xml_response .= "</GTIResponse>\n";
229 return $xml_response;
230}
231
232
233sub get_first_n_chunks_requiring_work
234{
235 # The code of the target language (ensure it is lowercase)
236 my $target_language_code = lc(shift(@_));
237 # The key of the file to translate (ensure it is lowercase)
238 my $translation_file_key = lc(shift(@_));
239 # The number of chunks to return (defaults to one if not specified)
240 my $num_chunks_to_return = shift(@_) || "1";
241
242 # Check that the necessary arguments were supplied
243 if (!$target_language_code || !$translation_file_key) {
244 &throw_fatal_error("Missing command argument.");
245 }
246
247 # Get (and check) the translation configuration
248 my ($source_file, $target_file, $translation_file_type)
249 = &get_translation_configuration($target_language_code, $translation_file_key);
250
251 # Parse the source language and target language files
252 my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
253 my @source_file_lines = &read_file_lines($source_file_path);
254 my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
255
256 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
257 my @target_file_lines = &read_file_lines($target_file_path);
258 my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
259
260 # Filter out any automatically translated chunks
261 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
262 if (&is_chunk_automatically_translated($chunk_key, $translation_file_type)) {
263 delete $source_file_key_to_line_mapping{$chunk_key};
264 delete $target_file_key_to_line_mapping{$chunk_key};
265 }
266 }
267
268 my %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
269 my %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
270 &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
271 &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
272
273 # Determine the target file chunks requiring translation
274 my @target_file_keys_requiring_translation = &determine_chunks_requiring_translation(\%source_file_key_to_text_mapping, \%target_file_key_to_text_mapping);
275 &log_message("Number of target chunks requiring translation: " . scalar(@target_file_keys_requiring_translation));
276
277 # Determine the target file chunks requiring updating
278 my %source_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($source_file, \@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
279 my %target_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($target_file, \@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
280 my @target_file_keys_requiring_updating = &determine_chunks_requiring_updating(\%source_file_key_to_last_update_date_mapping, \%target_file_key_to_last_update_date_mapping);
281 &log_message("Number of target chunks requiring updating: " . scalar(@target_file_keys_requiring_updating));
282
283 # Form an XML response to the command
284 my $xml_response = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
285 $xml_response .= "<GTIResponse>\n";
286 $xml_response .= " <TranslationFile"
287 . " key=\"" . $translation_file_key . "\""
288 . " target_file_path=\"" . $target_file . "\""
289 . " num_chunks_translated=\"" . (scalar(keys(%source_file_key_to_text_mapping)) - scalar(@target_file_keys_requiring_translation)) . "\""
290 . " num_chunks_requiring_translation=\"" . scalar(@target_file_keys_requiring_translation) . "\""
291 . " num_chunks_requiring_updating=\"" . scalar(@target_file_keys_requiring_updating) . "\"\/>\n";
292
293 # Do chunks requiring translation first
294 if ($num_chunks_to_return > scalar(@target_file_keys_requiring_translation)) {
295 $xml_response .= " <ChunksRequiringTranslation size=\"" . scalar(@target_file_keys_requiring_translation) . "\">\n";
296 }
297 else {
298 $xml_response .= " <ChunksRequiringTranslation size=\"" . $num_chunks_to_return . "\">\n";
299 }
300
301 foreach my $chunk_key (@target_file_keys_requiring_translation) {
302 last if ($num_chunks_to_return == 0);
303
304 my $source_file_chunk_date = $source_file_key_to_last_update_date_mapping{$chunk_key};
305 my $source_file_chunk_text = &make_text_xml_safe($source_file_key_to_text_mapping{$chunk_key});
306
307 if(!defined $source_file_chunk_date){
308 $source_file_chunk_date = "";
309 }
310
311 $xml_response .= " <Chunk key=\"" . &make_text_xml_safe($chunk_key) . "\">\n";
312 $xml_response .= " <SourceFileText date=\"$source_file_chunk_date\">$source_file_chunk_text</SourceFileText>\n";
313 $xml_response .= " <TargetFileText></TargetFileText>\n";
314 $xml_response .= " </Chunk>\n";
315
316 $num_chunks_to_return--;
317 }
318
319 $xml_response .= " </ChunksRequiringTranslation>\n";
320
321 # Then do chunks requiring updating
322 if ($num_chunks_to_return > scalar(@target_file_keys_requiring_updating)) {
323 $xml_response .= " <ChunksRequiringUpdating size=\"" . scalar(@target_file_keys_requiring_updating) . "\">\n";
324 }
325 else {
326 $xml_response .= " <ChunksRequiringUpdating size=\"" . $num_chunks_to_return . "\">\n";
327 }
328
329 foreach my $chunk_key (@target_file_keys_requiring_updating) {
330 last if ($num_chunks_to_return == 0);
331
332 my $source_file_chunk_date = $source_file_key_to_last_update_date_mapping{$chunk_key};
333 my $source_file_chunk_text = &make_text_xml_safe($source_file_key_to_text_mapping{$chunk_key});
334 my $target_file_chunk_date = $target_file_key_to_last_update_date_mapping{$chunk_key};
335 my $target_file_chunk_text = &make_text_xml_safe($target_file_key_to_text_mapping{$chunk_key});
336
337 if(!defined $source_file_chunk_date){
338 $source_file_chunk_date = "";
339 }
340
341 $xml_response .= " <Chunk key=\"" . &make_text_xml_safe($chunk_key) . "\">\n";
342 $xml_response .= " <SourceFileText date=\"$source_file_chunk_date\">$source_file_chunk_text</SourceFileText>\n";
343 $xml_response .= " <TargetFileText date=\"$target_file_chunk_date\">$target_file_chunk_text</TargetFileText>\n";
344 $xml_response .= " </Chunk>\n";
345
346 $num_chunks_to_return--;
347 }
348
349 $xml_response .= " </ChunksRequiringUpdating>\n";
350
351 $xml_response .= "</GTIResponse>\n";
352 return $xml_response;
353}
354
355
356sub get_language_status
357{
358 # The code of the target language (ensure it is lowercase)
359 my $target_language_code = lc(shift(@_));
360
361 # Check that the necessary arguments were supplied
362 if (!$target_language_code) {
363 &throw_fatal_error("Missing command argument.");
364 }
365
366 # Form an XML response to the command
367 my $xml_response = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
368 $xml_response .= "<GTIResponse>\n";
369 $xml_response .= " <LanguageStatus code=\"$target_language_code\">\n";
370
371 foreach my $translation_file (@$gti_translation_files) {
372 # Get (and check) the translation configuration
373 my ($source_file, $target_file, $translation_file_type)
374 = &get_translation_configuration($target_language_code, $translation_file->{'key'});
375
376 # Parse the source language and target language files
377 my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
378 my @source_file_lines = &read_file_lines($source_file_path);
379 my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
380
381 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
382 my @target_file_lines = &read_file_lines($target_file_path);
383 my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
384
385 # Filter out any automatically translated chunks
386 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
387 if (&is_chunk_automatically_translated($chunk_key, $translation_file_type)) {
388 delete $source_file_key_to_line_mapping{$chunk_key};
389 delete $target_file_key_to_line_mapping{$chunk_key};
390 }
391 }
392
393 my %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
394 my %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
395 &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
396 &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
397
398 # Determine the target file chunks requiring translation
399 my @target_file_keys_requiring_translation = &determine_chunks_requiring_translation(\%source_file_key_to_text_mapping, \%target_file_key_to_text_mapping);
400 &log_message("Number of target chunks requiring translation: " . scalar(@target_file_keys_requiring_translation));
401
402 # Determine the target file chunks requiring updating
403 my @target_file_keys_requiring_updating = ();
404 if (-e $target_file_path) {
405 my %source_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($source_file, \@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
406 my %target_file_key_to_last_update_date_mapping = &build_key_to_last_update_date_mapping($target_file, \@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
407 @target_file_keys_requiring_updating = &determine_chunks_requiring_updating(\%source_file_key_to_last_update_date_mapping, \%target_file_key_to_last_update_date_mapping);
408 &log_message("Number of target chunks requiring updating: " . scalar(@target_file_keys_requiring_updating));
409 }
410
411 $xml_response .= " <TranslationFile"
412 . " key=\"" . $translation_file->{'key'} . "\""
413 . " target_file_path=\"" . $target_file . "\""
414 . " num_chunks_translated=\"" . (scalar(keys(%source_file_key_to_text_mapping)) - scalar(@target_file_keys_requiring_translation)) . "\""
415 . " num_chunks_requiring_translation=\"" . scalar(@target_file_keys_requiring_translation) . "\""
416 . " num_chunks_requiring_updating=\"" . scalar(@target_file_keys_requiring_updating) . "\"\/>\n";
417 }
418
419 $xml_response .= " </LanguageStatus>\n";
420
421 $xml_response .= "</GTIResponse>\n";
422 return $xml_response;
423}
424
425
426sub search_chunks
427{
428 # The code of the target language (ensure it is lowercase)
429 my $target_language_code = lc(shift(@_));
430 # The key of the file to translate (ensure it is lowercase)
431 my $translation_file_key = lc(shift(@_));
432 # The query string
433 my $query_string = join(' ', @_);
434
435 # Check that the necessary arguments were supplied
436 if (!$target_language_code || !$translation_file_key || !$query_string) {
437 &throw_fatal_error("Missing command argument.");
438 }
439
440 # Get (and check) the translation configuration
441 my ($source_file, $target_file, $translation_file_type)
442 = &get_translation_configuration($target_language_code, $translation_file_key);
443
444 # Parse the source language and target language files
445 my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
446 my @source_file_lines = &read_file_lines($source_file_path);
447 my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
448
449 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
450 my @target_file_lines = &read_file_lines($target_file_path);
451 my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
452
453 # Filter out any automatically translated chunks
454 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
455 if (&is_chunk_automatically_translated($chunk_key, $translation_file_type)) {
456 delete $source_file_key_to_line_mapping{$chunk_key};
457 delete $target_file_key_to_line_mapping{$chunk_key};
458 }
459 }
460
461 my %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
462 my %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
463 &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
464 &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
465
466 # Determine the target file chunks matching the query
467 my @target_file_keys_matching_query = ();
468 foreach my $chunk_key (keys(%target_file_key_to_text_mapping)) {
469 my $target_file_text = $target_file_key_to_text_mapping{$chunk_key};
470 if ($target_file_text =~ /$query_string/i) {
471 # &log_message("Chunk with key $chunk_key matches query.");
472 push(@target_file_keys_matching_query, $chunk_key);
473 }
474 }
475
476 # Form an XML response to the command
477 my $xml_response = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
478 $xml_response .= "<GTIResponse>\n";
479
480 $xml_response .= " <ChunksMatchingQuery size=\"" . scalar(@target_file_keys_matching_query) . "\">\n";
481 foreach my $chunk_key (@target_file_keys_matching_query) {
482 my $target_file_chunk_text = &make_text_xml_safe($target_file_key_to_text_mapping{$chunk_key});
483
484 $xml_response .= " <Chunk key=\"$chunk_key\">\n";
485 $xml_response .= " <TargetFileText>$target_file_chunk_text</TargetFileText>\n";
486 $xml_response .= " </Chunk>\n";
487 }
488 $xml_response .= " </ChunksMatchingQuery>\n";
489
490 $xml_response .= "</GTIResponse>\n";
491 return $xml_response;
492}
493
494
495sub submit_translations
496{
497 # The code of the target language (ensure it is lowercase)
498 my $target_language_code = lc(shift(@_));
499 # The key of the file to translate (ensure it is lowercase)
500 my $translation_file_key = lc(shift(@_));
501 # The username of the translation submitter
502 my $submitter_username = shift(@_);
503 # Whether to submit a target chunk even if it hasn't changed
504 my $force_submission_flag = shift(@_);
505
506 # Check that the necessary arguments were supplied
507 if (!$target_language_code || !$translation_file_key || !$submitter_username) {
508 &log_message("Fatal error (but cannot be thrown): Missing command argument.");
509 die "\n";
510 }
511
512 # Get (and check) the translation configuration
513 my ($source_file, $target_file, $translation_file_type)
514 = &get_translation_configuration($target_language_code, $translation_file_key);
515
516 # Parse the source language and target language files
517 my @source_file_lines = &read_file_lines(&util::filename_cat($gsdl_root_directory, $source_file));
518 my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type);
519 my %source_file_key_to_text_mapping = &build_key_to_text_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
520 my %source_file_key_to_gti_comment_mapping = &build_key_to_gti_comment_mapping(\@source_file_lines, \%source_file_key_to_line_mapping, $translation_file_type);
521 &log_message("Number of source chunks: " . scalar(keys(%source_file_key_to_text_mapping)));
522
523 my @target_file_lines = &read_file_lines(&util::filename_cat($gsdl_root_directory, $target_file));
524 my %target_file_key_to_line_mapping = &build_key_to_line_mapping(\@target_file_lines, $translation_file_type);
525 my %target_file_key_to_text_mapping = &build_key_to_text_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
526 my %target_file_key_to_gti_comment_mapping = &build_key_to_gti_comment_mapping(\@target_file_lines, \%target_file_key_to_line_mapping, $translation_file_type);
527 &log_message("Number of target chunks: " . scalar(keys(%target_file_key_to_text_mapping)));
528
529 # Submission date
530 my $day = (localtime)[3];
531 my $month = ("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[(localtime)[4]];
532 my $year = (localtime)[5] + 1900;
533 my $submission_date = "$day-$month-$year";
534
535 open(SUBMISSION, "-");
536 my @submission_lines = <SUBMISSION>;
537 close(SUBMISSION);
538
539 # Remove any nasty carriage returns
540 &log_message("Submission:");
541 foreach my $submission_line (@submission_lines) {
542 $submission_line =~ s/\r$//;
543 &log_message(" $submission_line");
544 }
545
546 my %source_file_key_to_submission_mapping = ();
547 my %target_file_key_to_submission_mapping = ();
548 for (my $i = 0; $i < scalar(@submission_lines); $i++) {
549 # Read source file part of submission
550 if ($submission_lines[$i] =~ /^\<SourceFileText key=\"(.+)\"\>/) {
551 my $chunk_key = $1;
552
553 # Read the source file text
554 my $source_file_chunk_text = "";
555 $i++;
556 while ($i < scalar(@submission_lines) && $submission_lines[$i] !~ /^\<\/SourceFileText\>/) {
557 $source_file_chunk_text .= $submission_lines[$i];
558 $i++;
559 }
560 $source_file_chunk_text =~ s/\n$//; # Strip the extra newline character added
561 $source_file_chunk_text = &unmake_text_xml_safe($source_file_chunk_text);
562
563 &log_message("Source file key: $chunk_key");
564 &log_message("Source file text: $source_file_chunk_text");
565 $source_file_key_to_submission_mapping{$chunk_key} = $source_file_chunk_text;
566 }
567
568 # Read target file part of submission
569 if ($submission_lines[$i] =~ /^\<TargetFileText key=\"(.+)\"\>/) {
570 my $chunk_key = $1;
571
572 # Read the target file text
573 my $target_file_chunk_text = "";
574 $i++;
575 while ($i < scalar(@submission_lines) && $submission_lines[$i] !~ /^\<\/TargetFileText\>/) {
576 $target_file_chunk_text .= $submission_lines[$i];
577 $i++;
578 }
579 $target_file_chunk_text =~ s/\n$//; # Strip the extra newline character added
580 $target_file_chunk_text = &unmake_text_xml_safe($target_file_chunk_text);
581
582 &log_message("Target file key: $chunk_key");
583 &log_message("Target file text: $target_file_chunk_text");
584 $target_file_key_to_submission_mapping{$chunk_key} = $target_file_chunk_text;
585 }
586 }
587
588 # -----------------------------------------
589 # Validate the translation submissions
590 # -----------------------------------------
591
592 # Check that the translations are valid
593 foreach my $chunk_key (keys(%source_file_key_to_submission_mapping)) {
594 # Make sure the submitted chunk still exists in the source file
595 if (!defined($source_file_key_to_text_mapping{$chunk_key})) {
596 &log_message("Warning: Source chunk $chunk_key no longer exists (ignoring submission).");
597 delete $source_file_key_to_submission_mapping{$chunk_key};
598 delete $target_file_key_to_submission_mapping{$chunk_key};
599 next;
600 }
601
602 # Make sure the submitted source chunk matches the source file chunk
603 if ($source_file_key_to_submission_mapping{$chunk_key} ne &unmake_text_xml_safe($source_file_key_to_text_mapping{$chunk_key})) {
604 &log_message("Warning: Source chunk $chunk_key has changed (ignoring submission).");
605 &log_message("Submission source: $source_file_key_to_submission_mapping{$chunk_key}");
606 &log_message(" Source text: $source_file_key_to_text_mapping{$chunk_key}");
607 delete $source_file_key_to_submission_mapping{$chunk_key};
608 delete $target_file_key_to_submission_mapping{$chunk_key};
609 next;
610 }
611 }
612
613 # Apply the submitted translations
614 foreach my $chunk_key (keys(%target_file_key_to_submission_mapping)) {
615 # Only apply the submission if it is a change, unless -force_submission has been specified
616 if ($force_submission_flag || !defined($target_file_key_to_text_mapping{$chunk_key}) || $target_file_key_to_submission_mapping{$chunk_key} ne $target_file_key_to_text_mapping{$chunk_key}) {
617 $target_file_key_to_text_mapping{$chunk_key} = $target_file_key_to_submission_mapping{$chunk_key};
618 $target_file_key_to_gti_comment_mapping{$chunk_key} = "Updated $submission_date by $submitter_username";
619 }
620 }
621
622 eval "&write_translated_${translation_file_type}(\$source_file, \\\@source_file_lines, \\\%source_file_key_to_text_mapping, \$target_file, \\\@target_file_lines, \\\%target_file_key_to_text_mapping, \\\%target_file_key_to_gti_comment_mapping, \$target_language_code)";
623}
624
625
626sub get_translation_configuration
627{
628 # Get the code of the target language
629 my $target_language_code = shift(@_);
630 # Get the key of the file to translate
631 my $translation_file_key = shift(@_);
632
633 # Read the translation data from the gti.cfg file
634 my ($source_file, $target_file, $translation_file_type) =
635 &get_translation_data_for($target_language_code, $translation_file_key);
636
637 # Check that the file to translate is defined in the gti.cfg file
638 if (!$source_file || !$target_file || !$translation_file_type) {
639 &throw_fatal_error("Missing or incomplete specification for translation file \"$translation_file_key\" in gti.pl.");
640 }
641
642 # Check that the source file exists
643 my $source_file_path = &util::filename_cat($gsdl_root_directory, $source_file);
644 if (!-e $source_file_path) {
645 &throw_fatal_error("Source file $source_file_path does not exist.");
646 }
647
648 # Check that the source file is up to date
649 # The "2>/dev/null" is very important! If it is missing this will never return when run from the receptionist
650 # unless ($translation_file_is_not_in_cvs) {
651 #my $source_file_cvs_status = `cd $gsdl_root_directory; cvs -d $anonymous_cvs_root update $source_file 2>/dev/null`;
652 my $source_file_cvs_status = `cd $gsdl_root_directory; svn status $source_file 2>/dev/null`;
653 if ($source_file_cvs_status =~ /^C /) {
654 &throw_fatal_error("Source file $source_file_path conflicts with the repository.");
655 }
656 if ($source_file_cvs_status =~ /^M /) {
657 &throw_fatal_error("Source file $source_file_path contains uncommitted changes.");
658 }
659 # }
660
661 return ($source_file, $target_file, $translation_file_type);
662}
663
664
665sub get_translation_data_for
666{
667 my ($target_language_code, $translation_file_key) = @_;
668
669 foreach my $translation_file (@$gti_translation_files) {
670 # If this isn't the correct translation file, move onto the next one
671 next if ($translation_file_key ne $translation_file->{'key'});
672
673 # Resolve the target language file
674 my $target_language_file = $translation_file->{'target_file'};
675 if ($target_language_file =~ /(\{.+\;.+\})/) {
676 my $unresolved_target_language_file_part = $1;
677
678 # Check for a special case for the target language code
679 if ($unresolved_target_language_file_part =~ /(\{|\;)$target_language_code:([^\;]+)(\;|\})/) {
680 my $resolved_target_language_file_part = $2;
681 $target_language_file =~ s/$unresolved_target_language_file_part/$resolved_target_language_file_part/;
682 }
683 # Otherwise use the last part as the default value
684 else {
685 my ($default_target_language_file_part) = $unresolved_target_language_file_part =~ /([^\;]+)\}/;
686 $target_language_file =~ s/$unresolved_target_language_file_part/\{$default_target_language_file_part\}/;
687 }
688 }
689
690 # Resolve instances of {iso_639_1_target_language_name}
691 my $iso_639_1_target_language_name = $iso639::fromiso639{$target_language_code};
692 $iso_639_1_target_language_name =~ tr/A-Z/a-z/ if $iso_639_1_target_language_name;
693 $target_language_file =~ s/\{iso_639_1_target_language_name\}/$iso_639_1_target_language_name/g;
694
695 # Resolve instances of {target_language_code}
696 $target_language_file =~ s/\{target_language_code\}/$target_language_code/g;
697
698 return ($translation_file->{'source_file'}, $target_language_file, $translation_file->{'file_type'});
699 }
700
701 return ();
702}
703
704
705sub read_file_lines
706{
707 my ($file_path) = @_;
708
709 if (!open(FILE_IN, "<$file_path")) {
710 &log_message("Note: Could not open file $file_path.");
711 return ();
712 }
713 my @file_lines = <FILE_IN>;
714 close(FILE_IN);
715
716 return @file_lines;
717}
718
719
720sub build_key_to_line_mapping
721{
722 my ($file_lines, $translation_file_type) = @_;
723 eval "return &build_key_to_line_mapping_for_${translation_file_type}(\@\$file_lines)";
724}
725
726
727sub build_key_to_text_mapping
728{
729 my ($file_lines, $key_to_line_mapping, $translation_file_type) = @_;
730
731 my %key_to_text_mapping = ();
732 foreach my $chunk_key (keys(%$key_to_line_mapping)) {
733 my $chunk_starting_line = (split(/-/, $key_to_line_mapping->{$chunk_key}))[0];
734 my $chunk_finishing_line = (split(/-/, $key_to_line_mapping->{$chunk_key}))[1];
735
736 my $chunk_text = @$file_lines[$chunk_starting_line];
737 for (my $l = ($chunk_starting_line + 1); $l <= $chunk_finishing_line; $l++) {
738 $chunk_text .= @$file_lines[$l];
739 }
740
741 # Map from chunk key to text
742 eval "\$key_to_text_mapping{\${chunk_key}} = &import_chunk_from_${translation_file_type}(\$chunk_text)";
743 }
744
745 return %key_to_text_mapping;
746}
747
748
749sub build_key_to_last_update_date_mapping
750{
751 my ($file, $file_lines, $key_to_line_mapping, $translation_file_type) = @_;
752
753 # If the files aren't in CVS then we can't tell anything about what needs updating
754 # return () if ($translation_file_is_not_in_cvs);
755
756 # Build a mapping from key to CVS date
757 # Need to be careful with this mapping because the chunk keys won't necessarily all be valid
758 my %key_to_cvs_date_mapping = &build_key_to_cvs_date_mapping($file, $translation_file_type);
759
760 # Build a mapping from key to comment date
761 my %key_to_gti_comment_mapping = &build_key_to_gti_comment_mapping($file_lines, $key_to_line_mapping, $translation_file_type);
762
763 # Build a mapping from key to last update date (the latter of the CVS date and comment date)
764 my %key_to_last_update_date_mapping = ();
765 foreach my $chunk_key (keys(%$key_to_line_mapping)) {
766 # Use the CVS date as a starting point
767 my $chunk_cvs_date = $key_to_cvs_date_mapping{$chunk_key};
768 $key_to_last_update_date_mapping{$chunk_key} = $chunk_cvs_date;
769
770 # If a comment date exists and it is after the CVS date, use that instead
771 # need to convert the comment date format to SVN format
772 my $chunk_gti_comment = $key_to_gti_comment_mapping{$chunk_key};
773 if (defined($chunk_gti_comment) && $chunk_gti_comment =~ /(\d?\d-\D\D\D-\d\d\d\d)/) {
774 my $chunk_comment_date = $1;
775 if ((!defined($chunk_cvs_date) || &is_date_after($chunk_comment_date, $chunk_cvs_date))) {
776 $key_to_last_update_date_mapping{$chunk_key} = $chunk_comment_date;
777 }
778 }
779 }
780
781 return %key_to_last_update_date_mapping;
782}
783
784
785sub build_key_to_cvs_date_mapping
786{
787 my ($filename, $translation_file_type) = @_;
788
789 # Use CVS to annotate each line of the file with the date it was last edited
790 # The "2>/dev/null" is very important! If it is missing this will never return when run from the receptionist
791 # my $cvs_annotated_file = `cd $gsdl_root_directory; cvs -d $anonymous_cvs_root annotate -F $filename 2>/dev/null`;
792 # my $cvs_annotated_file = `cd $gsdl_root_directory; export PATH=.:/research/lh92/programs/subversion/bin; svn annotate -v --force $filename`;
793 my $cvs_annotated_file = `cd $gsdl_root_directory; svn annotate -v $filename`;
794
795 my @cvs_annotated_file_lines = split(/\n/, $cvs_annotated_file);
796
797 my @cvs_annotated_file_lines_date = ();
798 foreach my $cvs_annotated_file_line (@cvs_annotated_file_lines) {
799 # Extract the date from the CVS annotation at the front
800 # cvs format : 07-Jun-02
801 # svn format : 2007-07-16
802 # $cvs_annotated_file_line =~ s/^\S+\s+\(\S+\s+(\S+)\):\s//;
803 $cvs_annotated_file_line =~ s/^\s+\S+\s+\S+\s(\S+)//;
804
805 push(@cvs_annotated_file_lines_date, $1);
806
807 # trim extra date information in svn annotation format
808 # 15:42:49 +1200 (Wed, 21 Jun 2006)
809 $cvs_annotated_file_line =~ s/^\s+\S+\s\S+\s\((.+?)\)\s//;
810 }
811
812 # Build a key to line mapping for the CVS annotated file, for matching the chunk key to the CVS date
813 my %key_to_line_mapping = &build_key_to_line_mapping(\@cvs_annotated_file_lines, $translation_file_type);
814
815 my %key_to_cvs_date_mapping = ();
816 foreach my $chunk_key (keys(%key_to_line_mapping)) {
817 my $chunk_starting_line = (split(/-/, $key_to_line_mapping{$chunk_key}))[0];
818 my $chunk_finishing_line = (split(/-/, $key_to_line_mapping{$chunk_key}))[1];
819
820 # Find the date this chunk was last edited, from the CVS annotation
821 my $chunk_date = $cvs_annotated_file_lines_date[$chunk_starting_line];
822 for (my $l = ($chunk_starting_line + 1); $l <= $chunk_finishing_line; $l++) {
823 if (&is_date_after($cvs_annotated_file_lines_date[$l], $chunk_date)) {
824 # This part of the chunk has been updated more recently
825 $chunk_date = $cvs_annotated_file_lines_date[$l];
826
827 }
828 }
829
830 # Map from chunk key to CVS date
831 $key_to_cvs_date_mapping{$chunk_key} = $chunk_date;
832 }
833
834 return %key_to_cvs_date_mapping;
835}
836
837
838sub build_key_to_gti_comment_mapping
839{
840 my ($file_lines, $key_to_line_mapping, $translation_file_type) = @_;
841
842 my %key_to_gti_comment_mapping = ();
843 foreach my $chunk_key (keys(%$key_to_line_mapping)) {
844 my $chunk_starting_line = (split(/-/, $key_to_line_mapping->{$chunk_key}))[0];
845 my $chunk_finishing_line = (split(/-/, $key_to_line_mapping->{$chunk_key}))[1];
846
847 my $chunk_text = @$file_lines[$chunk_starting_line];
848 for (my $l = ($chunk_starting_line + 1); $l <= $chunk_finishing_line; $l++) {
849 $chunk_text .= @$file_lines[$l];
850 }
851
852 # Map from chunk key to GTI comment
853 my $chunk_gti_comment;
854 eval "\$chunk_gti_comment = &get_${translation_file_type}_chunk_gti_comment(\$chunk_text)";
855 $key_to_gti_comment_mapping{$chunk_key} = $chunk_gti_comment if (defined($chunk_gti_comment));
856 }
857
858 return %key_to_gti_comment_mapping;
859}
860
861
862sub determine_chunks_requiring_translation
863{
864 my $source_file_key_to_text_mapping = shift(@_);
865 my $target_file_key_to_text_mapping = shift(@_);
866
867 # Chunks needing translation are those in the source file with no translation in the target file
868 my @target_file_keys_requiring_translation = ();
869 foreach my $chunk_key (keys(%$source_file_key_to_text_mapping)) {
870 if ($source_file_key_to_text_mapping->{$chunk_key} && !$target_file_key_to_text_mapping->{$chunk_key}) {
871 # &log_message("Chunk with key $chunk_key needs translating.");
872 push(@target_file_keys_requiring_translation, $chunk_key);
873 }
874 }
875
876 return @target_file_keys_requiring_translation;
877}
878
879
880sub determine_chunks_requiring_updating
881{
882 my $source_file_key_to_last_update_date_mapping = shift(@_);
883 my $target_file_key_to_last_update_date_mapping = shift(@_);
884
885 # Chunks needing updating are those in the target file that have been more recently edited in the source file
886 my @target_file_keys_requiring_updating = ();
887 foreach my $chunk_key (keys(%$source_file_key_to_last_update_date_mapping)) {
888 my $source_chunk_last_update_date = $source_file_key_to_last_update_date_mapping->{$chunk_key};
889 my $target_chunk_last_update_date = $target_file_key_to_last_update_date_mapping->{$chunk_key};
890
891 # print "key: $chunk_key\nsource date : $source_chunk_last_update_date\ntarget date : $target_chunk_last_update_date\nafter? ". &is_date_after($source_chunk_last_update_date, $target_chunk_last_update_date) . "\n\n";
892
893 if (defined($target_chunk_last_update_date) && &is_date_after($source_chunk_last_update_date, $target_chunk_last_update_date)) {
894 # &log_message("Chunk with key $chunk_key needs updating.");
895 push(@target_file_keys_requiring_updating, $chunk_key);
896 }
897 }
898
899 return @target_file_keys_requiring_updating;
900}
901
902
903sub is_chunk_automatically_translated
904{
905 my ($chunk_key, $translation_file_type) = @_;
906 eval "return &is_${translation_file_type}_chunk_automatically_translated(\$chunk_key)";
907}
908
909
910sub make_text_xml_safe
911{
912 my $text = shift(@_);
913 $text =~ s/\&/\&amp\;/g;
914 $text =~ s/\&amp\;lt\;/\&amp\;amp\;lt\;/g;
915 $text =~ s/\&amp\;gt\;/\&amp\;amp\;gt\;/g;
916 $text =~ s/\&amp\;rarr\;/\&amp\;amp\;rarr\;/g;
917 $text =~ s/\&amp\;mdash\;/\&amp\;amp\;mdash\;/g;
918 $text =~ s/</\&lt\;/g;
919 $text =~ s/>/\&gt\;/g;
920 return $text;
921}
922
923
924sub unmake_text_xml_safe
925{
926 my $text = shift(@_);
927 $text =~ s/\&lt\;/</g;
928 $text =~ s/\&gt\;/>/g;
929 $text =~ s/\&amp\;/\&/g;
930 return $text;
931}
932
933
934# Returns 1 if $date1 is after $date2, 0 otherwise
935sub is_date_after_cvs
936{
937 my ($date1, $date2) = @_;
938 my %months = ("Jan", 1, "Feb", 2, "Mar", 3, "Apr", 4, "May", 5, "Jun", 6,
939 "Jul", 7, "Aug", 8, "Sep", 9, "Oct", 10, "Nov", 11, "Dec", 12);
940
941 if(!defined $date1) {
942 return 1;
943 }
944
945 my @date1parts = split(/-/, $date1);
946 my @date2parts = split(/-/, $date2);
947
948 # Compare year - nasty because we have rolled over into a new century
949 my $year1 = $date1parts[2];
950 if ($year1 < 80) {
951 $year1 += 2000;
952 }
953 my $year2 = $date2parts[2];
954 if ($year2 < 80) {
955 $year2 += 2000;
956 }
957
958 # Compare year
959 if ($year1 > $year2) {
960 return 1;
961 }
962 elsif ($year1 == $year2) {
963 # Year is the same, so compare month
964 if ($months{$date1parts[1]} > $months{$date2parts[1]}) {
965 return 1;
966 }
967 elsif ($months{$date1parts[1]} == $months{$date2parts[1]}) {
968 # Month is the same, so compare day
969 if ($date1parts[0] > $date2parts[0]) {
970 return 1;
971 }
972 }
973 }
974
975 return 0;
976}
977
978sub is_date_after
979{
980 my ($date1, $date2) = @_;
981
982 if(!defined $date1) {
983 return 1;
984 }
985 if(!defined $date2) {
986 return 0;
987 }
988
989 # 16-Aug-2006
990 if($date1=~ /(\d+?)-(\S\S\S)-(\d\d\d\d)/){
991 my %months = ("Jan", "01", "Feb", "02", "Mar", "03", "Apr", "04", "May", "05", "Jun", "06",
992 "Jul", "07", "Aug", "08", "Sep", "09", "Oct", "10", "Nov", "11", "Dec", "12");
993 $date1=$3 . "-" . $months{$2} . "-" . $1;
994 # print "** converted date1: $date1\n";
995 }
996 if($date2=~ /(\d+?)-(\S\S\S)-(\d\d\d\d)/){
997 my %months = ("Jan", "01", "Feb", "02", "Mar", "03", "Apr", "04", "May", "05", "Jun", "06",
998 "Jul", "07", "Aug", "08", "Sep", "09", "Oct", "10", "Nov", "11", "Dec", "12");
999 $date2=$3 . "-" . $months{$2} . "-" . $1;
1000 # print "** converted date2: $date2\n";
1001 }
1002
1003
1004 # 2006-08-16
1005 my @date1parts = split(/-/, $date1);
1006 my @date2parts = split(/-/, $date2);
1007
1008 # Compare year
1009 if ($date1parts[0] > $date2parts[0]) {
1010 return 1;
1011 }
1012 elsif ($date1parts[0] == $date2parts[0]) {
1013 # Year is the same, so compare month
1014 if ($date1parts[1] > $date2parts[1]) {
1015 return 1;
1016 }
1017 elsif ($date1parts[1] == $date2parts[1]) {
1018 # Month is the same, so compare day
1019 if ($date1parts[2] > $date2parts[2]) {
1020 return 1;
1021 }
1022 }
1023 }
1024
1025 return 0;
1026}
1027
1028
1029# ==========================================================================================
1030# MACROFILE FUNCTIONS
1031
1032sub build_key_to_line_mapping_for_macrofile
1033{
1034 my (@file_lines) = @_;
1035
1036 my $macro_package;
1037 my %chunk_key_to_line_mapping = ();
1038 # Process the contents of the file, line by line
1039 for (my $i = 0; $i < scalar(@file_lines); $i++) {
1040 my $line = $file_lines[$i];
1041 $line =~ s/(\s*)$//; # Remove any nasty whitespace, carriage returns etc.
1042
1043 # Check if a new package is being defined
1044 if ($line =~ m/^package\s+(.+)/) {
1045 $macro_package = $1;
1046 }
1047
1048 # Line contains a macro name
1049 elsif ($line =~ m/^(_\w+_)/) {
1050 my $macro_key = $1;
1051 $line =~ s/\s*([^\\]\#[^\}]+)?$//; # Remove any comments and nasty whitespace
1052
1053 # While there is still text of the macro to go...
1054 my $startline = $i;
1055 while ($line !~ /\}$/) {
1056 $i++;
1057 if ($i == scalar(@file_lines)) {
1058 &throw_fatal_error("Could not find end of macro $macro_key.");
1059 }
1060 $line = $file_lines[$i];
1061 $line =~ s/\s*([^\\]\#[^\}]+)?$//; # Remove any comments and nasty whitespace
1062 }
1063
1064 # The chunk key consists of the package name and the macro key
1065 my $chunk_key = $macro_package . "." . $macro_key;
1066 # Map from chunk key to line
1067 $chunk_key_to_line_mapping{$chunk_key} = $startline . "-" . $i;
1068 }
1069
1070 # Icon: line in format ## "image text" ## image_type ## macro_name ##
1071 elsif ($line =~ m/^\#\# .* \#\# .* \#\# (.*) \#\#/) {
1072 # The chunk key consists of package name and macro key
1073 my $chunk_key = $macro_package . "." . $1;
1074 # Map from chunk key to line
1075 $chunk_key_to_line_mapping{$chunk_key} = $i . "-" . $i;
1076 }
1077 }
1078
1079 return %chunk_key_to_line_mapping;
1080}
1081
1082
1083sub import_chunk_from_macrofile
1084{
1085 my ($chunk_text) = @_;
1086
1087 # Is this an icon macro??
1088 if ($chunk_text =~ /^\#\# (.*)/) {
1089 # Extract image macro text
1090 $chunk_text =~ /^\#\#\s+([^\#]+)\s+\#\#/;
1091 $chunk_text = $1;
1092
1093 # Remove enclosing quotes
1094 $chunk_text =~ s/^\"//;
1095 $chunk_text =~ s/\"$//;
1096 }
1097
1098 # No, so it must be a text macro
1099 else {
1100 # Remove macro key
1101 $chunk_text =~ s/^_([^_]+)_(\s*)//;
1102
1103 # Remove language specifier
1104 $chunk_text =~ s/^\[l=.*\](\s*)//;
1105
1106 # Remove braces enclosing text
1107 $chunk_text =~ s/^{(\s*)((.|\n)*)}(\s*)(\#.+\s*)?/$2/;
1108 }
1109
1110 return $chunk_text;
1111}
1112
1113
1114sub get_macrofile_chunk_gti_comment
1115{
1116 my ($chunk_text) = @_;
1117
1118 # Check for an "Updated DD-MMM-YYYY" comment at the end of the chunk
1119 if ($chunk_text =~ /\#\s+(Updated\s+\d?\d-\D\D\D-\d\d\d\d.*)\s*$/i) {
1120 return $1;
1121 }
1122
1123 return undef;
1124}
1125
1126
1127sub is_macrofile_chunk_automatically_translated
1128{
1129 my ($chunk_key) = @_;
1130
1131 # The _httpiconX_, _widthX_ and _heightX_ image macros are automatically translated
1132 if ($chunk_key =~ /\._(httpicon|width|height)/) {
1133 return 1;
1134 }
1135
1136 return 0;
1137}
1138
1139
1140# Use the source file to generate a target file that is formatted the same
1141sub write_translated_macrofile
1142{
1143 my $source_file = shift(@_); # Not used
1144 my @source_file_lines = @{shift(@_)};
1145 my $source_file_key_to_text_mapping = shift(@_);
1146 my $target_file = shift(@_);
1147 my @target_file_lines = @{shift(@_)};
1148 my $target_file_key_to_text_mapping = shift(@_);
1149 my $target_file_key_to_gti_comment_mapping = shift(@_);
1150 my $target_language_code = shift(@_);
1151
1152 # Build a mapping from source file line to chunk key
1153 my %source_file_key_to_line_mapping = &build_key_to_line_mapping_for_macrofile(@source_file_lines);
1154 my %source_file_line_to_key_mapping = ();
1155 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
1156 $source_file_line_to_key_mapping{$source_file_key_to_line_mapping{$chunk_key}} = $chunk_key;
1157 }
1158 my @source_file_line_keys = (sort sort_by_line (keys(%source_file_line_to_key_mapping)));
1159 my $source_file_line_number = 0;
1160
1161 # Build a mapping from target file line to chunk key
1162 my %target_file_key_to_line_mapping = &build_key_to_line_mapping_for_macrofile(@target_file_lines);
1163 my %target_file_line_to_key_mapping = ();
1164 foreach my $chunk_key (keys(%target_file_key_to_line_mapping)) {
1165 $target_file_line_to_key_mapping{$target_file_key_to_line_mapping{$chunk_key}} = $chunk_key;
1166 }
1167 my @target_file_line_keys = (sort sort_by_line (keys(%target_file_line_to_key_mapping)));
1168
1169 # Write the new target file
1170 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
1171 if (!open(TARGET_FILE, ">$target_file_path")) {
1172 &throw_fatal_error("Could not write target file $target_file_path.");
1173 }
1174
1175 # Use the header from the target file, to keep language and author information
1176 if (scalar(@target_file_line_keys) > 0) {
1177 my $target_file_line_number = 0;
1178 my $target_file_chunk_starting_line_number = (split(/-/, $target_file_line_keys[0]))[0];
1179 while ($target_file_line_number < $target_file_chunk_starting_line_number) {
1180 my $target_file_line = $target_file_lines[$target_file_line_number];
1181 last if ($target_file_line =~ /^\# -- Missing translation: /); # We don't want to get into the macros
1182 print TARGET_FILE $target_file_line;
1183 $target_file_line_number++;
1184 }
1185
1186 $source_file_line_number = (split(/-/, $source_file_line_keys[0]))[0];
1187 }
1188
1189 # Model the new target file on the source file, with the target file translations
1190 foreach my $line_key (@source_file_line_keys) {
1191 # Fill in the gaps before this chunk starts
1192 my $source_file_chunk_starting_line_number = (split(/-/, $line_key))[0];
1193 my $source_file_chunk_finishing_line_number = (split(/-/, $line_key))[1];
1194 while ($source_file_line_number < $source_file_chunk_starting_line_number) {
1195 print TARGET_FILE $source_file_lines[$source_file_line_number];
1196 $source_file_line_number++;
1197 }
1198 $source_file_line_number = $source_file_chunk_finishing_line_number + 1;
1199
1200 my $chunk_key = $source_file_line_to_key_mapping{$line_key};
1201 my $source_file_chunk_text = $source_file_key_to_text_mapping->{$chunk_key};
1202 my $target_file_chunk_text = $target_file_key_to_text_mapping->{$chunk_key} || "";
1203
1204 my $macrofile_key = $chunk_key;
1205 $macrofile_key =~ s/^(.+?)\.//;
1206
1207 # If no translation exists for this chunk, show this, and move on
1208 if ($source_file_chunk_text ne "" && $target_file_chunk_text eq "") {
1209 print TARGET_FILE "# -- Missing translation: $macrofile_key\n";
1210 next;
1211 }
1212
1213 # Grab the source chunk text
1214 my $source_file_chunk = $source_file_lines[$source_file_chunk_starting_line_number];
1215 for (my $l = ($source_file_chunk_starting_line_number + 1); $l <= $source_file_chunk_finishing_line_number; $l++) {
1216 $source_file_chunk .= $source_file_lines[$l];
1217 }
1218
1219 # Is this an icon macro??
1220 if ($source_file_chunk =~ /^\#\# (.*)/) {
1221 # Escape any newline and question mark characters so the source text is replaced correctly
1222 $source_file_chunk_text =~ s/\\/\\\\/g;
1223 $source_file_chunk_text =~ s/\?/\\\?/g;
1224
1225 # Build the new target chunk from the source chunk
1226 my $target_file_chunk = $source_file_chunk;
1227 $target_file_chunk =~ s/$source_file_chunk_text/$target_file_chunk_text/;
1228 $target_file_chunk =~ s/(\s)*$//;
1229 print TARGET_FILE "$target_file_chunk";
1230 }
1231
1232 # No, it is just a normal text macro
1233 else {
1234 print TARGET_FILE "$macrofile_key [l=$target_language_code] {$target_file_chunk_text}";
1235 }
1236
1237 # Add the "updated" comment, if one exists
1238 if ($target_file_key_to_gti_comment_mapping->{$chunk_key}) {
1239 print TARGET_FILE " # " . $target_file_key_to_gti_comment_mapping->{$chunk_key};
1240 }
1241 print TARGET_FILE "\n";
1242 }
1243
1244 close(TARGET_FILE);
1245}
1246
1247
1248sub sort_by_line
1249{
1250 return ((split(/-/, $a))[0] <=> (split(/-/, $b))[0]);
1251}
1252
1253
1254# ==========================================================================================
1255# RESOURCE BUNDLE FUNCTIONS
1256
1257sub build_key_to_line_mapping_for_resource_bundle
1258{
1259 my (@file_lines) = @_;
1260
1261 my %chunk_key_to_line_mapping = ();
1262 for (my $i = 0; $i < scalar(@file_lines); $i++) {
1263 my $line = $file_lines[$i];
1264 $line =~ s/(\s*)$//; # Remove any nasty whitespace, carriage returns etc.
1265
1266 # Line contains a dictionary string
1267 if ($line =~ /^(\S+?):(.*)$/) {
1268 my $chunk_key = $1;
1269
1270 # Map from chunk key to line
1271 $chunk_key_to_line_mapping{$chunk_key} = $i . "-" . $i;
1272 }
1273 }
1274
1275 return %chunk_key_to_line_mapping;
1276}
1277
1278
1279sub import_chunk_from_resource_bundle
1280{
1281 my ($chunk_text) = @_;
1282
1283 # Simple: just remove string key
1284 $chunk_text =~ s/^(\S+?):(\s*)//;
1285 $chunk_text =~ s/(\s*)$//; # Remove any nasty whitespace, carriage returns etc.
1286 $chunk_text =~ s/(\s*)\#\s+Updated\s+(\d?\d-\D\D\D-\d\d\d\d.*)\s*$//i;
1287
1288 return $chunk_text;
1289}
1290
1291
1292sub get_resource_bundle_chunk_gti_comment
1293{
1294 my ($chunk_text) = @_;
1295
1296 # Check for an "Updated DD-MMM-YYYY" comment at the end of the chunk
1297 if ($chunk_text =~ /\#\s+(Updated\s+\d?\d-\D\D\D-\d\d\d\d.*)\s*$/i) {
1298 return $1;
1299 }
1300
1301 return undef;
1302}
1303
1304
1305sub is_resource_bundle_chunk_automatically_translated
1306{
1307 # No resource bundle chunks are automatically translated
1308 return 0;
1309}
1310
1311
1312sub write_translated_resource_bundle
1313{
1314 my $source_file = shift(@_); # Not used
1315 my @source_file_lines = @{shift(@_)};
1316 my $source_file_key_to_text_mapping = shift(@_);
1317 my $target_file = shift(@_);
1318 my @target_file_lines = @{shift(@_)}; # Not used
1319 my $target_file_key_to_text_mapping = shift(@_);
1320 my $target_file_key_to_gti_comment_mapping = shift(@_);
1321 my $target_language_code = shift(@_); # Not used
1322
1323 # Build a mapping from chunk key to source file line, and from source file line to chunk key
1324 my %source_file_key_to_line_mapping = &build_key_to_line_mapping_for_resource_bundle(@source_file_lines);
1325 my %source_file_line_to_key_mapping = ();
1326 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
1327 $source_file_line_to_key_mapping{$source_file_key_to_line_mapping{$chunk_key}} = $chunk_key;
1328 }
1329
1330 # Write the new target file
1331 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
1332 if (!open(TARGET_FILE, ">$target_file_path")) {
1333 &throw_fatal_error("Could not write target file $target_file_path.");
1334 }
1335
1336 # Model the new target file on the source file, with the target file translations
1337 my $source_file_line_number = 0;
1338 foreach my $line_key (sort sort_by_line (keys(%source_file_line_to_key_mapping))) {
1339 # Fill in the gaps before this chunk starts
1340 my $source_file_chunk_starting_line_number = (split(/-/, $line_key))[0];
1341 my $source_file_chunk_finishing_line_number = (split(/-/, $line_key))[1];
1342 while ($source_file_line_number < $source_file_chunk_starting_line_number) {
1343 print TARGET_FILE $source_file_lines[$source_file_line_number];
1344 $source_file_line_number++;
1345 }
1346 $source_file_line_number = $source_file_chunk_finishing_line_number + 1;
1347
1348 my $chunk_key = $source_file_line_to_key_mapping{$line_key};
1349 my $source_file_chunk_text = $source_file_key_to_text_mapping->{$chunk_key};
1350 my $target_file_chunk_text = $target_file_key_to_text_mapping->{$chunk_key} || "";
1351
1352 # If no translation exists for this chunk, show this, and move on
1353 if ($source_file_chunk_text ne "" && $target_file_chunk_text eq "") {
1354 print TARGET_FILE "# -- Missing translation: $chunk_key\n";
1355 next;
1356 }
1357
1358 print TARGET_FILE "$chunk_key:$target_file_chunk_text";
1359 if ($target_file_key_to_gti_comment_mapping->{$chunk_key}) {
1360 print TARGET_FILE " # " . $target_file_key_to_gti_comment_mapping->{$chunk_key};
1361 }
1362 print TARGET_FILE "\n";
1363 }
1364
1365 close(TARGET_FILE);
1366}
1367
1368
1369# ==========================================================================================
1370# GREENSTONE XML FUNCTIONS
1371
1372sub build_key_to_line_mapping_for_greenstone_xml
1373{
1374 my (@file_lines) = @_;
1375
1376 my %chunk_key_to_line_mapping = ();
1377 for (my $i = 0; $i < scalar(@file_lines); $i++) {
1378 my $line = $file_lines[$i];
1379 $line =~ s/(\s*)$//; # Remove any nasty whitespace, carriage returns etc.
1380
1381 # Line contains a string to translate
1382 if ($line =~ /^\s*<Text id=\"(.*?)\">/) {
1383 my $chunk_key = $1;
1384 $line =~ s/\s*$//; # Remove any nasty whitespace
1385 $line =~ s/<Updated date=\"\d?\d-\D\D\D-\d\d\d\d.*\"\/>$//;
1386
1387 # While there is still text of the string to go...
1388 my $startline = $i;
1389 while ($line !~ /<\/Text>$/) {
1390 $i++;
1391 if ($i == scalar(@file_lines)) {
1392 &throw_fatal_error("Could not find end of string $chunk_key.");
1393 }
1394 $line = $file_lines[$i];
1395 $line =~ s/\s*$//; # Remove any nasty whitespace
1396 $line =~ s/<Updated date=\"\d?\d-\D\D\D-\d\d\d\d.*\"\/>$//;
1397 }
1398
1399 # Map from chunk key to line
1400 if (!defined($chunk_key_to_line_mapping{$chunk_key})) {
1401 $chunk_key_to_line_mapping{$chunk_key} = $startline . "-" . $i;
1402 }
1403 else {
1404 &throw_fatal_error("Duplicate key $chunk_key.");
1405 }
1406 }
1407 }
1408
1409 return %chunk_key_to_line_mapping;
1410}
1411
1412
1413sub import_chunk_from_greenstone_xml
1414{
1415 my ($chunk_text) = @_;
1416
1417 # Simple: just remove the Text tags
1418 $chunk_text =~ s/^\s*<Text id=\"(.*?)\">(\s*)//;
1419 $chunk_text =~ s/<Updated date=\"\d?\d-\D\D\D-\d\d\d\d.*\"\/>$//;
1420 $chunk_text =~ s/<\/Text>$//;
1421
1422 return $chunk_text;
1423}
1424
1425
1426sub get_greenstone_xml_chunk_gti_comment
1427{
1428 my ($chunk_text) = @_;
1429
1430 # Check for an "Updated DD-MMM-YYYY" comment at the end of the chunk
1431 if ($chunk_text =~ /<Updated date=\"(\d?\d-\D\D\D-\d\d\d\d.*)\"\/>$/i) {
1432 return $1;
1433 }
1434
1435 return undef;
1436}
1437
1438
1439sub is_greenstone_xml_chunk_automatically_translated
1440{
1441 # No greenstone XML chunks are automatically translated
1442 return 0;
1443}
1444
1445
1446sub write_translated_greenstone_xml
1447{
1448 my $source_file = shift(@_); # Not used
1449 my @source_file_lines = @{shift(@_)};
1450 my $source_file_key_to_text_mapping = shift(@_);
1451 my $target_file = shift(@_);
1452 my @target_file_lines = @{shift(@_)}; # Not used
1453 my $target_file_key_to_text_mapping = shift(@_);
1454 my $target_file_key_to_gti_comment_mapping = shift(@_);
1455 my $target_language_code = shift(@_); # Not used
1456
1457 # Build a mapping from chunk key to source file line, and from source file line to chunk key
1458 my %source_file_key_to_line_mapping = &build_key_to_line_mapping_for_greenstone_xml(@source_file_lines);
1459 my %source_file_line_to_key_mapping = ();
1460 foreach my $chunk_key (keys(%source_file_key_to_line_mapping)) {
1461 $source_file_line_to_key_mapping{$source_file_key_to_line_mapping{$chunk_key}} = $chunk_key;
1462 }
1463
1464 # Write the new target file
1465 my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file);
1466 if (!open(TARGET_FILE, ">$target_file_path")) {
1467 &throw_fatal_error("Could not write target file $target_file_path.");
1468 }
1469
1470 # Model the new target file on the source file, with the target file translations
1471 my $source_file_line_number = 0;
1472 foreach my $line_key (sort sort_by_line (keys(%source_file_line_to_key_mapping))) {
1473 # Fill in the gaps before this chunk starts
1474 my $source_file_chunk_starting_line_number = (split(/-/, $line_key))[0];
1475 my $source_file_chunk_finishing_line_number = (split(/-/, $line_key))[1];
1476 while ($source_file_line_number < $source_file_chunk_starting_line_number) {
1477 print TARGET_FILE $source_file_lines[$source_file_line_number];
1478 $source_file_line_number++;
1479 }
1480 $source_file_line_number = $source_file_chunk_finishing_line_number + 1;
1481
1482 my $chunk_key = $source_file_line_to_key_mapping{$line_key};
1483 my $source_file_chunk_text = $source_file_key_to_text_mapping->{$chunk_key};
1484 my $target_file_chunk_text = $target_file_key_to_text_mapping->{$chunk_key} || "";
1485 $target_file_chunk_text =~ s/(\n)*$//g;
1486
1487 # If no translation exists for this chunk, show this, and move on
1488 if ($source_file_chunk_text ne "" && $target_file_chunk_text eq "") {
1489 print TARGET_FILE "<!-- Missing translation: $chunk_key -->\n";
1490 next;
1491 }
1492
1493 print TARGET_FILE "<Text id=\"$chunk_key\">$target_file_chunk_text</Text>";
1494 if ($target_file_key_to_gti_comment_mapping->{$chunk_key}) {
1495 my $chunk_gti_comment = $target_file_key_to_gti_comment_mapping->{$chunk_key};
1496 $chunk_gti_comment =~ s/^Updated //;
1497 print TARGET_FILE "<Updated date=\"" . $chunk_gti_comment . "\"\/>";
1498 }
1499 print TARGET_FILE "\n";
1500 }
1501
1502 # Fill in the end of the file
1503 while ($source_file_line_number < scalar(@source_file_lines)) {
1504 print TARGET_FILE $source_file_lines[$source_file_line_number];
1505 $source_file_line_number++;
1506 }
1507
1508 close(TARGET_FILE);
1509}
1510
1511
1512&main(@ARGV);
Note: See TracBrowser for help on using the repository browser.