source: trunk/gsdl/perllib/translator.pm@ 7909

Last change on this file since 7909 was 5276, checked in by mdewsnip, 21 years ago

Perl module containing various methods used by the different parts of the Greenstone Translator Interface.

  • Property svn:keywords set to Author Date Id Revision
File size: 12.5 KB
Line 
1###########################################################################
2#
3# translator.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26
27# Common functions used by the different parts of the translator
28package translator;
29
30
31use util;
32use GDBM_File;
33
34
35my $gsdldir = "$ENV{'GSDLHOME'}";
36my $maincfgfile = &util::filename_cat($gsdldir, "etc", "main.cfg");
37my $translatordir = &util::filename_cat($gsdldir, "translator");
38my $translatorcfgfile = &util::filename_cat($gsdldir, "etc", "translator.cfg");
39
40my $cvsroot = ":pserver:cvs_anon\@cvs.scms.waikato.ac.nz:2402/usr/local/global-cvs/gsdl-src";
41
42
43sub get_translator_dir
44{
45 return $translatordir;
46}
47
48
49sub get_cvs_root
50{
51 return $cvsroot;
52}
53
54
55sub read_languages_from_main_cfg_file
56{
57 local %name2abbrhash = ();
58
59 # Read the main.cfg file
60 if (!open(MAIN_CFG_FILE, "<$maincfgfile")) {
61 print STDERR "Error: Could not open $maincfgfile.\n";
62 die "\n";
63 }
64 while (<MAIN_CFG_FILE>) {
65 local $line = $_;
66 chomp($line);
67
68 # Parse the lines containing a language definition
69 if ($line =~ m/^Language\s+/i) {
70 local @lineparts = split(/\s+/, $line);
71 local ($langabbr) = ($lineparts[1] =~ m/^shortname=(.*)$/);
72 local ($langname) = ($lineparts[2] =~ m/^longname=(.*)$/);
73
74 # Casefold language name and code
75 $langabbr =~ tr/A-Z/a-z/;
76 $langname =~ tr/A-Z/a-z/;
77
78 $name2abbrhash{$langname} = $langabbr;
79 }
80 }
81 close(MAIN_CFG_FILE);
82
83 return %name2abbrhash;
84}
85
86
87sub read_translation_files_from_config_file
88{
89 local %name2deschash = ();
90
91 # Read the translator config file
92 if (!open(CONFIG_FILE, "<$translatorcfgfile")) {
93 print STDERR "Error: Could not open $translatorcfgfile.\n";
94 die "\n";
95 }
96 while (<CONFIG_FILE>) {
97 local $line = $_;
98 chomp($line);
99
100 # Check if a file is being defined
101 if ($line =~ m/^File\s+/i) {
102 local @lineparts = split(/\s+/, $line);
103 local ($fileid) = ($lineparts[1] =~ m/^id=(.*)$/i);
104 local ($filedesc) = ($lineparts[2] =~ m/^description=(.*)$/i);
105 $name2deschash{$fileid} = $filedesc;
106 }
107 }
108 close(CONFIG_FILE);
109
110 return %name2deschash;
111}
112
113
114sub read_file_data_from_config_file
115{
116 my ($sourcelang, $sourceabbr, $targetlang, $targetabbr, $file) = @_;
117
118 # Read the translator config file
119 if (!open(CONFIG_FILE, "<$translatorcfgfile")) {
120 print STDERR "Error: Could not open $translatorcfgfile.\n";
121 die "\n";
122 }
123 while (<CONFIG_FILE>) {
124 local $line = $_;
125 chomp($line);
126
127 # Check if a file is being defined
128 if ($line =~ m/^File\s+/i) {
129 local @lineparts = split(/\s+/, $line);
130 local ($fileid) = ($lineparts[1] =~ m/^id=(.*)$/);
131
132 # If this is the file we want, return its details
133 if ($fileid eq $file) {
134 local ($filedesc) = ($lineparts[2] =~ m/^description=(.*)$/);
135 local ($sourcefile) = ($lineparts[3] =~ m/^sourcefile=(.*)$/);
136 local ($targetfile) = ($lineparts[4] =~ m/^targetfile=(.*)$/);
137 local ($filetype) = ($lineparts[5] =~ m/^type=(.*)$/);
138
139 # Resolve instances of {sourcelang}, {sourceabbr}, {targetlang}, {targetabbr}
140 $sourcefile =~ s/\{sourcelang\}/$sourcelang/g;
141 $sourcefile =~ s/\{sourceabbr\}/$sourceabbr/g;
142 $targetfile =~ s/\{targetlang\}/$targetlang/g;
143 $targetfile =~ s/\{targetabbr\}/$targetabbr/g;
144
145 close(CONFIG_FILE);
146 return ($sourcefile, $targetfile, $filetype);
147 }
148 }
149 }
150 close(CONFIG_FILE);
151
152 return ();
153}
154
155
156sub get_translation_file_path
157{
158 my ($translationdir, $relativefilepath) = @_;
159
160 # Extract the file name from the relative file path
161 $relativefilepath =~ /([^\\\/]+)$/;
162 return &util::filename_cat($translationdir, $1);
163}
164
165
166sub process_file
167{
168 my ($filepath, $filetype) = @_;
169
170 # Build a mapping from key to lines
171 local %linehash = &build_key_to_line_mapping($filepath, $filetype);
172
173 # Build a mapping from chunk key to chunk text, and return
174 return &build_key_to_text_mapping($filepath, $filetype, %linehash);
175}
176
177
178sub build_key_to_line_mapping
179{
180 my ($filepath, $filetype) = @_;
181
182 if ($filetype =~ /^macrofile/) {
183 return &build_key_to_line_mapping_for_macrofile($filepath);
184 }
185 if ($filetype =~ /^resourcebundle/) {
186 return &build_key_to_line_mapping_for_resource_bundle($filepath);
187 }
188
189 print STDERR "Error: Unknown file type $filetype.\n";
190 die "\n";
191}
192
193
194sub build_key_to_text_mapping
195{
196 my ($filepath, $filetype, %linehash) = @_;
197
198 if (!open(FILE_IN, "<$filepath")) {
199 print STDERR "Error: Could not open file $filepath.\n";
200 die "\n";
201 }
202 local @lines = <FILE_IN>;
203 close FILE_IN;
204
205 my %chunks = ();
206
207 foreach $key (keys(%linehash)) {
208 # Chunk lines in linehash start at 0
209 local $chunkstart = (split(/-/, $linehash{$key}))[0];
210 local $chunkfinish = (split(/-/, $linehash{$key}))[1];
211
212 local $chunktext = $lines[$chunkstart];
213 for ($l = ($chunkstart + 1); $l <= $chunkfinish; $l++) {
214 $chunktext .= $lines[$l];
215 }
216
217 # Map from chunk key to (formatted) text
218 $chunks{$key} = &import_chunk($chunktext, $filetype);
219 }
220
221 return %chunks;
222}
223
224
225sub import_chunk
226{
227 local ($chunktext, $filetype) = @_;
228
229 if ($filetype =~ /^macrofile/) {
230 return &import_chunk_from_macrofile($chunktext);
231 }
232 if ($filetype =~ /^resourcebundle/) {
233 return &import_chunk_from_resource_bundle($chunktext);
234 }
235
236 print STDERR "Error: Unknown file type $filetype.\n";
237 die "\n";
238}
239
240
241sub refresh_translation
242{
243 local ($translationdir, $sourcefile, $filetype) = @_;
244
245
246 # -----------------------------------------
247 # Process the source language file
248 # -----------------------------------------
249
250 # Parse the backup source language file, building a mapping from chunk key to chunk text
251 local $sourcepath = &get_translation_file_path($translationdir, $sourcefile);
252 local %backuphash = &process_file($sourcepath, $filetype);
253 print STDOUT "Number of backup chunks: " . scalar(keys(%backuphash)) . "\n";
254
255 # Remove the (possibly out of date) source file
256 local $hotsourcepath = &util::filename_cat($gsdldir, $sourcefile);
257# &util::rm($hotsourcepath);
258
259 # Get the latest version of the source file from CVS
260# local $sourceupdate = `cd $gsdldir; cvs -d $cvsroot update $sourcefile`;
261# if ($sourceupdate !~ /^U /) {
262# print STDERR "Error: Could not update source file.\n";
263# die "\n";
264# }
265
266 # Check that the source file now exists
267 if (!-e $hotsourcepath) {
268 print STDERR "Error: Source file $hotsourcepath does not exist.\n";
269 die "\n";
270 }
271
272 # Copy the new version of the source file to become the backup file
273 &util::cp($hotsourcepath, $sourcepath);
274
275 # Parse the new source language file, building a mapping from chunk key to chunk text
276 local %sourcehash = &process_file($sourcepath, $filetype);
277 print STDOUT "Number of source chunks: " . scalar(keys(%sourcehash)) . "\n";
278
279
280 # -----------------------------------------
281 # Rebuild the translation database
282 # -----------------------------------------
283
284 # Open the translation database for this file
285 local $translationdb = &util::filename_cat($translationdir, "translation.db");
286 local %translationdata = ();
287 tie(%translationdata, "GDBM_File", $translationdb, GDBM_WRCREAT, 0666);
288
289 # Remove translation data for any chunks that no longer exist
290 foreach $key (keys(%translationdata)) {
291 local $sourcekey = $key;
292 if ($sourcekey =~ /\*$/) {
293 chop($sourcekey);
294 }
295
296 # If the chunk doesn't exist in the new source file, remove it from the database
297 if (!defined($sourcehash{$sourcekey})) {
298 print STDOUT "Chunk with key $key no longer exists!\n";
299 delete $translationdata{$key};
300 }
301 }
302
303 # Update the translation database to reflect the new source file
304 foreach $key (keys(%sourcehash)) {
305 # Add translation data for any new chunks
306 if (!defined($backuphash{$key})) {
307 print STDOUT "Chunk with key $key is new!\n";
308 $translationdata{$key} = "";
309 }
310
311 # Source chunks that have changed since last time must have their translations updated
312 if ($backuphash{$key} && $backuphash{$key} ne $sourcehash{$key}) {
313 print STDOUT "Chunk with key $key needs updating!\n";
314 $translationdata{$key . "*"} = "";
315 }
316 }
317
318 # Close the old translation database
319 untie(%translationdata);
320}
321
322
323# ==========================================================================================
324
325sub build_key_to_line_mapping_for_macrofile
326{
327 local ($filepath) = @_;
328
329 # Open file for reading
330 if (!open(FILE_IN, "<$filepath")) {
331 print STDERR "Error: Could not open macrofile $filepath.\n";
332 die "\n";
333 }
334 local @lines = <FILE_IN>;
335 close FILE_IN;
336
337 # Initialise some local variables
338 local $currpackage;
339 local %linehash = ();
340
341 # Process the contents of the file, line by line
342 for ($i = 0; $i < scalar(@lines); $i++) {
343 local $line = $lines[$i];
344 $line =~ s/(\s*)$//; # Remove any nasty whitespace, carriage returns etc.
345
346 # Check if a new package is being defined
347 if ($line =~ m/^package\s+(.+)/) {
348 $currpackage = $1;
349 }
350
351 # Line contains a macro name
352 elsif ($line =~ m/^(_\w+_)/) {
353 local $macroname = $1;
354
355 # While there is still text of the macro to go...
356 local $startline = $i;
357 while ($line !~ /.*\}/) {
358 $i++;
359 $line = $lines[$i];
360 }
361
362 # The key consists of the package name and the macro name
363 local $key = $currpackage . "::" . $macroname;
364 # Map from key to line
365 $linehash{$key} = $startline . "-" . $i;
366 }
367
368 # Icon: line in format ## "sometext" ## macro ## macroname ##
369 elsif ($line =~ m/^\#\# (.*)/) {
370 local $macroname = $1;
371
372 # If the macro text contains a new line it will run over two lines
373 local $startline = $i;
374 unless ($line =~ m/^\#\# .*\#\#/) {
375 $line = $lines[++$i];
376 $macroname .= $line;
377 }
378
379 # Split the image macro header on ##
380 local @names = split(/\s*\#\#\s*/, $macroname);
381 # Overwrite macroname with macroname from ## ... ## ... ## HERE ###
382 $macroname = $names[(scalar @names) - 1];
383
384 # Read the rest of the text associated with the image macro
385 while ($line !~ /^\s*$/) {
386 $i++;
387 $line = $lines[$i];
388 }
389
390 # The key consists of package name and macro name
391 local $key = $currpackage . "::" . $macroname;
392 # Map from key to line
393 $linehash{$key} = $startline . "-" . ($i - 1);
394 }
395 }
396
397 return %linehash;
398}
399
400
401sub import_chunk_from_macrofile
402{
403 local ($chunktext) = @_;
404
405 # Is this an icon macro??
406 if ($chunktext =~ m/^\#\# (.*)/) {
407 # Extract image macro text
408 $chunktext =~ s/^\#\# ((.|\n)+) \#\# (.+) \#\# (.|\n)+/$1/;
409
410 # Remove enclosing quotes
411 $chunktext =~ s/^\s*\"((.|\n)+)\"\s*/$1/;
412 }
413
414 # No, so it must be a text macro
415 else {
416 # Remove macro key
417 $chunktext =~ s/^_([^_]+)_(\s*)//;
418
419 # Remove language specifier
420 $chunktext =~ s/^\[l=.*\](\s*)//;
421
422 # Remove braces enclosing text
423 $chunktext =~ s/^{(\s*)((.|\n)*)}(\s*)/$2/;
424 }
425
426 return $chunktext;
427}
428
429
430# ------------------------------------------------------------------------------------------
431
432sub build_key_to_line_mapping_for_resource_bundle
433{
434 local ($filepath) = @_;
435
436 # Open file for reading
437 if (!open(FILE_IN, "<$filepath")) {
438 print STDERR "Error: Could not open resource bundle $filepath.\n";
439 die "\n";
440 }
441 local @lines = <FILE_IN>;
442 close FILE_IN;
443
444 # Initialise some local variables
445 local %linehash = ();
446
447 # Process the contents of the file, line by line
448 for ($i = 0; $i < scalar(@lines); $i++) {
449 local $line = $lines[$i];
450 $line =~ s/(\s*)$//; # Remove any nasty whitespace, carriage returns etc.
451
452 # Line contains a dictionary string
453 if ($line =~ m/^((\w|\.)+):/) {
454 local $key = $1;
455
456 # Map from key to line
457 $linehash{$key} = $i . "-" . $i;
458 }
459 }
460
461 return %linehash;
462}
463
464
465sub import_chunk_from_resource_bundle
466{
467 local ($chunktext) = @_;
468
469 # Simple: just remove string key
470 $chunktext =~ s/^((\w|\.)+)://;
471
472 return $chunktext;
473}
474
475
4761;
Note: See TracBrowser for help on using the repository browser.