source: trunk/gsdl/perllib/plugins/ConvertToRogPlug.pm@ 10419

Last change on this file since 10419 was 10254, checked in by kjdon, 19 years ago

added 'use strict' to all plugins, and made modifications (mostly adding 'my') to make them compile

  • Property svn:keywords set to Author Date Id Revision
File size: 13.7 KB
Line 
1###########################################################################
2#
3# ConvertToRogPlug.pm -- plugin that inherits from RogPlug
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27
28package ConvertToRogPlug;
29
30use BasPlug;
31use RogPlug;
32use strict;
33no strict 'refs'; # allow filehandles to be variables and viceversa
34
35sub BEGIN {
36 @ConvertToRogPlug::ISA = ('RogPlug');
37}
38
39my $arguments = [
40 ];
41my $options = { 'name' => "ConvertToRogPlug",
42 'desc' => "{ConvertToRogPlug.desc}",
43 'abstract' => "yes",
44 'inherits' => "yes" };
45
46sub new {
47 my ($class) = shift (@_);
48 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
49 push(@$pluginlist, $class);
50
51 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
52 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
53
54 my $self = (defined $hashArgOptLists)? new RogPlug($pluginlist,$inputargs,$hashArgOptLists): new RogPlug($pluginlist,$inputargs);
55
56 $self->{'convert_to'} = "Rog";
57 $self->{'convert_to_ext'} = "rog";
58
59 return bless $self, $class;
60}
61
62
63sub begin {
64 my $self = shift (@_);
65
66 $self->SUPER::begin(@_);
67
68 $self->{'docnum'} = 0;
69}
70
71sub end {
72 my ($self) = @_;
73
74 # nothing to do, but keep symmetric with begin function
75 $self->SUPER::end(@_);
76}
77
78
79# Run conversion utility on the input file.
80#
81# The conversion takes place in a collection specific 'tmp' directory so
82# that we don't accidentally damage the input.
83#
84# The desired output type is indicated by $output_ext. This is usually
85# something like "html" or "word", but can be "best" (or the empty string)
86# to indicate that the conversion utility should do the best it can.
87
88sub tmp_area_convert_file {
89 my $self = shift (@_);
90 my ($output_ext, $input_filename, $textref) = @_;
91
92 my $outhandle = $self->{'outhandle'};
93 my $convert_to = $self->{'convert_to'};
94 my $failhandle = $self->{'failhandle'};
95
96 # softlink to collection tmp dir
97 my $tmp_dirname
98 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
99 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
100
101 # derive tmp filename from input filename
102 my ($tailname, $dirname, $suffix)
103 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
104
105 # Remove any white space from filename -- no risk of name collision, and
106 # makes later conversion by utils simpler. Leave spaces in path...
107 $tailname =~ s/\s+//g;
108
109 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix");
110
111 &util::soft_link($input_filename, $tmp_filename);
112
113 my $verbosity = $self->{'verbosity'};
114 if ($verbosity > 0) {
115 print $outhandle "Converting $tailname$suffix to $convert_to format\n";
116 }
117
118 my $errlog = &util::filename_cat($tmp_dirname, "err.log");
119
120 # Execute the conversion command and get the type of the result,
121 # making sure the converter gives us the appropriate output type
122 my $output_type = lc($convert_to);
123 my $cmd = "perl -S gsMusicConvert.pl -verbose $verbosity -errlog \"$errlog\" -output $output_type \"$tmp_filename\"";
124 $output_type = `$cmd`;
125
126 # remove symbolic link to original file
127 &util::rm($tmp_filename);
128
129 # Check STDERR here
130 chomp $output_type;
131 if ($output_type eq "fail") {
132 print $outhandle "Could not convert $tailname$suffix to $convert_to format\n";
133 print $failhandle "$tailname$suffix: " . ref($self) . " failed to convert to $convert_to\n";
134 $self->{'num_not_processed'} ++;
135 if (-s "$errlog") {
136 open(ERRLOG, "$errlog");
137 while (<ERRLOG>) {
138 print $outhandle "$_";
139 }
140 print $outhandle "\n";
141 close ERRLOG;
142 }
143 &util::rm("$errlog") if (-e "$errlog");
144 return "";
145 }
146
147 # store the *actual* output type and return the output filename
148 # it's possible we requested conversion to html, but only to text succeeded
149
150 $self->{'convert_to_ext'} = $output_type;
151 $self->{'converted_to'} = "Rog";
152
153 my $output_filename = $tmp_filename;
154
155 $output_filename =~ s/$suffix$//;
156
157 return $output_filename;
158}
159
160
161# Remove collection specific tmp directory and all its contents.
162
163sub cleanup_tmp_area {
164 my $self = shift (@_);
165
166 my $tmp_dirname
167 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
168 &util::rm_r($tmp_dirname);
169 &util::mk_dir($tmp_dirname);
170}
171
172
173# Exact copy of read_rog_record from RogPlug
174# Needed for FILE in right scope
175
176sub read_rog_record
177{
178 my ($self,$file_buffer, $docnum, $seclevel) = @_;
179
180 my $next_line = $file_buffer->{'next_line'};
181
182 return 0 if (!defined $next_line);
183
184 if ($next_line eq "")
185 {
186 my $line;
187 while(defined($line=<FILE>))
188 {
189 $line =~ s/\r$//;
190 $file_buffer->{'line_no'}++;
191 next if ($line =~ m/^\#/);
192 $next_line = $line;
193 last;
194 }
195 }
196
197 if ($next_line !~ m/^song( +)\"([^\"]*)\"( +)\"([^\"]*)\"( +)(\d+)( *)$/)
198 {
199 print STDERR "Error: Malformed Rog file: $next_line";
200 return 0;
201 }
202 else
203 {
204 # init default values
205 $file_buffer->{'song'}->{'tempo'} = 120;
206 $file_buffer->{'song'}->{'ks_type'} = 0;
207 $file_buffer->{'song'}->{'ks_num'} = 0;
208 $file_buffer->{'song'}->{'metadata'} = [];
209 $file_buffer->{'song'}->{'content'} = "";
210
211 $file_buffer->{'song'}->{'subcol'} = $2;
212 $file_buffer->{'song'}->{'title'} = $4;
213 $file_buffer->{'song'}->{'tval'} = $6;
214
215 chomp($next_line);
216 my $content = $next_line;
217 if (defined $docnum)
218 {
219 $content.= " $docnum $seclevel";
220 }
221 $content .= "\n";
222
223 $file_buffer->{'song'}->{'content'} = $content;
224
225
226 my $line;
227 while(defined($line=<FILE>))
228 {
229 $line =~ s/\r$//;
230
231 $file_buffer->{'line_no'}++;
232 next if ($line =~ m/^\#/);
233
234 if ($line =~ m/^song/)
235 {
236 $file_buffer->{'next_line'} = $line;
237 return 1;
238 }
239 elsif ($line =~ m/^tempo( +)(\d+)( *)$/)
240 {
241 $file_buffer->{'song'}->{'tempo'} = $2;
242 $file_buffer->{'song'}->{'content'} .= $line;
243 }
244 elsif ($line =~ m/^keysig( +)(\d+)( +)(\d+)( *)$/)
245 {
246 $file_buffer->{'song'}->{'ks_type'} = $2;
247 $file_buffer->{'song'}->{'ks_num'} = $4;
248 $file_buffer->{'song'}->{'content'} .= $line;
249 }
250 elsif ($line =~ m/^timesig( +)(\d+)( +)(\d+)( *)$/)
251 {
252 $file_buffer->{'song'}->{'ts_numer'} = $2;
253 $file_buffer->{'song'}->{'ts_denom'} = $4;
254 $file_buffer->{'song'}->{'content'} .= $line;
255 }
256 elsif ($line =~ m/^metadata ([^:]*): (.*)/)
257 {
258 push(@{$file_buffer->{'song'}->{'metadata'}},[$1,$2]);
259 $file_buffer->{'song'}->{'content'} .= $line;
260 }
261 else
262 {
263 $file_buffer->{'song'}->{'content'} .= $line;
264 }
265 }
266
267 $file_buffer->{'next_line'} = undef;
268 }
269
270 return 1;
271}
272
273# Override RogPlug function so rog files are stored as sections (not docs)
274
275sub process_rog_record
276{
277 my ($self,$doc_obj,$cursection,$song) = @_;
278
279 $cursection =
280 $doc_obj->insert_section($cursection);
281 $self->{'docnum'}++;
282
283 my $title = $song->{'title'};
284 my $title_safe = $title;
285 $title_safe =~ s/\'/\\\\&apos;/g;
286
287 # add metadata
288 $doc_obj->add_metadata($cursection, "Tempo", $song->{'tempo'});
289 $doc_obj->add_metadata($cursection, "KeySigType", $song->{'ks_type'});
290 $doc_obj->add_metadata($cursection, "KeySigNum", $song->{'ks_num'});
291 $doc_obj->add_metadata($cursection, "SubCollection", $song->{'subcol'});
292 $doc_obj->add_metadata($cursection, "Title", $title);
293 $doc_obj->add_metadata($cursection, "TitleSafe", $title_safe);
294 $doc_obj->add_metadata($cursection, "TVal", $song->{'tval'});
295
296 foreach my $md ( @{$song->{'metadata'}} )
297 {
298 $doc_obj->add_metadata($cursection, $md->[0], $md->[1]);
299 }
300
301 # add contents as text
302 $doc_obj->add_text($cursection,$song->{'content'});
303
304 return $cursection;
305}
306
307
308
309# Override BasPlug read
310# We don't want to get language encoding stuff until after we've converted
311# our file to Rog format
312sub read {
313 my $self = shift (@_);
314 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
315# if ($self->is_recursive()) {
316# die "BasPlug::read function must be implemented in sub-class for recursive plugins\n";
317# }
318
319 my $outhandle = $self->{'outhandle'};
320
321 my $filename = $file;
322 $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
323
324 if ($self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/) {
325 $self->{'num_blocked'} ++;
326 return 0;
327 }
328 if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
329 return undef;
330 }
331 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
332
333 # read in file ($text will be in utf8)
334 my $text = "";
335
336 my $output_ext = $self->{'convert_to_ext'};
337 my $conv_filename = $self->tmp_area_convert_file($output_ext, $filename);
338
339 if ("$conv_filename" eq "") {return 0;} # allows continue on errors
340 $self->{'conv_filename'} = $conv_filename;
341
342
343 # create a new document
344 #my $doc_obj = new doc ($conv_filename, "indexed_doc");
345 # the original filename is used now
346 my $doc_obj = new doc ($filename, "indexed_doc");
347 # the converted filename is set separately
348 $doc_obj->set_converted_filename($conv_filename);
349
350 my $topsection = $doc_obj->get_top_section();
351 my $cursection = $topsection;
352
353 $self->{'docnum'}++;
354 my $docnum = $self->{'docnum'};
355
356 $doc_obj->set_OIDtype ($processor->{'OIDtype'});
357 my ($filemeta) = $file =~ /([^\\\/]+)$/;
358 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta));
359 if ($self->{'cover_image'}) {
360 $self->associate_cover_image($doc_obj, $filename);
361 }
362 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
363 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "FileSize", (-s $filename));
364
365 my $track_no = "1";
366 my $rog_filename = "$conv_filename$track_no.$output_ext";
367 while (1)
368 {
369 last unless open (FILE, $rog_filename) ;
370
371 my $file_buffer = { line_no => 0, next_line => "", song => {} };
372
373 while ($self->read_rog_record($file_buffer, $docnum, $track_no))
374 {
375 my $song = $file_buffer->{'song'};
376 my $content = $song->{'content'};
377 $content =~ s/^song\w+(.*)$/song $1 X.$track_no/;
378
379 $cursection
380 = $self->process_rog_record($doc_obj,$cursection,
381 $file_buffer->{'song'});
382 }
383
384 close FILE;
385
386 $track_no++;
387 $rog_filename = "$conv_filename$track_no.$output_ext";
388 }
389
390 print STDERR "\n";
391
392 # include any metadata passed in from previous plugins
393 # note that this metadata is associated with the top level section
394 $self->extra_metadata ($doc_obj, $doc_obj->get_top_section(), $metadata);
395 # do plugin specific processing of doc_obj
396 unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) {
397 print STDERR "<ProcessingError n='$file'>\n" if ($gli);
398 return -1;
399 }
400 # do any automatic metadata extraction
401 $self->auto_extract_metadata ($doc_obj);
402 # add an OID
403 $doc_obj->set_OID();
404
405 my $oid = $doc_obj->get_OID();
406 my $appletlink = "<a href=\"javascript:meldexout(\'$oid\','[TitleSafe]')\">";
407
408 $doc_obj->add_utf8_metadata ($topsection, "audiolink", $appletlink);
409 $doc_obj->add_utf8_metadata ($topsection, "audioicon", "_iconaudio_");
410 $doc_obj->add_utf8_metadata ($topsection, "/audiolink", "</a>");
411
412 # if no title metadata defined, set it to filename minus extension
413 my $existing_title = $doc_obj->get_metadata_element($topsection,"Title");
414 if (!defined $existing_title)
415 {
416 my $title = $doc_obj->get_metadata_element($topsection,"Source");
417 $title =~ s/\..*?$//g;
418 $doc_obj->add_utf8_metadata ($topsection, "Title", $title);
419
420 my $title_safe = $title;
421 $title_safe =~ s/\'/\\\\&apos;/g;
422 $doc_obj->add_utf8_metadata ($topsection, "TitleSafe", $title_safe);
423 }
424
425 # process the document
426 $processor->process($doc_obj);
427 $self->cleanup_tmp_area();
428
429 $self->{'num_processed'} ++;
430
431 return 1;
432}
433
434
435# do plugin specific processing of doc_obj for HTML type
436sub process_type {
437 my $self = shift (@_);
438 my ($doc_ext, $textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
439
440 my $conv_filename = $self->{'conv_filename'};
441 my $tmp_dirname = File::Basename::dirname($conv_filename);
442 my $tmp_tailname = File::Basename::basename($conv_filename);
443
444 my $converted_to = $self->{'converted_to'};
445 my $ret_val = 1;
446
447# $ret_val = &RogPlug::process($self, $textref, $pluginfo,
448# $tmp_dirname, $tmp_tailname,
449# $metadata, $doc_obj);
450
451 # associate original file with doc object
452 my $cursection = $doc_obj->get_top_section();
453 my $filename = &util::filename_cat($base_dir, $file);
454 $doc_obj->associate_file($filename, "doc.$doc_ext", undef, $cursection);
455
456 my $doclink = "<a href=\"_httpcollection_/index/assoc/[archivedir]/doc.$doc_ext\">";
457 $doc_obj->add_utf8_metadata ($cursection, "srclink", $doclink);
458 $doc_obj->add_utf8_metadata ($cursection, "srcicon", "_icon".$doc_ext."_");
459 $doc_obj->add_utf8_metadata ($cursection, "/srclink", "</a>");
460 return $ret_val;
461}
462
4631;
Note: See TracBrowser for help on using the repository browser.