source: trunk/gsdl/perllib/plugins/ConvertToRogPlug.pm@ 5096

Last change on this file since 5096 was 4843, checked in by mdewsnip, 21 years ago

Added check to ConvertToRogPlug creation so that 'pluginfo.pl ConvertToRogPlug' doesn't crash and burn (because there is no target format specified).

  • Property svn:keywords set to Author Date Id Revision
File size: 13.9 KB
Line 
1###########################################################################
2#
3# ConvertToRogPlug.pm -- plugin that inherits from RogPlug
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27
28package ConvertToRogPlug;
29
30use BasPlug;
31use RogPlug;
32
33
34sub BEGIN {
35 @ISA = ('RogPlug');
36}
37
38my $options = { 'name' => "ConvertToRogPlug",
39 'desc' => "A plugin that inherits from RogPlug.",
40 'inherits' => "Yes" };
41
42sub print_usage {
43 my ($plugin_name) = @_;
44
45 # for when this function is called directly by pluginfo.pl
46 if (ref ($plugin_name)) {
47 $plugin_name = ref ($plugin_name);
48 }
49
50 print STDERR "\n usage: plugin $plugin_name [options]\n\n";
51 print STDERR " options:\n";
52 print STDERR " [none at present]\n";
53}
54
55sub parse_args
56{
57 my $class = shift (@_);
58 my ($args) = @_;
59
60 my $plugin_name = $class;
61 $plugin_name =~ s/\.pm$//;
62
63 if (!parsargv::parse($args,
64 "allow_extra_options")) {
65
66 print STDERR "\nIncorrect options passed to $plugin_name, ";
67 print STDERR "check your collect.cfg configuration file\n";
68 &print_usage($plugin_name);
69 die "\n";
70 }
71
72 return ($plugin_name);
73}
74
75sub new {
76 my $class = shift (@_);
77 if ($class eq "ConvertToRogPlug" && defined $_[0]) {$class = shift (@_);}
78 my $self;
79 # parsargv::parse might modify the list, so we do this by creating a copy
80 # of the argument list.
81 my @arglist = @_;
82 my ($plugin_name) = $class->parse_args(\@_);
83
84 $self = new RogPlug ($class, @arglist);
85 $self->{'convert_to'} = "Rog";
86 $self->{'convert_to_ext'} = "rog";
87
88 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
89 my $option_list = $self->{'option_list'};
90 push( @{$option_list}, $options );
91
92 return bless $self, $class;
93}
94
95
96sub begin {
97 my $self = shift (@_);
98
99 $self->SUPER::begin(@_);
100
101 $self->{'docnum'} = 0;
102}
103
104sub end {
105 my ($self) = @_;
106
107 # nothing to do, but keep symmetric with begin function
108 $self->SUPER::end(@_);
109}
110
111
112# Run conversion utility on the input file.
113#
114# The conversion takes place in a collection specific 'tmp' directory so
115# that we don't accidentally damage the input.
116#
117# The desired output type is indicated by $output_ext. This is usually
118# something like "html" or "word", but can be "best" (or the empty string)
119# to indicate that the conversion utility should do the best it can.
120
121sub tmp_area_convert_file {
122 my $self = shift (@_);
123 my ($output_ext, $input_filename, $textref) = @_;
124
125 my $outhandle = $self->{'outhandle'};
126 my $convert_to = $self->{'convert_to'};
127 my $failhandle = $self->{'failhandle'};
128
129 # softlink to collection tmp dir
130 my $tmp_dirname
131 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
132 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
133
134 # derive tmp filename from input filename
135 my ($tailname, $dirname, $suffix)
136 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
137
138 # Remove any white space from filename -- no risk of name collision, and
139 # makes later conversion by utils simpler. Leave spaces in path...
140 $tailname =~ s/\s+//g;
141
142 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix");
143
144 &util::soft_link($input_filename, $tmp_filename);
145
146 my $verbosity = $self->{'verbosity'};
147 if ($verbosity > 0) {
148 print $outhandle "Converting $tailname$suffix to $convert_to format\n";
149 }
150
151 my $errlog = &util::filename_cat($tmp_dirname, "err.log");
152
153 # Execute the conversion command and get the type of the result,
154 # making sure the converter gives us the appropriate output type
155 my $output_type = lc($convert_to);
156 my $cmd = "perl -S gsMusicConvert.pl -verbose $verbosity -errlog \"$errlog\" -output $output_type \"$tmp_filename\"";
157 $output_type = `$cmd`;
158
159 # remove symbolic link to original file
160 &util::rm($tmp_filename);
161
162 # Check STDERR here
163 chomp $output_type;
164 if ($output_type eq "fail") {
165 print $outhandle "Could not convert $tailname$suffix to $convert_to format\n";
166 print $failhandle "$tailname$suffix: " . ref($self) . " failed to convert to $convert_to\n";
167 $self->{'num_not_processed'} ++;
168 if (-s "$errlog") {
169 open(ERRLOG, "$errlog");
170 while (<ERRLOG>) {
171 print $outhandle "$_";
172 }
173 print $outhandle "\n";
174 close ERRLOG;
175 }
176 &util::rm("$errlog") if (-e "$errlog");
177 return "";
178 }
179
180 # store the *actual* output type and return the output filename
181 # it's possible we requested conversion to html, but only to text succeeded
182
183 $self->{'convert_to_ext'} = $output_type;
184 $self->{'converted_to'} = "Rog";
185
186 my $output_filename = $tmp_filename;
187
188 $output_filename =~ s/$suffix$//;
189
190 return $output_filename;
191}
192
193
194# Remove collection specific tmp directory and all its contents.
195
196sub cleanup_tmp_area {
197 my $self = shift (@_);
198
199 my $tmp_dirname
200 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
201 &util::rm_r($tmp_dirname);
202 &util::mk_dir($tmp_dirname);
203}
204
205
206# Exact copy of read_rog_record from RogPlug
207# Needed for FILE in right scope
208
209sub read_rog_record
210{
211 my ($self,$file_buffer, $docnum, $seclevel) = @_;
212
213 my $next_line = $file_buffer->{'next_line'};
214
215 return 0 if (!defined $next_line);
216
217 if ($next_line eq "")
218 {
219 my $line;
220 while(defined($line=<FILE>))
221 {
222 $line =~ s/\r$//;
223 $file_buffer->{'line_no'}++;
224 next if ($line =~ m/^\#/);
225 $next_line = $line;
226 last;
227 }
228 }
229
230 if ($next_line !~ m/^song( +)\"([^\"]*)\"( +)\"([^\"]*)\"( +)(\d+)( *)$/)
231 {
232 print STDERR "Error: Malformed Rog file: $next_line";
233 return 0;
234 }
235 else
236 {
237 # init default values
238 $file_buffer->{'song'}->{'tempo'} = 120;
239 $file_buffer->{'song'}->{'ks_type'} = 0;
240 $file_buffer->{'song'}->{'ks_num'} = 0;
241 $file_buffer->{'song'}->{'metadata'} = [];
242 $file_buffer->{'song'}->{'content'} = "";
243
244 $file_buffer->{'song'}->{'subcol'} = $2;
245 $file_buffer->{'song'}->{'title'} = $4;
246 $file_buffer->{'song'}->{'tval'} = $6;
247
248 chomp($next_line);
249 my $content = $next_line;
250 if (defined $docnum)
251 {
252 $content.= " $docnum $seclevel";
253 }
254 $content .= "\n";
255
256 $file_buffer->{'song'}->{'content'} = $content;
257
258
259 my $line;
260 while(defined($line=<FILE>))
261 {
262 $line =~ s/\r$//;
263
264 $file_buffer->{'line_no'}++;
265 next if ($line =~ m/^\#/);
266
267 if ($line =~ m/^song/)
268 {
269 $file_buffer->{'next_line'} = $line;
270 return 1;
271 }
272 elsif ($line =~ m/^tempo( +)(\d+)( *)$/)
273 {
274 $file_buffer->{'song'}->{'tempo'} = $2;
275 $file_buffer->{'song'}->{'content'} .= $line;
276 }
277 elsif ($line =~ m/^keysig( +)(\d+)( +)(\d+)( *)$/)
278 {
279 $file_buffer->{'song'}->{'ks_type'} = $2;
280 $file_buffer->{'song'}->{'ks_num'} = $4;
281 $file_buffer->{'song'}->{'content'} .= $line;
282 }
283 elsif ($line =~ m/^timesig( +)(\d+)( +)(\d+)( *)$/)
284 {
285 $file_buffer->{'song'}->{'ts_numer'} = $2;
286 $file_buffer->{'song'}->{'ts_denom'} = $4;
287 $file_buffer->{'song'}->{'content'} .= $line;
288 }
289 elsif ($line =~ m/^metadata ([^:]*): (.*)/)
290 {
291 push(@{$file_buffer->{'song'}->{'metadata'}},[$1,$2]);
292 $file_buffer->{'song'}->{'content'} .= $line;
293 }
294 else
295 {
296 $file_buffer->{'song'}->{'content'} .= $line;
297 }
298 }
299
300 $file_buffer->{'next_line'} = undef;
301 }
302
303 return 1;
304}
305
306# Override RogPlug function so rog files are stored as sections (not docs)
307
308sub process_rog_record
309{
310 my ($self,$doc_obj,$cursection,$song) = @_;
311
312 $cursection =
313 $doc_obj->insert_section($cursection);
314 $self->{'docnum'}++;
315
316 my $title = $song->{'title'};
317 my $title_safe = $title;
318 $title_safe =~ s/\'/\\\\&apos;/g;
319
320 # add metadata
321 $doc_obj->add_metadata($cursection, "Tempo", $song->{'tempo'});
322 $doc_obj->add_metadata($cursection, "KeySigType", $song->{'ks_type'});
323 $doc_obj->add_metadata($cursection, "KeySigNum", $song->{'ks_num'});
324 $doc_obj->add_metadata($cursection, "SubCollection", $song->{'subcol'});
325 $doc_obj->add_metadata($cursection, "Title", $title);
326 $doc_obj->add_metadata($cursection, "TitleSafe", $title_safe);
327 $doc_obj->add_metadata($cursection, "TVal", $song->{'tval'});
328
329 foreach $md ( @{$song->{'metadata'}} )
330 {
331 $doc_obj->add_metadata($cursection, $md->[0], $md->[1]);
332 }
333
334 # add contents as text
335 $doc_obj->add_text($cursection,$song->{'content'});
336
337 return $cursection;
338}
339
340
341
342# Override BasPlug read
343# We don't want to get language encoding stuff until after we've converted
344# our file to Rog format
345sub read {
346 my $self = shift (@_);
347 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
348# if ($self->is_recursive()) {
349# die "BasPlug::read function must be implemented in sub-class for recursive plugins\n";
350# }
351
352 my $outhandle = $self->{'outhandle'};
353
354 my $filename = $file;
355 $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
356
357 if ($self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/) {
358 $self->{'num_blocked'} ++;
359 return 0;
360 }
361 if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
362 return undef;
363 }
364 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
365
366 # read in file ($text will be in utf8)
367 my $text = "";
368
369 my $output_ext = $self->{'convert_to_ext'};
370 my $conv_filename = $self->tmp_area_convert_file($output_ext, $filename);
371
372 if ("$conv_filename" eq "") {return 0;} # allows continue on errors
373 $self->{'conv_filename'} = $conv_filename;
374
375
376 # create a new document
377 my $doc_obj = new doc ($conv_filename, "indexed_doc");
378 my $topsection = $doc_obj->get_top_section();
379 my $cursection = $topsection;
380
381 $self->{'docnum'}++;
382 my $docnum = $self->{'docnum'};
383
384 $doc_obj->set_OIDtype ($processor->{'OIDtype'});
385 my ($filemeta) = $file =~ /([^\\\/]+)$/;
386 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta));
387 if ($self->{'cover_image'}) {
388 $self->associate_cover_image($doc_obj, $filename);
389 }
390
391
392 my $track_no = "1";
393 my $rog_filename = "$conv_filename$track_no.$output_ext";
394 while (1)
395 {
396 last unless open (FILE, $rog_filename) ;
397
398 my $file_buffer = { line_no => 0, next_line => "", song => {} };
399
400 while ($self->read_rog_record($file_buffer, $docnum, $track_no))
401 {
402 my $song = $file_buffer->{'song'};
403 my $content = $song->{'content'};
404 $content =~ s/^song\w+(.*)$/song $1 X.$track_no/;
405
406 $cursection
407 = $self->process_rog_record($doc_obj,$cursection,
408 $file_buffer->{'song'});
409 }
410
411 close FILE;
412
413 $track_no++;
414 $rog_filename = "$conv_filename$track_no.$output_ext";
415 }
416
417 print STDERR "\n";
418
419 # include any metadata passed in from previous plugins
420 # note that this metadata is associated with the top level section
421 $self->extra_metadata ($doc_obj, $doc_obj->get_top_section(), $metadata);
422 # do plugin specific processing of doc_obj
423 return undef unless defined ($self->process (\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj));
424 # do any automatic metadata extraction
425 $self->auto_extract_metadata ($doc_obj);
426 # add an OID
427 $doc_obj->set_OID();
428
429 my $oid = $doc_obj->get_OID();
430 my $appletlink = "<a href=\"javascript:meldexout(\'$oid\','[TitleSafe]')\">";
431
432 $doc_obj->add_utf8_metadata ($topsection, "audiolink", $appletlink);
433 $doc_obj->add_utf8_metadata ($topsection, "audioicon", "_iconaudio_");
434 $doc_obj->add_utf8_metadata ($topsection, "/audiolink", "</a>");
435
436 # if no title metadata defined, set it to filename minus extension
437 my $existing_title = $doc_obj->get_metadata_element($topsection,"Title");
438 if (!defined $existing_title)
439 {
440 my $title = $doc_obj->get_metadata_element($topsection,"Source");
441 $title =~ s/\..*?$//g;
442 $doc_obj->add_utf8_metadata ($topsection, "Title", $title);
443
444 my $title_safe = $title;
445 $title_safe =~ s/\'/\\\\&apos;/g;
446 $doc_obj->add_utf8_metadata ($topsection, "TitleSafe", $title_safe);
447 }
448
449 # process the document
450 $processor->process($doc_obj);
451 $self->cleanup_tmp_area();
452
453 $self->{'num_processed'} ++;
454
455 return 1;
456}
457
458
459# do plugin specific processing of doc_obj for HTML type
460sub process_type {
461 my $self = shift (@_);
462 my ($doc_ext, $textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
463
464 my $conv_filename = $self->{'conv_filename'};
465 my $tmp_dirname = File::Basename::dirname($conv_filename);
466 my $tmp_tailname = File::Basename::basename($conv_filename);
467
468 my $converted_to = $self->{'converted_to'};
469 my $ret_val = 1;
470
471# $ret_val = &RogPlug::process($self, $textref, $pluginfo,
472# $tmp_dirname, $tmp_tailname,
473# $metadata, $doc_obj);
474
475 # associate original file with doc object
476 my $cursection = $doc_obj->get_top_section();
477 my $filename = &util::filename_cat($base_dir, $file);
478 $doc_obj->associate_file($filename, "doc.$doc_ext", undef, $cursection);
479
480 my $doclink = "<a href=\"_httpcollection_/index/assoc/[archivedir]/doc.$doc_ext\">";
481 $doc_obj->add_utf8_metadata ($cursection, "srclink", $doclink);
482 $doc_obj->add_utf8_metadata ($cursection, "srcicon", "_icon".$doc_ext."_");
483 $doc_obj->add_utf8_metadata ($cursection, "/srclink", "</a>");
484 return $ret_val;
485}
486
4871;
Note: See TracBrowser for help on using the repository browser.