source: trunk/gsdl/perllib/plugins/ConvertToRogPlug.pm@ 4785

Last change on this file since 4785 was 4745, checked in by mdewsnip, 21 years ago

Uncommented a line which shouldn't have been committed commented.

  • Property svn:keywords set to Author Date Id Revision
File size: 13.9 KB
Line 
1###########################################################################
2#
3# ConvertToRogPlug.pm -- plugin that inherits from RogPlug
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27
28package ConvertToRogPlug;
29
30use BasPlug;
31use RogPlug;
32
33
34sub BEGIN {
35 @ISA = ('RogPlug');
36}
37
38my $options = { 'name' => "ConvertToRogPlug",
39 'desc' => "A plugin that inherits from RogPlug.",
40 'inherits' => "Yes" };
41
42sub print_usage {
43 my ($plugin_name) = @_;
44
45 # for when this function is called directly by pluginfo.pl
46 if (ref ($plugin_name)) {
47 $plugin_name = ref ($plugin_name);
48 }
49
50 print STDERR "\n usage: plugin $plugin_name [options]\n\n";
51 print STDERR " options:\n";
52 print STDERR " [none at present]\n";
53}
54
55sub parse_args
56{
57 my $class = shift (@_);
58 my ($args) = @_;
59
60 my $plugin_name = $class;
61 $plugin_name =~ s/\.pm$//;
62
63 if (!parsargv::parse($args,
64 "allow_extra_options")) {
65
66 print STDERR "\nIncorrect options passed to $plugin_name, ";
67 print STDERR "check your collect.cfg configuration file\n";
68 &print_usage($plugin_name);
69 die "\n";
70 }
71
72 return ($plugin_name);
73}
74
75sub new {
76 my $class = shift (@_);
77 if ($class eq "ConvertToRogPlug") {$class = shift (@_);}
78 my $self;
79 # parsargv::parse might modify the list, so we do this by creating a copy
80 # of the argument list.
81 my @arglist = @_;
82 my ($plugin_name) = $class->parse_args(\@_);
83
84 $self = new RogPlug ($class, @arglist);
85 $self->{'convert_to'} = "Rog";
86 $self->{'convert_to_ext'} = "rog";
87
88 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
89 my $option_list = $self->{'option_list'};
90 push( @{$option_list}, $options );
91
92 return bless $self, $class;
93}
94
95
96sub begin {
97 my $self = shift (@_);
98
99 $self->SUPER::begin(@_);
100
101 $self->{'docnum'} = 0;
102}
103
104sub end {
105 my ($self) = @_;
106
107 # nothing to do, but keep symmetric with begin function
108 $self->SUPER::end(@_);
109}
110
111
112# Run conversion utility on the input file.
113#
114# The conversion takes place in a collection specific 'tmp' directory so
115# that we don't accidentally damage the input.
116#
117# The desired output type is indicated by $output_ext. This is usually
118# something like "html" or "word", but can be "best" (or the empty string)
119# to indicate that the conversion utility should do the best it can.
120
121sub tmp_area_convert_file {
122 my $self = shift (@_);
123 my ($output_ext, $input_filename, $textref) = @_;
124
125 my $outhandle = $self->{'outhandle'};
126 my $convert_to = $self->{'convert_to'};
127 my $failhandle = $self->{'failhandle'};
128
129 # softlink to collection tmp dir
130 my $tmp_dirname
131 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
132 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
133
134 # derive tmp filename from input filename
135 my ($tailname, $dirname, $suffix)
136 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
137
138 # Remove any white space from filename -- no risk of name collision, and
139 # makes later conversion by utils simpler. Leave spaces in path...
140 $tailname =~ s/\s+//g;
141
142 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix");
143
144 &util::soft_link($input_filename, $tmp_filename);
145
146 my $verbosity = $self->{'verbosity'};
147 if ($verbosity > 0) {
148 print $outhandle "Converting $tailname$suffix to $convert_to format\n";
149 }
150
151 my $errlog = &util::filename_cat($tmp_dirname, "err.log");
152
153 # Execute the conversion command and get the type of the result,
154 # making sure the converter gives us the appropriate output type
155 my $output_type = lc($convert_to);
156 my $cmd = "perl -S gsMusicConvert.pl -verbose $verbosity -errlog \"$errlog\" -output $output_type \"$tmp_filename\"";
157 $output_type = `$cmd`;
158
159 # remove symbolic link to original file
160 &util::rm($tmp_filename);
161
162 # Check STDERR here
163 chomp $output_type;
164 if ($output_type eq "fail") {
165 print $outhandle "Could not convert $tailname$suffix to $convert_to format\n";
166 print $failhandle "$tailname$suffix: " . ref($self) . " failed to convert to $convert_to\n";
167 $self->{'num_not_processed'} ++;
168 if (-s "$errlog") {
169 open(ERRLOG, "$errlog");
170 while (<ERRLOG>) {
171 print $outhandle "$_";
172 }
173 print $outhandle "\n";
174 close ERRLOG;
175 }
176 &util::rm("$errlog") if (-e "$errlog");
177 return "";
178 }
179
180 # store the *actual* output type and return the output filename
181 # it's possible we requested conversion to html, but only to text succeeded
182
183 $self->{'convert_to_ext'} = $output_type;
184 $self->{'converted_to'} = "Rog";
185
186 my $output_filename = $tmp_filename;
187
188 $output_filename =~ s/$suffix$//;
189
190 return $output_filename;
191}
192
193
194# Remove collection specific tmp directory and all its contents.
195
196sub cleanup_tmp_area {
197 my $self = shift (@_);
198
199 my $tmp_dirname
200 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
201 &util::rm_r($tmp_dirname);
202 &util::mk_dir($tmp_dirname);
203}
204
205
206# Exact copy of read_rog_record from RogPlug
207# Needed for FILE in right scope
208
209sub read_rog_record
210{
211 my ($self,$file_buffer, $docnum, $seclevel) = @_;
212
213 my $next_line = $file_buffer->{'next_line'};
214
215 return 0 if (!defined $next_line);
216
217 if ($next_line eq "")
218 {
219 my $line;
220 while(defined($line=<FILE>))
221 {
222 $line =~ s/\r$//;
223 $file_buffer->{'line_no'}++;
224 next if ($line =~ m/^\#/);
225 $next_line = $line;
226 last;
227 }
228 }
229
230 if ($next_line !~ m/^song( +)\"([^\"]*)\"( +)\"([^\"]*)\"( +)(\d+)( *)$/)
231 {
232 print STDERR "Error: Malformed Rog file: $next_line";
233 return 0;
234 }
235 else
236 {
237 # init default values
238 $file_buffer->{'song'}->{'tempo'} = 120;
239 $file_buffer->{'song'}->{'ks_type'} = 0;
240 $file_buffer->{'song'}->{'ks_num'} = 0;
241 $file_buffer->{'song'}->{'metadata'} = [];
242 $file_buffer->{'song'}->{'content'} = "";
243
244 $file_buffer->{'song'}->{'subcol'} = $2;
245 $file_buffer->{'song'}->{'title'} = $4;
246 $file_buffer->{'song'}->{'tval'} = $6;
247
248 chomp($next_line);
249 my $content = $next_line;
250 if (defined $docnum)
251 {
252 $content.= " $docnum $seclevel";
253 }
254 $content .= "\n";
255
256 $file_buffer->{'song'}->{'content'} = $content;
257
258
259 my $line;
260 while(defined($line=<FILE>))
261 {
262 $line =~ s/\r$//;
263
264 $file_buffer->{'line_no'}++;
265 next if ($line =~ m/^\#/);
266
267 if ($line =~ m/^song/)
268 {
269 $file_buffer->{'next_line'} = $line;
270 return 1;
271 }
272 elsif ($line =~ m/^tempo( +)(\d+)( *)$/)
273 {
274 $file_buffer->{'song'}->{'tempo'} = $2;
275 $file_buffer->{'song'}->{'content'} .= $line;
276 }
277 elsif ($line =~ m/^keysig( +)(\d+)( +)(\d+)( *)$/)
278 {
279 $file_buffer->{'song'}->{'ks_type'} = $2;
280 $file_buffer->{'song'}->{'ks_num'} = $4;
281 $file_buffer->{'song'}->{'content'} .= $line;
282 }
283 elsif ($line =~ m/^timesig( +)(\d+)( +)(\d+)( *)$/)
284 {
285 $file_buffer->{'song'}->{'ts_numer'} = $2;
286 $file_buffer->{'song'}->{'ts_denom'} = $4;
287 $file_buffer->{'song'}->{'content'} .= $line;
288 }
289 elsif ($line =~ m/^metadata ([^:]*): (.*)/)
290 {
291 push(@{$file_buffer->{'song'}->{'metadata'}},[$1,$2]);
292 $file_buffer->{'song'}->{'content'} .= $line;
293 }
294 else
295 {
296 $file_buffer->{'song'}->{'content'} .= $line;
297 }
298 }
299
300 $file_buffer->{'next_line'} = undef;
301 }
302
303 return 1;
304}
305
306# Override RogPlug function so rog files are stored as sections (not docs)
307
308sub process_rog_record
309{
310 my ($self,$doc_obj,$cursection,$song) = @_;
311
312 $cursection =
313 $doc_obj->insert_section($cursection);
314 $self->{'docnum'}++;
315
316 my $title = $song->{'title'};
317 my $title_safe = $title;
318 $title_safe =~ s/\'/\\\\&apos;/g;
319
320 # add metadata
321 $doc_obj->add_metadata($cursection, "Tempo", $song->{'tempo'});
322 $doc_obj->add_metadata($cursection, "KeySigType", $song->{'ks_type'});
323 $doc_obj->add_metadata($cursection, "KeySigNum", $song->{'ks_num'});
324 $doc_obj->add_metadata($cursection, "SubCollection", $song->{'subcol'});
325 $doc_obj->add_metadata($cursection, "Title", $title);
326 $doc_obj->add_metadata($cursection, "TitleSafe", $title_safe);
327 $doc_obj->add_metadata($cursection, "TVal", $song->{'tval'});
328
329 foreach $md ( @{$song->{'metadata'}} )
330 {
331 $doc_obj->add_metadata($cursection, $md->[0], $md->[1]);
332 }
333
334 # add contents as text
335 $doc_obj->add_text($cursection,$song->{'content'});
336
337 return $cursection;
338}
339
340
341
342# Override BasPlug read
343# We don't want to get language encoding stuff until after we've converted
344# our file to Rog format
345sub read {
346 my $self = shift (@_);
347 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
348# if ($self->is_recursive()) {
349# die "BasPlug::read function must be implemented in sub-class for recursive plugins\n";
350# }
351
352 my $outhandle = $self->{'outhandle'};
353
354 my $filename = $file;
355 $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
356
357 if ($self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/) {
358 $self->{'num_blocked'} ++;
359 return 0;
360 }
361 if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
362 return undef;
363 }
364 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
365
366 # read in file ($text will be in utf8)
367 my $text = "";
368
369 my $output_ext = $self->{'convert_to_ext'};
370 my $conv_filename = $self->tmp_area_convert_file($output_ext, $filename);
371
372 if ("$conv_filename" eq "") {return 0;} # allows continue on errors
373 $self->{'conv_filename'} = $conv_filename;
374
375
376 # create a new document
377 my $doc_obj = new doc ($conv_filename, "indexed_doc");
378 my $topsection = $doc_obj->get_top_section();
379 my $cursection = $topsection;
380
381 $self->{'docnum'}++;
382 my $docnum = $self->{'docnum'};
383
384 $doc_obj->set_OIDtype ($processor->{'OIDtype'});
385 my ($filemeta) = $file =~ /([^\\\/]+)$/;
386 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta));
387 if ($self->{'cover_image'}) {
388 $self->associate_cover_image($doc_obj, $filename);
389 }
390
391
392 my $track_no = "1";
393 my $rog_filename = "$conv_filename$track_no.$output_ext";
394 while (1)
395 {
396 last unless open (FILE, $rog_filename) ;
397
398 my $file_buffer = { line_no => 0, next_line => "", song => {} };
399
400 while ($self->read_rog_record($file_buffer, $docnum, $track_no))
401 {
402 my $song = $file_buffer->{'song'};
403 my $content = $song->{'content'};
404 $content =~ s/^song\w+(.*)$/song $1 X.$track_no/;
405
406 $cursection
407 = $self->process_rog_record($doc_obj,$cursection,
408 $file_buffer->{'song'});
409 }
410
411 close FILE;
412
413 $track_no++;
414 $rog_filename = "$conv_filename$track_no.$output_ext";
415 }
416
417 print STDERR "\n";
418
419 # include any metadata passed in from previous plugins
420 # note that this metadata is associated with the top level section
421 $self->extra_metadata ($doc_obj, $doc_obj->get_top_section(), $metadata);
422 # do plugin specific processing of doc_obj
423 return undef unless defined ($self->process (\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj));
424 # do any automatic metadata extraction
425 $self->auto_extract_metadata ($doc_obj);
426 # add an OID
427 $doc_obj->set_OID();
428
429 my $oid = $doc_obj->get_OID();
430 my $appletlink = "<a href=\"javascript:meldexout(\'$oid\','[TitleSafe]')\">";
431
432 $doc_obj->add_utf8_metadata ($topsection, "audiolink", $appletlink);
433 $doc_obj->add_utf8_metadata ($topsection, "audioicon", "_iconaudio_");
434 $doc_obj->add_utf8_metadata ($topsection, "/audiolink", "</a>");
435
436 # if no title metadata defined, set it to filename minus extension
437 my $existing_title = $doc_obj->get_metadata_element($topsection,"Title");
438 if (!defined $existing_title)
439 {
440 my $title = $doc_obj->get_metadata_element($topsection,"Source");
441 $title =~ s/\..*?$//g;
442 $doc_obj->add_utf8_metadata ($topsection, "Title", $title);
443
444 my $title_safe = $title;
445 $title_safe =~ s/\'/\\\\&apos;/g;
446 $doc_obj->add_utf8_metadata ($topsection, "TitleSafe", $title_safe);
447 }
448
449 # process the document
450 $processor->process($doc_obj);
451 $self->cleanup_tmp_area();
452
453 $self->{'num_processed'} ++;
454
455 return 1;
456}
457
458
459# do plugin specific processing of doc_obj for HTML type
460sub process_type {
461 my $self = shift (@_);
462 my ($doc_ext, $textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
463
464 my $conv_filename = $self->{'conv_filename'};
465 my $tmp_dirname = File::Basename::dirname($conv_filename);
466 my $tmp_tailname = File::Basename::basename($conv_filename);
467
468 my $converted_to = $self->{'converted_to'};
469 my $ret_val = 1;
470
471# $ret_val = &RogPlug::process($self, $textref, $pluginfo,
472# $tmp_dirname, $tmp_tailname,
473# $metadata, $doc_obj);
474
475 # associate original file with doc object
476 my $cursection = $doc_obj->get_top_section();
477 my $filename = &util::filename_cat($base_dir, $file);
478 $doc_obj->associate_file($filename, "doc.$doc_ext", undef, $cursection);
479
480 my $doclink = "<a href=\"_httpcollection_/index/assoc/[archivedir]/doc.$doc_ext\">";
481 $doc_obj->add_utf8_metadata ($cursection, "srclink", $doclink);
482 $doc_obj->add_utf8_metadata ($cursection, "srcicon", "_icon".$doc_ext."_");
483 $doc_obj->add_utf8_metadata ($cursection, "/srclink", "</a>");
484 return $ret_val;
485}
486
4871;
Note: See TracBrowser for help on using the repository browser.