source: trunk/gsdl/perllib/plugins/ConvertToRogPlug.pm@ 7336

Last change on this file since 7336 was 7243, checked in by kjdon, 20 years ago

David said these were abstract plugins so set abstract to yes - GLI won't display them

  • Property svn:keywords set to Author Date Id Revision
File size: 14.1 KB
Line 
1###########################################################################
2#
3# ConvertToRogPlug.pm -- plugin that inherits from RogPlug
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27
28package ConvertToRogPlug;
29
30use BasPlug;
31use RogPlug;
32
33
34sub BEGIN {
35 @ISA = ('RogPlug');
36}
37
38my $options = { 'name' => "ConvertToRogPlug",
39 'desc' => "{ConvertToRogPlug.desc}",
40 'abstract' => "yes",
41 'inherits' => "yes" };
42
43sub print_usage {
44 my ($plugin_name) = @_;
45
46 # for when this function is called directly by pluginfo.pl
47 if (ref ($plugin_name)) {
48 $plugin_name = ref ($plugin_name);
49 }
50
51 print STDERR "\n usage: plugin $plugin_name [options]\n\n";
52 print STDERR " options:\n";
53 print STDERR " [none at present]\n";
54}
55
56sub parse_args
57{
58 my $class = shift (@_);
59 my ($args) = @_;
60
61 my $plugin_name = $class;
62 $plugin_name =~ s/\.pm$//;
63
64 if (!parsargv::parse($args,
65 "allow_extra_options")) {
66
67 print STDERR "\nIncorrect options passed to $plugin_name, ";
68 print STDERR "check your collect.cfg configuration file\n";
69 &print_usage($plugin_name);
70 die "\n";
71 }
72
73 return ($plugin_name);
74}
75
76sub new {
77 my $class = shift (@_);
78 if ($class eq "ConvertToRogPlug" && defined $_[0]) {$class = shift (@_);}
79 my $self;
80 # parsargv::parse might modify the list, so we do this by creating a copy
81 # of the argument list.
82 my @arglist = @_;
83 my ($plugin_name) = $class->parse_args(\@_);
84
85 $self = new RogPlug ($class, @arglist);
86 $self->{'convert_to'} = "Rog";
87 $self->{'convert_to_ext'} = "rog";
88 $self->{'plugin_type'} = "ConvertToRogPlug";
89 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
90 my $option_list = $self->{'option_list'};
91 push( @{$option_list}, $options );
92
93 return bless $self, $class;
94}
95
96
97sub begin {
98 my $self = shift (@_);
99
100 $self->SUPER::begin(@_);
101
102 $self->{'docnum'} = 0;
103}
104
105sub end {
106 my ($self) = @_;
107
108 # nothing to do, but keep symmetric with begin function
109 $self->SUPER::end(@_);
110}
111
112
113# Run conversion utility on the input file.
114#
115# The conversion takes place in a collection specific 'tmp' directory so
116# that we don't accidentally damage the input.
117#
118# The desired output type is indicated by $output_ext. This is usually
119# something like "html" or "word", but can be "best" (or the empty string)
120# to indicate that the conversion utility should do the best it can.
121
122sub tmp_area_convert_file {
123 my $self = shift (@_);
124 my ($output_ext, $input_filename, $textref) = @_;
125
126 my $outhandle = $self->{'outhandle'};
127 my $convert_to = $self->{'convert_to'};
128 my $failhandle = $self->{'failhandle'};
129
130 # softlink to collection tmp dir
131 my $tmp_dirname
132 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
133 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
134
135 # derive tmp filename from input filename
136 my ($tailname, $dirname, $suffix)
137 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
138
139 # Remove any white space from filename -- no risk of name collision, and
140 # makes later conversion by utils simpler. Leave spaces in path...
141 $tailname =~ s/\s+//g;
142
143 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix");
144
145 &util::soft_link($input_filename, $tmp_filename);
146
147 my $verbosity = $self->{'verbosity'};
148 if ($verbosity > 0) {
149 print $outhandle "Converting $tailname$suffix to $convert_to format\n";
150 }
151
152 my $errlog = &util::filename_cat($tmp_dirname, "err.log");
153
154 # Execute the conversion command and get the type of the result,
155 # making sure the converter gives us the appropriate output type
156 my $output_type = lc($convert_to);
157 my $cmd = "perl -S gsMusicConvert.pl -verbose $verbosity -errlog \"$errlog\" -output $output_type \"$tmp_filename\"";
158 $output_type = `$cmd`;
159
160 # remove symbolic link to original file
161 &util::rm($tmp_filename);
162
163 # Check STDERR here
164 chomp $output_type;
165 if ($output_type eq "fail") {
166 print $outhandle "Could not convert $tailname$suffix to $convert_to format\n";
167 print $failhandle "$tailname$suffix: " . ref($self) . " failed to convert to $convert_to\n";
168 $self->{'num_not_processed'} ++;
169 if (-s "$errlog") {
170 open(ERRLOG, "$errlog");
171 while (<ERRLOG>) {
172 print $outhandle "$_";
173 }
174 print $outhandle "\n";
175 close ERRLOG;
176 }
177 &util::rm("$errlog") if (-e "$errlog");
178 return "";
179 }
180
181 # store the *actual* output type and return the output filename
182 # it's possible we requested conversion to html, but only to text succeeded
183
184 $self->{'convert_to_ext'} = $output_type;
185 $self->{'converted_to'} = "Rog";
186
187 my $output_filename = $tmp_filename;
188
189 $output_filename =~ s/$suffix$//;
190
191 return $output_filename;
192}
193
194
195# Remove collection specific tmp directory and all its contents.
196
197sub cleanup_tmp_area {
198 my $self = shift (@_);
199
200 my $tmp_dirname
201 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
202 &util::rm_r($tmp_dirname);
203 &util::mk_dir($tmp_dirname);
204}
205
206
207# Exact copy of read_rog_record from RogPlug
208# Needed for FILE in right scope
209
210sub read_rog_record
211{
212 my ($self,$file_buffer, $docnum, $seclevel) = @_;
213
214 my $next_line = $file_buffer->{'next_line'};
215
216 return 0 if (!defined $next_line);
217
218 if ($next_line eq "")
219 {
220 my $line;
221 while(defined($line=<FILE>))
222 {
223 $line =~ s/\r$//;
224 $file_buffer->{'line_no'}++;
225 next if ($line =~ m/^\#/);
226 $next_line = $line;
227 last;
228 }
229 }
230
231 if ($next_line !~ m/^song( +)\"([^\"]*)\"( +)\"([^\"]*)\"( +)(\d+)( *)$/)
232 {
233 print STDERR "Error: Malformed Rog file: $next_line";
234 return 0;
235 }
236 else
237 {
238 # init default values
239 $file_buffer->{'song'}->{'tempo'} = 120;
240 $file_buffer->{'song'}->{'ks_type'} = 0;
241 $file_buffer->{'song'}->{'ks_num'} = 0;
242 $file_buffer->{'song'}->{'metadata'} = [];
243 $file_buffer->{'song'}->{'content'} = "";
244
245 $file_buffer->{'song'}->{'subcol'} = $2;
246 $file_buffer->{'song'}->{'title'} = $4;
247 $file_buffer->{'song'}->{'tval'} = $6;
248
249 chomp($next_line);
250 my $content = $next_line;
251 if (defined $docnum)
252 {
253 $content.= " $docnum $seclevel";
254 }
255 $content .= "\n";
256
257 $file_buffer->{'song'}->{'content'} = $content;
258
259
260 my $line;
261 while(defined($line=<FILE>))
262 {
263 $line =~ s/\r$//;
264
265 $file_buffer->{'line_no'}++;
266 next if ($line =~ m/^\#/);
267
268 if ($line =~ m/^song/)
269 {
270 $file_buffer->{'next_line'} = $line;
271 return 1;
272 }
273 elsif ($line =~ m/^tempo( +)(\d+)( *)$/)
274 {
275 $file_buffer->{'song'}->{'tempo'} = $2;
276 $file_buffer->{'song'}->{'content'} .= $line;
277 }
278 elsif ($line =~ m/^keysig( +)(\d+)( +)(\d+)( *)$/)
279 {
280 $file_buffer->{'song'}->{'ks_type'} = $2;
281 $file_buffer->{'song'}->{'ks_num'} = $4;
282 $file_buffer->{'song'}->{'content'} .= $line;
283 }
284 elsif ($line =~ m/^timesig( +)(\d+)( +)(\d+)( *)$/)
285 {
286 $file_buffer->{'song'}->{'ts_numer'} = $2;
287 $file_buffer->{'song'}->{'ts_denom'} = $4;
288 $file_buffer->{'song'}->{'content'} .= $line;
289 }
290 elsif ($line =~ m/^metadata ([^:]*): (.*)/)
291 {
292 push(@{$file_buffer->{'song'}->{'metadata'}},[$1,$2]);
293 $file_buffer->{'song'}->{'content'} .= $line;
294 }
295 else
296 {
297 $file_buffer->{'song'}->{'content'} .= $line;
298 }
299 }
300
301 $file_buffer->{'next_line'} = undef;
302 }
303
304 return 1;
305}
306
307# Override RogPlug function so rog files are stored as sections (not docs)
308
309sub process_rog_record
310{
311 my ($self,$doc_obj,$cursection,$song) = @_;
312
313 $cursection =
314 $doc_obj->insert_section($cursection);
315 $self->{'docnum'}++;
316
317 my $title = $song->{'title'};
318 my $title_safe = $title;
319 $title_safe =~ s/\'/\\\\&apos;/g;
320
321 # add metadata
322 $doc_obj->add_metadata($cursection, "Tempo", $song->{'tempo'});
323 $doc_obj->add_metadata($cursection, "KeySigType", $song->{'ks_type'});
324 $doc_obj->add_metadata($cursection, "KeySigNum", $song->{'ks_num'});
325 $doc_obj->add_metadata($cursection, "SubCollection", $song->{'subcol'});
326 $doc_obj->add_metadata($cursection, "Title", $title);
327 $doc_obj->add_metadata($cursection, "TitleSafe", $title_safe);
328 $doc_obj->add_metadata($cursection, "TVal", $song->{'tval'});
329
330 foreach $md ( @{$song->{'metadata'}} )
331 {
332 $doc_obj->add_metadata($cursection, $md->[0], $md->[1]);
333 }
334
335 # add contents as text
336 $doc_obj->add_text($cursection,$song->{'content'});
337
338 return $cursection;
339}
340
341
342
343# Override BasPlug read
344# We don't want to get language encoding stuff until after we've converted
345# our file to Rog format
346sub read {
347 my $self = shift (@_);
348 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
349# if ($self->is_recursive()) {
350# die "BasPlug::read function must be implemented in sub-class for recursive plugins\n";
351# }
352
353 my $outhandle = $self->{'outhandle'};
354
355 my $filename = $file;
356 $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
357
358 if ($self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/) {
359 $self->{'num_blocked'} ++;
360 return 0;
361 }
362 if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
363 return undef;
364 }
365 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
366
367 # read in file ($text will be in utf8)
368 my $text = "";
369
370 my $output_ext = $self->{'convert_to_ext'};
371 my $conv_filename = $self->tmp_area_convert_file($output_ext, $filename);
372
373 if ("$conv_filename" eq "") {return 0;} # allows continue on errors
374 $self->{'conv_filename'} = $conv_filename;
375
376
377 # create a new document
378 my $doc_obj = new doc ($conv_filename, "indexed_doc");
379 my $topsection = $doc_obj->get_top_section();
380 my $cursection = $topsection;
381
382 $self->{'docnum'}++;
383 my $docnum = $self->{'docnum'};
384
385 $doc_obj->set_OIDtype ($processor->{'OIDtype'});
386 my ($filemeta) = $file =~ /([^\\\/]+)$/;
387 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta));
388 if ($self->{'cover_image'}) {
389 $self->associate_cover_image($doc_obj, $filename);
390 }
391 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "$self->{'plugin_type'}", "1");
392
393
394 my $track_no = "1";
395 my $rog_filename = "$conv_filename$track_no.$output_ext";
396 while (1)
397 {
398 last unless open (FILE, $rog_filename) ;
399
400 my $file_buffer = { line_no => 0, next_line => "", song => {} };
401
402 while ($self->read_rog_record($file_buffer, $docnum, $track_no))
403 {
404 my $song = $file_buffer->{'song'};
405 my $content = $song->{'content'};
406 $content =~ s/^song\w+(.*)$/song $1 X.$track_no/;
407
408 $cursection
409 = $self->process_rog_record($doc_obj,$cursection,
410 $file_buffer->{'song'});
411 }
412
413 close FILE;
414
415 $track_no++;
416 $rog_filename = "$conv_filename$track_no.$output_ext";
417 }
418
419 print STDERR "\n";
420
421 # include any metadata passed in from previous plugins
422 # note that this metadata is associated with the top level section
423 $self->extra_metadata ($doc_obj, $doc_obj->get_top_section(), $metadata);
424 # do plugin specific processing of doc_obj
425 return undef unless defined ($self->process (\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj));
426 # do any automatic metadata extraction
427 $self->auto_extract_metadata ($doc_obj);
428 # add an OID
429 $doc_obj->set_OID();
430
431 my $oid = $doc_obj->get_OID();
432 my $appletlink = "<a href=\"javascript:meldexout(\'$oid\','[TitleSafe]')\">";
433
434 $doc_obj->add_utf8_metadata ($topsection, "audiolink", $appletlink);
435 $doc_obj->add_utf8_metadata ($topsection, "audioicon", "_iconaudio_");
436 $doc_obj->add_utf8_metadata ($topsection, "/audiolink", "</a>");
437
438 # if no title metadata defined, set it to filename minus extension
439 my $existing_title = $doc_obj->get_metadata_element($topsection,"Title");
440 if (!defined $existing_title)
441 {
442 my $title = $doc_obj->get_metadata_element($topsection,"Source");
443 $title =~ s/\..*?$//g;
444 $doc_obj->add_utf8_metadata ($topsection, "Title", $title);
445
446 my $title_safe = $title;
447 $title_safe =~ s/\'/\\\\&apos;/g;
448 $doc_obj->add_utf8_metadata ($topsection, "TitleSafe", $title_safe);
449 }
450
451 # process the document
452 $processor->process($doc_obj);
453 $self->cleanup_tmp_area();
454
455 $self->{'num_processed'} ++;
456
457 return 1;
458}
459
460
461# do plugin specific processing of doc_obj for HTML type
462sub process_type {
463 my $self = shift (@_);
464 my ($doc_ext, $textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
465
466 my $conv_filename = $self->{'conv_filename'};
467 my $tmp_dirname = File::Basename::dirname($conv_filename);
468 my $tmp_tailname = File::Basename::basename($conv_filename);
469
470 my $converted_to = $self->{'converted_to'};
471 my $ret_val = 1;
472
473# $ret_val = &RogPlug::process($self, $textref, $pluginfo,
474# $tmp_dirname, $tmp_tailname,
475# $metadata, $doc_obj);
476
477 # associate original file with doc object
478 my $cursection = $doc_obj->get_top_section();
479 my $filename = &util::filename_cat($base_dir, $file);
480 $doc_obj->associate_file($filename, "doc.$doc_ext", undef, $cursection);
481
482 my $doclink = "<a href=\"_httpcollection_/index/assoc/[archivedir]/doc.$doc_ext\">";
483 $doc_obj->add_utf8_metadata ($cursection, "srclink", $doclink);
484 $doc_obj->add_utf8_metadata ($cursection, "srcicon", "_icon".$doc_ext."_");
485 $doc_obj->add_utf8_metadata ($cursection, "/srclink", "</a>");
486 return $ret_val;
487}
488
4891;
Note: See TracBrowser for help on using the repository browser.