source: trunk/gsdl/perllib/plugins/ConvertToRogPlug.pm@ 7830

Last change on this file since 7830 was 7571, checked in by kjdon, 20 years ago

origianl filename is now used for gsdlsourcefilename, and converted filenmae is set as gsdlconvertedfilename

  • Property svn:keywords set to Author Date Id Revision
File size: 14.0 KB
Line 
1###########################################################################
2#
3# ConvertToRogPlug.pm -- plugin that inherits from RogPlug
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27
28package ConvertToRogPlug;
29
30use BasPlug;
31use RogPlug;
32
33
34sub BEGIN {
35 @ISA = ('RogPlug');
36}
37
38my $options = { 'name' => "ConvertToRogPlug",
39 'desc' => "{ConvertToRogPlug.desc}",
40 'abstract' => "yes",
41 'inherits' => "yes" };
42
43
44sub parse_args
45{
46 my $class = shift (@_);
47 my ($args) = @_;
48
49 my $plugin_name = $class;
50 $plugin_name =~ s/\.pm$//;
51
52 if (!parsargv::parse($args,
53 "allow_extra_options")) {
54
55 print STDERR "\nIncorrect options passed to $plugin_name, ";
56 print STDERR "check your collect.cfg configuration file\n";
57 $self->print_txt_usage(""); # Use default resource bundle
58 die "\n";
59 }
60
61 return ($plugin_name);
62}
63
64sub new {
65 my $class = shift (@_);
66 if ($class eq "ConvertToRogPlug" && defined $_[0]) {$class = shift (@_);}
67 my $self;
68 # parsargv::parse might modify the list, so we do this by creating a copy
69 # of the argument list.
70 my @arglist = @_;
71 my ($plugin_name) = $class->parse_args(\@_);
72
73 $self = new RogPlug ($class, @arglist);
74 $self->{'convert_to'} = "Rog";
75 $self->{'convert_to_ext'} = "rog";
76 $self->{'plugin_type'} = "ConvertToRogPlug";
77 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
78 my $option_list = $self->{'option_list'};
79 push( @{$option_list}, $options );
80
81 return bless $self, $class;
82}
83
84
85sub begin {
86 my $self = shift (@_);
87
88 $self->SUPER::begin(@_);
89
90 $self->{'docnum'} = 0;
91}
92
93sub end {
94 my ($self) = @_;
95
96 # nothing to do, but keep symmetric with begin function
97 $self->SUPER::end(@_);
98}
99
100
101# Run conversion utility on the input file.
102#
103# The conversion takes place in a collection specific 'tmp' directory so
104# that we don't accidentally damage the input.
105#
106# The desired output type is indicated by $output_ext. This is usually
107# something like "html" or "word", but can be "best" (or the empty string)
108# to indicate that the conversion utility should do the best it can.
109
110sub tmp_area_convert_file {
111 my $self = shift (@_);
112 my ($output_ext, $input_filename, $textref) = @_;
113
114 my $outhandle = $self->{'outhandle'};
115 my $convert_to = $self->{'convert_to'};
116 my $failhandle = $self->{'failhandle'};
117
118 # softlink to collection tmp dir
119 my $tmp_dirname
120 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
121 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
122
123 # derive tmp filename from input filename
124 my ($tailname, $dirname, $suffix)
125 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
126
127 # Remove any white space from filename -- no risk of name collision, and
128 # makes later conversion by utils simpler. Leave spaces in path...
129 $tailname =~ s/\s+//g;
130
131 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix");
132
133 &util::soft_link($input_filename, $tmp_filename);
134
135 my $verbosity = $self->{'verbosity'};
136 if ($verbosity > 0) {
137 print $outhandle "Converting $tailname$suffix to $convert_to format\n";
138 }
139
140 my $errlog = &util::filename_cat($tmp_dirname, "err.log");
141
142 # Execute the conversion command and get the type of the result,
143 # making sure the converter gives us the appropriate output type
144 my $output_type = lc($convert_to);
145 my $cmd = "perl -S gsMusicConvert.pl -verbose $verbosity -errlog \"$errlog\" -output $output_type \"$tmp_filename\"";
146 $output_type = `$cmd`;
147
148 # remove symbolic link to original file
149 &util::rm($tmp_filename);
150
151 # Check STDERR here
152 chomp $output_type;
153 if ($output_type eq "fail") {
154 print $outhandle "Could not convert $tailname$suffix to $convert_to format\n";
155 print $failhandle "$tailname$suffix: " . ref($self) . " failed to convert to $convert_to\n";
156 $self->{'num_not_processed'} ++;
157 if (-s "$errlog") {
158 open(ERRLOG, "$errlog");
159 while (<ERRLOG>) {
160 print $outhandle "$_";
161 }
162 print $outhandle "\n";
163 close ERRLOG;
164 }
165 &util::rm("$errlog") if (-e "$errlog");
166 return "";
167 }
168
169 # store the *actual* output type and return the output filename
170 # it's possible we requested conversion to html, but only to text succeeded
171
172 $self->{'convert_to_ext'} = $output_type;
173 $self->{'converted_to'} = "Rog";
174
175 my $output_filename = $tmp_filename;
176
177 $output_filename =~ s/$suffix$//;
178
179 return $output_filename;
180}
181
182
183# Remove collection specific tmp directory and all its contents.
184
185sub cleanup_tmp_area {
186 my $self = shift (@_);
187
188 my $tmp_dirname
189 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
190 &util::rm_r($tmp_dirname);
191 &util::mk_dir($tmp_dirname);
192}
193
194
195# Exact copy of read_rog_record from RogPlug
196# Needed for FILE in right scope
197
198sub read_rog_record
199{
200 my ($self,$file_buffer, $docnum, $seclevel) = @_;
201
202 my $next_line = $file_buffer->{'next_line'};
203
204 return 0 if (!defined $next_line);
205
206 if ($next_line eq "")
207 {
208 my $line;
209 while(defined($line=<FILE>))
210 {
211 $line =~ s/\r$//;
212 $file_buffer->{'line_no'}++;
213 next if ($line =~ m/^\#/);
214 $next_line = $line;
215 last;
216 }
217 }
218
219 if ($next_line !~ m/^song( +)\"([^\"]*)\"( +)\"([^\"]*)\"( +)(\d+)( *)$/)
220 {
221 print STDERR "Error: Malformed Rog file: $next_line";
222 return 0;
223 }
224 else
225 {
226 # init default values
227 $file_buffer->{'song'}->{'tempo'} = 120;
228 $file_buffer->{'song'}->{'ks_type'} = 0;
229 $file_buffer->{'song'}->{'ks_num'} = 0;
230 $file_buffer->{'song'}->{'metadata'} = [];
231 $file_buffer->{'song'}->{'content'} = "";
232
233 $file_buffer->{'song'}->{'subcol'} = $2;
234 $file_buffer->{'song'}->{'title'} = $4;
235 $file_buffer->{'song'}->{'tval'} = $6;
236
237 chomp($next_line);
238 my $content = $next_line;
239 if (defined $docnum)
240 {
241 $content.= " $docnum $seclevel";
242 }
243 $content .= "\n";
244
245 $file_buffer->{'song'}->{'content'} = $content;
246
247
248 my $line;
249 while(defined($line=<FILE>))
250 {
251 $line =~ s/\r$//;
252
253 $file_buffer->{'line_no'}++;
254 next if ($line =~ m/^\#/);
255
256 if ($line =~ m/^song/)
257 {
258 $file_buffer->{'next_line'} = $line;
259 return 1;
260 }
261 elsif ($line =~ m/^tempo( +)(\d+)( *)$/)
262 {
263 $file_buffer->{'song'}->{'tempo'} = $2;
264 $file_buffer->{'song'}->{'content'} .= $line;
265 }
266 elsif ($line =~ m/^keysig( +)(\d+)( +)(\d+)( *)$/)
267 {
268 $file_buffer->{'song'}->{'ks_type'} = $2;
269 $file_buffer->{'song'}->{'ks_num'} = $4;
270 $file_buffer->{'song'}->{'content'} .= $line;
271 }
272 elsif ($line =~ m/^timesig( +)(\d+)( +)(\d+)( *)$/)
273 {
274 $file_buffer->{'song'}->{'ts_numer'} = $2;
275 $file_buffer->{'song'}->{'ts_denom'} = $4;
276 $file_buffer->{'song'}->{'content'} .= $line;
277 }
278 elsif ($line =~ m/^metadata ([^:]*): (.*)/)
279 {
280 push(@{$file_buffer->{'song'}->{'metadata'}},[$1,$2]);
281 $file_buffer->{'song'}->{'content'} .= $line;
282 }
283 else
284 {
285 $file_buffer->{'song'}->{'content'} .= $line;
286 }
287 }
288
289 $file_buffer->{'next_line'} = undef;
290 }
291
292 return 1;
293}
294
295# Override RogPlug function so rog files are stored as sections (not docs)
296
297sub process_rog_record
298{
299 my ($self,$doc_obj,$cursection,$song) = @_;
300
301 $cursection =
302 $doc_obj->insert_section($cursection);
303 $self->{'docnum'}++;
304
305 my $title = $song->{'title'};
306 my $title_safe = $title;
307 $title_safe =~ s/\'/\\\\&apos;/g;
308
309 # add metadata
310 $doc_obj->add_metadata($cursection, "Tempo", $song->{'tempo'});
311 $doc_obj->add_metadata($cursection, "KeySigType", $song->{'ks_type'});
312 $doc_obj->add_metadata($cursection, "KeySigNum", $song->{'ks_num'});
313 $doc_obj->add_metadata($cursection, "SubCollection", $song->{'subcol'});
314 $doc_obj->add_metadata($cursection, "Title", $title);
315 $doc_obj->add_metadata($cursection, "TitleSafe", $title_safe);
316 $doc_obj->add_metadata($cursection, "TVal", $song->{'tval'});
317
318 foreach $md ( @{$song->{'metadata'}} )
319 {
320 $doc_obj->add_metadata($cursection, $md->[0], $md->[1]);
321 }
322
323 # add contents as text
324 $doc_obj->add_text($cursection,$song->{'content'});
325
326 return $cursection;
327}
328
329
330
331# Override BasPlug read
332# We don't want to get language encoding stuff until after we've converted
333# our file to Rog format
334sub read {
335 my $self = shift (@_);
336 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
337# if ($self->is_recursive()) {
338# die "BasPlug::read function must be implemented in sub-class for recursive plugins\n";
339# }
340
341 my $outhandle = $self->{'outhandle'};
342
343 my $filename = $file;
344 $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
345
346 if ($self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/) {
347 $self->{'num_blocked'} ++;
348 return 0;
349 }
350 if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
351 return undef;
352 }
353 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
354
355 # read in file ($text will be in utf8)
356 my $text = "";
357
358 my $output_ext = $self->{'convert_to_ext'};
359 my $conv_filename = $self->tmp_area_convert_file($output_ext, $filename);
360
361 if ("$conv_filename" eq "") {return 0;} # allows continue on errors
362 $self->{'conv_filename'} = $conv_filename;
363
364
365 # create a new document
366 #my $doc_obj = new doc ($conv_filename, "indexed_doc");
367 # the original filename is used now
368 my $doc_obj = new doc ($filename, "indexed_doc");
369 # the converted filename is set separately
370 $doc_obj->set_converted_filename($conv_filename);
371
372 my $topsection = $doc_obj->get_top_section();
373 my $cursection = $topsection;
374
375 $self->{'docnum'}++;
376 my $docnum = $self->{'docnum'};
377
378 $doc_obj->set_OIDtype ($processor->{'OIDtype'});
379 my ($filemeta) = $file =~ /([^\\\/]+)$/;
380 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta));
381 if ($self->{'cover_image'}) {
382 $self->associate_cover_image($doc_obj, $filename);
383 }
384 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
385
386
387 my $track_no = "1";
388 my $rog_filename = "$conv_filename$track_no.$output_ext";
389 while (1)
390 {
391 last unless open (FILE, $rog_filename) ;
392
393 my $file_buffer = { line_no => 0, next_line => "", song => {} };
394
395 while ($self->read_rog_record($file_buffer, $docnum, $track_no))
396 {
397 my $song = $file_buffer->{'song'};
398 my $content = $song->{'content'};
399 $content =~ s/^song\w+(.*)$/song $1 X.$track_no/;
400
401 $cursection
402 = $self->process_rog_record($doc_obj,$cursection,
403 $file_buffer->{'song'});
404 }
405
406 close FILE;
407
408 $track_no++;
409 $rog_filename = "$conv_filename$track_no.$output_ext";
410 }
411
412 print STDERR "\n";
413
414 # include any metadata passed in from previous plugins
415 # note that this metadata is associated with the top level section
416 $self->extra_metadata ($doc_obj, $doc_obj->get_top_section(), $metadata);
417 # do plugin specific processing of doc_obj
418 return -1 unless defined ($self->process (\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj));
419 # do any automatic metadata extraction
420 $self->auto_extract_metadata ($doc_obj);
421 # add an OID
422 $doc_obj->set_OID();
423
424 my $oid = $doc_obj->get_OID();
425 my $appletlink = "<a href=\"javascript:meldexout(\'$oid\','[TitleSafe]')\">";
426
427 $doc_obj->add_utf8_metadata ($topsection, "audiolink", $appletlink);
428 $doc_obj->add_utf8_metadata ($topsection, "audioicon", "_iconaudio_");
429 $doc_obj->add_utf8_metadata ($topsection, "/audiolink", "</a>");
430
431 # if no title metadata defined, set it to filename minus extension
432 my $existing_title = $doc_obj->get_metadata_element($topsection,"Title");
433 if (!defined $existing_title)
434 {
435 my $title = $doc_obj->get_metadata_element($topsection,"Source");
436 $title =~ s/\..*?$//g;
437 $doc_obj->add_utf8_metadata ($topsection, "Title", $title);
438
439 my $title_safe = $title;
440 $title_safe =~ s/\'/\\\\&apos;/g;
441 $doc_obj->add_utf8_metadata ($topsection, "TitleSafe", $title_safe);
442 }
443
444 # process the document
445 $processor->process($doc_obj);
446 $self->cleanup_tmp_area();
447
448 $self->{'num_processed'} ++;
449
450 return 1;
451}
452
453
454# do plugin specific processing of doc_obj for HTML type
455sub process_type {
456 my $self = shift (@_);
457 my ($doc_ext, $textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
458
459 my $conv_filename = $self->{'conv_filename'};
460 my $tmp_dirname = File::Basename::dirname($conv_filename);
461 my $tmp_tailname = File::Basename::basename($conv_filename);
462
463 my $converted_to = $self->{'converted_to'};
464 my $ret_val = 1;
465
466# $ret_val = &RogPlug::process($self, $textref, $pluginfo,
467# $tmp_dirname, $tmp_tailname,
468# $metadata, $doc_obj);
469
470 # associate original file with doc object
471 my $cursection = $doc_obj->get_top_section();
472 my $filename = &util::filename_cat($base_dir, $file);
473 $doc_obj->associate_file($filename, "doc.$doc_ext", undef, $cursection);
474
475 my $doclink = "<a href=\"_httpcollection_/index/assoc/[archivedir]/doc.$doc_ext\">";
476 $doc_obj->add_utf8_metadata ($cursection, "srclink", $doclink);
477 $doc_obj->add_utf8_metadata ($cursection, "srcicon", "_icon".$doc_ext."_");
478 $doc_obj->add_utf8_metadata ($cursection, "/srclink", "</a>");
479 return $ret_val;
480}
481
4821;
Note: See TracBrowser for help on using the repository browser.