source: main/trunk/greenstone2/perllib/plugins/CONTENTdmPlugin.pm@ 22862

Last change on this file since 22862 was 22862, checked in by kjdon, 14 years ago

changed a comment

  • Property svn:keywords set to Author Date Id Revision
File size: 22.5 KB
Line 
1###########################################################################
2#
3# CONTENTdmPlugin.pm -- reasonably with-it pdf plugin
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999-2001 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25package CONTENTdmPlugin;
26
27use ConvertBinaryFile;
28use ReadXMLFile;
29
30use unicode;
31use ghtml;
32
33use strict;
34no strict 'refs'; # so we can use a var for filehandles (eg STDERR)
35
36
37use XMLParser;
38
39# inherit ReadXMLFile for the apply_xslt method
40sub BEGIN {
41 @CONTENTdmPlugin::ISA = ('ConvertBinaryFile', 'ReadXMLFile');
42}
43
44
45my $convert_to_list =
46 [
47# { 'name' => "auto",
48# 'desc' => "{ConvertBinaryFile.convert_to.auto}" },
49# { 'name' => "html",
50# 'desc' => "{ConvertBinaryFile.convert_to.html}" },
51# { 'name' => "text",
52# 'desc' => "{ConvertBinaryFile.convert_to.text}" },
53 { 'name' => "pagedimg",
54 'desc' => "{ConvertBinaryFile.convert_to.pagedimg}"},
55 ];
56
57
58
59my $arguments =
60 [
61 { 'name' => "convert_to",
62 'desc' => "{ConvertBinaryFile.convert_to}",
63 'type' => "enum",
64 'reqd' => "yes",
65 'list' => $convert_to_list,
66 'deft' => "html" },
67 { 'name' => "xslt",
68 'desc' => "{ReadXMLFile.xslt}",
69 'type' => "string",
70 'deft' => "",
71 'reqd' => "no" },
72 { 'name' => "process_exp",
73 'desc' => "{BasePlugin.process_exp}",
74 'type' => "regexp",
75 'deft' => &get_default_process_exp(),
76 'reqd' => "no" },
77 { 'name' => "block_exp",
78 'desc' => "{BasePlugin.block_exp}",
79 'type' => "regexp",
80 'deft' => &get_default_block_exp() }
81];
82
83my $options = { 'name' => "CONTENTdmPlugin",
84 'desc' => "{CONTENTdmPlugin.desc}",
85 'abstract' => "no",
86 'inherits' => "yes",
87 # CONTENTdmPlugin is one of the few ConvertBinaryFile subclasses whose source doc can't be replaced by a GS-generated html
88 'srcreplaceable' => "no",
89 'args' => $arguments };
90
91sub new {
92 my ($class) = shift (@_);
93 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
94 push(@$pluginlist, $class);
95
96 push(@$inputargs,"-title_sub");
97 push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?');
98
99 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
100 push(@{$hashArgOptLists->{"OptList"}},$options);
101
102 my @arg_array = @$inputargs;
103 my $self = new ConvertBinaryFile($pluginlist,$inputargs,$hashArgOptLists);
104
105 if ($self->{'info_only'}) {
106 # don't worry about any options etc
107 return bless $self, $class;
108 }
109
110 my $parser = new XML::Parser('Style' => 'Stream',
111 'Pkg' => 'ReadXMLFile',
112 'PluginObj' => $self,
113 'Handlers' => {'Char' => \&ReadXMLFile::Char,
114 'XMLDecl' => \&ReadXMLFile::XMLDecl,
115 'Entity' => \&ReadXMLFile::Entity,
116 'Doctype' => \&ReadXMLFile::Doctype,
117 'Default' => \&ReadXMLFile::Default
118 });
119 $self->{'parser'} = $parser;
120
121
122 $self->{'rdf_desc'} = undef;
123 $self->{'about_key'} = undef;
124 $self->{'metadata_name'} = undef;
125 $self->{'metadata_value'} = undef;
126
127 # do we only allow one option??
128 $self->{'convert_to'} = "pagedimg";
129 $self->{'convert_to_plugin'} = "PagedImagePlugin";
130 $self->{'convert_to_ext'} = "jpg";
131
132 my $secondary_plugin_name = $self->{'convert_to_plugin'};
133 my $secondary_plugin_options = $self->{'secondary_plugin_options'};
134
135 if (!defined $secondary_plugin_options->{$secondary_plugin_name}) {
136 $secondary_plugin_options->{$secondary_plugin_name} = [];
137 }
138 my $specific_options = $secondary_plugin_options->{$secondary_plugin_name};
139
140 push(@$specific_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
141 push(@$specific_options, "-create_thumbnail", "true", "-create_screenview", "true");
142 push(@$specific_options, "-file_rename_method", "none");
143 push(@$specific_options, "-processing_tmp_files");
144
145# my $secondary_plugin_options = $self->{'secondary_plugin_options'};
146
147# if (!defined $secondary_plugin_options->{'PagedImagePlugin'}){
148# $secondary_plugin_options->{'PagedImagePlugin'} = [];
149# }
150# my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'};
151# push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
152# push(@$pagedimg_options, "-create_thumbnail", "true", "-create_screenview", "true");
153# push(@$pagedimg_options, "-file_rename_method", "none");
154# push(@$pagedimg_options, "-processing_tmp_files");
155 $self = bless $self, $class;
156 $self->load_secondary_plugins($class,$secondary_plugin_options,$hashArgOptLists);
157 return $self;
158}
159
160sub get_default_process_exp {
161 my $self = shift (@_);
162
163 return q^(?i)\.rdf$^;
164}
165
166sub get_default_block_exp {
167 return q^(?i)\.(jpg|jpeg|gif)$^;
168}
169
170
171
172sub rdf_desc_to_id
173{
174 my $self = shift (@_);
175 my ($rdf_desc) = @_;
176
177 my $rdf_id = {};
178
179 # initialise any .cpd (=complex multi page) structures
180
181 foreach my $about_key (keys %{$rdf_desc}) {
182 if ($about_key =~ m/\.cpd$/) {
183 my $about = $rdf_desc->{$about_key};
184 my $id = $about->{'dc:identifier'};
185
186 if ($id =~ m/^\s*$/) {
187 # missing id, make one up based on about attribute
188
189 my ($tailname, $dirname, $suffix)
190 = &File::Basename::fileparse($about_key, "\\.[^\\.]+\$");
191
192 $id = "about:$tailname";
193 }
194
195 $rdf_id->{$id} = $about;
196 $rdf_id->{$id}->{'ex:filename'} = $about_key;
197 $rdf_id->{$id}->{'ex:type'} = "complex";
198 $rdf_id->{$id}->{'pages'} = [];
199 }
200
201 }
202
203 # now add in *non* .cpd items
204
205 foreach my $about_key (keys %{$rdf_desc}) {
206 if ($about_key !~ m/\.cpd$/) {
207 my $about = $rdf_desc->{$about_key};
208 my $id = $about->{'dc:identifier'};
209
210
211 if ($id =~ m/^\s*$/) {
212 # missing id, make one up based on about attribute
213
214 my ($tailname, $dirname, $suffix)
215 = &File::Basename::fileparse($about_key, "\\.[^\\.]+\$");
216
217 $id = "about:$tailname";
218 }
219
220 if (defined $rdf_id->{$id}) {
221 $about->{'ex:filename'} = $about_key;
222
223 # dealing with complex multi-page situation
224 # Add to existing structure
225
226 my $pages = $rdf_id->{$id}->{'pages'};
227 push(@$pages,$about)
228 }
229 else {
230 # New entry
231
232 $rdf_id->{$id} = $about;
233 $rdf_id->{$id}->{'ex:type'} = "simple";
234 $rdf_id->{$id}->{'ex:filename'} = $about_key;
235 }
236 }
237
238 }
239
240 return $rdf_id;
241}
242
243
244sub metadata_table_txt_file
245{
246 my $self = shift (@_);
247 my ($output_root,$page_num) = @_;
248
249 my $txt_filename = $output_root."_page_$page_num.txt";
250
251 my ($tailname, $dirname, $suffix)
252 = &File::Basename::fileparse($txt_filename, "\\.[^\\.]+\$");
253
254 my $txt_file = "$tailname$suffix";
255
256 return $txt_file;
257}
258
259
260sub output_metadata_table
261{
262 my $self = shift (@_);
263 my ($page,$page_num,$tmp_dirname,$txt_file) = @_;
264
265 my $txt_filename = &util::filename_cat($tmp_dirname,$txt_file);
266
267 open(TOUT,">$txt_filename")
268 || die "Error: unable to write metadata data out as txt file $txt_filename: $!\n";
269
270 print TOUT $page->{'MetadataTable'};
271 delete $page->{'MetadataTable'};
272
273 close (TOUT);
274}
275
276
277sub rdf_id_to_item_file
278{
279 my $self = shift (@_);
280 my ($rdf_id,$tmp_dirname,$output_root) = @_;
281
282 my $item_file_list = [];
283
284 foreach my $id (keys %{$rdf_id}) {
285
286 my $id_safe = $id;
287 $id_safe =~ s/ /-/g;
288
289 my $output_filename = $output_root."_$id_safe.item";
290 open(FOUT,">$output_filename")
291 || die "Unable to open $output_filename: $!\n";
292
293
294 print FOUT "<PagedDocument>\n";
295
296 my $rdf_doc = $rdf_id->{$id};
297 foreach my $metadata_name (keys %$rdf_doc) {
298
299
300 next if ($metadata_name eq "pages");
301
302 my $metadata_value = $rdf_doc->{$metadata_name};
303
304 # convert ns:name to ns.Name
305 $metadata_name =~ s/^(.*?):(.*)/$1\.\u$2/;
306
307 print FOUT " <Metadata name=\"$metadata_name\">$metadata_value</Metadata>\n";
308 }
309
310 if ($rdf_doc->{'ex:type'} eq "complex") {
311 my $pages = $rdf_doc->{'pages'};
312 my $page_num = 1;
313
314 foreach my $page (@$pages) {
315
316 my $imgfile = $page->{'ex:filename'};
317 if ($imgfile =~ m/(http|ftp):/) {
318 $imgfile = "empty.jpg";
319 }
320 else {
321 $imgfile = &util::filename_cat("..","import",$imgfile);
322 }
323
324 my $txt_file
325 = $self->metadata_table_txt_file($output_root,$page_num);
326
327 $self->output_metadata_table($page,$page_num,
328 $tmp_dirname,$txt_file);
329
330
331 print FOUT " <Page pagenum=\"$page_num\" imgfile=\"$imgfile\" txtfile=\"$txt_file\">\n";
332
333 foreach my $metadata_name (keys %$page) {
334
335 my $metadata_value = $rdf_doc->{$metadata_name};
336 # convert ns:name to ns.Name
337 $metadata_name =~ s/^(.*?):(.*)/$1\.\u$2/;
338
339 print FOUT " <Metadata name=\"$metadata_name\">$metadata_value</Metadata>\n";
340 }
341
342
343 $page_num++;
344
345
346 print FOUT " </Page>\n";
347 }
348 }
349 else {
350 # simple
351 # duplicate top-level metadata for now plus image to bind to
352
353 my $imgfile = $rdf_doc->{'ex:filename'};
354 if ($imgfile =~ m/(http|ftp):/) {
355 $imgfile = "empty.jpg";
356 }
357 else {
358 $imgfile = &util::filename_cat("..","import",$imgfile);
359 }
360
361
362 my $txt_file = $self->metadata_table_txt_file($output_root,1);
363 $self->output_metadata_table($rdf_doc,1,$tmp_dirname,$txt_file);
364
365 print FOUT " <Page pagenum=\"1\" imgfile=\"$imgfile\" txtfile=\"$txt_file\">\n";
366 foreach my $metadata_name (keys %$rdf_doc) {
367
368 my $metadata_value = $rdf_doc->{$metadata_name};
369
370 # convert ns:name to ns.Name
371 $metadata_name =~ s/^(.*?):(.*)/$1\.\u$2/;
372
373 print FOUT " <Metadata name=\"$metadata_name\">$metadata_value</Metadata>\n";
374 }
375 print FOUT " </Page>\n";
376
377 }
378
379 print FOUT "</PagedDocument>\n";
380 close(FOUT);
381
382 push(@$item_file_list,$output_filename);
383
384 }
385
386
387 return $item_file_list;
388}
389
390
391
392sub xml_area_convert_file
393{
394 my $self = shift (@_);
395 my ($input_filename, $tmp_dirname, $output_root) = @_;
396
397 eval {
398 # Build up hash table/tree of all records
399
400 my $xslt = $self->{'xslt'};
401 if (defined $xslt && ($xslt ne "")) {
402 # perform xslt
403 my $transformed_xml = $self->apply_xslt($xslt,$input_filename);
404
405 open(TOUT,">/tmp/tout.xml")
406 || die "Unable to open /tmp/tout.xml: $!\n";
407 print TOUT $transformed_xml;
408 close(TOUT);
409
410
411 # feed transformed file (now in memory as string) into XML parser
412 $self->{'parser'}->parse($transformed_xml);
413 }
414 else {
415 $self->{'parser'}->parsefile($input_filename);
416 }
417 };
418
419 if ($@) {
420
421 # parsefile may either croak somewhere in XML::Parser (e.g. because
422 # the document is not well formed) or die somewhere in XMLPlug or a
423 # derived plugin (e.g. because we're attempting to process a
424 # document whose DOCTYPE is not meant for this plugin). For the
425 # first case we'll print a warning and continue, for the second
426 # we'll just continue quietly
427
428 print STDERR "**** Error is: $@\n";
429
430 my $file = $self->{'file'};
431
432 my ($msg) = $@ =~ /Carp::croak\(\'(.*?)\'\)/;
433 if (defined $msg) {
434 my $outhandle = $self->{'outhandle'};
435 my $plugin_name = ref ($self);
436 print $outhandle "$plugin_name failed to process $file ($msg)\n";
437 }
438
439 my $gli = $self->{'gli'};
440
441 # reset ourself for the next document
442 $self->{'section_level'}=0;
443 print STDERR "<ProcessingError n='$file'>\n" if ($gli);
444 return ("fail",undef); # error during processing
445 }
446
447 my $rdf_desc = $self->{'rdf_desc'};
448
449# foreach my $about_key (keys %{$rdf_desc}) {
450# my $about = $rdf_desc->{$about_key};
451# foreach my $metadata_name (keys %{$about}) {
452#
453# my $metadata_value = $about->{$metadata_name};
454## print STDERR " $metadata_name: $metadata_value\n";
455# }
456# }
457
458
459 # Merge entries with same name
460
461
462 my $merged_rdf_id = $self->rdf_desc_to_id($rdf_desc);
463
464# foreach my $about_key (keys %{$merged_rdf_id}) {
465# my $about = $merged_rdf_id->{$about_key};
466# foreach my $metadata_name (keys %{$about}) {
467#
468# my $metadata_value = $about->{$metadata_name};
469## print STDERR " $metadata_name: $metadata_value\n";
470# }
471# }
472
473
474
475 my $item_files = $self->rdf_id_to_item_file($merged_rdf_id,$tmp_dirname,
476 $output_root);
477
478 return ("item",$item_files);
479}
480
481
482# Override ConvertBinaryFile tmp_area_convert_file() to provide solution specific
483# to CONTENTdm
484#
485# A better (i.e. in the future) solution would be to see if this can be
486# shifted into gsConvert.pl so there is no need to override the
487# default tmp_area_convert_file()
488
489
490sub tmp_area_convert_file {
491 my $self = shift (@_);
492 my ($output_ext, $input_filename, $textref) = @_;
493
494 # is textref ever used?!?
495
496 my $outhandle = $self->{'outhandle'};
497 my $convert_to = $self->{'convert_to'};
498 my $failhandle = $self->{'failhandle'};
499 my $convert_to_ext = $self->{'convert_to_ext'};
500
501 # softlink to collection tmp dir
502 my $tmp_dirname
503 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
504 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
505
506 # derive tmp filename from input filename
507 my ($tailname, $dirname, $suffix)
508 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
509
510 # Remove any white space from filename -- no risk of name collision, and
511 # makes later conversion by utils simpler. Leave spaces in path...
512 # tidy up the filename with space, dot, hyphen between
513 $tailname =~ s/\s+//g;
514 $tailname =~ s/\.+//g;
515 $tailname =~ s/\-+//g;
516
517 $tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
518 $suffix = lc($suffix);
519 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix");
520
521 &util::soft_link($input_filename, $tmp_filename);
522 my $verbosity = $self->{'verbosity'};
523 if ($verbosity > 0) {
524 print $outhandle "Converting $tailname$suffix to $convert_to format\n";
525 }
526
527 my $errlog = &util::filename_cat($tmp_dirname, "err.log");
528
529 # call xml_area_convert_file rather than gsConvert.pl
530
531 my $output_root = &util::filename_cat($tmp_dirname, "$tailname");
532
533 my ($output_type,$item_files)
534 = $self->xml_area_convert_file($tmp_filename,$tmp_dirname,$output_root);
535
536
537 my $fakeimg_filename = &util::filename_cat($dirname, "empty.jpg");
538 my $fakeimg_tmp_filename = &util::filename_cat($tmp_dirname, "empty.jpg");
539
540 print STDERR "***** No source image identified with item\n";
541
542 print STDERR "***** Using default \"no image available\" $fakeimg_filename -> $fakeimg_tmp_filename\n";
543
544 &util::soft_link($fakeimg_filename, $fakeimg_tmp_filename);
545
546 # continue as before ...
547
548 # remove symbolic link to original file
549 &util::rm($tmp_filename);
550
551 # Check STDERR here
552 chomp $output_type;
553 if ($output_type eq "fail") {
554 print $outhandle "Could not convert $tailname$suffix to $convert_to format\n";
555 print $failhandle "$tailname$suffix: " . ref($self) . " failed to convert to $convert_to\n";
556 $self->{'num_not_processed'} ++;
557 if (-s "$errlog") {
558 open(ERRLOG, "$errlog");
559 while (<ERRLOG>) {
560 print $outhandle "$_";
561 }
562 print $outhandle "\n";
563 close ERRLOG;
564 }
565 &util::rm("$errlog") if (-e "$errlog");
566 return [];
567 }
568
569 # store the *actual* output type and return the output filename
570 # it's possible we requested conversion to html, but only to text succeeded
571 #$self->{'convert_to_ext'} = $output_type;
572 if ($output_type =~ /html/i) {
573 $self->{'converted_to'} = "HTML";
574 } elsif ($output_type =~ /te?xt/i) {
575 $self->{'converted_to'} = "Text";
576 } elsif ($output_type =~ /item/i){
577 $self->{'converted_to'} = "PagedImage";
578 }
579
580
581 return $item_files;
582}
583
584
585
586
587# Override ConvertBinaryFile (ie BasePlugin) read
588# Needed so multiple .item files generated are sent down secondary plugin
589# and the resulting doc_objs all processed.
590
591sub read {
592 my $self = shift (@_);
593 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
594
595
596 $self->{'gli'} = $gli;
597 $self->{'file'} = $file;
598
599 my $successful_rv = -1;
600
601 my $outhandle = $self->{'outhandle'};
602
603 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
604 return undef unless $self->can_process_this_file($filename_full_path);
605
606 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
607
608 # read() deviates at this point from ConvertBinaryFile
609 # Need to work with list of filename returned
610
611 my $output_ext = $self->{'convert_to_ext'};
612 my $conv_filename_list = [];
613
614 $conv_filename_list = $self->tmp_area_convert_file($output_ext, $filename_full_path);
615
616 if (scalar(@$conv_filename_list)==0) {
617 return -1;
618 } # had an error, will be passed down pipeline
619
620 foreach my $conv_filename ( @$conv_filename_list ) {
621 if (! -e "$conv_filename") {return -1;}
622 $self->{'conv_filename'} = $conv_filename; # is this used anywhere?
623 $self->convert_post_process($conv_filename);
624
625 my $secondary_plugins = $self->{'secondary_plugins'};
626 my $num_secondary_plugins = scalar(keys %$secondary_plugins);
627
628 if ($num_secondary_plugins == 0) {
629 print $outhandle "Warning: No secondary plugin to use in conversion. Skipping $file\n";
630 return 0; # effectively block it
631 }
632
633 my @plugin_names = keys %$secondary_plugins;
634 my $plugin_name = shift @plugin_names;
635
636 if ($num_secondary_plugins > 1) {
637 print $outhandle "Warning: Multiple secondary plugins not supported yet! Choosing $plugin_name\n.";
638 }
639
640 my $secondary_plugin = $secondary_plugins->{$plugin_name};
641
642 # note: metadata is not carried on to the next level
643 my ($rv,$doc_obj)
644 = $secondary_plugin->read_into_doc_obj ($pluginfo,"", $conv_filename,
645 $block_hash, $metadata, $processor, $maxdocs, $total_count,
646 $gli);
647
648 print STDERR "**** $conv_filename => returned rv = $rv\n";
649
650 if ((defined $rv) && ($rv>=0)) {
651 $successful_rv = 1;
652 }
653
654 # Override previous gsdlsourcefilename set by secondary plugin
655 my $collect_file = &util::filename_within_collection($filename_full_path);
656 my $collect_conv_file = &util::filename_within_collection($conv_filename);
657 $doc_obj->set_source_filename ($collect_file, $self->{'file_rename_method'});
658 $doc_obj->set_converted_filename($collect_conv_file);
659
660 my ($filemeta) = $file =~ /([^\\\/]+)$/;
661 $self->set_Source_metadata($doc_obj, $filemeta);
662 $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
663 $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename_full_path));
664
665 if ($self->{'cover_image'}) {
666 $self->associate_cover_image($doc_obj, $filename_full_path);
667 }
668
669 # do plugin specific processing of doc_obj
670 unless (defined ($self->process(undef, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli))) {
671 print STDERR "***** process returned undef: $base_dir $file\n";
672 print STDERR "<ProcessingError n='$file'>\n" if ($gli);
673 return -1;
674 }
675 # do any automatic metadata extraction
676 $self->auto_extract_metadata ($doc_obj);
677
678 # have we found a Title??
679 $self->title_fallback($doc_obj,$doc_obj->get_top_section(),$filemeta);
680
681 # add an OID
682 $self->add_OID($doc_obj);
683 # process the document
684 $processor->process($doc_obj);
685
686 $self->{'num_processed'} ++;
687 }
688
689 return $successful_rv;
690}
691
692sub process {
693
694 return 1;
695}
696
697# do we need this? sec pluginn process would have already been called as part of read_into_doc_obj??
698sub process_old {
699 my $self = shift (@_);
700 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
701
702
703 my $secondary_plugins = $self->{'secondary_plugins'};
704 my @plugin_names = keys %$secondary_plugins;
705 my $plugin_name = shift @plugin_names; # already checked there is only one
706
707 my $secondary_plugin = $secondary_plugins->{$plugin_name};
708
709 my $result = $secondary_plugin->process(@_);
710
711 return $result;
712}
713
714
715# Called at the beginning of the XML document.
716sub xml_start_document {
717 my $self = shift(@_);
718 my ($expat) = @_;
719
720 $self->{'rdf_desc'} = {};
721}
722
723
724# Called for DOCTYPE declarations - use die to bail out if this doctype
725# is not meant for this plugin
726sub xml_doctype {
727 my $self = shift(@_);
728 my ($expat, $name, $sysid, $pubid, $internal) = @_;
729
730 die "" if ($name !~ /^rdf:RDF$/);
731
732 my $outhandle = $self->{'outhandle'};
733 print $outhandle "CONTENTdmPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
734
735}
736
737# Called for every start tag. The $_ variable will contain a copy of the
738# tag and the %_ variable will contain the element's attributes.
739sub xml_start_tag {
740 my $self = shift(@_);
741 my ($expat, $element) = @_;
742
743 if ($element eq "rdf:Description") {
744
745 my $about_key = $_{'about'};
746
747 my $rdf_desc = $self->{'rdf_desc'};
748 $rdf_desc->{$about_key} = {};
749
750 $self->{'about_key'} = $about_key;
751 $self->{'index_text'} = "";
752 $self->{'pp_text'} = "<table width=\"100%\">\n";
753
754
755 }
756 elsif (defined $self->{'about_key'}) {
757 $self->{'metadata_name'} = $element;
758 $self->{'metadata_value'} = "";
759 }
760
761}
762
763# Called for every end tag. The $_ variable will contain a copy of the tag.
764sub xml_end_tag {
765 my $self = shift(@_);
766 my ($expat, $element) = @_;
767
768 if ($element eq "rdf:Description") {
769 $self->{'pp_text'} .= "</table>\n";
770 ## ghtml::htmlsafe($self->{'pp_text'});
771
772
773 my $about_key = $self->{'about_key'};
774 my $about = $self->{'rdf_desc'}->{$about_key};
775 $about->{'IndexText'} = $self->{'index_text'};
776 $about->{'MetadataTable'} = $self->{'pp_text'};
777
778
779 $self->{'about_key'} = undef;
780 $self->{'index_text'} = undef;
781 $self->{'pp_text'} = undef;
782
783 }
784 elsif (defined $self->{'metadata_name'}) {
785 my $metadata_name = $self->{'metadata_name'};
786 if ($element eq $metadata_name) {
787 my $metadata_value = $self->{'metadata_value'};
788
789 my $about_key = $self->{'about_key'};
790 my $about = $self->{'rdf_desc'}->{$about_key};
791 $about->{$metadata_name} = $metadata_value;
792
793 $self->{'index_text'} .= "$metadata_value\n";
794 $self->{'pp_text'} .= " <tr><td>$metadata_name</td><td>$metadata_value</td></tr>\n";
795
796 $self->{'metadata_name'} = undef;
797 $self->{'metadata_value'} = undef;
798 }
799 }
800}
801
802# Called just before start or end tags with accumulated non-markup text in
803# the $_ variable.
804sub xml_text {
805 my $self = shift(@_);
806 my ($expat) = @_;
807
808 if (defined $self->{'metadata_name'}) {
809 $self->{'metadata_value'} .= $_;
810 }
811}
812
813# Called at the end of the XML document.
814sub xml_end_document {
815 my $self = shift(@_);
816 my ($expat) = @_;
817}
818
819
8201;
Note: See TracBrowser for help on using the repository browser.