source: gsdl/trunk/perllib/plugins/CONTENTdmPlugin.pm@ 15872

Last change on this file since 15872 was 15872, checked in by kjdon, 16 years ago

plugin overhaul: plugins renamed to xxPlugin, and in some cases the names are made more sensible. They now use the new base plugins. Hopefully we have better code reuse. Some of the plugins still need work done as I didn't want to spend another month doing this before committing it. Alos, I haven't really tested anything yet...

  • Property svn:keywords set to Author Date Id Revision
File size: 20.9 KB
Line 
1###########################################################################
2#
3# CONTENTdmPlugin.pm -- reasonably with-it pdf plugin
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999-2001 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25package CONTENTdmPlugin;
26
27use ConvertBinaryFile;
28use ReadXMLFile;
29
30use unicode;
31use ghtml;
32
33use strict;
34no strict 'refs'; # so we can use a var for filehandles (eg STDERR)
35
36
37use XMLParser;
38
39# inherit ReadXMLFile for the apply_xslt method
40sub BEGIN {
41 @CONTENTdmPlugin::ISA = ('ConvertBinaryFile', 'ReadXMLFile');
42}
43
44
45my $convert_to_list =
46 [ { 'name' => "auto",
47 'desc' => "{ConvertBinaryFile.convert_to.auto}" },
48 { 'name' => "html",
49 'desc' => "{ConvertBinaryFile.convert_to.html}" },
50 { 'name' => "text",
51 'desc' => "{ConvertBinaryFile.convert_to.text}" },
52 { 'name' => "pagedimg",
53 'desc' => "{ConvertBinaryFile.convert_to.pagedimg}"},
54 ];
55
56
57
58my $arguments =
59 [
60 { 'name' => "convert_to",
61 'desc' => "{ConvertBinaryFile.convert_to}",
62 'type' => "enum",
63 'reqd' => "yes",
64 'list' => $convert_to_list,
65 'deft' => "html" },
66 { 'name' => "xslt",
67 'desc' => "{ReadXMLFile.xslt}",
68 'type' => "string",
69 'deft' => "",
70 'reqd' => "no" },
71 { 'name' => "process_exp",
72 'desc' => "{BasePlugin.process_exp}",
73 'type' => "regexp",
74 'deft' => &get_default_process_exp(),
75 'reqd' => "no" },
76 { 'name' => "block_exp",
77 'desc' => "{BasePlugin.block_exp}",
78 'type' => "regexp",
79 'deft' => &get_default_block_exp() }
80];
81
82my $options = { 'name' => "CONTENTdmPlugin",
83 'desc' => "{CONTENTdmPlugin.desc}",
84 'abstract' => "no",
85 'inherits' => "yes",
86 # CONTENTdmPlugin is one of the few ConvertBinaryFile subclasses whose source doc can't be replaced by a GS-generated html
87 'srcreplaceable' => "no",
88 'args' => $arguments };
89
90sub new {
91 my ($class) = shift (@_);
92 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
93 push(@$pluginlist, $class);
94
95 push(@$inputargs,"-title_sub");
96 push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?');
97
98 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
99 push(@{$hashArgOptLists->{"OptList"}},$options);
100
101 my @arg_array = @$inputargs;
102 my $self = new ConvertBinaryFile($pluginlist,$inputargs,$hashArgOptLists);
103
104 if ($self->{'info_only'}) {
105 # don't worry about any options etc
106 return bless $self, $class;
107 }
108
109 my $parser = new XML::Parser('Style' => 'Stream',
110 'Pkg' => 'CONTENTdmPlugin',
111 'PluginObj' => $self,
112 'Handlers' => {'Char' => \&Char,
113 'XMLDecl' => \&XMLDecl,
114 'Entity' => \&Entity,
115 'Doctype' => \&Doctype,
116 'Default' => \&Default
117 });
118 $self->{'parser'} = $parser;
119 $self->{'rdf_desc'} = undef;
120 $self->{'about_key'} = undef;
121 $self->{'metadata_name'} = undef;
122 $self->{'metadata_value'} = undef;
123
124 $self->{'convert_to'} = "PagedImage";
125 my $secondary_plugin_options = $self->{'secondary_plugin_options'};
126
127 if (!defined $secondary_plugin_options->{'PagedImagePlugin'}){
128 $secondary_plugin_options->{'PagedImagePlugin'} = [];
129 }
130 my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'};
131 push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
132 push(@$pagedimg_options, "-thumbnail", "-screenview");
133
134
135 $self = bless $self, $class;
136 $self->load_secondary_plugins($class,$secondary_plugin_options,$hashArgOptLists);
137 return $self;
138}
139
140sub get_default_process_exp {
141 my $self = shift (@_);
142
143 return q^(?i)\.rdf$^;
144}
145
146sub get_default_block_exp {
147 return q^(?i)\.(jpg|jpeg|gif)$^;
148}
149
150
151
152sub rdf_desc_to_id
153{
154 my $self = shift (@_);
155 my ($rdf_desc) = @_;
156
157 my $rdf_id = {};
158
159 # initialise any .cpd (=complex multi page) structures
160
161 foreach my $about_key (keys %{$rdf_desc}) {
162 if ($about_key =~ m/\.cpd$/) {
163 my $about = $rdf_desc->{$about_key};
164 my $id = $about->{'dc:identifier'};
165
166 if ($id =~ m/^\s*$/) {
167 # missing id, make one up based on about attribute
168
169 my ($tailname, $dirname, $suffix)
170 = &File::Basename::fileparse($about_key, "\\.[^\\.]+\$");
171
172 $id = "about:$tailname";
173 }
174
175 $rdf_id->{$id} = $about;
176 $rdf_id->{$id}->{'ex:filename'} = $about_key;
177 $rdf_id->{$id}->{'ex:type'} = "complex";
178 $rdf_id->{$id}->{'pages'} = [];
179 }
180
181 }
182
183 # now add in *non* .cpd items
184
185 foreach my $about_key (keys %{$rdf_desc}) {
186 if ($about_key !~ m/\.cpd$/) {
187 my $about = $rdf_desc->{$about_key};
188 my $id = $about->{'dc:identifier'};
189
190
191 if ($id =~ m/^\s*$/) {
192 # missing id, make one up based on about attribute
193
194 my ($tailname, $dirname, $suffix)
195 = &File::Basename::fileparse($about_key, "\\.[^\\.]+\$");
196
197 $id = "about:$tailname";
198 }
199
200 if (defined $rdf_id->{$id}) {
201 $about->{'ex:filename'} = $about_key;
202
203 # dealing with complex multi-page situation
204 # Add to existing structure
205
206 my $pages = $rdf_id->{$id}->{'pages'};
207 push(@$pages,$about)
208 }
209 else {
210 # New entry
211
212 $rdf_id->{$id} = $about;
213 $rdf_id->{$id}->{'ex:type'} = "simple";
214 $rdf_id->{$id}->{'ex:filename'} = $about_key;
215 }
216 }
217
218 }
219
220 return $rdf_id;
221}
222
223
224sub metadata_table_txt_file
225{
226 my $self = shift (@_);
227 my ($output_root,$page_num) = @_;
228
229 my $txt_filename = $output_root."_page_$page_num.txt";
230
231 my ($tailname, $dirname, $suffix)
232 = &File::Basename::fileparse($txt_filename, "\\.[^\\.]+\$");
233
234 my $txt_file = "$tailname.$suffix";
235
236 return $txt_file;
237}
238
239
240sub output_metadata_table
241{
242 my $self = shift (@_);
243 my ($page,$page_num,$tmp_dirname,$txt_file) = @_;
244
245 my $txt_filename = &util::filename_cat($tmp_dirname,$txt_file);
246
247 open(TOUT,">$txt_filename")
248 || die "Error: unable to write metadata data out as txt file $txt_filename: $!\n";
249
250 print TOUT $page->{'MetadataTable'};
251 delete $page->{'MetadataTable'};
252
253 close (TOUT);
254}
255
256
257sub rdf_id_to_item_file
258{
259 my $self = shift (@_);
260 my ($rdf_id,$tmp_dirname,$output_root) = @_;
261
262 my $item_file_list = [];
263
264 foreach my $id (keys %{$rdf_id}) {
265
266 my $id_safe = $id;
267 $id_safe =~ s/ /-/g;
268
269 my $output_filename = $output_root."_$id_safe.item";
270 open(FOUT,">$output_filename")
271 || die "Unable to open $output_filename: $!\n";
272
273
274 print FOUT "<PagedDocument>\n";
275
276 my $rdf_doc = $rdf_id->{$id};
277 foreach my $metadata_name (keys %$rdf_doc) {
278
279
280 next if ($metadata_name eq "pages");
281
282 my $metadata_value = $rdf_doc->{$metadata_name};
283
284 # convert ns:name to ns.Name
285 $metadata_name =~ s/^(.*?):(.*)/$1\.\u$2/;
286
287 print FOUT " <Metadata name=\"$metadata_name\">$metadata_value</Metadata>\n";
288 }
289
290 if ($rdf_doc->{'ex:type'} eq "complex") {
291 my $pages = $rdf_doc->{'pages'};
292 my $page_num = 1;
293
294 foreach my $page (@$pages) {
295
296 my $imgfile = $page->{'ex:filename'};
297 if ($imgfile =~ m/(http|ftp):/) {
298 $imgfile = "empty.jpg";
299 }
300 else {
301 $imgfile = &util::filename_cat("..","import",$imgfile);
302 }
303
304 my $txt_file
305 = $self->metadata_table_txt_file($output_root,$page_num);
306
307 $self->output_metadata_table($page,$page_num,
308 $tmp_dirname,$txt_file);
309
310
311 print FOUT " <Page pagenum=\"$page_num\" imgfile=\"$imgfile\" txtfile=\"$txt_file\">\n";
312
313 foreach my $metadata_name (keys %$page) {
314
315 my $metadata_value = $rdf_doc->{$metadata_name};
316 # convert ns:name to ns.Name
317 $metadata_name =~ s/^(.*?):(.*)/$1\.\u$2/;
318
319 print FOUT " <Metadata name=\"$metadata_name\">$metadata_value</Metadata>\n";
320 }
321
322
323 $page_num++;
324
325
326 print FOUT " </Page>\n";
327 }
328 }
329 else {
330 # simple
331 # duplicate top-level metadata for now plus image to bind to
332
333 my $imgfile = $rdf_doc->{'ex:filename'};
334 if ($imgfile =~ m/(http|ftp):/) {
335 $imgfile = "empty.jpg";
336 }
337 else {
338 $imgfile = &util::filename_cat("..","import",$imgfile);
339 $imgfile = "../import/".$imgfile;
340 }
341
342
343 my $txt_file = $self->metadata_table_txt_file($output_root,1);
344 $self->output_metadata_table($rdf_doc,1,$tmp_dirname,$txt_file);
345
346 print FOUT " <Page pagenum=\"1\" imgfile=\"$imgfile\" txtfile=\"$txt_file\">\n";
347 foreach my $metadata_name (keys %$rdf_doc) {
348
349 my $metadata_value = $rdf_doc->{$metadata_name};
350
351 # convert ns:name to ns.Name
352 $metadata_name =~ s/^(.*?):(.*)/$1\.\u$2/;
353
354 print FOUT " <Metadata name=\"$metadata_name\">$metadata_value</Metadata>\n";
355 }
356 print FOUT " </Page>\n";
357
358 }
359
360 print FOUT "</PagedDocument>\n";
361 close(FOUT);
362
363 push(@$item_file_list,$output_filename);
364
365 }
366
367
368 return $item_file_list;
369}
370
371
372
373sub xml_area_convert_file
374{
375 my $self = shift (@_);
376 my ($input_filename, $tmp_dirname, $output_root) = @_;
377
378 eval {
379 # Build up hash table/tree of all records
380
381 my $xslt = $self->{'xslt'};
382 if (defined $xslt && ($xslt ne "")) {
383 # perform xslt
384 my $transformed_xml = $self->apply_xslt($xslt,$input_filename);
385
386 # feed transformed file (now in memory as string) into XML parser
387 $self->{'parser'}->parse($transformed_xml);
388 }
389 else {
390 $self->{'parser'}->parsefile($input_filename);
391 }
392 };
393
394 if ($@) {
395
396 # parsefile may either croak somewhere in XML::Parser (e.g. because
397 # the document is not well formed) or die somewhere in XMLPlug or a
398 # derived plugin (e.g. because we're attempting to process a
399 # document whose DOCTYPE is not meant for this plugin). For the
400 # first case we'll print a warning and continue, for the second
401 # we'll just continue quietly
402
403 print STDERR "**** Error is: $@\n";
404
405 my $file = $self->{'file'};
406
407 my ($msg) = $@ =~ /Carp::croak\(\'(.*?)\'\)/;
408 if (defined $msg) {
409 my $outhandle = $self->{'outhandle'};
410 my $plugin_name = ref ($self);
411 print $outhandle "$plugin_name failed to process $file ($msg)\n";
412 }
413
414 my $gli = $self->{'gli'};
415
416 # reset ourself for the next document
417 $self->{'section_level'}=0;
418 print STDERR "<ProcessingError n='$file'>\n" if ($gli);
419 return ("fail",undef); # error during processing
420 }
421
422 my $rdf_desc = $self->{'rdf_desc'};
423
424# foreach my $about_key (keys %{$rdf_desc}) {
425# my $about = $rdf_desc->{$about_key};
426# foreach my $metadata_name (keys %{$about}) {
427#
428# my $metadata_value = $about->{$metadata_name};
429## print STDERR " $metadata_name: $metadata_value\n";
430# }
431# }
432
433
434 # Merge entries with same name
435
436
437 my $merged_rdf_id = $self->rdf_desc_to_id($rdf_desc);
438
439# foreach my $about_key (keys %{$merged_rdf_id}) {
440# my $about = $merged_rdf_id->{$about_key};
441# foreach my $metadata_name (keys %{$about}) {
442#
443# my $metadata_value = $about->{$metadata_name};
444## print STDERR " $metadata_name: $metadata_value\n";
445# }
446# }
447
448
449
450 my $item_files = $self->rdf_id_to_item_file($merged_rdf_id,$tmp_dirname,
451 $output_root);
452
453 return ("item",$item_files);
454}
455
456
457# Override ConvertBinaryFile tmp_area_convert_file() to provide solution specific
458# to CONTENTdm
459#
460# A better (i.e. in the future) solution would be to see if this can be
461# shifted into gsConvert.pl so there is no need to override the
462# default tmp_area_convert_file()
463
464
465sub tmp_area_convert_file {
466 my $self = shift (@_);
467 my ($output_ext, $input_filename, $textref) = @_;
468
469 # is textref ever used?!?
470
471 my $outhandle = $self->{'outhandle'};
472 my $convert_to = $self->{'convert_to'};
473 my $failhandle = $self->{'failhandle'};
474 my $convert_to_ext = $self->{'convert_to_ext'};
475
476 # softlink to collection tmp dir
477 my $tmp_dirname
478 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
479 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
480
481 # derive tmp filename from input filename
482 my ($tailname, $dirname, $suffix)
483 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
484
485 # Remove any white space from filename -- no risk of name collision, and
486 # makes later conversion by utils simpler. Leave spaces in path...
487 # tidy up the filename with space, dot, hyphen between
488 $tailname =~ s/\s+//g;
489 $tailname =~ s/\.+//g;
490 $tailname =~ s/\-+//g;
491 $suffix = lc($suffix);
492 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix");
493
494 &util::soft_link($input_filename, $tmp_filename);
495 my $verbosity = $self->{'verbosity'};
496 if ($verbosity > 0) {
497 print $outhandle "Converting $tailname$suffix to $convert_to format\n";
498 }
499
500 my $errlog = &util::filename_cat($tmp_dirname, "err.log");
501
502 # call xml_area_convert_file rather than gsConvert.pl
503
504 my $output_root = &util::filename_cat($tmp_dirname, "$tailname");
505
506 my ($output_type,$item_files)
507 = $self->xml_area_convert_file($tmp_filename,$tmp_dirname,$output_root);
508
509
510 my $fakeimg_filename = &util::filename_cat($dirname, "empty.jpg");
511 my $fakeimg_tmp_filename = &util::filename_cat($tmp_dirname, "empty.jpg");
512
513 print STDERR "***** Setting up fake filename $fakeimg_filename -> $fakeimg_tmp_filename\n";
514
515 &util::soft_link($fakeimg_filename, $fakeimg_tmp_filename);
516
517 # continue as before ...
518
519 # remove symbolic link to original file
520 &util::rm($tmp_filename);
521
522 # Check STDERR here
523 chomp $output_type;
524 if ($output_type eq "fail") {
525 print $outhandle "Could not convert $tailname$suffix to $convert_to format\n";
526 print $failhandle "$tailname$suffix: " . ref($self) . " failed to convert to $convert_to\n";
527 $self->{'num_not_processed'} ++;
528 if (-s "$errlog") {
529 open(ERRLOG, "$errlog");
530 while (<ERRLOG>) {
531 print $outhandle "$_";
532 }
533 print $outhandle "\n";
534 close ERRLOG;
535 }
536 &util::rm("$errlog") if (-e "$errlog");
537 return [];
538 }
539
540 # store the *actual* output type and return the output filename
541 # it's possible we requested conversion to html, but only to text succeeded
542 #$self->{'convert_to_ext'} = $output_type;
543 if ($output_type =~ /html/i) {
544 $self->{'converted_to'} = "HTML";
545 } elsif ($output_type =~ /te?xt/i) {
546 $self->{'converted_to'} = "Text";
547 } elsif ($output_type =~ /item/i){
548 $self->{'converted_to'} = "PagedImage";
549 }
550
551
552 return $item_files;
553}
554
555
556
557
558# Override ConvertBinaryFile read
559# Needed so multiple .item files generate are sent down secondary plugin
560
561sub read {
562 my $self = shift (@_);
563 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
564
565 $self->{'gli'} = $gli;
566 $self->{'file'} = $file;
567
568 my $successful_rv = -1;
569
570 my $outhandle = $self->{'outhandle'};
571
572 my ($block_status,$filename) = $self->read_block(@_);
573 return $block_status if ((!defined $block_status) || ($block_status==0));
574 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
575
576 # read() deviates at this point from ConvertBinaryFile
577 # Need to work with list of filename returned
578
579 my $output_ext = $self->{'convert_to_ext'};
580 my $conv_filename_list = [];
581
582 $conv_filename_list = $self->tmp_area_convert_file($output_ext, $filename);
583
584 if (scalar(@$conv_filename_list)==0) {
585 return -1;
586 } # had an error, will be passed down pipeline
587
588 foreach my $conv_filename ( @$conv_filename_list ) {
589 if (! -e "$conv_filename") {return -1;}
590 $self->{'conv_filename'} = $conv_filename; # is this used anywhere?
591 $self->convert_post_process($conv_filename);
592
593 my $secondary_plugins = $self->{'secondary_plugins'};
594 my $num_secondary_plugins = scalar(keys %$secondary_plugins);
595
596 if ($num_secondary_plugins == 0) {
597 print $outhandle "Warning: No secondary plugin to use in conversion. Skipping $file\n";
598 return 0; # effectively block it
599 }
600
601 my @plugin_names = keys %$secondary_plugins;
602 my $plugin_name = shift @plugin_names;
603
604 if ($num_secondary_plugins > 1) {
605 print $outhandle "Warning: Multiple secondary plugins not supported yet! Choosing $plugin_name\n.";
606 }
607
608 my $secondary_plugin = $secondary_plugins->{$plugin_name};
609
610 # note: metadata is not carried on to the next level
611 my ($rv,$doc_obj)
612 = $secondary_plugin->read_into_doc_obj ($pluginfo,"", $conv_filename,
613 $metadata, $processor, $maxdocs, $total_count,
614 $gli);
615
616 if ((defined $rv) && ($rv>=0)) {
617 $successful_rv = 1;
618 }
619
620 # Override previous gsdlsourcefilename set by secondary plugin
621 my $collect_file = &util::filename_within_collection($filename);
622 my $collect_conv_file = &util::filename_within_collection($conv_filename);
623 $doc_obj->set_source_filename ($collect_file);
624 $doc_obj->set_converted_filename($collect_conv_file);
625
626 my ($filemeta) = $file =~ /([^\\\/]+)$/;
627 $self->set_Source_metadata($doc_obj, $filemeta);
628 $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
629 $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename));
630
631 if ($self->{'cover_image'}) {
632 $self->associate_cover_image($doc_obj, $filename);
633 }
634
635 # do plugin specific processing of doc_obj
636 unless (defined ($self->process(undef, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli))) {
637 print STDERR "<ProcessingError n='$file'>\n" if ($gli);
638 return -1;
639 }
640 # do any automatic metadata extraction
641 $self->auto_extract_metadata ($doc_obj);
642
643 # have we found a Title??
644 $self->title_fallback($doc_obj,$doc_obj->get_top_section(),$filemeta);
645
646 # add an OID
647 $self->add_OID($doc_obj);
648 # process the document
649 $processor->process($doc_obj);
650
651 $self->{'num_processed'} ++;
652 }
653
654 return $successful_rv;
655}
656
657sub process {
658
659}
660# do we need this? sec pluginn process would have already been called as part of read_into_doc_obj??
661sub process_old {
662 my $self = shift (@_);
663 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
664
665
666 my $secondary_plugins = $self->{'secondary_plugins'};
667 my @plugin_names = keys %$secondary_plugins;
668 my $plugin_name = shift @plugin_names; # already checked there is only one
669
670 my $secondary_plugin = $secondary_plugins->{$plugin_name};
671
672 my $result = $secondary_plugin->process(@_);
673
674 return $result;
675}
676
677
678# Called at the beginning of the XML document.
679sub xml_start_document {
680 my $self = shift(@_);
681 my ($expat) = @_;
682
683 $self->{'rdf_desc'} = {};
684}
685
686
687# Called for DOCTYPE declarations - use die to bail out if this doctype
688# is not meant for this plugin
689sub xml_doctype {
690 my $self = shift(@_);
691 my ($expat, $name, $sysid, $pubid, $internal) = @_;
692
693 die "" if ($name !~ /^rdf:RDF$/);
694
695 my $outhandle = $self->{'outhandle'};
696 print $outhandle "CONTENTdmPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
697
698}
699
700# Called for every start tag. The $_ variable will contain a copy of the
701# tag and the %_ variable will contain the element's attributes.
702sub xml_start_tag {
703 my $self = shift(@_);
704 my ($expat, $element) = @_;
705
706 if ($element eq "rdf:Description") {
707
708 my $about_key = $_{'about'};
709
710 my $rdf_desc = $self->{'rdf_desc'};
711 $rdf_desc->{$about_key} = {};
712
713 $self->{'about_key'} = $about_key;
714 $self->{'index_text'} = "";
715 $self->{'pp_text'} = "<table width=\"100%\">\n";
716
717
718 }
719 elsif (defined $self->{'about_key'}) {
720 $self->{'metadata_name'} = $element;
721 $self->{'metadata_value'} = "";
722 }
723
724}
725
726# Called for every end tag. The $_ variable will contain a copy of the tag.
727sub xml_end_tag {
728 my $self = shift(@_);
729 my ($expat, $element) = @_;
730
731 if ($element eq "rdf:Description") {
732 $self->{'pp_text'} .= "</table>\n";
733 ## ghtml::htmlsafe($self->{'pp_text'});
734
735
736 my $about_key = $self->{'about_key'};
737 my $about = $self->{'rdf_desc'}->{$about_key};
738 $about->{'IndexText'} = $self->{'index_text'};
739 $about->{'MetadataTable'} = $self->{'pp_text'};
740
741
742 $self->{'about_key'} = undef;
743 $self->{'index_text'} = undef;
744 $self->{'pp_text'} = undef;
745
746 }
747 elsif (defined $self->{'metadata_name'}) {
748 my $metadata_name = $self->{'metadata_name'};
749 if ($element eq $metadata_name) {
750 my $metadata_value = $self->{'metadata_value'};
751
752 my $about_key = $self->{'about_key'};
753 my $about = $self->{'rdf_desc'}->{$about_key};
754 $about->{$metadata_name} = $metadata_value;
755
756 $self->{'index_text'} .= "$metadata_value\n";
757 $self->{'pp_text'} .= " <tr><td>$metadata_name</td><td>$metadata_value</td></tr>\n";
758
759 $self->{'metadata_name'} = undef;
760 $self->{'metadata_value'} = undef;
761 }
762 }
763}
764
765# Called just before start or end tags with accumulated non-markup text in
766# the $_ variable.
767sub xml_text {
768 my $self = shift(@_);
769 my ($expat) = @_;
770
771 if (defined $self->{'metadata_name'}) {
772 $self->{'metadata_value'} .= $_;
773 }
774}
775
776# Called at the end of the XML document.
777sub xml_end_document {
778 my $self = shift(@_);
779 my ($expat) = @_;
780
781}
782
783
7841;
Note: See TracBrowser for help on using the repository browser.