source: gsdl/trunk/perllib/plugins/CONTENTdmPlugin.pm@ 15925

Last change on this file since 15925 was 15925, checked in by kjdon, 16 years ago

use the proper options for PagedImagePlugin

  • Property svn:keywords set to Author Date Id Revision
File size: 20.9 KB
Line 
1###########################################################################
2#
3# CONTENTdmPlugin.pm -- reasonably with-it pdf plugin
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999-2001 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25package CONTENTdmPlugin;
26
27use ConvertBinaryFile;
28use ReadXMLFile;
29
30use unicode;
31use ghtml;
32
33use strict;
34no strict 'refs'; # so we can use a var for filehandles (eg STDERR)
35
36
37use XMLParser;
38
39# inherit ReadXMLFile for the apply_xslt method
40sub BEGIN {
41 @CONTENTdmPlugin::ISA = ('ConvertBinaryFile', 'ReadXMLFile');
42}
43
44
45my $convert_to_list =
46 [ { 'name' => "auto",
47 'desc' => "{ConvertBinaryFile.convert_to.auto}" },
48 { 'name' => "html",
49 'desc' => "{ConvertBinaryFile.convert_to.html}" },
50 { 'name' => "text",
51 'desc' => "{ConvertBinaryFile.convert_to.text}" },
52 { 'name' => "pagedimg",
53 'desc' => "{ConvertBinaryFile.convert_to.pagedimg}"},
54 ];
55
56
57
58my $arguments =
59 [
60 { 'name' => "convert_to",
61 'desc' => "{ConvertBinaryFile.convert_to}",
62 'type' => "enum",
63 'reqd' => "yes",
64 'list' => $convert_to_list,
65 'deft' => "html" },
66 { 'name' => "xslt",
67 'desc' => "{ReadXMLFile.xslt}",
68 'type' => "string",
69 'deft' => "",
70 'reqd' => "no" },
71 { 'name' => "process_exp",
72 'desc' => "{BasePlugin.process_exp}",
73 'type' => "regexp",
74 'deft' => &get_default_process_exp(),
75 'reqd' => "no" },
76 { 'name' => "block_exp",
77 'desc' => "{BasePlugin.block_exp}",
78 'type' => "regexp",
79 'deft' => &get_default_block_exp() }
80];
81
82my $options = { 'name' => "CONTENTdmPlugin",
83 'desc' => "{CONTENTdmPlugin.desc}",
84 'abstract' => "no",
85 'inherits' => "yes",
86 # CONTENTdmPlugin is one of the few ConvertBinaryFile subclasses whose source doc can't be replaced by a GS-generated html
87 'srcreplaceable' => "no",
88 'args' => $arguments };
89
90sub new {
91 my ($class) = shift (@_);
92 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
93 push(@$pluginlist, $class);
94
95 push(@$inputargs,"-title_sub");
96 push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?');
97
98 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
99 push(@{$hashArgOptLists->{"OptList"}},$options);
100
101 my @arg_array = @$inputargs;
102 my $self = new ConvertBinaryFile($pluginlist,$inputargs,$hashArgOptLists);
103
104 if ($self->{'info_only'}) {
105 # don't worry about any options etc
106 return bless $self, $class;
107 }
108
109 my $parser = new XML::Parser('Style' => 'Stream',
110 'Pkg' => 'CONTENTdmPlugin',
111 'PluginObj' => $self,
112 'Handlers' => {'Char' => \&Char,
113 'XMLDecl' => \&XMLDecl,
114 'Entity' => \&Entity,
115 'Doctype' => \&Doctype,
116 'Default' => \&Default
117 });
118 $self->{'parser'} = $parser;
119 $self->{'rdf_desc'} = undef;
120 $self->{'about_key'} = undef;
121 $self->{'metadata_name'} = undef;
122 $self->{'metadata_value'} = undef;
123
124 $self->{'convert_to'} = "PagedImage";
125 my $secondary_plugin_options = $self->{'secondary_plugin_options'};
126
127 if (!defined $secondary_plugin_options->{'PagedImagePlugin'}){
128 $secondary_plugin_options->{'PagedImagePlugin'} = [];
129 }
130 my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'};
131 push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
132 push(@$pagedimg_options, "-create_thumbnail", "true", "-create_screenview", "true");
133
134
135 $self = bless $self, $class;
136 $self->load_secondary_plugins($class,$secondary_plugin_options,$hashArgOptLists);
137 return $self;
138}
139
140sub get_default_process_exp {
141 my $self = shift (@_);
142
143 return q^(?i)\.rdf$^;
144}
145
146sub get_default_block_exp {
147 return q^(?i)\.(jpg|jpeg|gif)$^;
148}
149
150
151
152sub rdf_desc_to_id
153{
154 my $self = shift (@_);
155 my ($rdf_desc) = @_;
156
157 my $rdf_id = {};
158
159 # initialise any .cpd (=complex multi page) structures
160
161 foreach my $about_key (keys %{$rdf_desc}) {
162 if ($about_key =~ m/\.cpd$/) {
163 my $about = $rdf_desc->{$about_key};
164 my $id = $about->{'dc:identifier'};
165
166 if ($id =~ m/^\s*$/) {
167 # missing id, make one up based on about attribute
168
169 my ($tailname, $dirname, $suffix)
170 = &File::Basename::fileparse($about_key, "\\.[^\\.]+\$");
171
172 $id = "about:$tailname";
173 }
174
175 $rdf_id->{$id} = $about;
176 $rdf_id->{$id}->{'ex:filename'} = $about_key;
177 $rdf_id->{$id}->{'ex:type'} = "complex";
178 $rdf_id->{$id}->{'pages'} = [];
179 }
180
181 }
182
183 # now add in *non* .cpd items
184
185 foreach my $about_key (keys %{$rdf_desc}) {
186 if ($about_key !~ m/\.cpd$/) {
187 my $about = $rdf_desc->{$about_key};
188 my $id = $about->{'dc:identifier'};
189
190
191 if ($id =~ m/^\s*$/) {
192 # missing id, make one up based on about attribute
193
194 my ($tailname, $dirname, $suffix)
195 = &File::Basename::fileparse($about_key, "\\.[^\\.]+\$");
196
197 $id = "about:$tailname";
198 }
199
200 if (defined $rdf_id->{$id}) {
201 $about->{'ex:filename'} = $about_key;
202
203 # dealing with complex multi-page situation
204 # Add to existing structure
205
206 my $pages = $rdf_id->{$id}->{'pages'};
207 push(@$pages,$about)
208 }
209 else {
210 # New entry
211
212 $rdf_id->{$id} = $about;
213 $rdf_id->{$id}->{'ex:type'} = "simple";
214 $rdf_id->{$id}->{'ex:filename'} = $about_key;
215 }
216 }
217
218 }
219
220 return $rdf_id;
221}
222
223
224sub metadata_table_txt_file
225{
226 my $self = shift (@_);
227 my ($output_root,$page_num) = @_;
228
229 my $txt_filename = $output_root."_page_$page_num.txt";
230
231 my ($tailname, $dirname, $suffix)
232 = &File::Basename::fileparse($txt_filename, "\\.[^\\.]+\$");
233
234 my $txt_file = "$tailname.$suffix";
235
236 return $txt_file;
237}
238
239
240sub output_metadata_table
241{
242 my $self = shift (@_);
243 my ($page,$page_num,$tmp_dirname,$txt_file) = @_;
244
245 my $txt_filename = &util::filename_cat($tmp_dirname,$txt_file);
246
247 open(TOUT,">$txt_filename")
248 || die "Error: unable to write metadata data out as txt file $txt_filename: $!\n";
249
250 print TOUT $page->{'MetadataTable'};
251 delete $page->{'MetadataTable'};
252
253 close (TOUT);
254}
255
256
257sub rdf_id_to_item_file
258{
259 my $self = shift (@_);
260 my ($rdf_id,$tmp_dirname,$output_root) = @_;
261
262 my $item_file_list = [];
263
264 foreach my $id (keys %{$rdf_id}) {
265
266 my $id_safe = $id;
267 $id_safe =~ s/ /-/g;
268
269 my $output_filename = $output_root."_$id_safe.item";
270 open(FOUT,">$output_filename")
271 || die "Unable to open $output_filename: $!\n";
272
273
274 print FOUT "<PagedDocument>\n";
275
276 my $rdf_doc = $rdf_id->{$id};
277 foreach my $metadata_name (keys %$rdf_doc) {
278
279
280 next if ($metadata_name eq "pages");
281
282 my $metadata_value = $rdf_doc->{$metadata_name};
283
284 # convert ns:name to ns.Name
285 $metadata_name =~ s/^(.*?):(.*)/$1\.\u$2/;
286
287 print FOUT " <Metadata name=\"$metadata_name\">$metadata_value</Metadata>\n";
288 }
289
290 if ($rdf_doc->{'ex:type'} eq "complex") {
291 my $pages = $rdf_doc->{'pages'};
292 my $page_num = 1;
293
294 foreach my $page (@$pages) {
295
296 my $imgfile = $page->{'ex:filename'};
297 if ($imgfile =~ m/(http|ftp):/) {
298 $imgfile = "empty.jpg";
299 }
300 else {
301 $imgfile = &util::filename_cat("..","import",$imgfile);
302 }
303
304 my $txt_file
305 = $self->metadata_table_txt_file($output_root,$page_num);
306
307 $self->output_metadata_table($page,$page_num,
308 $tmp_dirname,$txt_file);
309
310
311 print FOUT " <Page pagenum=\"$page_num\" imgfile=\"$imgfile\" txtfile=\"$txt_file\">\n";
312
313 foreach my $metadata_name (keys %$page) {
314
315 my $metadata_value = $rdf_doc->{$metadata_name};
316 # convert ns:name to ns.Name
317 $metadata_name =~ s/^(.*?):(.*)/$1\.\u$2/;
318
319 print FOUT " <Metadata name=\"$metadata_name\">$metadata_value</Metadata>\n";
320 }
321
322
323 $page_num++;
324
325
326 print FOUT " </Page>\n";
327 }
328 }
329 else {
330 # simple
331 # duplicate top-level metadata for now plus image to bind to
332
333 my $imgfile = $rdf_doc->{'ex:filename'};
334 if ($imgfile =~ m/(http|ftp):/) {
335 $imgfile = "empty.jpg";
336 }
337 else {
338 $imgfile = &util::filename_cat("..","import",$imgfile);
339 $imgfile = "../import/".$imgfile;
340 }
341
342
343 my $txt_file = $self->metadata_table_txt_file($output_root,1);
344 $self->output_metadata_table($rdf_doc,1,$tmp_dirname,$txt_file);
345
346 print FOUT " <Page pagenum=\"1\" imgfile=\"$imgfile\" txtfile=\"$txt_file\">\n";
347 foreach my $metadata_name (keys %$rdf_doc) {
348
349 my $metadata_value = $rdf_doc->{$metadata_name};
350
351 # convert ns:name to ns.Name
352 $metadata_name =~ s/^(.*?):(.*)/$1\.\u$2/;
353
354 print FOUT " <Metadata name=\"$metadata_name\">$metadata_value</Metadata>\n";
355 }
356 print FOUT " </Page>\n";
357
358 }
359
360 print FOUT "</PagedDocument>\n";
361 close(FOUT);
362
363 push(@$item_file_list,$output_filename);
364
365 }
366
367
368 return $item_file_list;
369}
370
371
372
373sub xml_area_convert_file
374{
375 my $self = shift (@_);
376 my ($input_filename, $tmp_dirname, $output_root) = @_;
377
378 eval {
379 # Build up hash table/tree of all records
380
381 my $xslt = $self->{'xslt'};
382 if (defined $xslt && ($xslt ne "")) {
383 # perform xslt
384 my $transformed_xml = $self->apply_xslt($xslt,$input_filename);
385
386 # feed transformed file (now in memory as string) into XML parser
387 $self->{'parser'}->parse($transformed_xml);
388 }
389 else {
390 $self->{'parser'}->parsefile($input_filename);
391 }
392 };
393
394 if ($@) {
395
396 # parsefile may either croak somewhere in XML::Parser (e.g. because
397 # the document is not well formed) or die somewhere in XMLPlug or a
398 # derived plugin (e.g. because we're attempting to process a
399 # document whose DOCTYPE is not meant for this plugin). For the
400 # first case we'll print a warning and continue, for the second
401 # we'll just continue quietly
402
403 print STDERR "**** Error is: $@\n";
404
405 my $file = $self->{'file'};
406
407 my ($msg) = $@ =~ /Carp::croak\(\'(.*?)\'\)/;
408 if (defined $msg) {
409 my $outhandle = $self->{'outhandle'};
410 my $plugin_name = ref ($self);
411 print $outhandle "$plugin_name failed to process $file ($msg)\n";
412 }
413
414 my $gli = $self->{'gli'};
415
416 # reset ourself for the next document
417 $self->{'section_level'}=0;
418 print STDERR "<ProcessingError n='$file'>\n" if ($gli);
419 return ("fail",undef); # error during processing
420 }
421
422 my $rdf_desc = $self->{'rdf_desc'};
423
424# foreach my $about_key (keys %{$rdf_desc}) {
425# my $about = $rdf_desc->{$about_key};
426# foreach my $metadata_name (keys %{$about}) {
427#
428# my $metadata_value = $about->{$metadata_name};
429## print STDERR " $metadata_name: $metadata_value\n";
430# }
431# }
432
433
434 # Merge entries with same name
435
436
437 my $merged_rdf_id = $self->rdf_desc_to_id($rdf_desc);
438
439# foreach my $about_key (keys %{$merged_rdf_id}) {
440# my $about = $merged_rdf_id->{$about_key};
441# foreach my $metadata_name (keys %{$about}) {
442#
443# my $metadata_value = $about->{$metadata_name};
444## print STDERR " $metadata_name: $metadata_value\n";
445# }
446# }
447
448
449
450 my $item_files = $self->rdf_id_to_item_file($merged_rdf_id,$tmp_dirname,
451 $output_root);
452
453 return ("item",$item_files);
454}
455
456
457# Override ConvertBinaryFile tmp_area_convert_file() to provide solution specific
458# to CONTENTdm
459#
460# A better (i.e. in the future) solution would be to see if this can be
461# shifted into gsConvert.pl so there is no need to override the
462# default tmp_area_convert_file()
463
464
465sub tmp_area_convert_file {
466 my $self = shift (@_);
467 my ($output_ext, $input_filename, $textref) = @_;
468
469 # is textref ever used?!?
470
471 my $outhandle = $self->{'outhandle'};
472 my $convert_to = $self->{'convert_to'};
473 my $failhandle = $self->{'failhandle'};
474 my $convert_to_ext = $self->{'convert_to_ext'};
475
476 # softlink to collection tmp dir
477 my $tmp_dirname
478 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
479 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
480
481 # derive tmp filename from input filename
482 my ($tailname, $dirname, $suffix)
483 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
484
485 # Remove any white space from filename -- no risk of name collision, and
486 # makes later conversion by utils simpler. Leave spaces in path...
487 # tidy up the filename with space, dot, hyphen between
488 $tailname =~ s/\s+//g;
489 $tailname =~ s/\.+//g;
490 $tailname =~ s/\-+//g;
491 $suffix = lc($suffix);
492 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix");
493
494 &util::soft_link($input_filename, $tmp_filename);
495 my $verbosity = $self->{'verbosity'};
496 if ($verbosity > 0) {
497 print $outhandle "Converting $tailname$suffix to $convert_to format\n";
498 }
499
500 my $errlog = &util::filename_cat($tmp_dirname, "err.log");
501
502 # call xml_area_convert_file rather than gsConvert.pl
503
504 my $output_root = &util::filename_cat($tmp_dirname, "$tailname");
505
506 my ($output_type,$item_files)
507 = $self->xml_area_convert_file($tmp_filename,$tmp_dirname,$output_root);
508
509
510 my $fakeimg_filename = &util::filename_cat($dirname, "empty.jpg");
511 my $fakeimg_tmp_filename = &util::filename_cat($tmp_dirname, "empty.jpg");
512
513 print STDERR "***** Setting up fake filename $fakeimg_filename -> $fakeimg_tmp_filename\n";
514
515 &util::soft_link($fakeimg_filename, $fakeimg_tmp_filename);
516
517 # continue as before ...
518
519 # remove symbolic link to original file
520 &util::rm($tmp_filename);
521
522 # Check STDERR here
523 chomp $output_type;
524 if ($output_type eq "fail") {
525 print $outhandle "Could not convert $tailname$suffix to $convert_to format\n";
526 print $failhandle "$tailname$suffix: " . ref($self) . " failed to convert to $convert_to\n";
527 $self->{'num_not_processed'} ++;
528 if (-s "$errlog") {
529 open(ERRLOG, "$errlog");
530 while (<ERRLOG>) {
531 print $outhandle "$_";
532 }
533 print $outhandle "\n";
534 close ERRLOG;
535 }
536 &util::rm("$errlog") if (-e "$errlog");
537 return [];
538 }
539
540 # store the *actual* output type and return the output filename
541 # it's possible we requested conversion to html, but only to text succeeded
542 #$self->{'convert_to_ext'} = $output_type;
543 if ($output_type =~ /html/i) {
544 $self->{'converted_to'} = "HTML";
545 } elsif ($output_type =~ /te?xt/i) {
546 $self->{'converted_to'} = "Text";
547 } elsif ($output_type =~ /item/i){
548 $self->{'converted_to'} = "PagedImage";
549 }
550
551
552 return $item_files;
553}
554
555
556
557
558# Override ConvertBinaryFile read
559# Needed so multiple .item files generate are sent down secondary plugin
560
561sub read {
562 my $self = shift (@_);
563 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
564
565 $self->{'gli'} = $gli;
566 $self->{'file'} = $file;
567
568 my $successful_rv = -1;
569
570 my $outhandle = $self->{'outhandle'};
571
572 my ($block_status,$filename) = $self->read_block(@_);
573 return $block_status if ((!defined $block_status) || ($block_status==0));
574 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
575
576 # read() deviates at this point from ConvertBinaryFile
577 # Need to work with list of filename returned
578
579 my $output_ext = $self->{'convert_to_ext'};
580 my $conv_filename_list = [];
581
582 $conv_filename_list = $self->tmp_area_convert_file($output_ext, $filename);
583
584 if (scalar(@$conv_filename_list)==0) {
585 return -1;
586 } # had an error, will be passed down pipeline
587
588 foreach my $conv_filename ( @$conv_filename_list ) {
589 if (! -e "$conv_filename") {return -1;}
590 $self->{'conv_filename'} = $conv_filename; # is this used anywhere?
591 $self->convert_post_process($conv_filename);
592
593 my $secondary_plugins = $self->{'secondary_plugins'};
594 my $num_secondary_plugins = scalar(keys %$secondary_plugins);
595
596 if ($num_secondary_plugins == 0) {
597 print $outhandle "Warning: No secondary plugin to use in conversion. Skipping $file\n";
598 return 0; # effectively block it
599 }
600
601 my @plugin_names = keys %$secondary_plugins;
602 my $plugin_name = shift @plugin_names;
603
604 if ($num_secondary_plugins > 1) {
605 print $outhandle "Warning: Multiple secondary plugins not supported yet! Choosing $plugin_name\n.";
606 }
607
608 my $secondary_plugin = $secondary_plugins->{$plugin_name};
609
610 # note: metadata is not carried on to the next level
611 my ($rv,$doc_obj)
612 = $secondary_plugin->read_into_doc_obj ($pluginfo,"", $conv_filename,
613 $metadata, $processor, $maxdocs, $total_count,
614 $gli);
615
616 if ((defined $rv) && ($rv>=0)) {
617 $successful_rv = 1;
618 }
619
620 # Override previous gsdlsourcefilename set by secondary plugin
621 my $collect_file = &util::filename_within_collection($filename);
622 my $collect_conv_file = &util::filename_within_collection($conv_filename);
623 $doc_obj->set_source_filename ($collect_file);
624 $doc_obj->set_converted_filename($collect_conv_file);
625
626 my ($filemeta) = $file =~ /([^\\\/]+)$/;
627 $self->set_Source_metadata($doc_obj, $filemeta);
628 $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
629 $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename));
630
631 if ($self->{'cover_image'}) {
632 $self->associate_cover_image($doc_obj, $filename);
633 }
634
635 # do plugin specific processing of doc_obj
636 unless (defined ($self->process(undef, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli))) {
637 print STDERR "<ProcessingError n='$file'>\n" if ($gli);
638 return -1;
639 }
640 # do any automatic metadata extraction
641 $self->auto_extract_metadata ($doc_obj);
642
643 # have we found a Title??
644 $self->title_fallback($doc_obj,$doc_obj->get_top_section(),$filemeta);
645
646 # add an OID
647 $self->add_OID($doc_obj);
648 # process the document
649 $processor->process($doc_obj);
650
651 $self->{'num_processed'} ++;
652 }
653
654 return $successful_rv;
655}
656
657sub process {
658
659}
660# do we need this? sec pluginn process would have already been called as part of read_into_doc_obj??
661sub process_old {
662 my $self = shift (@_);
663 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
664
665
666 my $secondary_plugins = $self->{'secondary_plugins'};
667 my @plugin_names = keys %$secondary_plugins;
668 my $plugin_name = shift @plugin_names; # already checked there is only one
669
670 my $secondary_plugin = $secondary_plugins->{$plugin_name};
671
672 my $result = $secondary_plugin->process(@_);
673
674 return $result;
675}
676
677
678# Called at the beginning of the XML document.
679sub xml_start_document {
680 my $self = shift(@_);
681 my ($expat) = @_;
682
683 $self->{'rdf_desc'} = {};
684}
685
686
687# Called for DOCTYPE declarations - use die to bail out if this doctype
688# is not meant for this plugin
689sub xml_doctype {
690 my $self = shift(@_);
691 my ($expat, $name, $sysid, $pubid, $internal) = @_;
692
693 die "" if ($name !~ /^rdf:RDF$/);
694
695 my $outhandle = $self->{'outhandle'};
696 print $outhandle "CONTENTdmPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
697
698}
699
700# Called for every start tag. The $_ variable will contain a copy of the
701# tag and the %_ variable will contain the element's attributes.
702sub xml_start_tag {
703 my $self = shift(@_);
704 my ($expat, $element) = @_;
705
706 if ($element eq "rdf:Description") {
707
708 my $about_key = $_{'about'};
709
710 my $rdf_desc = $self->{'rdf_desc'};
711 $rdf_desc->{$about_key} = {};
712
713 $self->{'about_key'} = $about_key;
714 $self->{'index_text'} = "";
715 $self->{'pp_text'} = "<table width=\"100%\">\n";
716
717
718 }
719 elsif (defined $self->{'about_key'}) {
720 $self->{'metadata_name'} = $element;
721 $self->{'metadata_value'} = "";
722 }
723
724}
725
726# Called for every end tag. The $_ variable will contain a copy of the tag.
727sub xml_end_tag {
728 my $self = shift(@_);
729 my ($expat, $element) = @_;
730
731 if ($element eq "rdf:Description") {
732 $self->{'pp_text'} .= "</table>\n";
733 ## ghtml::htmlsafe($self->{'pp_text'});
734
735
736 my $about_key = $self->{'about_key'};
737 my $about = $self->{'rdf_desc'}->{$about_key};
738 $about->{'IndexText'} = $self->{'index_text'};
739 $about->{'MetadataTable'} = $self->{'pp_text'};
740
741
742 $self->{'about_key'} = undef;
743 $self->{'index_text'} = undef;
744 $self->{'pp_text'} = undef;
745
746 }
747 elsif (defined $self->{'metadata_name'}) {
748 my $metadata_name = $self->{'metadata_name'};
749 if ($element eq $metadata_name) {
750 my $metadata_value = $self->{'metadata_value'};
751
752 my $about_key = $self->{'about_key'};
753 my $about = $self->{'rdf_desc'}->{$about_key};
754 $about->{$metadata_name} = $metadata_value;
755
756 $self->{'index_text'} .= "$metadata_value\n";
757 $self->{'pp_text'} .= " <tr><td>$metadata_name</td><td>$metadata_value</td></tr>\n";
758
759 $self->{'metadata_name'} = undef;
760 $self->{'metadata_value'} = undef;
761 }
762 }
763}
764
765# Called just before start or end tags with accumulated non-markup text in
766# the $_ variable.
767sub xml_text {
768 my $self = shift(@_);
769 my ($expat) = @_;
770
771 if (defined $self->{'metadata_name'}) {
772 $self->{'metadata_value'} .= $_;
773 }
774}
775
776# Called at the end of the XML document.
777sub xml_end_document {
778 my $self = shift(@_);
779 my ($expat) = @_;
780
781}
782
783
7841;
Note: See TracBrowser for help on using the repository browser.