source: gsdl/trunk/perllib/plugins/CONTENTdmPlugin.pm@ 18406

Last change on this file since 18406 was 18406, checked in by ak19, 15 years ago

Modified srcreplaceable plugins (plugins which operate on docs where the source file can be replaced with their converted htmls) to set the file_rename_method to none for secondary plugins (for Text, HTML, and PagedImage plugins) so that the file is not renamed several times.

  • Property svn:keywords set to Author Date Id Revision
File size: 21.2 KB
Line 
1###########################################################################
2#
3# CONTENTdmPlugin.pm -- reasonably with-it pdf plugin
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999-2001 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25package CONTENTdmPlugin;
26
27use ConvertBinaryFile;
28use ReadXMLFile;
29
30use unicode;
31use ghtml;
32
33use strict;
34no strict 'refs'; # so we can use a var for filehandles (eg STDERR)
35
36
37use XMLParser;
38
39# inherit ReadXMLFile for the apply_xslt method
40sub BEGIN {
41 @CONTENTdmPlugin::ISA = ('ConvertBinaryFile', 'ReadXMLFile');
42}
43
44
45my $convert_to_list =
46 [ { 'name' => "auto",
47 'desc' => "{ConvertBinaryFile.convert_to.auto}" },
48 { 'name' => "html",
49 'desc' => "{ConvertBinaryFile.convert_to.html}" },
50 { 'name' => "text",
51 'desc' => "{ConvertBinaryFile.convert_to.text}" },
52 { 'name' => "pagedimg",
53 'desc' => "{ConvertBinaryFile.convert_to.pagedimg}"},
54 ];
55
56
57
58my $arguments =
59 [
60 { 'name' => "convert_to",
61 'desc' => "{ConvertBinaryFile.convert_to}",
62 'type' => "enum",
63 'reqd' => "yes",
64 'list' => $convert_to_list,
65 'deft' => "html" },
66 { 'name' => "xslt",
67 'desc' => "{ReadXMLFile.xslt}",
68 'type' => "string",
69 'deft' => "",
70 'reqd' => "no" },
71 { 'name' => "process_exp",
72 'desc' => "{BasePlugin.process_exp}",
73 'type' => "regexp",
74 'deft' => &get_default_process_exp(),
75 'reqd' => "no" },
76 { 'name' => "block_exp",
77 'desc' => "{BasePlugin.block_exp}",
78 'type' => "regexp",
79 'deft' => &get_default_block_exp() }
80];
81
82my $options = { 'name' => "CONTENTdmPlugin",
83 'desc' => "{CONTENTdmPlugin.desc}",
84 'abstract' => "no",
85 'inherits' => "yes",
86 # CONTENTdmPlugin is one of the few ConvertBinaryFile subclasses whose source doc can't be replaced by a GS-generated html
87 'srcreplaceable' => "no",
88 'args' => $arguments };
89
90sub new {
91 my ($class) = shift (@_);
92 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
93 push(@$pluginlist, $class);
94
95 push(@$inputargs,"-title_sub");
96 push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?');
97
98 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
99 push(@{$hashArgOptLists->{"OptList"}},$options);
100
101 my @arg_array = @$inputargs;
102 my $self = new ConvertBinaryFile($pluginlist,$inputargs,$hashArgOptLists);
103
104 if ($self->{'info_only'}) {
105 # don't worry about any options etc
106 return bless $self, $class;
107 }
108
109 my $parser = new XML::Parser('Style' => 'Stream',
110 'Pkg' => 'CONTENTdmPlugin',
111 'PluginObj' => $self,
112 'Handlers' => {'Char' => \&Char,
113 'XMLDecl' => \&XMLDecl,
114 'Entity' => \&Entity,
115 'Doctype' => \&Doctype,
116 'Default' => \&Default
117 });
118 $self->{'parser'} = $parser;
119 $self->{'rdf_desc'} = undef;
120 $self->{'about_key'} = undef;
121 $self->{'metadata_name'} = undef;
122 $self->{'metadata_value'} = undef;
123
124 $self->{'convert_to'} = "PagedImage";
125 my $secondary_plugin_options = $self->{'secondary_plugin_options'};
126
127 if (!defined $secondary_plugin_options->{'PagedImagePlugin'}){
128 $secondary_plugin_options->{'PagedImagePlugin'} = [];
129 }
130 my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'};
131 push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
132 push(@$pagedimg_options, "-create_thumbnail", "true", "-create_screenview", "true");
133 push(@$pagedimg_options, "-file_rename_method", "none");
134
135 $self = bless $self, $class;
136 $self->load_secondary_plugins($class,$secondary_plugin_options,$hashArgOptLists);
137 return $self;
138}
139
140sub get_default_process_exp {
141 my $self = shift (@_);
142
143 return q^(?i)\.rdf$^;
144}
145
146sub get_default_block_exp {
147 return q^(?i)\.(jpg|jpeg|gif)$^;
148}
149
150
151
152sub rdf_desc_to_id
153{
154 my $self = shift (@_);
155 my ($rdf_desc) = @_;
156
157 my $rdf_id = {};
158
159 # initialise any .cpd (=complex multi page) structures
160
161 foreach my $about_key (keys %{$rdf_desc}) {
162 if ($about_key =~ m/\.cpd$/) {
163 my $about = $rdf_desc->{$about_key};
164 my $id = $about->{'dc:identifier'};
165
166 if ($id =~ m/^\s*$/) {
167 # missing id, make one up based on about attribute
168
169 my ($tailname, $dirname, $suffix)
170 = &File::Basename::fileparse($about_key, "\\.[^\\.]+\$");
171
172 $id = "about:$tailname";
173 }
174
175 $rdf_id->{$id} = $about;
176 $rdf_id->{$id}->{'ex:filename'} = $about_key;
177 $rdf_id->{$id}->{'ex:type'} = "complex";
178 $rdf_id->{$id}->{'pages'} = [];
179 }
180
181 }
182
183 # now add in *non* .cpd items
184
185 foreach my $about_key (keys %{$rdf_desc}) {
186 if ($about_key !~ m/\.cpd$/) {
187 my $about = $rdf_desc->{$about_key};
188 my $id = $about->{'dc:identifier'};
189
190
191 if ($id =~ m/^\s*$/) {
192 # missing id, make one up based on about attribute
193
194 my ($tailname, $dirname, $suffix)
195 = &File::Basename::fileparse($about_key, "\\.[^\\.]+\$");
196
197 $id = "about:$tailname";
198 }
199
200 if (defined $rdf_id->{$id}) {
201 $about->{'ex:filename'} = $about_key;
202
203 # dealing with complex multi-page situation
204 # Add to existing structure
205
206 my $pages = $rdf_id->{$id}->{'pages'};
207 push(@$pages,$about)
208 }
209 else {
210 # New entry
211
212 $rdf_id->{$id} = $about;
213 $rdf_id->{$id}->{'ex:type'} = "simple";
214 $rdf_id->{$id}->{'ex:filename'} = $about_key;
215 }
216 }
217
218 }
219
220 return $rdf_id;
221}
222
223
224sub metadata_table_txt_file
225{
226 my $self = shift (@_);
227 my ($output_root,$page_num) = @_;
228
229 my $txt_filename = $output_root."_page_$page_num.txt";
230
231 my ($tailname, $dirname, $suffix)
232 = &File::Basename::fileparse($txt_filename, "\\.[^\\.]+\$");
233
234 my $txt_file = "$tailname.$suffix";
235
236 return $txt_file;
237}
238
239
240sub output_metadata_table
241{
242 my $self = shift (@_);
243 my ($page,$page_num,$tmp_dirname,$txt_file) = @_;
244
245 my $txt_filename = &util::filename_cat($tmp_dirname,$txt_file);
246
247 open(TOUT,">$txt_filename")
248 || die "Error: unable to write metadata data out as txt file $txt_filename: $!\n";
249
250 print TOUT $page->{'MetadataTable'};
251 delete $page->{'MetadataTable'};
252
253 close (TOUT);
254}
255
256
257sub rdf_id_to_item_file
258{
259 my $self = shift (@_);
260 my ($rdf_id,$tmp_dirname,$output_root) = @_;
261
262 my $item_file_list = [];
263
264 foreach my $id (keys %{$rdf_id}) {
265
266 my $id_safe = $id;
267 $id_safe =~ s/ /-/g;
268
269 my $output_filename = $output_root."_$id_safe.item";
270 open(FOUT,">$output_filename")
271 || die "Unable to open $output_filename: $!\n";
272
273
274 print FOUT "<PagedDocument>\n";
275
276 my $rdf_doc = $rdf_id->{$id};
277 foreach my $metadata_name (keys %$rdf_doc) {
278
279
280 next if ($metadata_name eq "pages");
281
282 my $metadata_value = $rdf_doc->{$metadata_name};
283
284 # convert ns:name to ns.Name
285 $metadata_name =~ s/^(.*?):(.*)/$1\.\u$2/;
286
287 print FOUT " <Metadata name=\"$metadata_name\">$metadata_value</Metadata>\n";
288 }
289
290 if ($rdf_doc->{'ex:type'} eq "complex") {
291 my $pages = $rdf_doc->{'pages'};
292 my $page_num = 1;
293
294 foreach my $page (@$pages) {
295
296 my $imgfile = $page->{'ex:filename'};
297 if ($imgfile =~ m/(http|ftp):/) {
298 $imgfile = "empty.jpg";
299 }
300 else {
301 $imgfile = &util::filename_cat("..","import",$imgfile);
302 }
303
304 my $txt_file
305 = $self->metadata_table_txt_file($output_root,$page_num);
306
307 $self->output_metadata_table($page,$page_num,
308 $tmp_dirname,$txt_file);
309
310
311 print FOUT " <Page pagenum=\"$page_num\" imgfile=\"$imgfile\" txtfile=\"$txt_file\">\n";
312
313 foreach my $metadata_name (keys %$page) {
314
315 my $metadata_value = $rdf_doc->{$metadata_name};
316 # convert ns:name to ns.Name
317 $metadata_name =~ s/^(.*?):(.*)/$1\.\u$2/;
318
319 print FOUT " <Metadata name=\"$metadata_name\">$metadata_value</Metadata>\n";
320 }
321
322
323 $page_num++;
324
325
326 print FOUT " </Page>\n";
327 }
328 }
329 else {
330 # simple
331 # duplicate top-level metadata for now plus image to bind to
332
333 my $imgfile = $rdf_doc->{'ex:filename'};
334 if ($imgfile =~ m/(http|ftp):/) {
335 $imgfile = "empty.jpg";
336 }
337 else {
338 $imgfile = &util::filename_cat("..","import",$imgfile);
339 $imgfile = "../import/".$imgfile;
340 }
341
342
343 my $txt_file = $self->metadata_table_txt_file($output_root,1);
344 $self->output_metadata_table($rdf_doc,1,$tmp_dirname,$txt_file);
345
346 print FOUT " <Page pagenum=\"1\" imgfile=\"$imgfile\" txtfile=\"$txt_file\">\n";
347 foreach my $metadata_name (keys %$rdf_doc) {
348
349 my $metadata_value = $rdf_doc->{$metadata_name};
350
351 # convert ns:name to ns.Name
352 $metadata_name =~ s/^(.*?):(.*)/$1\.\u$2/;
353
354 print FOUT " <Metadata name=\"$metadata_name\">$metadata_value</Metadata>\n";
355 }
356 print FOUT " </Page>\n";
357
358 }
359
360 print FOUT "</PagedDocument>\n";
361 close(FOUT);
362
363 push(@$item_file_list,$output_filename);
364
365 }
366
367
368 return $item_file_list;
369}
370
371
372
373sub xml_area_convert_file
374{
375 my $self = shift (@_);
376 my ($input_filename, $tmp_dirname, $output_root) = @_;
377
378 eval {
379 # Build up hash table/tree of all records
380
381 my $xslt = $self->{'xslt'};
382 if (defined $xslt && ($xslt ne "")) {
383 # perform xslt
384 my $transformed_xml = $self->apply_xslt($xslt,$input_filename);
385
386 # feed transformed file (now in memory as string) into XML parser
387 $self->{'parser'}->parse($transformed_xml);
388 }
389 else {
390 $self->{'parser'}->parsefile($input_filename);
391 }
392 };
393
394 if ($@) {
395
396 # parsefile may either croak somewhere in XML::Parser (e.g. because
397 # the document is not well formed) or die somewhere in XMLPlug or a
398 # derived plugin (e.g. because we're attempting to process a
399 # document whose DOCTYPE is not meant for this plugin). For the
400 # first case we'll print a warning and continue, for the second
401 # we'll just continue quietly
402
403 print STDERR "**** Error is: $@\n";
404
405 my $file = $self->{'file'};
406
407 my ($msg) = $@ =~ /Carp::croak\(\'(.*?)\'\)/;
408 if (defined $msg) {
409 my $outhandle = $self->{'outhandle'};
410 my $plugin_name = ref ($self);
411 print $outhandle "$plugin_name failed to process $file ($msg)\n";
412 }
413
414 my $gli = $self->{'gli'};
415
416 # reset ourself for the next document
417 $self->{'section_level'}=0;
418 print STDERR "<ProcessingError n='$file'>\n" if ($gli);
419 return ("fail",undef); # error during processing
420 }
421
422 my $rdf_desc = $self->{'rdf_desc'};
423
424# foreach my $about_key (keys %{$rdf_desc}) {
425# my $about = $rdf_desc->{$about_key};
426# foreach my $metadata_name (keys %{$about}) {
427#
428# my $metadata_value = $about->{$metadata_name};
429## print STDERR " $metadata_name: $metadata_value\n";
430# }
431# }
432
433
434 # Merge entries with same name
435
436
437 my $merged_rdf_id = $self->rdf_desc_to_id($rdf_desc);
438
439# foreach my $about_key (keys %{$merged_rdf_id}) {
440# my $about = $merged_rdf_id->{$about_key};
441# foreach my $metadata_name (keys %{$about}) {
442#
443# my $metadata_value = $about->{$metadata_name};
444## print STDERR " $metadata_name: $metadata_value\n";
445# }
446# }
447
448
449
450 my $item_files = $self->rdf_id_to_item_file($merged_rdf_id,$tmp_dirname,
451 $output_root);
452
453 return ("item",$item_files);
454}
455
456
457# Override ConvertBinaryFile tmp_area_convert_file() to provide solution specific
458# to CONTENTdm
459#
460# A better (i.e. in the future) solution would be to see if this can be
461# shifted into gsConvert.pl so there is no need to override the
462# default tmp_area_convert_file()
463
464
465sub tmp_area_convert_file {
466 my $self = shift (@_);
467 my ($output_ext, $input_filename, $textref) = @_;
468
469 # is textref ever used?!?
470
471 my $outhandle = $self->{'outhandle'};
472 my $convert_to = $self->{'convert_to'};
473 my $failhandle = $self->{'failhandle'};
474 my $convert_to_ext = $self->{'convert_to_ext'};
475
476 # softlink to collection tmp dir
477 my $tmp_dirname
478 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
479 &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
480
481 # derive tmp filename from input filename
482 my ($tailname, $dirname, $suffix)
483 = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
484
485 # Remove any white space from filename -- no risk of name collision, and
486 # makes later conversion by utils simpler. Leave spaces in path...
487 # tidy up the filename with space, dot, hyphen between
488 $tailname =~ s/\s+//g;
489 $tailname =~ s/\.+//g;
490 $tailname =~ s/\-+//g;
491
492 $tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
493 $suffix = lc($suffix);
494 my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix");
495
496 &util::soft_link($input_filename, $tmp_filename);
497 my $verbosity = $self->{'verbosity'};
498 if ($verbosity > 0) {
499 print $outhandle "Converting $tailname$suffix to $convert_to format\n";
500 }
501
502 my $errlog = &util::filename_cat($tmp_dirname, "err.log");
503
504 # call xml_area_convert_file rather than gsConvert.pl
505
506 my $output_root = &util::filename_cat($tmp_dirname, "$tailname");
507
508 my ($output_type,$item_files)
509 = $self->xml_area_convert_file($tmp_filename,$tmp_dirname,$output_root);
510
511
512 my $fakeimg_filename = &util::filename_cat($dirname, "empty.jpg");
513 my $fakeimg_tmp_filename = &util::filename_cat($tmp_dirname, "empty.jpg");
514
515 print STDERR "***** Setting up fake filename $fakeimg_filename -> $fakeimg_tmp_filename\n";
516
517 &util::soft_link($fakeimg_filename, $fakeimg_tmp_filename);
518
519 # continue as before ...
520
521 # remove symbolic link to original file
522 &util::rm($tmp_filename);
523
524 # Check STDERR here
525 chomp $output_type;
526 if ($output_type eq "fail") {
527 print $outhandle "Could not convert $tailname$suffix to $convert_to format\n";
528 print $failhandle "$tailname$suffix: " . ref($self) . " failed to convert to $convert_to\n";
529 $self->{'num_not_processed'} ++;
530 if (-s "$errlog") {
531 open(ERRLOG, "$errlog");
532 while (<ERRLOG>) {
533 print $outhandle "$_";
534 }
535 print $outhandle "\n";
536 close ERRLOG;
537 }
538 &util::rm("$errlog") if (-e "$errlog");
539 return [];
540 }
541
542 # store the *actual* output type and return the output filename
543 # it's possible we requested conversion to html, but only to text succeeded
544 #$self->{'convert_to_ext'} = $output_type;
545 if ($output_type =~ /html/i) {
546 $self->{'converted_to'} = "HTML";
547 } elsif ($output_type =~ /te?xt/i) {
548 $self->{'converted_to'} = "Text";
549 } elsif ($output_type =~ /item/i){
550 $self->{'converted_to'} = "PagedImage";
551 }
552
553
554 return $item_files;
555}
556
557
558
559
560# Override ConvertBinaryFile read
561# Needed so multiple .item files generated are sent down secondary plugin
562
563sub read {
564 my $self = shift (@_);
565 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
566
567 $self->{'gli'} = $gli;
568 $self->{'file'} = $file;
569
570 my $successful_rv = -1;
571
572 my $outhandle = $self->{'outhandle'};
573
574 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
575 return undef unless $self->can_process_this_file($filename_full_path);
576
577 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
578
579 # read() deviates at this point from ConvertBinaryFile
580 # Need to work with list of filename returned
581
582 my $output_ext = $self->{'convert_to_ext'};
583 my $conv_filename_list = [];
584
585 $conv_filename_list = $self->tmp_area_convert_file($output_ext, $filename_full_path);
586
587 if (scalar(@$conv_filename_list)==0) {
588 return -1;
589 } # had an error, will be passed down pipeline
590
591 foreach my $conv_filename ( @$conv_filename_list ) {
592 if (! -e "$conv_filename") {return -1;}
593 $self->{'conv_filename'} = $conv_filename; # is this used anywhere?
594 $self->convert_post_process($conv_filename);
595
596 my $secondary_plugins = $self->{'secondary_plugins'};
597 my $num_secondary_plugins = scalar(keys %$secondary_plugins);
598
599 if ($num_secondary_plugins == 0) {
600 print $outhandle "Warning: No secondary plugin to use in conversion. Skipping $file\n";
601 return 0; # effectively block it
602 }
603
604 my @plugin_names = keys %$secondary_plugins;
605 my $plugin_name = shift @plugin_names;
606
607 if ($num_secondary_plugins > 1) {
608 print $outhandle "Warning: Multiple secondary plugins not supported yet! Choosing $plugin_name\n.";
609 }
610
611 my $secondary_plugin = $secondary_plugins->{$plugin_name};
612
613 # note: metadata is not carried on to the next level
614 my ($rv,$doc_obj)
615 = $secondary_plugin->read_into_doc_obj ($pluginfo,"", $conv_filename,
616 $block_hash, $metadata, $processor, $maxdocs, $total_count,
617 $gli);
618
619 if ((defined $rv) && ($rv>=0)) {
620 $successful_rv = 1;
621 }
622
623 # Override previous gsdlsourcefilename set by secondary plugin
624 my $collect_file = &util::filename_within_collection($filename_full_path);
625 my $collect_conv_file = &util::filename_within_collection($conv_filename);
626 $doc_obj->set_source_filename ($collect_file, $self->{'file_rename_method'});
627 $doc_obj->set_converted_filename($collect_conv_file);
628
629 my ($filemeta) = $file =~ /([^\\\/]+)$/;
630 $self->set_Source_metadata($doc_obj, $filemeta);
631 $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
632 $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename_full_path));
633
634 if ($self->{'cover_image'}) {
635 $self->associate_cover_image($doc_obj, $filename_full_path);
636 }
637
638 # do plugin specific processing of doc_obj
639 unless (defined ($self->process(undef, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli))) {
640 print STDERR "<ProcessingError n='$file'>\n" if ($gli);
641 return -1;
642 }
643 # do any automatic metadata extraction
644 $self->auto_extract_metadata ($doc_obj);
645
646 # have we found a Title??
647 $self->title_fallback($doc_obj,$doc_obj->get_top_section(),$filemeta);
648
649 # add an OID
650 $self->add_OID($doc_obj);
651 # process the document
652 $processor->process($doc_obj);
653
654 $self->{'num_processed'} ++;
655 }
656
657 return $successful_rv;
658}
659
660sub process {
661
662}
663# do we need this? sec pluginn process would have already been called as part of read_into_doc_obj??
664sub process_old {
665 my $self = shift (@_);
666 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
667
668
669 my $secondary_plugins = $self->{'secondary_plugins'};
670 my @plugin_names = keys %$secondary_plugins;
671 my $plugin_name = shift @plugin_names; # already checked there is only one
672
673 my $secondary_plugin = $secondary_plugins->{$plugin_name};
674
675 my $result = $secondary_plugin->process(@_);
676
677 return $result;
678}
679
680
681# Called at the beginning of the XML document.
682sub xml_start_document {
683 my $self = shift(@_);
684 my ($expat) = @_;
685
686 $self->{'rdf_desc'} = {};
687}
688
689
690# Called for DOCTYPE declarations - use die to bail out if this doctype
691# is not meant for this plugin
692sub xml_doctype {
693 my $self = shift(@_);
694 my ($expat, $name, $sysid, $pubid, $internal) = @_;
695
696 die "" if ($name !~ /^rdf:RDF$/);
697
698 my $outhandle = $self->{'outhandle'};
699 print $outhandle "CONTENTdmPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
700
701}
702
703# Called for every start tag. The $_ variable will contain a copy of the
704# tag and the %_ variable will contain the element's attributes.
705sub xml_start_tag {
706 my $self = shift(@_);
707 my ($expat, $element) = @_;
708
709 if ($element eq "rdf:Description") {
710
711 my $about_key = $_{'about'};
712
713 my $rdf_desc = $self->{'rdf_desc'};
714 $rdf_desc->{$about_key} = {};
715
716 $self->{'about_key'} = $about_key;
717 $self->{'index_text'} = "";
718 $self->{'pp_text'} = "<table width=\"100%\">\n";
719
720
721 }
722 elsif (defined $self->{'about_key'}) {
723 $self->{'metadata_name'} = $element;
724 $self->{'metadata_value'} = "";
725 }
726
727}
728
729# Called for every end tag. The $_ variable will contain a copy of the tag.
730sub xml_end_tag {
731 my $self = shift(@_);
732 my ($expat, $element) = @_;
733
734 if ($element eq "rdf:Description") {
735 $self->{'pp_text'} .= "</table>\n";
736 ## ghtml::htmlsafe($self->{'pp_text'});
737
738
739 my $about_key = $self->{'about_key'};
740 my $about = $self->{'rdf_desc'}->{$about_key};
741 $about->{'IndexText'} = $self->{'index_text'};
742 $about->{'MetadataTable'} = $self->{'pp_text'};
743
744
745 $self->{'about_key'} = undef;
746 $self->{'index_text'} = undef;
747 $self->{'pp_text'} = undef;
748
749 }
750 elsif (defined $self->{'metadata_name'}) {
751 my $metadata_name = $self->{'metadata_name'};
752 if ($element eq $metadata_name) {
753 my $metadata_value = $self->{'metadata_value'};
754
755 my $about_key = $self->{'about_key'};
756 my $about = $self->{'rdf_desc'}->{$about_key};
757 $about->{$metadata_name} = $metadata_value;
758
759 $self->{'index_text'} .= "$metadata_value\n";
760 $self->{'pp_text'} .= " <tr><td>$metadata_name</td><td>$metadata_value</td></tr>\n";
761
762 $self->{'metadata_name'} = undef;
763 $self->{'metadata_value'} = undef;
764 }
765 }
766}
767
768# Called just before start or end tags with accumulated non-markup text in
769# the $_ variable.
770sub xml_text {
771 my $self = shift(@_);
772 my ($expat) = @_;
773
774 if (defined $self->{'metadata_name'}) {
775 $self->{'metadata_value'} .= $_;
776 }
777}
778
779# Called at the end of the XML document.
780sub xml_end_document {
781 my $self = shift(@_);
782 my ($expat) = @_;
783
784}
785
786
7871;
Note: See TracBrowser for help on using the repository browser.