Context Navigation

source: gsdl/trunk/perllib/plugins/CONTENTdmPlugin.pm@ 15925

Last change on this file since 15925 was 15925, checked in by kjdon, 16 years ago
use the proper options for PagedImagePlugin
Property svn:keywords set to `Author Date Id Revision`
File size: 20.9 KB

Line
1	###########################################################################
2	#
3	# CONTENTdmPlugin.pm -- reasonably with-it pdf plugin
4	# A component of the Greenstone digital library software
5	# from the New Zealand Digital Library Project at the
6	# University of Waikato, New Zealand.
7	#
8	# Copyright (C) 1999-2001 New Zealand Digital Library Project
9	#
10	# This program is free software; you can redistribute it and/or modify
11	# it under the terms of the GNU General Public License as published by
12	# the Free Software Foundation; either version 2 of the License, or
13	# (at your option) any later version.
14	#
15	# This program is distributed in the hope that it will be useful,
16	# but WITHOUT ANY WARRANTY; without even the implied warranty of
17	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18	# GNU General Public License for more details.
19	#
20	# You should have received a copy of the GNU General Public License
21	# along with this program; if not, write to the Free Software
22	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23	#
24	###########################################################################
25	package CONTENTdmPlugin;
26
27	use ConvertBinaryFile;
28	use ReadXMLFile;
29
30	use unicode;
31	use ghtml;
32
33	use strict;
34	no strict 'refs'; # so we can use a var for filehandles (eg STDERR)
35
36
37	use XMLParser;
38
39	# inherit ReadXMLFile for the apply_xslt method
40	sub BEGIN {
41	@CONTENTdmPlugin::ISA = ('ConvertBinaryFile', 'ReadXMLFile');
42	}
43
44
45	my $convert_to_list =
46	[ { 'name' => "auto",
47	'desc' => "{ConvertBinaryFile.convert_to.auto}" },
48	{ 'name' => "html",
49	'desc' => "{ConvertBinaryFile.convert_to.html}" },
50	{ 'name' => "text",
51	'desc' => "{ConvertBinaryFile.convert_to.text}" },
52	{ 'name' => "pagedimg",
53	'desc' => "{ConvertBinaryFile.convert_to.pagedimg}"},
54	];
55
56
57
58	my $arguments =
59	[
60	{ 'name' => "convert_to",
61	'desc' => "{ConvertBinaryFile.convert_to}",
62	'type' => "enum",
63	'reqd' => "yes",
64	'list' => $convert_to_list,
65	'deft' => "html" },
66	{ 'name' => "xslt",
67	'desc' => "{ReadXMLFile.xslt}",
68	'type' => "string",
69	'deft' => "",
70	'reqd' => "no" },
71	{ 'name' => "process_exp",
72	'desc' => "{BasePlugin.process_exp}",
73	'type' => "regexp",
74	'deft' => &get_default_process_exp(),
75	'reqd' => "no" },
76	{ 'name' => "block_exp",
77	'desc' => "{BasePlugin.block_exp}",
78	'type' => "regexp",
79	'deft' => &get_default_block_exp() }
80	];
81
82	my $options = { 'name' => "CONTENTdmPlugin",
83	'desc' => "{CONTENTdmPlugin.desc}",
84	'abstract' => "no",
85	'inherits' => "yes",
86	# CONTENTdmPlugin is one of the few ConvertBinaryFile subclasses whose source doc can't be replaced by a GS-generated html
87	'srcreplaceable' => "no",
88	'args' => $arguments };
89
90	sub new {
91	my ($class) = shift (@_);
92	my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
93	push(@$pluginlist, $class);
94
95	push(@$inputargs,"-title_sub");
96	push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?');
97
98	push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
99	push(@{$hashArgOptLists->{"OptList"}},$options);
100
101	my @arg_array = @$inputargs;
102	my $self = new ConvertBinaryFile($pluginlist,$inputargs,$hashArgOptLists);
103
104	if ($self->{'info_only'}) {
105	# don't worry about any options etc
106	return bless $self, $class;
107	}
108
109	my $parser = new XML::Parser('Style' => 'Stream',
110	'Pkg' => 'CONTENTdmPlugin',
111	'PluginObj' => $self,
112	'Handlers' => {'Char' => \&Char,
113	'XMLDecl' => \&XMLDecl,
114	'Entity' => \&Entity,
115	'Doctype' => \&Doctype,
116	'Default' => \&Default
117	});
118	$self->{'parser'} = $parser;
119	$self->{'rdf_desc'} = undef;
120	$self->{'about_key'} = undef;
121	$self->{'metadata_name'} = undef;
122	$self->{'metadata_value'} = undef;
123
124	$self->{'convert_to'} = "PagedImage";
125	my $secondary_plugin_options = $self->{'secondary_plugin_options'};
126
127	if (!defined $secondary_plugin_options->{'PagedImagePlugin'}){
128	$secondary_plugin_options->{'PagedImagePlugin'} = [];
129	}
130	my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'};
131	push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
132	push(@$pagedimg_options, "-create_thumbnail", "true", "-create_screenview", "true");
133
134
135	$self = bless $self, $class;
136	$self->load_secondary_plugins($class,$secondary_plugin_options,$hashArgOptLists);
137	return $self;
138	}
139
140	sub get_default_process_exp {
141	my $self = shift (@_);
142
143	return q^(?i)\.rdf$^;
144	}
145
146	sub get_default_block_exp {
147	return q^(?i)\.(jpg\|jpeg\|gif)$^;
148	}
149
150
151
152	sub rdf_desc_to_id
153	{
154	my $self = shift (@_);
155	my ($rdf_desc) = @_;
156
157	my $rdf_id = {};
158
159	# initialise any .cpd (=complex multi page) structures
160
161	foreach my $about_key (keys %{$rdf_desc}) {
162	if ($about_key =~ m/\.cpd$/) {
163	my $about = $rdf_desc->{$about_key};
164	my $id = $about->{'dc:identifier'};
165
166	if ($id =~ m/^\s*$/) {
167	# missing id, make one up based on about attribute
168
169	my ($tailname, $dirname, $suffix)
170	= &File::Basename::fileparse($about_key, "\\.[^\\.]+\$");
171
172	$id = "about:$tailname";
173	}
174
175	$rdf_id->{$id} = $about;
176	$rdf_id->{$id}->{'ex:filename'} = $about_key;
177	$rdf_id->{$id}->{'ex:type'} = "complex";
178	$rdf_id->{$id}->{'pages'} = [];
179	}
180
181	}
182
183	# now add in non .cpd items
184
185	foreach my $about_key (keys %{$rdf_desc}) {
186	if ($about_key !~ m/\.cpd$/) {
187	my $about = $rdf_desc->{$about_key};
188	my $id = $about->{'dc:identifier'};
189
190
191	if ($id =~ m/^\s*$/) {
192	# missing id, make one up based on about attribute
193
194	my ($tailname, $dirname, $suffix)
195	= &File::Basename::fileparse($about_key, "\\.[^\\.]+\$");
196
197	$id = "about:$tailname";
198	}
199
200	if (defined $rdf_id->{$id}) {
201	$about->{'ex:filename'} = $about_key;
202
203	# dealing with complex multi-page situation
204	# Add to existing structure
205
206	my $pages = $rdf_id->{$id}->{'pages'};
207	push(@$pages,$about)
208	}
209	else {
210	# New entry
211
212	$rdf_id->{$id} = $about;
213	$rdf_id->{$id}->{'ex:type'} = "simple";
214	$rdf_id->{$id}->{'ex:filename'} = $about_key;
215	}
216	}
217
218	}
219
220	return $rdf_id;
221	}
222
223
224	sub metadata_table_txt_file
225	{
226	my $self = shift (@_);
227	my ($output_root,$page_num) = @_;
228
229	my $txt_filename = $output_root."_page_$page_num.txt";
230
231	my ($tailname, $dirname, $suffix)
232	= &File::Basename::fileparse($txt_filename, "\\.[^\\.]+\$");
233
234	my $txt_file = "$tailname.$suffix";
235
236	return $txt_file;
237	}
238
239
240	sub output_metadata_table
241	{
242	my $self = shift (@_);
243	my ($page,$page_num,$tmp_dirname,$txt_file) = @_;
244
245	my $txt_filename = &util::filename_cat($tmp_dirname,$txt_file);
246
247	open(TOUT,">$txt_filename")
248	\|\| die "Error: unable to write metadata data out as txt file $txt_filename: $!\n";
249
250	print TOUT $page->{'MetadataTable'};
251	delete $page->{'MetadataTable'};
252
253	close (TOUT);
254	}
255
256
257	sub rdf_id_to_item_file
258	{
259	my $self = shift (@_);
260	my ($rdf_id,$tmp_dirname,$output_root) = @_;
261
262	my $item_file_list = [];
263
264	foreach my $id (keys %{$rdf_id}) {
265
266	my $id_safe = $id;
267	$id_safe =~ s/ /-/g;
268
269	my $output_filename = $output_root."_$id_safe.item";
270	open(FOUT,">$output_filename")
271	\|\| die "Unable to open $output_filename: $!\n";
272
273
274	print FOUT "<PagedDocument>\n";
275
276	my $rdf_doc = $rdf_id->{$id};
277	foreach my $metadata_name (keys %$rdf_doc) {
278
279
280	next if ($metadata_name eq "pages");
281
282	my $metadata_value = $rdf_doc->{$metadata_name};
283
284	# convert ns:name to ns.Name
285	$metadata_name =~ s/^(.?):(.)/$1\.\u$2/;
286
287	print FOUT " <Metadata name=\"$metadata_name\">$metadata_value</Metadata>\n";
288	}
289
290	if ($rdf_doc->{'ex:type'} eq "complex") {
291	my $pages = $rdf_doc->{'pages'};
292	my $page_num = 1;
293
294	foreach my $page (@$pages) {
295
296	my $imgfile = $page->{'ex:filename'};
297	if ($imgfile =~ m/(http\|ftp):/) {
298	$imgfile = "empty.jpg";
299	}
300	else {
301	$imgfile = &util::filename_cat("..","import",$imgfile);
302	}
303
304	my $txt_file
305	= $self->metadata_table_txt_file($output_root,$page_num);
306
307	$self->output_metadata_table($page,$page_num,
308	$tmp_dirname,$txt_file);
309
310
311	print FOUT " <Page pagenum=\"$page_num\" imgfile=\"$imgfile\" txtfile=\"$txt_file\">\n";
312
313	foreach my $metadata_name (keys %$page) {
314
315	my $metadata_value = $rdf_doc->{$metadata_name};
316	# convert ns:name to ns.Name
317	$metadata_name =~ s/^(.?):(.)/$1\.\u$2/;
318
319	print FOUT " <Metadata name=\"$metadata_name\">$metadata_value</Metadata>\n";
320	}
321
322
323	$page_num++;
324
325
326	print FOUT " </Page>\n";
327	}
328	}
329	else {
330	# simple
331	# duplicate top-level metadata for now plus image to bind to
332
333	my $imgfile = $rdf_doc->{'ex:filename'};
334	if ($imgfile =~ m/(http\|ftp):/) {
335	$imgfile = "empty.jpg";
336	}
337	else {
338	$imgfile = &util::filename_cat("..","import",$imgfile);
339	$imgfile = "../import/".$imgfile;
340	}
341
342
343	my $txt_file = $self->metadata_table_txt_file($output_root,1);
344	$self->output_metadata_table($rdf_doc,1,$tmp_dirname,$txt_file);
345
346	print FOUT " <Page pagenum=\"1\" imgfile=\"$imgfile\" txtfile=\"$txt_file\">\n";
347	foreach my $metadata_name (keys %$rdf_doc) {
348
349	my $metadata_value = $rdf_doc->{$metadata_name};
350
351	# convert ns:name to ns.Name
352	$metadata_name =~ s/^(.?):(.)/$1\.\u$2/;
353
354	print FOUT " <Metadata name=\"$metadata_name\">$metadata_value</Metadata>\n";
355	}
356	print FOUT " </Page>\n";
357
358	}
359
360	print FOUT "</PagedDocument>\n";
361	close(FOUT);
362
363	push(@$item_file_list,$output_filename);
364
365	}
366
367
368	return $item_file_list;
369	}
370
371
372
373	sub xml_area_convert_file
374	{
375	my $self = shift (@_);
376	my ($input_filename, $tmp_dirname, $output_root) = @_;
377
378	eval {
379	# Build up hash table/tree of all records
380
381	my $xslt = $self->{'xslt'};
382	if (defined $xslt && ($xslt ne "")) {
383	# perform xslt
384	my $transformed_xml = $self->apply_xslt($xslt,$input_filename);
385
386	# feed transformed file (now in memory as string) into XML parser
387	$self->{'parser'}->parse($transformed_xml);
388	}
389	else {
390	$self->{'parser'}->parsefile($input_filename);
391	}
392	};
393
394	if ($@) {
395
396	# parsefile may either croak somewhere in XML::Parser (e.g. because
397	# the document is not well formed) or die somewhere in XMLPlug or a
398	# derived plugin (e.g. because we're attempting to process a
399	# document whose DOCTYPE is not meant for this plugin). For the
400	# first case we'll print a warning and continue, for the second
401	# we'll just continue quietly
402
403	print STDERR "**** Error is: $@\n";
404
405	my $file = $self->{'file'};
406
407	my ($msg) = $@ =~ /Carp::croak\(\'(.*?)\'\)/;
408	if (defined $msg) {
409	my $outhandle = $self->{'outhandle'};
410	my $plugin_name = ref ($self);
411	print $outhandle "$plugin_name failed to process $file ($msg)\n";
412	}
413
414	my $gli = $self->{'gli'};
415
416	# reset ourself for the next document
417	$self->{'section_level'}=0;
418	print STDERR "<ProcessingError n='$file'>\n" if ($gli);
419	return ("fail",undef); # error during processing
420	}
421
422	my $rdf_desc = $self->{'rdf_desc'};
423
424	# foreach my $about_key (keys %{$rdf_desc}) {
425	# my $about = $rdf_desc->{$about_key};
426	# foreach my $metadata_name (keys %{$about}) {
427	#
428	# my $metadata_value = $about->{$metadata_name};
429	## print STDERR " $metadata_name: $metadata_value\n";
430	# }
431	# }
432
433
434	# Merge entries with same name
435
436
437	my $merged_rdf_id = $self->rdf_desc_to_id($rdf_desc);
438
439	# foreach my $about_key (keys %{$merged_rdf_id}) {
440	# my $about = $merged_rdf_id->{$about_key};
441	# foreach my $metadata_name (keys %{$about}) {
442	#
443	# my $metadata_value = $about->{$metadata_name};
444	## print STDERR " $metadata_name: $metadata_value\n";
445	# }
446	# }
447
448
449
450	my $item_files = $self->rdf_id_to_item_file($merged_rdf_id,$tmp_dirname,
451	$output_root);
452
453	return ("item",$item_files);
454	}
455
456
457	# Override ConvertBinaryFile tmp_area_convert_file() to provide solution specific
458	# to CONTENTdm
459	#
460	# A better (i.e. in the future) solution would be to see if this can be
461	# shifted into gsConvert.pl so there is no need to override the
462	# default tmp_area_convert_file()
463
464
465	sub tmp_area_convert_file {
466	my $self = shift (@_);
467	my ($output_ext, $input_filename, $textref) = @_;
468
469	# is textref ever used?!?
470
471	my $outhandle = $self->{'outhandle'};
472	my $convert_to = $self->{'convert_to'};
473	my $failhandle = $self->{'failhandle'};
474	my $convert_to_ext = $self->{'convert_to_ext'};
475
476	# softlink to collection tmp dir
477	my $tmp_dirname
478	= &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
479	&util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
480
481	# derive tmp filename from input filename
482	my ($tailname, $dirname, $suffix)
483	= &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
484
485	# Remove any white space from filename -- no risk of name collision, and
486	# makes later conversion by utils simpler. Leave spaces in path...
487	# tidy up the filename with space, dot, hyphen between
488	$tailname =~ s/\s+//g;
489	$tailname =~ s/\.+//g;
490	$tailname =~ s/\-+//g;
491	$suffix = lc($suffix);
492	my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix");
493
494	&util::soft_link($input_filename, $tmp_filename);
495	my $verbosity = $self->{'verbosity'};
496	if ($verbosity > 0) {
497	print $outhandle "Converting $tailname$suffix to $convert_to format\n";
498	}
499
500	my $errlog = &util::filename_cat($tmp_dirname, "err.log");
501
502	# call xml_area_convert_file rather than gsConvert.pl
503
504	my $output_root = &util::filename_cat($tmp_dirname, "$tailname");
505
506	my ($output_type,$item_files)
507	= $self->xml_area_convert_file($tmp_filename,$tmp_dirname,$output_root);
508
509
510	my $fakeimg_filename = &util::filename_cat($dirname, "empty.jpg");
511	my $fakeimg_tmp_filename = &util::filename_cat($tmp_dirname, "empty.jpg");
512
513	print STDERR "***** Setting up fake filename $fakeimg_filename -> $fakeimg_tmp_filename\n";
514
515	&util::soft_link($fakeimg_filename, $fakeimg_tmp_filename);
516
517	# continue as before ...
518
519	# remove symbolic link to original file
520	&util::rm($tmp_filename);
521
522	# Check STDERR here
523	chomp $output_type;
524	if ($output_type eq "fail") {
525	print $outhandle "Could not convert $tailname$suffix to $convert_to format\n";
526	print $failhandle "$tailname$suffix: " . ref($self) . " failed to convert to $convert_to\n";
527	$self->{'num_not_processed'} ++;
528	if (-s "$errlog") {
529	open(ERRLOG, "$errlog");
530	while (<ERRLOG>) {
531	print $outhandle "$_";
532	}
533	print $outhandle "\n";
534	close ERRLOG;
535	}
536	&util::rm("$errlog") if (-e "$errlog");
537	return [];
538	}
539
540	# store the actual output type and return the output filename
541	# it's possible we requested conversion to html, but only to text succeeded
542	#$self->{'convert_to_ext'} = $output_type;
543	if ($output_type =~ /html/i) {
544	$self->{'converted_to'} = "HTML";
545	} elsif ($output_type =~ /te?xt/i) {
546	$self->{'converted_to'} = "Text";
547	} elsif ($output_type =~ /item/i){
548	$self->{'converted_to'} = "PagedImage";
549	}
550
551
552	return $item_files;
553	}
554
555
556
557
558	# Override ConvertBinaryFile read
559	# Needed so multiple .item files generate are sent down secondary plugin
560
561	sub read {
562	my $self = shift (@_);
563	my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
564
565	$self->{'gli'} = $gli;
566	$self->{'file'} = $file;
567
568	my $successful_rv = -1;
569
570	my $outhandle = $self->{'outhandle'};
571
572	my ($block_status,$filename) = $self->read_block(@_);
573	return $block_status if ((!defined $block_status) \|\| ($block_status==0));
574	$file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
575
576	# read() deviates at this point from ConvertBinaryFile
577	# Need to work with list of filename returned
578
579	my $output_ext = $self->{'convert_to_ext'};
580	my $conv_filename_list = [];
581
582	$conv_filename_list = $self->tmp_area_convert_file($output_ext, $filename);
583
584	if (scalar(@$conv_filename_list)==0) {
585	return -1;
586	} # had an error, will be passed down pipeline
587
588	foreach my $conv_filename ( @$conv_filename_list ) {
589	if (! -e "$conv_filename") {return -1;}
590	$self->{'conv_filename'} = $conv_filename; # is this used anywhere?
591	$self->convert_post_process($conv_filename);
592
593	my $secondary_plugins = $self->{'secondary_plugins'};
594	my $num_secondary_plugins = scalar(keys %$secondary_plugins);
595
596	if ($num_secondary_plugins == 0) {
597	print $outhandle "Warning: No secondary plugin to use in conversion. Skipping $file\n";
598	return 0; # effectively block it
599	}
600
601	my @plugin_names = keys %$secondary_plugins;
602	my $plugin_name = shift @plugin_names;
603
604	if ($num_secondary_plugins > 1) {
605	print $outhandle "Warning: Multiple secondary plugins not supported yet! Choosing $plugin_name\n.";
606	}
607
608	my $secondary_plugin = $secondary_plugins->{$plugin_name};
609
610	# note: metadata is not carried on to the next level
611	my ($rv,$doc_obj)
612	= $secondary_plugin->read_into_doc_obj ($pluginfo,"", $conv_filename,
613	$metadata, $processor, $maxdocs, $total_count,
614	$gli);
615
616	if ((defined $rv) && ($rv>=0)) {
617	$successful_rv = 1;
618	}
619
620	# Override previous gsdlsourcefilename set by secondary plugin
621	my $collect_file = &util::filename_within_collection($filename);
622	my $collect_conv_file = &util::filename_within_collection($conv_filename);
623	$doc_obj->set_source_filename ($collect_file);
624	$doc_obj->set_converted_filename($collect_conv_file);
625
626	my ($filemeta) = $file =~ /([^\\\/]+)$/;
627	$self->set_Source_metadata($doc_obj, $filemeta);
628	$doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
629	$doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename));
630
631	if ($self->{'cover_image'}) {
632	$self->associate_cover_image($doc_obj, $filename);
633	}
634
635	# do plugin specific processing of doc_obj
636	unless (defined ($self->process(undef, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli))) {
637	print STDERR "<ProcessingError n='$file'>\n" if ($gli);
638	return -1;
639	}
640	# do any automatic metadata extraction
641	$self->auto_extract_metadata ($doc_obj);
642
643	# have we found a Title??
644	$self->title_fallback($doc_obj,$doc_obj->get_top_section(),$filemeta);
645
646	# add an OID
647	$self->add_OID($doc_obj);
648	# process the document
649	$processor->process($doc_obj);
650
651	$self->{'num_processed'} ++;
652	}
653
654	return $successful_rv;
655	}
656
657	sub process {
658
659	}
660	# do we need this? sec pluginn process would have already been called as part of read_into_doc_obj??
661	sub process_old {
662	my $self = shift (@_);
663	my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
664
665
666	my $secondary_plugins = $self->{'secondary_plugins'};
667	my @plugin_names = keys %$secondary_plugins;
668	my $plugin_name = shift @plugin_names; # already checked there is only one
669
670	my $secondary_plugin = $secondary_plugins->{$plugin_name};
671
672	my $result = $secondary_plugin->process(@_);
673
674	return $result;
675	}
676
677
678	# Called at the beginning of the XML document.
679	sub xml_start_document {
680	my $self = shift(@_);
681	my ($expat) = @_;
682
683	$self->{'rdf_desc'} = {};
684	}
685
686
687	# Called for DOCTYPE declarations - use die to bail out if this doctype
688	# is not meant for this plugin
689	sub xml_doctype {
690	my $self = shift(@_);
691	my ($expat, $name, $sysid, $pubid, $internal) = @_;
692
693	die "" if ($name !~ /^rdf:RDF$/);
694
695	my $outhandle = $self->{'outhandle'};
696	print $outhandle "CONTENTdmPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
697
698	}
699
700	# Called for every start tag. The $_ variable will contain a copy of the
701	# tag and the %_ variable will contain the element's attributes.
702	sub xml_start_tag {
703	my $self = shift(@_);
704	my ($expat, $element) = @_;
705
706	if ($element eq "rdf:Description") {
707
708	my $about_key = $_{'about'};
709
710	my $rdf_desc = $self->{'rdf_desc'};
711	$rdf_desc->{$about_key} = {};
712
713	$self->{'about_key'} = $about_key;
714	$self->{'index_text'} = "";
715	$self->{'pp_text'} = "<table width=\"100%\">\n";
716
717
718	}
719	elsif (defined $self->{'about_key'}) {
720	$self->{'metadata_name'} = $element;
721	$self->{'metadata_value'} = "";
722	}
723
724	}
725
726	# Called for every end tag. The $_ variable will contain a copy of the tag.
727	sub xml_end_tag {
728	my $self = shift(@_);
729	my ($expat, $element) = @_;
730
731	if ($element eq "rdf:Description") {
732	$self->{'pp_text'} .= "</table>\n";
733	## ghtml::htmlsafe($self->{'pp_text'});
734
735
736	my $about_key = $self->{'about_key'};
737	my $about = $self->{'rdf_desc'}->{$about_key};
738	$about->{'IndexText'} = $self->{'index_text'};
739	$about->{'MetadataTable'} = $self->{'pp_text'};
740
741
742	$self->{'about_key'} = undef;
743	$self->{'index_text'} = undef;
744	$self->{'pp_text'} = undef;
745
746	}
747	elsif (defined $self->{'metadata_name'}) {
748	my $metadata_name = $self->{'metadata_name'};
749	if ($element eq $metadata_name) {
750	my $metadata_value = $self->{'metadata_value'};
751
752	my $about_key = $self->{'about_key'};
753	my $about = $self->{'rdf_desc'}->{$about_key};
754	$about->{$metadata_name} = $metadata_value;
755
756	$self->{'index_text'} .= "$metadata_value\n";
757	$self->{'pp_text'} .= " <tr><td>$metadata_name</td><td>$metadata_value</td></tr>\n";
758
759	$self->{'metadata_name'} = undef;
760	$self->{'metadata_value'} = undef;
761	}
762	}
763	}
764
765	# Called just before start or end tags with accumulated non-markup text in
766	# the $_ variable.
767	sub xml_text {
768	my $self = shift(@_);
769	my ($expat) = @_;
770
771	if (defined $self->{'metadata_name'}) {
772	$self->{'metadata_value'} .= $_;
773	}
774	}
775
776	# Called at the end of the XML document.
777	sub xml_end_document {
778	my $self = shift(@_);
779	my ($expat) = @_;
780
781	}
782
783
784	1;

Note: See TracBrowser for help on using the repository browser.

Download in other formats: