Context Navigation

source: gsdl/trunk/perllib/plugins/CONTENTdmPlugin.pm@ 15872

Last change on this file since 15872 was 15872, checked in by kjdon, 16 years ago
plugin overhaul: plugins renamed to xxPlugin, and in some cases the names are made more sensible. They now use the new base plugins. Hopefully we have better code reuse. Some of the plugins still need work done as I didn't want to spend another month doing this before committing it. Alos, I haven't really tested anything yet...
Property svn:keywords set to `Author Date Id Revision`
File size: 20.9 KB

Line
1	###########################################################################
2	#
3	# CONTENTdmPlugin.pm -- reasonably with-it pdf plugin
4	# A component of the Greenstone digital library software
5	# from the New Zealand Digital Library Project at the
6	# University of Waikato, New Zealand.
7	#
8	# Copyright (C) 1999-2001 New Zealand Digital Library Project
9	#
10	# This program is free software; you can redistribute it and/or modify
11	# it under the terms of the GNU General Public License as published by
12	# the Free Software Foundation; either version 2 of the License, or
13	# (at your option) any later version.
14	#
15	# This program is distributed in the hope that it will be useful,
16	# but WITHOUT ANY WARRANTY; without even the implied warranty of
17	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18	# GNU General Public License for more details.
19	#
20	# You should have received a copy of the GNU General Public License
21	# along with this program; if not, write to the Free Software
22	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23	#
24	###########################################################################
25	package CONTENTdmPlugin;
26
27	use ConvertBinaryFile;
28	use ReadXMLFile;
29
30	use unicode;
31	use ghtml;
32
33	use strict;
34	no strict 'refs'; # so we can use a var for filehandles (eg STDERR)
35
36
37	use XMLParser;
38
39	# inherit ReadXMLFile for the apply_xslt method
40	sub BEGIN {
41	@CONTENTdmPlugin::ISA = ('ConvertBinaryFile', 'ReadXMLFile');
42	}
43
44
45	my $convert_to_list =
46	[ { 'name' => "auto",
47	'desc' => "{ConvertBinaryFile.convert_to.auto}" },
48	{ 'name' => "html",
49	'desc' => "{ConvertBinaryFile.convert_to.html}" },
50	{ 'name' => "text",
51	'desc' => "{ConvertBinaryFile.convert_to.text}" },
52	{ 'name' => "pagedimg",
53	'desc' => "{ConvertBinaryFile.convert_to.pagedimg}"},
54	];
55
56
57
58	my $arguments =
59	[
60	{ 'name' => "convert_to",
61	'desc' => "{ConvertBinaryFile.convert_to}",
62	'type' => "enum",
63	'reqd' => "yes",
64	'list' => $convert_to_list,
65	'deft' => "html" },
66	{ 'name' => "xslt",
67	'desc' => "{ReadXMLFile.xslt}",
68	'type' => "string",
69	'deft' => "",
70	'reqd' => "no" },
71	{ 'name' => "process_exp",
72	'desc' => "{BasePlugin.process_exp}",
73	'type' => "regexp",
74	'deft' => &get_default_process_exp(),
75	'reqd' => "no" },
76	{ 'name' => "block_exp",
77	'desc' => "{BasePlugin.block_exp}",
78	'type' => "regexp",
79	'deft' => &get_default_block_exp() }
80	];
81
82	my $options = { 'name' => "CONTENTdmPlugin",
83	'desc' => "{CONTENTdmPlugin.desc}",
84	'abstract' => "no",
85	'inherits' => "yes",
86	# CONTENTdmPlugin is one of the few ConvertBinaryFile subclasses whose source doc can't be replaced by a GS-generated html
87	'srcreplaceable' => "no",
88	'args' => $arguments };
89
90	sub new {
91	my ($class) = shift (@_);
92	my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
93	push(@$pluginlist, $class);
94
95	push(@$inputargs,"-title_sub");
96	push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?');
97
98	push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
99	push(@{$hashArgOptLists->{"OptList"}},$options);
100
101	my @arg_array = @$inputargs;
102	my $self = new ConvertBinaryFile($pluginlist,$inputargs,$hashArgOptLists);
103
104	if ($self->{'info_only'}) {
105	# don't worry about any options etc
106	return bless $self, $class;
107	}
108
109	my $parser = new XML::Parser('Style' => 'Stream',
110	'Pkg' => 'CONTENTdmPlugin',
111	'PluginObj' => $self,
112	'Handlers' => {'Char' => \&Char,
113	'XMLDecl' => \&XMLDecl,
114	'Entity' => \&Entity,
115	'Doctype' => \&Doctype,
116	'Default' => \&Default
117	});
118	$self->{'parser'} = $parser;
119	$self->{'rdf_desc'} = undef;
120	$self->{'about_key'} = undef;
121	$self->{'metadata_name'} = undef;
122	$self->{'metadata_value'} = undef;
123
124	$self->{'convert_to'} = "PagedImage";
125	my $secondary_plugin_options = $self->{'secondary_plugin_options'};
126
127	if (!defined $secondary_plugin_options->{'PagedImagePlugin'}){
128	$secondary_plugin_options->{'PagedImagePlugin'} = [];
129	}
130	my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'};
131	push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
132	push(@$pagedimg_options, "-thumbnail", "-screenview");
133
134
135	$self = bless $self, $class;
136	$self->load_secondary_plugins($class,$secondary_plugin_options,$hashArgOptLists);
137	return $self;
138	}
139
140	sub get_default_process_exp {
141	my $self = shift (@_);
142
143	return q^(?i)\.rdf$^;
144	}
145
146	sub get_default_block_exp {
147	return q^(?i)\.(jpg\|jpeg\|gif)$^;
148	}
149
150
151
152	sub rdf_desc_to_id
153	{
154	my $self = shift (@_);
155	my ($rdf_desc) = @_;
156
157	my $rdf_id = {};
158
159	# initialise any .cpd (=complex multi page) structures
160
161	foreach my $about_key (keys %{$rdf_desc}) {
162	if ($about_key =~ m/\.cpd$/) {
163	my $about = $rdf_desc->{$about_key};
164	my $id = $about->{'dc:identifier'};
165
166	if ($id =~ m/^\s*$/) {
167	# missing id, make one up based on about attribute
168
169	my ($tailname, $dirname, $suffix)
170	= &File::Basename::fileparse($about_key, "\\.[^\\.]+\$");
171
172	$id = "about:$tailname";
173	}
174
175	$rdf_id->{$id} = $about;
176	$rdf_id->{$id}->{'ex:filename'} = $about_key;
177	$rdf_id->{$id}->{'ex:type'} = "complex";
178	$rdf_id->{$id}->{'pages'} = [];
179	}
180
181	}
182
183	# now add in non .cpd items
184
185	foreach my $about_key (keys %{$rdf_desc}) {
186	if ($about_key !~ m/\.cpd$/) {
187	my $about = $rdf_desc->{$about_key};
188	my $id = $about->{'dc:identifier'};
189
190
191	if ($id =~ m/^\s*$/) {
192	# missing id, make one up based on about attribute
193
194	my ($tailname, $dirname, $suffix)
195	= &File::Basename::fileparse($about_key, "\\.[^\\.]+\$");
196
197	$id = "about:$tailname";
198	}
199
200	if (defined $rdf_id->{$id}) {
201	$about->{'ex:filename'} = $about_key;
202
203	# dealing with complex multi-page situation
204	# Add to existing structure
205
206	my $pages = $rdf_id->{$id}->{'pages'};
207	push(@$pages,$about)
208	}
209	else {
210	# New entry
211
212	$rdf_id->{$id} = $about;
213	$rdf_id->{$id}->{'ex:type'} = "simple";
214	$rdf_id->{$id}->{'ex:filename'} = $about_key;
215	}
216	}
217
218	}
219
220	return $rdf_id;
221	}
222
223
224	sub metadata_table_txt_file
225	{
226	my $self = shift (@_);
227	my ($output_root,$page_num) = @_;
228
229	my $txt_filename = $output_root."_page_$page_num.txt";
230
231	my ($tailname, $dirname, $suffix)
232	= &File::Basename::fileparse($txt_filename, "\\.[^\\.]+\$");
233
234	my $txt_file = "$tailname.$suffix";
235
236	return $txt_file;
237	}
238
239
240	sub output_metadata_table
241	{
242	my $self = shift (@_);
243	my ($page,$page_num,$tmp_dirname,$txt_file) = @_;
244
245	my $txt_filename = &util::filename_cat($tmp_dirname,$txt_file);
246
247	open(TOUT,">$txt_filename")
248	\|\| die "Error: unable to write metadata data out as txt file $txt_filename: $!\n";
249
250	print TOUT $page->{'MetadataTable'};
251	delete $page->{'MetadataTable'};
252
253	close (TOUT);
254	}
255
256
257	sub rdf_id_to_item_file
258	{
259	my $self = shift (@_);
260	my ($rdf_id,$tmp_dirname,$output_root) = @_;
261
262	my $item_file_list = [];
263
264	foreach my $id (keys %{$rdf_id}) {
265
266	my $id_safe = $id;
267	$id_safe =~ s/ /-/g;
268
269	my $output_filename = $output_root."_$id_safe.item";
270	open(FOUT,">$output_filename")
271	\|\| die "Unable to open $output_filename: $!\n";
272
273
274	print FOUT "<PagedDocument>\n";
275
276	my $rdf_doc = $rdf_id->{$id};
277	foreach my $metadata_name (keys %$rdf_doc) {
278
279
280	next if ($metadata_name eq "pages");
281
282	my $metadata_value = $rdf_doc->{$metadata_name};
283
284	# convert ns:name to ns.Name
285	$metadata_name =~ s/^(.?):(.)/$1\.\u$2/;
286
287	print FOUT " <Metadata name=\"$metadata_name\">$metadata_value</Metadata>\n";
288	}
289
290	if ($rdf_doc->{'ex:type'} eq "complex") {
291	my $pages = $rdf_doc->{'pages'};
292	my $page_num = 1;
293
294	foreach my $page (@$pages) {
295
296	my $imgfile = $page->{'ex:filename'};
297	if ($imgfile =~ m/(http\|ftp):/) {
298	$imgfile = "empty.jpg";
299	}
300	else {
301	$imgfile = &util::filename_cat("..","import",$imgfile);
302	}
303
304	my $txt_file
305	= $self->metadata_table_txt_file($output_root,$page_num);
306
307	$self->output_metadata_table($page,$page_num,
308	$tmp_dirname,$txt_file);
309
310
311	print FOUT " <Page pagenum=\"$page_num\" imgfile=\"$imgfile\" txtfile=\"$txt_file\">\n";
312
313	foreach my $metadata_name (keys %$page) {
314
315	my $metadata_value = $rdf_doc->{$metadata_name};
316	# convert ns:name to ns.Name
317	$metadata_name =~ s/^(.?):(.)/$1\.\u$2/;
318
319	print FOUT " <Metadata name=\"$metadata_name\">$metadata_value</Metadata>\n";
320	}
321
322
323	$page_num++;
324
325
326	print FOUT " </Page>\n";
327	}
328	}
329	else {
330	# simple
331	# duplicate top-level metadata for now plus image to bind to
332
333	my $imgfile = $rdf_doc->{'ex:filename'};
334	if ($imgfile =~ m/(http\|ftp):/) {
335	$imgfile = "empty.jpg";
336	}
337	else {
338	$imgfile = &util::filename_cat("..","import",$imgfile);
339	$imgfile = "../import/".$imgfile;
340	}
341
342
343	my $txt_file = $self->metadata_table_txt_file($output_root,1);
344	$self->output_metadata_table($rdf_doc,1,$tmp_dirname,$txt_file);
345
346	print FOUT " <Page pagenum=\"1\" imgfile=\"$imgfile\" txtfile=\"$txt_file\">\n";
347	foreach my $metadata_name (keys %$rdf_doc) {
348
349	my $metadata_value = $rdf_doc->{$metadata_name};
350
351	# convert ns:name to ns.Name
352	$metadata_name =~ s/^(.?):(.)/$1\.\u$2/;
353
354	print FOUT " <Metadata name=\"$metadata_name\">$metadata_value</Metadata>\n";
355	}
356	print FOUT " </Page>\n";
357
358	}
359
360	print FOUT "</PagedDocument>\n";
361	close(FOUT);
362
363	push(@$item_file_list,$output_filename);
364
365	}
366
367
368	return $item_file_list;
369	}
370
371
372
373	sub xml_area_convert_file
374	{
375	my $self = shift (@_);
376	my ($input_filename, $tmp_dirname, $output_root) = @_;
377
378	eval {
379	# Build up hash table/tree of all records
380
381	my $xslt = $self->{'xslt'};
382	if (defined $xslt && ($xslt ne "")) {
383	# perform xslt
384	my $transformed_xml = $self->apply_xslt($xslt,$input_filename);
385
386	# feed transformed file (now in memory as string) into XML parser
387	$self->{'parser'}->parse($transformed_xml);
388	}
389	else {
390	$self->{'parser'}->parsefile($input_filename);
391	}
392	};
393
394	if ($@) {
395
396	# parsefile may either croak somewhere in XML::Parser (e.g. because
397	# the document is not well formed) or die somewhere in XMLPlug or a
398	# derived plugin (e.g. because we're attempting to process a
399	# document whose DOCTYPE is not meant for this plugin). For the
400	# first case we'll print a warning and continue, for the second
401	# we'll just continue quietly
402
403	print STDERR "**** Error is: $@\n";
404
405	my $file = $self->{'file'};
406
407	my ($msg) = $@ =~ /Carp::croak\(\'(.*?)\'\)/;
408	if (defined $msg) {
409	my $outhandle = $self->{'outhandle'};
410	my $plugin_name = ref ($self);
411	print $outhandle "$plugin_name failed to process $file ($msg)\n";
412	}
413
414	my $gli = $self->{'gli'};
415
416	# reset ourself for the next document
417	$self->{'section_level'}=0;
418	print STDERR "<ProcessingError n='$file'>\n" if ($gli);
419	return ("fail",undef); # error during processing
420	}
421
422	my $rdf_desc = $self->{'rdf_desc'};
423
424	# foreach my $about_key (keys %{$rdf_desc}) {
425	# my $about = $rdf_desc->{$about_key};
426	# foreach my $metadata_name (keys %{$about}) {
427	#
428	# my $metadata_value = $about->{$metadata_name};
429	## print STDERR " $metadata_name: $metadata_value\n";
430	# }
431	# }
432
433
434	# Merge entries with same name
435
436
437	my $merged_rdf_id = $self->rdf_desc_to_id($rdf_desc);
438
439	# foreach my $about_key (keys %{$merged_rdf_id}) {
440	# my $about = $merged_rdf_id->{$about_key};
441	# foreach my $metadata_name (keys %{$about}) {
442	#
443	# my $metadata_value = $about->{$metadata_name};
444	## print STDERR " $metadata_name: $metadata_value\n";
445	# }
446	# }
447
448
449
450	my $item_files = $self->rdf_id_to_item_file($merged_rdf_id,$tmp_dirname,
451	$output_root);
452
453	return ("item",$item_files);
454	}
455
456
457	# Override ConvertBinaryFile tmp_area_convert_file() to provide solution specific
458	# to CONTENTdm
459	#
460	# A better (i.e. in the future) solution would be to see if this can be
461	# shifted into gsConvert.pl so there is no need to override the
462	# default tmp_area_convert_file()
463
464
465	sub tmp_area_convert_file {
466	my $self = shift (@_);
467	my ($output_ext, $input_filename, $textref) = @_;
468
469	# is textref ever used?!?
470
471	my $outhandle = $self->{'outhandle'};
472	my $convert_to = $self->{'convert_to'};
473	my $failhandle = $self->{'failhandle'};
474	my $convert_to_ext = $self->{'convert_to_ext'};
475
476	# softlink to collection tmp dir
477	my $tmp_dirname
478	= &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
479	&util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
480
481	# derive tmp filename from input filename
482	my ($tailname, $dirname, $suffix)
483	= &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
484
485	# Remove any white space from filename -- no risk of name collision, and
486	# makes later conversion by utils simpler. Leave spaces in path...
487	# tidy up the filename with space, dot, hyphen between
488	$tailname =~ s/\s+//g;
489	$tailname =~ s/\.+//g;
490	$tailname =~ s/\-+//g;
491	$suffix = lc($suffix);
492	my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix");
493
494	&util::soft_link($input_filename, $tmp_filename);
495	my $verbosity = $self->{'verbosity'};
496	if ($verbosity > 0) {
497	print $outhandle "Converting $tailname$suffix to $convert_to format\n";
498	}
499
500	my $errlog = &util::filename_cat($tmp_dirname, "err.log");
501
502	# call xml_area_convert_file rather than gsConvert.pl
503
504	my $output_root = &util::filename_cat($tmp_dirname, "$tailname");
505
506	my ($output_type,$item_files)
507	= $self->xml_area_convert_file($tmp_filename,$tmp_dirname,$output_root);
508
509
510	my $fakeimg_filename = &util::filename_cat($dirname, "empty.jpg");
511	my $fakeimg_tmp_filename = &util::filename_cat($tmp_dirname, "empty.jpg");
512
513	print STDERR "***** Setting up fake filename $fakeimg_filename -> $fakeimg_tmp_filename\n";
514
515	&util::soft_link($fakeimg_filename, $fakeimg_tmp_filename);
516
517	# continue as before ...
518
519	# remove symbolic link to original file
520	&util::rm($tmp_filename);
521
522	# Check STDERR here
523	chomp $output_type;
524	if ($output_type eq "fail") {
525	print $outhandle "Could not convert $tailname$suffix to $convert_to format\n";
526	print $failhandle "$tailname$suffix: " . ref($self) . " failed to convert to $convert_to\n";
527	$self->{'num_not_processed'} ++;
528	if (-s "$errlog") {
529	open(ERRLOG, "$errlog");
530	while (<ERRLOG>) {
531	print $outhandle "$_";
532	}
533	print $outhandle "\n";
534	close ERRLOG;
535	}
536	&util::rm("$errlog") if (-e "$errlog");
537	return [];
538	}
539
540	# store the actual output type and return the output filename
541	# it's possible we requested conversion to html, but only to text succeeded
542	#$self->{'convert_to_ext'} = $output_type;
543	if ($output_type =~ /html/i) {
544	$self->{'converted_to'} = "HTML";
545	} elsif ($output_type =~ /te?xt/i) {
546	$self->{'converted_to'} = "Text";
547	} elsif ($output_type =~ /item/i){
548	$self->{'converted_to'} = "PagedImage";
549	}
550
551
552	return $item_files;
553	}
554
555
556
557
558	# Override ConvertBinaryFile read
559	# Needed so multiple .item files generate are sent down secondary plugin
560
561	sub read {
562	my $self = shift (@_);
563	my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
564
565	$self->{'gli'} = $gli;
566	$self->{'file'} = $file;
567
568	my $successful_rv = -1;
569
570	my $outhandle = $self->{'outhandle'};
571
572	my ($block_status,$filename) = $self->read_block(@_);
573	return $block_status if ((!defined $block_status) \|\| ($block_status==0));
574	$file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
575
576	# read() deviates at this point from ConvertBinaryFile
577	# Need to work with list of filename returned
578
579	my $output_ext = $self->{'convert_to_ext'};
580	my $conv_filename_list = [];
581
582	$conv_filename_list = $self->tmp_area_convert_file($output_ext, $filename);
583
584	if (scalar(@$conv_filename_list)==0) {
585	return -1;
586	} # had an error, will be passed down pipeline
587
588	foreach my $conv_filename ( @$conv_filename_list ) {
589	if (! -e "$conv_filename") {return -1;}
590	$self->{'conv_filename'} = $conv_filename; # is this used anywhere?
591	$self->convert_post_process($conv_filename);
592
593	my $secondary_plugins = $self->{'secondary_plugins'};
594	my $num_secondary_plugins = scalar(keys %$secondary_plugins);
595
596	if ($num_secondary_plugins == 0) {
597	print $outhandle "Warning: No secondary plugin to use in conversion. Skipping $file\n";
598	return 0; # effectively block it
599	}
600
601	my @plugin_names = keys %$secondary_plugins;
602	my $plugin_name = shift @plugin_names;
603
604	if ($num_secondary_plugins > 1) {
605	print $outhandle "Warning: Multiple secondary plugins not supported yet! Choosing $plugin_name\n.";
606	}
607
608	my $secondary_plugin = $secondary_plugins->{$plugin_name};
609
610	# note: metadata is not carried on to the next level
611	my ($rv,$doc_obj)
612	= $secondary_plugin->read_into_doc_obj ($pluginfo,"", $conv_filename,
613	$metadata, $processor, $maxdocs, $total_count,
614	$gli);
615
616	if ((defined $rv) && ($rv>=0)) {
617	$successful_rv = 1;
618	}
619
620	# Override previous gsdlsourcefilename set by secondary plugin
621	my $collect_file = &util::filename_within_collection($filename);
622	my $collect_conv_file = &util::filename_within_collection($conv_filename);
623	$doc_obj->set_source_filename ($collect_file);
624	$doc_obj->set_converted_filename($collect_conv_file);
625
626	my ($filemeta) = $file =~ /([^\\\/]+)$/;
627	$self->set_Source_metadata($doc_obj, $filemeta);
628	$doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
629	$doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename));
630
631	if ($self->{'cover_image'}) {
632	$self->associate_cover_image($doc_obj, $filename);
633	}
634
635	# do plugin specific processing of doc_obj
636	unless (defined ($self->process(undef, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli))) {
637	print STDERR "<ProcessingError n='$file'>\n" if ($gli);
638	return -1;
639	}
640	# do any automatic metadata extraction
641	$self->auto_extract_metadata ($doc_obj);
642
643	# have we found a Title??
644	$self->title_fallback($doc_obj,$doc_obj->get_top_section(),$filemeta);
645
646	# add an OID
647	$self->add_OID($doc_obj);
648	# process the document
649	$processor->process($doc_obj);
650
651	$self->{'num_processed'} ++;
652	}
653
654	return $successful_rv;
655	}
656
657	sub process {
658
659	}
660	# do we need this? sec pluginn process would have already been called as part of read_into_doc_obj??
661	sub process_old {
662	my $self = shift (@_);
663	my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
664
665
666	my $secondary_plugins = $self->{'secondary_plugins'};
667	my @plugin_names = keys %$secondary_plugins;
668	my $plugin_name = shift @plugin_names; # already checked there is only one
669
670	my $secondary_plugin = $secondary_plugins->{$plugin_name};
671
672	my $result = $secondary_plugin->process(@_);
673
674	return $result;
675	}
676
677
678	# Called at the beginning of the XML document.
679	sub xml_start_document {
680	my $self = shift(@_);
681	my ($expat) = @_;
682
683	$self->{'rdf_desc'} = {};
684	}
685
686
687	# Called for DOCTYPE declarations - use die to bail out if this doctype
688	# is not meant for this plugin
689	sub xml_doctype {
690	my $self = shift(@_);
691	my ($expat, $name, $sysid, $pubid, $internal) = @_;
692
693	die "" if ($name !~ /^rdf:RDF$/);
694
695	my $outhandle = $self->{'outhandle'};
696	print $outhandle "CONTENTdmPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
697
698	}
699
700	# Called for every start tag. The $_ variable will contain a copy of the
701	# tag and the %_ variable will contain the element's attributes.
702	sub xml_start_tag {
703	my $self = shift(@_);
704	my ($expat, $element) = @_;
705
706	if ($element eq "rdf:Description") {
707
708	my $about_key = $_{'about'};
709
710	my $rdf_desc = $self->{'rdf_desc'};
711	$rdf_desc->{$about_key} = {};
712
713	$self->{'about_key'} = $about_key;
714	$self->{'index_text'} = "";
715	$self->{'pp_text'} = "<table width=\"100%\">\n";
716
717
718	}
719	elsif (defined $self->{'about_key'}) {
720	$self->{'metadata_name'} = $element;
721	$self->{'metadata_value'} = "";
722	}
723
724	}
725
726	# Called for every end tag. The $_ variable will contain a copy of the tag.
727	sub xml_end_tag {
728	my $self = shift(@_);
729	my ($expat, $element) = @_;
730
731	if ($element eq "rdf:Description") {
732	$self->{'pp_text'} .= "</table>\n";
733	## ghtml::htmlsafe($self->{'pp_text'});
734
735
736	my $about_key = $self->{'about_key'};
737	my $about = $self->{'rdf_desc'}->{$about_key};
738	$about->{'IndexText'} = $self->{'index_text'};
739	$about->{'MetadataTable'} = $self->{'pp_text'};
740
741
742	$self->{'about_key'} = undef;
743	$self->{'index_text'} = undef;
744	$self->{'pp_text'} = undef;
745
746	}
747	elsif (defined $self->{'metadata_name'}) {
748	my $metadata_name = $self->{'metadata_name'};
749	if ($element eq $metadata_name) {
750	my $metadata_value = $self->{'metadata_value'};
751
752	my $about_key = $self->{'about_key'};
753	my $about = $self->{'rdf_desc'}->{$about_key};
754	$about->{$metadata_name} = $metadata_value;
755
756	$self->{'index_text'} .= "$metadata_value\n";
757	$self->{'pp_text'} .= " <tr><td>$metadata_name</td><td>$metadata_value</td></tr>\n";
758
759	$self->{'metadata_name'} = undef;
760	$self->{'metadata_value'} = undef;
761	}
762	}
763	}
764
765	# Called just before start or end tags with accumulated non-markup text in
766	# the $_ variable.
767	sub xml_text {
768	my $self = shift(@_);
769	my ($expat) = @_;
770
771	if (defined $self->{'metadata_name'}) {
772	$self->{'metadata_value'} .= $_;
773	}
774	}
775
776	# Called at the end of the XML document.
777	sub xml_end_document {
778	my $self = shift(@_);
779	my ($expat) = @_;
780
781	}
782
783
784	1;

Note: See TracBrowser for help on using the repository browser.

Download in other formats: