Context Navigation

source: gsdl/trunk/perllib/plugins/CONTENTdmPlugin.pm@ 18406

Last change on this file since 18406 was 18406, checked in by ak19, 15 years ago
Modified srcreplaceable plugins (plugins which operate on docs where the source file can be replaced with their converted htmls) to set the file_rename_method to none for secondary plugins (for Text, HTML, and PagedImage plugins) so that the file is not renamed several times.
Property svn:keywords set to `Author Date Id Revision`
File size: 21.2 KB

Line
1	###########################################################################
2	#
3	# CONTENTdmPlugin.pm -- reasonably with-it pdf plugin
4	# A component of the Greenstone digital library software
5	# from the New Zealand Digital Library Project at the
6	# University of Waikato, New Zealand.
7	#
8	# Copyright (C) 1999-2001 New Zealand Digital Library Project
9	#
10	# This program is free software; you can redistribute it and/or modify
11	# it under the terms of the GNU General Public License as published by
12	# the Free Software Foundation; either version 2 of the License, or
13	# (at your option) any later version.
14	#
15	# This program is distributed in the hope that it will be useful,
16	# but WITHOUT ANY WARRANTY; without even the implied warranty of
17	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18	# GNU General Public License for more details.
19	#
20	# You should have received a copy of the GNU General Public License
21	# along with this program; if not, write to the Free Software
22	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23	#
24	###########################################################################
25	package CONTENTdmPlugin;
26
27	use ConvertBinaryFile;
28	use ReadXMLFile;
29
30	use unicode;
31	use ghtml;
32
33	use strict;
34	no strict 'refs'; # so we can use a var for filehandles (eg STDERR)
35
36
37	use XMLParser;
38
39	# inherit ReadXMLFile for the apply_xslt method
40	sub BEGIN {
41	@CONTENTdmPlugin::ISA = ('ConvertBinaryFile', 'ReadXMLFile');
42	}
43
44
45	my $convert_to_list =
46	[ { 'name' => "auto",
47	'desc' => "{ConvertBinaryFile.convert_to.auto}" },
48	{ 'name' => "html",
49	'desc' => "{ConvertBinaryFile.convert_to.html}" },
50	{ 'name' => "text",
51	'desc' => "{ConvertBinaryFile.convert_to.text}" },
52	{ 'name' => "pagedimg",
53	'desc' => "{ConvertBinaryFile.convert_to.pagedimg}"},
54	];
55
56
57
58	my $arguments =
59	[
60	{ 'name' => "convert_to",
61	'desc' => "{ConvertBinaryFile.convert_to}",
62	'type' => "enum",
63	'reqd' => "yes",
64	'list' => $convert_to_list,
65	'deft' => "html" },
66	{ 'name' => "xslt",
67	'desc' => "{ReadXMLFile.xslt}",
68	'type' => "string",
69	'deft' => "",
70	'reqd' => "no" },
71	{ 'name' => "process_exp",
72	'desc' => "{BasePlugin.process_exp}",
73	'type' => "regexp",
74	'deft' => &get_default_process_exp(),
75	'reqd' => "no" },
76	{ 'name' => "block_exp",
77	'desc' => "{BasePlugin.block_exp}",
78	'type' => "regexp",
79	'deft' => &get_default_block_exp() }
80	];
81
82	my $options = { 'name' => "CONTENTdmPlugin",
83	'desc' => "{CONTENTdmPlugin.desc}",
84	'abstract' => "no",
85	'inherits' => "yes",
86	# CONTENTdmPlugin is one of the few ConvertBinaryFile subclasses whose source doc can't be replaced by a GS-generated html
87	'srcreplaceable' => "no",
88	'args' => $arguments };
89
90	sub new {
91	my ($class) = shift (@_);
92	my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
93	push(@$pluginlist, $class);
94
95	push(@$inputargs,"-title_sub");
96	push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?');
97
98	push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
99	push(@{$hashArgOptLists->{"OptList"}},$options);
100
101	my @arg_array = @$inputargs;
102	my $self = new ConvertBinaryFile($pluginlist,$inputargs,$hashArgOptLists);
103
104	if ($self->{'info_only'}) {
105	# don't worry about any options etc
106	return bless $self, $class;
107	}
108
109	my $parser = new XML::Parser('Style' => 'Stream',
110	'Pkg' => 'CONTENTdmPlugin',
111	'PluginObj' => $self,
112	'Handlers' => {'Char' => \&Char,
113	'XMLDecl' => \&XMLDecl,
114	'Entity' => \&Entity,
115	'Doctype' => \&Doctype,
116	'Default' => \&Default
117	});
118	$self->{'parser'} = $parser;
119	$self->{'rdf_desc'} = undef;
120	$self->{'about_key'} = undef;
121	$self->{'metadata_name'} = undef;
122	$self->{'metadata_value'} = undef;
123
124	$self->{'convert_to'} = "PagedImage";
125	my $secondary_plugin_options = $self->{'secondary_plugin_options'};
126
127	if (!defined $secondary_plugin_options->{'PagedImagePlugin'}){
128	$secondary_plugin_options->{'PagedImagePlugin'} = [];
129	}
130	my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'};
131	push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
132	push(@$pagedimg_options, "-create_thumbnail", "true", "-create_screenview", "true");
133	push(@$pagedimg_options, "-file_rename_method", "none");
134
135	$self = bless $self, $class;
136	$self->load_secondary_plugins($class,$secondary_plugin_options,$hashArgOptLists);
137	return $self;
138	}
139
140	sub get_default_process_exp {
141	my $self = shift (@_);
142
143	return q^(?i)\.rdf$^;
144	}
145
146	sub get_default_block_exp {
147	return q^(?i)\.(jpg\|jpeg\|gif)$^;
148	}
149
150
151
152	sub rdf_desc_to_id
153	{
154	my $self = shift (@_);
155	my ($rdf_desc) = @_;
156
157	my $rdf_id = {};
158
159	# initialise any .cpd (=complex multi page) structures
160
161	foreach my $about_key (keys %{$rdf_desc}) {
162	if ($about_key =~ m/\.cpd$/) {
163	my $about = $rdf_desc->{$about_key};
164	my $id = $about->{'dc:identifier'};
165
166	if ($id =~ m/^\s*$/) {
167	# missing id, make one up based on about attribute
168
169	my ($tailname, $dirname, $suffix)
170	= &File::Basename::fileparse($about_key, "\\.[^\\.]+\$");
171
172	$id = "about:$tailname";
173	}
174
175	$rdf_id->{$id} = $about;
176	$rdf_id->{$id}->{'ex:filename'} = $about_key;
177	$rdf_id->{$id}->{'ex:type'} = "complex";
178	$rdf_id->{$id}->{'pages'} = [];
179	}
180
181	}
182
183	# now add in non .cpd items
184
185	foreach my $about_key (keys %{$rdf_desc}) {
186	if ($about_key !~ m/\.cpd$/) {
187	my $about = $rdf_desc->{$about_key};
188	my $id = $about->{'dc:identifier'};
189
190
191	if ($id =~ m/^\s*$/) {
192	# missing id, make one up based on about attribute
193
194	my ($tailname, $dirname, $suffix)
195	= &File::Basename::fileparse($about_key, "\\.[^\\.]+\$");
196
197	$id = "about:$tailname";
198	}
199
200	if (defined $rdf_id->{$id}) {
201	$about->{'ex:filename'} = $about_key;
202
203	# dealing with complex multi-page situation
204	# Add to existing structure
205
206	my $pages = $rdf_id->{$id}->{'pages'};
207	push(@$pages,$about)
208	}
209	else {
210	# New entry
211
212	$rdf_id->{$id} = $about;
213	$rdf_id->{$id}->{'ex:type'} = "simple";
214	$rdf_id->{$id}->{'ex:filename'} = $about_key;
215	}
216	}
217
218	}
219
220	return $rdf_id;
221	}
222
223
224	sub metadata_table_txt_file
225	{
226	my $self = shift (@_);
227	my ($output_root,$page_num) = @_;
228
229	my $txt_filename = $output_root."_page_$page_num.txt";
230
231	my ($tailname, $dirname, $suffix)
232	= &File::Basename::fileparse($txt_filename, "\\.[^\\.]+\$");
233
234	my $txt_file = "$tailname.$suffix";
235
236	return $txt_file;
237	}
238
239
240	sub output_metadata_table
241	{
242	my $self = shift (@_);
243	my ($page,$page_num,$tmp_dirname,$txt_file) = @_;
244
245	my $txt_filename = &util::filename_cat($tmp_dirname,$txt_file);
246
247	open(TOUT,">$txt_filename")
248	\|\| die "Error: unable to write metadata data out as txt file $txt_filename: $!\n";
249
250	print TOUT $page->{'MetadataTable'};
251	delete $page->{'MetadataTable'};
252
253	close (TOUT);
254	}
255
256
257	sub rdf_id_to_item_file
258	{
259	my $self = shift (@_);
260	my ($rdf_id,$tmp_dirname,$output_root) = @_;
261
262	my $item_file_list = [];
263
264	foreach my $id (keys %{$rdf_id}) {
265
266	my $id_safe = $id;
267	$id_safe =~ s/ /-/g;
268
269	my $output_filename = $output_root."_$id_safe.item";
270	open(FOUT,">$output_filename")
271	\|\| die "Unable to open $output_filename: $!\n";
272
273
274	print FOUT "<PagedDocument>\n";
275
276	my $rdf_doc = $rdf_id->{$id};
277	foreach my $metadata_name (keys %$rdf_doc) {
278
279
280	next if ($metadata_name eq "pages");
281
282	my $metadata_value = $rdf_doc->{$metadata_name};
283
284	# convert ns:name to ns.Name
285	$metadata_name =~ s/^(.?):(.)/$1\.\u$2/;
286
287	print FOUT " <Metadata name=\"$metadata_name\">$metadata_value</Metadata>\n";
288	}
289
290	if ($rdf_doc->{'ex:type'} eq "complex") {
291	my $pages = $rdf_doc->{'pages'};
292	my $page_num = 1;
293
294	foreach my $page (@$pages) {
295
296	my $imgfile = $page->{'ex:filename'};
297	if ($imgfile =~ m/(http\|ftp):/) {
298	$imgfile = "empty.jpg";
299	}
300	else {
301	$imgfile = &util::filename_cat("..","import",$imgfile);
302	}
303
304	my $txt_file
305	= $self->metadata_table_txt_file($output_root,$page_num);
306
307	$self->output_metadata_table($page,$page_num,
308	$tmp_dirname,$txt_file);
309
310
311	print FOUT " <Page pagenum=\"$page_num\" imgfile=\"$imgfile\" txtfile=\"$txt_file\">\n";
312
313	foreach my $metadata_name (keys %$page) {
314
315	my $metadata_value = $rdf_doc->{$metadata_name};
316	# convert ns:name to ns.Name
317	$metadata_name =~ s/^(.?):(.)/$1\.\u$2/;
318
319	print FOUT " <Metadata name=\"$metadata_name\">$metadata_value</Metadata>\n";
320	}
321
322
323	$page_num++;
324
325
326	print FOUT " </Page>\n";
327	}
328	}
329	else {
330	# simple
331	# duplicate top-level metadata for now plus image to bind to
332
333	my $imgfile = $rdf_doc->{'ex:filename'};
334	if ($imgfile =~ m/(http\|ftp):/) {
335	$imgfile = "empty.jpg";
336	}
337	else {
338	$imgfile = &util::filename_cat("..","import",$imgfile);
339	$imgfile = "../import/".$imgfile;
340	}
341
342
343	my $txt_file = $self->metadata_table_txt_file($output_root,1);
344	$self->output_metadata_table($rdf_doc,1,$tmp_dirname,$txt_file);
345
346	print FOUT " <Page pagenum=\"1\" imgfile=\"$imgfile\" txtfile=\"$txt_file\">\n";
347	foreach my $metadata_name (keys %$rdf_doc) {
348
349	my $metadata_value = $rdf_doc->{$metadata_name};
350
351	# convert ns:name to ns.Name
352	$metadata_name =~ s/^(.?):(.)/$1\.\u$2/;
353
354	print FOUT " <Metadata name=\"$metadata_name\">$metadata_value</Metadata>\n";
355	}
356	print FOUT " </Page>\n";
357
358	}
359
360	print FOUT "</PagedDocument>\n";
361	close(FOUT);
362
363	push(@$item_file_list,$output_filename);
364
365	}
366
367
368	return $item_file_list;
369	}
370
371
372
373	sub xml_area_convert_file
374	{
375	my $self = shift (@_);
376	my ($input_filename, $tmp_dirname, $output_root) = @_;
377
378	eval {
379	# Build up hash table/tree of all records
380
381	my $xslt = $self->{'xslt'};
382	if (defined $xslt && ($xslt ne "")) {
383	# perform xslt
384	my $transformed_xml = $self->apply_xslt($xslt,$input_filename);
385
386	# feed transformed file (now in memory as string) into XML parser
387	$self->{'parser'}->parse($transformed_xml);
388	}
389	else {
390	$self->{'parser'}->parsefile($input_filename);
391	}
392	};
393
394	if ($@) {
395
396	# parsefile may either croak somewhere in XML::Parser (e.g. because
397	# the document is not well formed) or die somewhere in XMLPlug or a
398	# derived plugin (e.g. because we're attempting to process a
399	# document whose DOCTYPE is not meant for this plugin). For the
400	# first case we'll print a warning and continue, for the second
401	# we'll just continue quietly
402
403	print STDERR "**** Error is: $@\n";
404
405	my $file = $self->{'file'};
406
407	my ($msg) = $@ =~ /Carp::croak\(\'(.*?)\'\)/;
408	if (defined $msg) {
409	my $outhandle = $self->{'outhandle'};
410	my $plugin_name = ref ($self);
411	print $outhandle "$plugin_name failed to process $file ($msg)\n";
412	}
413
414	my $gli = $self->{'gli'};
415
416	# reset ourself for the next document
417	$self->{'section_level'}=0;
418	print STDERR "<ProcessingError n='$file'>\n" if ($gli);
419	return ("fail",undef); # error during processing
420	}
421
422	my $rdf_desc = $self->{'rdf_desc'};
423
424	# foreach my $about_key (keys %{$rdf_desc}) {
425	# my $about = $rdf_desc->{$about_key};
426	# foreach my $metadata_name (keys %{$about}) {
427	#
428	# my $metadata_value = $about->{$metadata_name};
429	## print STDERR " $metadata_name: $metadata_value\n";
430	# }
431	# }
432
433
434	# Merge entries with same name
435
436
437	my $merged_rdf_id = $self->rdf_desc_to_id($rdf_desc);
438
439	# foreach my $about_key (keys %{$merged_rdf_id}) {
440	# my $about = $merged_rdf_id->{$about_key};
441	# foreach my $metadata_name (keys %{$about}) {
442	#
443	# my $metadata_value = $about->{$metadata_name};
444	## print STDERR " $metadata_name: $metadata_value\n";
445	# }
446	# }
447
448
449
450	my $item_files = $self->rdf_id_to_item_file($merged_rdf_id,$tmp_dirname,
451	$output_root);
452
453	return ("item",$item_files);
454	}
455
456
457	# Override ConvertBinaryFile tmp_area_convert_file() to provide solution specific
458	# to CONTENTdm
459	#
460	# A better (i.e. in the future) solution would be to see if this can be
461	# shifted into gsConvert.pl so there is no need to override the
462	# default tmp_area_convert_file()
463
464
465	sub tmp_area_convert_file {
466	my $self = shift (@_);
467	my ($output_ext, $input_filename, $textref) = @_;
468
469	# is textref ever used?!?
470
471	my $outhandle = $self->{'outhandle'};
472	my $convert_to = $self->{'convert_to'};
473	my $failhandle = $self->{'failhandle'};
474	my $convert_to_ext = $self->{'convert_to_ext'};
475
476	# softlink to collection tmp dir
477	my $tmp_dirname
478	= &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
479	&util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
480
481	# derive tmp filename from input filename
482	my ($tailname, $dirname, $suffix)
483	= &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
484
485	# Remove any white space from filename -- no risk of name collision, and
486	# makes later conversion by utils simpler. Leave spaces in path...
487	# tidy up the filename with space, dot, hyphen between
488	$tailname =~ s/\s+//g;
489	$tailname =~ s/\.+//g;
490	$tailname =~ s/\-+//g;
491
492	$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
493	$suffix = lc($suffix);
494	my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix");
495
496	&util::soft_link($input_filename, $tmp_filename);
497	my $verbosity = $self->{'verbosity'};
498	if ($verbosity > 0) {
499	print $outhandle "Converting $tailname$suffix to $convert_to format\n";
500	}
501
502	my $errlog = &util::filename_cat($tmp_dirname, "err.log");
503
504	# call xml_area_convert_file rather than gsConvert.pl
505
506	my $output_root = &util::filename_cat($tmp_dirname, "$tailname");
507
508	my ($output_type,$item_files)
509	= $self->xml_area_convert_file($tmp_filename,$tmp_dirname,$output_root);
510
511
512	my $fakeimg_filename = &util::filename_cat($dirname, "empty.jpg");
513	my $fakeimg_tmp_filename = &util::filename_cat($tmp_dirname, "empty.jpg");
514
515	print STDERR "***** Setting up fake filename $fakeimg_filename -> $fakeimg_tmp_filename\n";
516
517	&util::soft_link($fakeimg_filename, $fakeimg_tmp_filename);
518
519	# continue as before ...
520
521	# remove symbolic link to original file
522	&util::rm($tmp_filename);
523
524	# Check STDERR here
525	chomp $output_type;
526	if ($output_type eq "fail") {
527	print $outhandle "Could not convert $tailname$suffix to $convert_to format\n";
528	print $failhandle "$tailname$suffix: " . ref($self) . " failed to convert to $convert_to\n";
529	$self->{'num_not_processed'} ++;
530	if (-s "$errlog") {
531	open(ERRLOG, "$errlog");
532	while (<ERRLOG>) {
533	print $outhandle "$_";
534	}
535	print $outhandle "\n";
536	close ERRLOG;
537	}
538	&util::rm("$errlog") if (-e "$errlog");
539	return [];
540	}
541
542	# store the actual output type and return the output filename
543	# it's possible we requested conversion to html, but only to text succeeded
544	#$self->{'convert_to_ext'} = $output_type;
545	if ($output_type =~ /html/i) {
546	$self->{'converted_to'} = "HTML";
547	} elsif ($output_type =~ /te?xt/i) {
548	$self->{'converted_to'} = "Text";
549	} elsif ($output_type =~ /item/i){
550	$self->{'converted_to'} = "PagedImage";
551	}
552
553
554	return $item_files;
555	}
556
557
558
559
560	# Override ConvertBinaryFile read
561	# Needed so multiple .item files generated are sent down secondary plugin
562
563	sub read {
564	my $self = shift (@_);
565	my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
566
567	$self->{'gli'} = $gli;
568	$self->{'file'} = $file;
569
570	my $successful_rv = -1;
571
572	my $outhandle = $self->{'outhandle'};
573
574	my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
575	return undef unless $self->can_process_this_file($filename_full_path);
576
577	$file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
578
579	# read() deviates at this point from ConvertBinaryFile
580	# Need to work with list of filename returned
581
582	my $output_ext = $self->{'convert_to_ext'};
583	my $conv_filename_list = [];
584
585	$conv_filename_list = $self->tmp_area_convert_file($output_ext, $filename_full_path);
586
587	if (scalar(@$conv_filename_list)==0) {
588	return -1;
589	} # had an error, will be passed down pipeline
590
591	foreach my $conv_filename ( @$conv_filename_list ) {
592	if (! -e "$conv_filename") {return -1;}
593	$self->{'conv_filename'} = $conv_filename; # is this used anywhere?
594	$self->convert_post_process($conv_filename);
595
596	my $secondary_plugins = $self->{'secondary_plugins'};
597	my $num_secondary_plugins = scalar(keys %$secondary_plugins);
598
599	if ($num_secondary_plugins == 0) {
600	print $outhandle "Warning: No secondary plugin to use in conversion. Skipping $file\n";
601	return 0; # effectively block it
602	}
603
604	my @plugin_names = keys %$secondary_plugins;
605	my $plugin_name = shift @plugin_names;
606
607	if ($num_secondary_plugins > 1) {
608	print $outhandle "Warning: Multiple secondary plugins not supported yet! Choosing $plugin_name\n.";
609	}
610
611	my $secondary_plugin = $secondary_plugins->{$plugin_name};
612
613	# note: metadata is not carried on to the next level
614	my ($rv,$doc_obj)
615	= $secondary_plugin->read_into_doc_obj ($pluginfo,"", $conv_filename,
616	$block_hash, $metadata, $processor, $maxdocs, $total_count,
617	$gli);
618
619	if ((defined $rv) && ($rv>=0)) {
620	$successful_rv = 1;
621	}
622
623	# Override previous gsdlsourcefilename set by secondary plugin
624	my $collect_file = &util::filename_within_collection($filename_full_path);
625	my $collect_conv_file = &util::filename_within_collection($conv_filename);
626	$doc_obj->set_source_filename ($collect_file, $self->{'file_rename_method'});
627	$doc_obj->set_converted_filename($collect_conv_file);
628
629	my ($filemeta) = $file =~ /([^\\\/]+)$/;
630	$self->set_Source_metadata($doc_obj, $filemeta);
631	$doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
632	$doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename_full_path));
633
634	if ($self->{'cover_image'}) {
635	$self->associate_cover_image($doc_obj, $filename_full_path);
636	}
637
638	# do plugin specific processing of doc_obj
639	unless (defined ($self->process(undef, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli))) {
640	print STDERR "<ProcessingError n='$file'>\n" if ($gli);
641	return -1;
642	}
643	# do any automatic metadata extraction
644	$self->auto_extract_metadata ($doc_obj);
645
646	# have we found a Title??
647	$self->title_fallback($doc_obj,$doc_obj->get_top_section(),$filemeta);
648
649	# add an OID
650	$self->add_OID($doc_obj);
651	# process the document
652	$processor->process($doc_obj);
653
654	$self->{'num_processed'} ++;
655	}
656
657	return $successful_rv;
658	}
659
660	sub process {
661
662	}
663	# do we need this? sec pluginn process would have already been called as part of read_into_doc_obj??
664	sub process_old {
665	my $self = shift (@_);
666	my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
667
668
669	my $secondary_plugins = $self->{'secondary_plugins'};
670	my @plugin_names = keys %$secondary_plugins;
671	my $plugin_name = shift @plugin_names; # already checked there is only one
672
673	my $secondary_plugin = $secondary_plugins->{$plugin_name};
674
675	my $result = $secondary_plugin->process(@_);
676
677	return $result;
678	}
679
680
681	# Called at the beginning of the XML document.
682	sub xml_start_document {
683	my $self = shift(@_);
684	my ($expat) = @_;
685
686	$self->{'rdf_desc'} = {};
687	}
688
689
690	# Called for DOCTYPE declarations - use die to bail out if this doctype
691	# is not meant for this plugin
692	sub xml_doctype {
693	my $self = shift(@_);
694	my ($expat, $name, $sysid, $pubid, $internal) = @_;
695
696	die "" if ($name !~ /^rdf:RDF$/);
697
698	my $outhandle = $self->{'outhandle'};
699	print $outhandle "CONTENTdmPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
700
701	}
702
703	# Called for every start tag. The $_ variable will contain a copy of the
704	# tag and the %_ variable will contain the element's attributes.
705	sub xml_start_tag {
706	my $self = shift(@_);
707	my ($expat, $element) = @_;
708
709	if ($element eq "rdf:Description") {
710
711	my $about_key = $_{'about'};
712
713	my $rdf_desc = $self->{'rdf_desc'};
714	$rdf_desc->{$about_key} = {};
715
716	$self->{'about_key'} = $about_key;
717	$self->{'index_text'} = "";
718	$self->{'pp_text'} = "<table width=\"100%\">\n";
719
720
721	}
722	elsif (defined $self->{'about_key'}) {
723	$self->{'metadata_name'} = $element;
724	$self->{'metadata_value'} = "";
725	}
726
727	}
728
729	# Called for every end tag. The $_ variable will contain a copy of the tag.
730	sub xml_end_tag {
731	my $self = shift(@_);
732	my ($expat, $element) = @_;
733
734	if ($element eq "rdf:Description") {
735	$self->{'pp_text'} .= "</table>\n";
736	## ghtml::htmlsafe($self->{'pp_text'});
737
738
739	my $about_key = $self->{'about_key'};
740	my $about = $self->{'rdf_desc'}->{$about_key};
741	$about->{'IndexText'} = $self->{'index_text'};
742	$about->{'MetadataTable'} = $self->{'pp_text'};
743
744
745	$self->{'about_key'} = undef;
746	$self->{'index_text'} = undef;
747	$self->{'pp_text'} = undef;
748
749	}
750	elsif (defined $self->{'metadata_name'}) {
751	my $metadata_name = $self->{'metadata_name'};
752	if ($element eq $metadata_name) {
753	my $metadata_value = $self->{'metadata_value'};
754
755	my $about_key = $self->{'about_key'};
756	my $about = $self->{'rdf_desc'}->{$about_key};
757	$about->{$metadata_name} = $metadata_value;
758
759	$self->{'index_text'} .= "$metadata_value\n";
760	$self->{'pp_text'} .= " <tr><td>$metadata_name</td><td>$metadata_value</td></tr>\n";
761
762	$self->{'metadata_name'} = undef;
763	$self->{'metadata_value'} = undef;
764	}
765	}
766	}
767
768	# Called just before start or end tags with accumulated non-markup text in
769	# the $_ variable.
770	sub xml_text {
771	my $self = shift(@_);
772	my ($expat) = @_;
773
774	if (defined $self->{'metadata_name'}) {
775	$self->{'metadata_value'} .= $_;
776	}
777	}
778
779	# Called at the end of the XML document.
780	sub xml_end_document {
781	my $self = shift(@_);
782	my ($expat) = @_;
783
784	}
785
786
787	1;

Note: See TracBrowser for help on using the repository browser.

Download in other formats: