Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

source: trunk/gsdl/perllib/plugins/BookPlug.pm@ 2484

Last change on this file since 2484 was 2356, checked in by sjboddie, 23 years ago
Renamed HBSPlug BookPlug in the hope that it's a little less crytic
Property svn:keywords set to `Author Date Id Revision`
File size: 6.3 KB

Rev	Line
[2356]	1	###########################################################################
	2	#
	3	# BookPlug.pm (formally called HBSPlug) -- plugin for processing simple
	4	# html (or text) books
	5	#
	6	# A component of the Greenstone digital library software
	7	# from the New Zealand Digital Library Project at the
	8	# University of Waikato, New Zealand.
	9	#
	10	# Copyright (C) 1999 New Zealand Digital Library Project
	11	#
	12	# This program is free software; you can redistribute it and/or modify
	13	# it under the terms of the GNU General Public License as published by
	14	# the Free Software Foundation; either version 2 of the License, or
	15	# (at your option) any later version.
	16	#
	17	# This program is distributed in the hope that it will be useful,
	18	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	19	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	20	# GNU General Public License for more details.
	21	#
	22	# You should have received a copy of the GNU General Public License
	23	# along with this program; if not, write to the Free Software
	24	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	25	#
	26	###########################################################################
	27
	28	# creates multi-level document from document containing
	29	# <<TOC>> level tags. Metadata for each section is taken from any
	30	# other tags on the same line as the <<TOC>>. e.g. <<Title>>xxxx<</Title>>
	31	# sets Title metadata.
	32
	33	# Everything else between TOC tags is treated as simple html (i.e. no
	34	# processing of html links or any other HTMLPlug type stuff is done).
	35
	36	# expects input files to have a .hb file extension by default (this can be
	37	# changed by adding a -process_exp option
	38
	39	# a file with the same name as the hb file but a .jpg extension is
	40	# taken as the cover image (jpg files are blocked by this plugin)
	41
	42	# BookPlug is a simplification (and extension) of the HBPlug used
	43	# by the Humanity Library collections. BookPlug is faster as it expects
	44	# the input files to be cleaner (The input to the HDL collections
	45	# contains lots of excess html tags around <<TOC>> tags, uses <<I>>
	46	# tags to specify images, and simply takes all text between <<TOC>>
	47	# tags and start of text to be Title metadata). If you're marking up
	48	# documents to be displayed in the same way as the HDL collections,
	49	# use this plugin instead of HBPlug.
	50
	51	package BookPlug;
	52
	53	use BasPlug;
	54	use util;
	55
	56	sub BEGIN {
	57	@ISA = ('BasPlug');
	58	}
	59
	60	sub new {
	61	my ($class) = @_;
	62	my $self = new BasPlug ("BookPlug", @_);
	63
	64	return bless $self, $class;
	65	}
	66
	67	sub get_default_block_exp {
	68	my $self = shift (@_);
	69
	70	return q^\.jpg$^;
	71	}
	72
	73	sub get_default_process_exp {
	74	my $self = shift (@_);
	75
	76	return q^(?i)\.hb$^;
	77	}
	78
	79	# do plugin specific processing of doc_obj
	80	sub process {
	81	my $self = shift (@_);
	82	my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
	83	my $outhandle = $self->{'outhandle'};
	84
	85	print $outhandle "BookPlug: processing $file\n"
	86	if $self->{'verbosity'} > 1;
	87
	88	my $cursection = $doc_obj->get_top_section();
	89
	90	my $filename = &util::filename_cat($base_dir, $file);
	91	my $absdir = $filename;
	92	$absdir =~ s/[^\/\\]*$//;
	93
	94	# add the cover image
	95	my $coverimage = $filename;
	96	$coverimage =~ s/\.[^\.]*$/\.jpg/i;
	97	$doc_obj->associate_file($coverimage, "cover.jpg", "image/jpeg");
	98
	99	my $title = "";
	100
	101	# remove any leading rubbish
	102	$$textref =~ s/^.*?(<<TOC)/$1/ios;
	103
	104	my $curtoclevel = 1;
	105	my $firstsection = 1;
	106	my $toccount = 0;
	107	while ($$textref =~ /\w/) {
	108	$$textref =~ s/^<<TOC(\d+)>>([^\n])\n(.?)(<<TOC\|\Z)/$4/ios;
	109	my $toclevel = $1;
	110	my $metadata = $2;
	111	my $sectiontext = $3;
	112
	113	if ($toclevel == 2) {
	114	$toccount ++;
	115	}
	116
	117	# close any sections below the current level and
	118	# create a new section (special case for the firstsection)
	119	while (($curtoclevel > $toclevel) \|\|
	120	(!$firstsection && $curtoclevel == $toclevel)) {
	121	$cursection = $doc_obj->get_parent_section ($cursection);
	122	$curtoclevel--;
	123	}
	124	if ($curtoclevel+1 < $toclevel) {
	125	print $outhandle "WARNING - jump in toc levels in $filename " .
	126	"from $curtoclevel to $toclevel\n";
	127	}
	128	while ($curtoclevel < $toclevel) {
	129	$curtoclevel++;
	130	$cursection =
	131	$doc_obj->insert_section($doc_obj->get_end_child($cursection));
	132	}
	133
	134	# sort out metadata
	135	while ($metadata =~ s/^.?<<([^>])>>(.?)<<[^>]>>//) {
	136	my $metakey = $1;
	137	my $metavalue = $2;
	138
	139	if ($metavalue ne "" && $metakey ne "") {
	140	# make sure key fits in with gsdl naming scheme
	141	$metakey =~ tr/[A-Z]/[a-z]/;
	142	$metakey = ucfirst ($metakey);
	143	$doc_obj->add_utf8_metadata ($cursection, $metakey, $metavalue);
	144	}
	145	}
	146
	147	# remove header rubbish
	148	$sectiontext =~ s/^.?<body[^>]>//ios;
	149
	150	# and any other unwanted tags
	151	$sectiontext =~ s/<(\/p\|\/html\|\/body)>//isg;
	152
	153	# fix up the image links
	154	$sectiontext =~ s/(<img[^>]?src\s=\s\"?)([^\">]+)(\"?[^>]>)/
	155	&replace_image_links($absdir, $doc_obj, $1, $2, $3)/isge;
	156
	157	# add the text
	158	$doc_obj->add_utf8_text($cursection, $sectiontext);
	159
	160	$firstsection = 0;
	161
	162	$$textref =~ s/^\s+//s;
	163	}
	164
	165	return 1;
	166	}
	167
	168	sub replace_image_links {
	169	my ($dir, $doc_obj, $front, $link, $back) = @_;
	170	my $outhandle = $self->{'outhandle'};
	171
	172	my ($filename, $error);
	173	my $foundimage = 0;
	174
	175	$link =~ s/\/\///;
	176	my ($imagetype) = $link =~ /([^\.]*)$/;
	177	$imagetype =~ tr/[A-Z]/[a-z]/;
	178	if ($imagetype eq "jpg") {$imagetype = "jpeg";}
	179	if ($imagetype !~ /^(jpeg\|gif\|png)$/) {
	180	print $outhandle "BookPlug: Warning - unknown image type ($imagetype)\n";
	181	}
	182	my ($imagefile) = $link =~ /([^\/]*)$/;
	183	my ($imagepath) = $link =~ /^[^\/](.)$/;
	184
	185	if (defined $imagepath && $imagepath =~ /\w/) {
	186	# relative link
	187	$filename = &util::filename_cat ($dir, $imagepath);
	188	if (-e $filename) {
	189	$doc_obj->associate_file ($filename, $imagefile, "image/$imagetype");
	190	$foundimage = 1;
	191	} else {
	192	$error = "BookPlug: Warning - couldn't find image file $imagefile in either $filename or";
	193	}
	194	}
	195
	196	if (!$foundimage) {
	197	$filename = &util::filename_cat ($dir, $imagefile);
	198	if (-e $filename) {
	199	$doc_obj->associate_file ($filename, $imagefile, "image/$imagetype");
	200	$foundimage = 1;
	201	} elsif (defined $error) {
	202	print $outhandle "$error $filename\n";
	203	} else {
	204	print $outhandle "BookPlug: Warning - couldn't find image file $imagefile in $filename\n";
	205	}
	206	}
	207
	208	if ($foundimage) {
	209	return "${front}_httpdocimg_/${imagefile}${back}";
	210	} else {
	211	return "";
	212	}
	213	}
	214
	215	1;

Note: See TracBrowser for help on using the repository browser.

Download in other formats: