Context Navigation

source: trunk/gsdl/perllib/plugins/HBSPlug.pm@ 1244

Last change on this file since 1244 was 1244, checked in by sjboddie, 24 years ago
Caught up most general plugins (that's the ones in gsdlhome/perllib/plugins) with changes to BasPlug so that they can all now use the new general plugin options. Those I didn't do were FoxPlug (as it's not actually used anywhere and I don't know what it does) and WebPlug (as it's kind of a work in progress and doesn't really work anyway). All plugins will still work (including all the collection specific ones that are laying around), some of them just won't have access to the general options. I also wrote a short perl script (pluginfo.pl) that prints out all the options available to a given plugin.
Property svn:keywords set to `Author Date Id Revision`
File size: 6.2 KB

Line
1	###########################################################################
2	#
3	# HBSPlug.pm -- plugin for processing simple html (or text) books
4	# A component of the Greenstone digital library software
5	# from the New Zealand Digital Library Project at the
6	# University of Waikato, New Zealand.
7	#
8	# Copyright (C) 1999 New Zealand Digital Library Project
9	#
10	# This program is free software; you can redistribute it and/or modify
11	# it under the terms of the GNU General Public License as published by
12	# the Free Software Foundation; either version 2 of the License, or
13	# (at your option) any later version.
14	#
15	# This program is distributed in the hope that it will be useful,
16	# but WITHOUT ANY WARRANTY; without even the implied warranty of
17	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18	# GNU General Public License for more details.
19	#
20	# You should have received a copy of the GNU General Public License
21	# along with this program; if not, write to the Free Software
22	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23	#
24	###########################################################################
25
26	# creates multi-level document from document containing
27	# <<TOC>> level tags. Metadata for each section is taken from any
28	# other tags on the same line as the <<TOC>>. e.g. <<Title>>xxxx<</Title>>
29	# sets Title metadata.
30
31	# Everything else between TOC tags is treated as simple html (i.e. no
32	# processing of html links or any other HTMLPlug type stuff is done).
33
34	# expects input files to have a .hb file extension by default (this can be
35	# changed by adding a -process_exp option
36
37	# a file with the same name as the hb file but a .jpg extension is
38	# taken as the cover image (jpg files are blocked by this plugin)
39
40	# HBSPlug is a simplification (and extension of) the HBPlug used
41	# by the Humanity Library collections. HBSPlug is faster as it expects
42	# the input files to be cleaner (The input to the HDL collections
43	# contains lots of excess html tags around <<TOC>> tags, uses <<I>>
44	# tags to specify images, and simply takes all text between <<TOC>>
45	# tags and start of text to be Title metadata). If you're marking up
46	# documents to be displayed in the same way as the HDL collections,
47	# use this plugin instead of HBPlug.
48
49	package HBSPlug;
50
51	use BasPlug;
52	use util;
53
54	sub BEGIN {
55	@ISA = ('BasPlug');
56	}
57
58	use strict;
59
60	sub new {
61	my ($class) = @_;
62	my $self = new BasPlug ("HBSPlug", @_);
63
64	return bless $self, $class;
65	}
66
67	sub get_default_block_exp {
68	my $self = shift (@_);
69
70	return q^\.jpg$^;
71	}
72
73	sub get_default_process_exp {
74	my $self = shift (@_);
75
76	return q^(?i)\.hb$^;
77	}
78
79	# do plugin specific processing of doc_obj
80	sub process {
81	my $self = shift (@_);
82	my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
83
84	print STDERR "HBSPlug: processing $file\n"
85	if $self->{'verbosity'} > 1;
86
87	my $cursection = $doc_obj->get_top_section();
88
89	my $filename = &util::filename_cat($base_dir, $file);
90	my $absdir = $filename;
91	$absdir =~ s/[^\/\\]*$//;
92
93	# add the cover image
94	my $coverimage = $filename;
95	$coverimage =~ s/\.[^\.]*$/\.jpg/i;
96	$doc_obj->associate_file($coverimage, "cover.jpg", "image/jpeg");
97
98	my $title = "";
99
100	# remove any leading rubbish
101	$$textref =~ s/^.*?(<<TOC)/$1/ios;
102
103	my $curtoclevel = 1;
104	my $firstsection = 1;
105	my $toccount = 0;
106	while ($$textref =~ /\w/) {
107	$$textref =~ s/^<<TOC(\d+)>>([^\n])\n(.?)(<<TOC\|\Z)/$4/ios;
108	my $toclevel = $1;
109	my $metadata = $2;
110	my $sectiontext = $3;
111
112	if ($toclevel == 2) {
113	$toccount ++;
114	}
115
116	# close any sections below the current level and
117	# create a new section (special case for the firstsection)
118	while (($curtoclevel > $toclevel) \|\|
119	(!$firstsection && $curtoclevel == $toclevel)) {
120	$cursection = $doc_obj->get_parent_section ($cursection);
121	$curtoclevel--;
122	}
123	if ($curtoclevel+1 < $toclevel) {
124	print STDERR "WARNING - jump in toc levels in $filename " .
125	"from $curtoclevel to $toclevel\n";
126	}
127	while ($curtoclevel < $toclevel) {
128	$curtoclevel++;
129	$cursection =
130	$doc_obj->insert_section($doc_obj->get_end_child($cursection));
131	}
132
133	# sort out metadata
134	while ($metadata =~ s/^.?<<([^>])>>(.?)<<[^>]>>//) {
135	my $metakey = $1;
136	my $metavalue = $2;
137
138	if ($metavalue ne "" && $metakey ne "") {
139	# make sure key fits in with gsdl naming scheme
140	$metakey =~ tr/[A-Z]/[a-z]/;
141	$metakey = ucfirst ($metakey);
142	$doc_obj->add_utf8_metadata ($cursection, $metakey, $metavalue);
143	}
144	}
145
146	# remove header rubbish
147	$sectiontext =~ s/^.?<body[^>]>//ios;
148
149	# and any other unwanted tags
150	$sectiontext =~ s/<(\/p\|\/html\|\/body)>//isg;
151
152	# fix up the image links
153	$sectiontext =~ s/(<img[^>]?src\s=\s\"?)([^\">]+)(\"?[^>]>)/
154	&replace_image_links($absdir, $doc_obj, $1, $2, $3)/isge;
155
156	# add the text
157	$doc_obj->add_utf8_text($cursection, $sectiontext);
158
159	$firstsection = 0;
160
161	$$textref =~ s/^\s+//s;
162	}
163
164	return 1;
165	}
166
167	sub replace_image_links {
168
169	my ($dir, $doc_obj, $front, $link, $back) = @_;
170
171	my ($filename, $error);
172	my $foundimage = 0;
173
174	$link =~ s/\/\///;
175	my ($imagetype) = $link =~ /([^\.]*)$/;
176	$imagetype =~ tr/[A-Z]/[a-z]/;
177	if ($imagetype eq "jpg") {$imagetype = "jpeg";}
178	if ($imagetype !~ /^(jpeg\|gif\|png)$/) {
179	print STDERR "HBSPlug: Warning - unknown image type ($imagetype)\n";
180	}
181	my ($imagefile) = $link =~ /([^\/]*)$/;
182	my ($imagepath) = $link =~ /^[^\/](.)$/;
183
184	if (defined $imagepath && $imagepath =~ /\w/) {
185	# relative link
186	$filename = &util::filename_cat ($dir, $imagepath);
187	if (-e $filename) {
188	$doc_obj->associate_file ($filename, $imagefile, "image/$imagetype");
189	$foundimage = 1;
190	} else {
191	$error = "HBSPlug: Warning - couldn't find image file $imagefile in either $filename or";
192	}
193	}
194
195	if (!$foundimage) {
196	$filename = &util::filename_cat ($dir, $imagefile);
197	if (-e $filename) {
198	$doc_obj->associate_file ($filename, $imagefile, "image/$imagetype");
199	$foundimage = 1;
200	} elsif (defined $error) {
201	print STDERR "$error $filename\n";
202	} else {
203	print STDERR "HBSPlug: Warning - couldn't find image file $imagefile in $filename\n";
204	}
205	}
206
207	if ($foundimage) {
208	return "${front}_httpdocimg_/${imagefile}${back}";
209	} else {
210	return "";
211	}
212	}
213
214	1;

Note: See TracBrowser for help on using the repository browser.

Download in other formats: