source: main/trunk/greenstone2/perllib/manifest.pm@ 32594

Last change on this file since 32594 was 30853, checked in by ak19, 8 years ago

Commenting out debug statements.

  • Property svn:keywords set to Author Date Id Revision
File size: 7.7 KB
Line 
1###########################################################################
2#
3# manifest.pm --
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2006-2010 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package manifest;
28
29use strict;
30no strict 'refs'; # allow filehandles to be variables and viceversa
31
32use XMLParser;
33use dbutil;
34
35our $self;
36
37sub new {
38 my ($class) = shift (@_);
39 my ($infodbtype,$archivedir) = @_;
40
41 $self = {} ;
42 # we can now mark a manifest with a version number
43 $self->{'version'} = 1;
44 $self->{'index'} = {};
45 $self->{'reindex'} = {};
46 $self->{'delete'} = {};
47
48 my $arcinfo_doc_filename
49 = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archivedir);
50
51 if (-e $arcinfo_doc_filename) {
52 # Only store the infodb-doc filename if it exists
53 # If it doesn't exist then this means the collection has not been
54 # built yet (or else the archives folder has been deleted).
55 # Either way we have no way to look up which files
56 # are associated with an OID. If we we encounter an OID
57 # tag later on, we will use the fact that this field is
58 # not defined to issue a warning
59
60 $self->{'_arcinfo-doc-filename'} = $arcinfo_doc_filename;
61 $self->{'_infodbtype'} = $infodbtype;
62 }
63
64 return bless $self, $class;
65}
66
67# /** @function get_version()
68# */
69sub get_version
70{
71 my $self = shift(@_);
72 return $self->{'version'};
73}
74# /** get_version() **/
75
76sub parse
77{
78 my ($self) = shift (@_);
79 my ($filename) = @_;
80
81 my $parser = new XML::Parser('Style' => 'Stream',
82 'Handlers' => {'Char' => \&Char,
83 'XMLDecl' => \&XMLDecl,
84 'Entity' => \&Entity,
85 'Doctype' => \&Doctype,
86 'Default' => \&Default
87 });
88
89 $parser->parsefile($filename);
90}
91
92sub StartDocument {$self->xml_start_document(@_);}
93sub XMLDecl {$self->xml_xmldecl(@_);}
94sub Entity {$self->xml_entity(@_);}
95sub Doctype {$self->xml_doctype(@_);}
96sub StartTag {$self->xml_start_tag(@_);}
97sub EndTag {$self->xml_end_tag(@_);}
98sub Text {$self->xml_text(@_);}
99sub PI {$self->xml_pi(@_);}
100sub EndDocument {$self->xml_end_document(@_);}
101sub Default {$self->xml_default(@_);}
102
103# This Char function overrides the one in XML::Parser::Stream to overcome a
104# problem where $expat->{Text} is treated as the return value, slowing
105# things down significantly in some cases.
106sub Char {
107 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+
108 $_[0]->{'Text'} .= $_[1];
109 return undef;
110}
111
112# Called at the beginning of the XML document.
113sub xml_start_document {
114 my $self = shift(@_);
115 my ($expat) = @_;
116
117}
118
119# Called for XML declarations
120sub xml_xmldecl {
121 my $self = shift(@_);
122 my ($expat, $version, $encoding, $standalone) = @_;
123}
124
125# Called for XML entities
126sub xml_entity {
127 my $self = shift(@_);
128 my ($expat, $name, $val, $sysid, $pubid, $ndata) = @_;
129}
130
131# Called for DOCTYPE declarations - use die to bail out if this doctype
132# is not meant for this plugin
133sub xml_doctype {
134 my $self = shift(@_);
135 my ($expat, $name, $sysid, $pubid, $internal) = @_;
136 die "Manifest Cannot process XML document with DOCTYPE of $name";
137}
138
139# Called for every start tag. The $_ variable will contain a copy of the
140# tag and the %_ variable will contain the element's attributes.
141sub xml_start_tag
142{
143 my $self = shift(@_);
144 my ($expat, $element) = @_;
145 my $attributes = \%_;
146
147 if (($element eq "Filename") || ($element eq "OID"))
148 {
149 $self->{'item-val'} = "";
150 }
151 elsif ($element eq "Manifest")
152 {
153 if (defined $attributes->{'version'})
154 {
155 $self->{'version'} = $attributes->{'version'};
156 }
157 }
158 else
159 {
160 if (defined($self->{'file-type'}))
161 {
162 print STDERR "Warning: Malformed XML manifest\n";
163 print STDERR " Unrecognized element $element nested inside " . $self->{'file-type'} . ".\n";
164 }
165 else
166 {
167 my $filetype = lc($element);
168 $self->{'file-type'} = $filetype;
169 if (!defined $self->{$filetype})
170 {
171 print STDERR "Warning: <$element> is not one of the registered tags for manifest format.\n";
172 }
173 }
174
175 }
176}
177
178# Called for every end tag. The $_ variable will contain a copy of the tag.
179sub xml_end_tag
180{
181 my $self = shift(@_);
182 my ($expat, $element) = @_;
183
184 #print STDERR "@@@@ element: $element\n";
185
186 if ($element eq "Filename")
187 {
188 my $filetype = $self->{'file-type'};
189 my $filename = $self->{'item-val'};
190
191 #print STDERR "@@@@ filename: $filename\n";
192
193 $self->{$filetype}->{$filename} = 1;
194 $self->{'item-val'} = undef;
195 }
196 elsif ($element eq "OID") {
197 # look up src and assoc filenames used by this doc oid
198
199 my $filetype = $self->{'file-type'};
200 my $oid = $self->{'item-val'};
201
202 if (defined $self->{'_infodbtype'}) {
203
204 my $infodbtype = $self->{'_infodbtype'};
205 my $arcinfo_doc_filename = $self->{'_arcinfo-doc-filename'};
206
207 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $oid);
208
209 my $doc_source_file = $doc_rec->{'src-file'}->[0];
210
211 if(!$doc_source_file) {
212 $self->{'item-val'} = undef;
213 }
214 else {
215 my $assoc_files = $doc_rec->{'assoc-file'};
216 my @all_files = ($doc_source_file);
217 push(@all_files,@$assoc_files) if defined $assoc_files;
218
219 foreach my $filename (@all_files) {
220
221 $filename = &util::placeholders_to_abspath($filename);
222
223 if (!&FileUtils::isFilenameAbsolute($filename)) {
224 $filename = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},$filename);
225 }
226
227 $self->{$filetype}->{$filename} = 1;
228 }
229 }
230 }
231 else {
232 print STDERR "Warning: No archiveinf-doc database in archives directory.\n";
233 print STDERR " Unable to look up source files that constitute document $oid.\n";
234 }
235
236 $self->{'item-val'} = undef;
237 }
238 else
239 {
240 $self->{'file-type'} = undef;
241 }
242}
243
244# Called just before start or end tags with accumulated non-markup text in
245# the $_ variable.
246sub xml_text {
247 my $self = shift(@_);
248 my ($expat) = @_;
249
250 if (defined $self->{'item-val'}) {
251 my $text = $_;
252 chomp($text);
253
254 $text =~ s/^\s+//;
255 $text =~ s/\s+$//;
256
257 $self->{'item-val'} .= $text if ($text !~ m/^\s*$/);
258 }
259}
260
261# Called for processing instructions. The $_ variable will contain a copy
262# of the pi.
263sub xml_pi {
264 my $self = shift(@_);
265 my ($expat, $target, $data) = @_;
266}
267
268# Called at the end of the XML document.
269sub xml_end_document {
270 my $self = shift(@_);
271 my ($expat) = @_;
272
273 if (defined $self->{'import'}) {
274 print STDERR "Warning: <Import> tag is deprecated.\n";
275 print STDERR " Processing data as if it were tagged as <Index>\n";
276 $self->{'index'} = $self->{'import'};
277 }
278
279}
280
281# Called for any characters not handled by the above functions.
282sub xml_default {
283 my $self = shift(@_);
284 my ($expat, $text) = @_;
285}
286
287
2881;
Note: See TracBrowser for help on using the repository browser.