source: trunk/gsdl/perllib/plugins/METSPlug.pm@ 11090

Last change on this file since 11090 was 10254, checked in by kjdon, 19 years ago

added 'use strict' to all plugins, and made modifications (mostly adding 'my') to make them compile

  • Property svn:keywords set to Author Date Id Revision
File size: 9.1 KB
Line 
1###########################################################################
2#
3# METSPlug.pm
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2001 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# Processes GreenstoneArchive METS documents. Note that this plugin does no
27# syntax checking (though the XML::Parser module tests for
28# well-formedness). It's assumed that the GreenstoneArchive files conform
29# to their DTD.
30
31# 28/07/04 Modified to METSPlug - CHI-YU HUANG
32
33package METSPlug;
34
35use ghtml;
36
37use strict;
38no strict 'refs'; # allow filehandles to be variables and viceversa
39
40use XMLPlug;
41use XML::XPath;
42use XML::XPath::XMLParser;
43
44sub BEGIN {
45 @METSPlug::ISA = ('XMLPlug');
46}
47
48my $arguments = [
49 ];
50my $options = { 'name' => "METSPlug",
51 'desc' => "{METSPlug.desc}",
52 'abstract' => "no",
53 'inherits' => "yes" };
54
55
56sub get_default_process_exp {
57 my $self = shift (@_);
58
59 return q^(?i)docmets\.xml$^;
60}
61
62sub new {
63 my ($class) = shift (@_);
64 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
65 push(@$pluginlist, $class);
66
67 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
68 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
69
70 my $self = (defined $hashArgOptLists)? new XMLPlug($pluginlist,$inputargs,$hashArgOptLists): new XMLPlug($pluginlist,$inputargs);
71
72 $self->{'section'} = "";
73 $self->{'section_level'} = 0;
74 $self->{'metadata_name'} = "";
75 $self->{'metadata_value'} = "";
76 $self->{'content'} = "";
77
78 return bless $self, $class;
79}
80
81sub xml_start_document {
82 my $self = shift (@_);
83 my ($expat, $element) = @_;
84
85 $self->{'section'} = "";
86 $self->{'section_level'} = 0;
87 $self->{'metadata_name'} = "";
88 $self->{'metadata_value'} = "";
89 $self->{'content'} = "";
90
91 #**defined a dmdSection Table
92 $self->{'dmdSec_table'}={};
93
94 #**defined a fileSection Table
95 $self->{'fileSec_table'}={};
96
97 #***open doctxt.xml and read the data in
98 my $filename = $self->{'filename'};
99
100 $filename =~ s/docmets.xml$/doctxt.xml/;
101
102 if (!open (FILEIN,"<$filename")){
103 print STDERR "Warning: unable to open the $filename\n";
104 $self->{'xmltxt'} = "";
105 }
106 else {
107 my $xml_text = "";
108 while (defined (my $line = <FILEIN>)) {
109 if ($line !~ m/^<!DOCTYPE.*>/) {
110 $xml_text .= $line;
111 }
112 }
113 my $xml_parser = XML::XPath->new (xml=> $xml_text);
114 #my $xml_tree = $xml_parser->parse ($xml_text);
115
116 #eval {$self->{'parser_text'}->parse};
117 $self->{'parsed_xml'} = $xml_parser;
118 }
119 my $outhandle = $self->{'outhandle'};
120 print $outhandle "METSPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
121 print STDERR "<Processing n='$self->{'file'}' p='METSPlug'>\n" if ($self->{'gli'});
122
123}
124
125sub xml_end_document {
126}
127
128sub xml_doctype {
129}
130
131sub xml_start_tag {
132 my $self = shift(@_);
133 my ($expat, $element) = @_;
134
135 $self->{'element'} = $element;
136
137 #**deal with dmdSection
138 if ($element eq "mets:dmdSec" || $element eq "gsdl3:Metadata"){
139 $self->xml_dmd_start_tag (@_);
140 } elsif ($element eq "mets:file") {
141 $_{'ID'} =~ m/FILE(.*)/;
142 $self->{'file_Id'} = $1;
143 } elsif ($element eq "mets:FLocat"){
144 #***deal with fileSection
145 $self->xml_fileloc_start_tag (@_);
146 } elsif ($element eq "mets:div"){
147 #***deal with StrucMap Section
148 $self->xml_strucMap_start_tag (@_);
149 }
150}
151
152sub xml_dmd_start_tag {
153 my $self = shift (@_);
154 my ($expat, $element) = @_;
155
156 if ($element eq "mets:dmdSec"){
157 my ($section_num) = ($_{'ID'} =~ m/DM(.*)/);
158 $self->{'dmdSec_table'}->{"$section_num"}=[];
159 $self->{'dmdSec_table'}->{'section_num'}=$section_num;
160 } elsif ($element eq "gsdl3:Metadata") {
161 $self->{'metadata_name'} = $_{'name'};
162 }
163}
164
165sub xml_fileloc_start_tag {
166 my $self = shift (@_);
167 my ($expat, $element) = @_;
168
169 my $xlink = $_{'xlink:href'};
170 #my ($section_num) = ($_{'ID'} =~ m/^FLOCAT(.*)$/);
171 my $section_num = $self->{'file_Id'};
172
173 return if (!defined $section_num);
174 #**return if the section_num is not defined or not deal with the whole section (ID="default.*")
175
176 $self->{'fileSec_table'}->{"$section_num"}=[];
177 $self->{'fileSec_table'}->{'section_num'}=$section_num;
178
179 my ($filename,$xpath_expr)=($xlink =~ m/^file:(.*)\#xpointer\((.*)\)$/);
180
181 my $nodeset = $self->{'parsed_xml'}->findnodes ($xpath_expr);
182 my $node_size= $nodeset->size;
183
184 if ($node_size==0) {
185 print STDERR "Warning: no text associated with XPATH $xpath_expr\n";
186 }
187 else {
188 foreach my $node ($nodeset->get_nodelist) {
189 my $xml_content = XML::XPath::XMLParser::as_string($node);
190 my $unescaped_xml_content = &ghtml::unescape_html($xml_content);
191
192 my $section_content={'section_content'=> $unescaped_xml_content};
193
194 my $content_list = $self->{'fileSec_table'}->{"$section_num"};
195 push (@$content_list, $section_content);
196 }
197 }
198}
199
200sub xml_strucMap_start_tag {
201 my $self = shift (@_);
202 my ($expat, $element) = @_;
203
204
205 my ($section_num) = ($_{'ID'} =~ m/DS(.*)/);
206
207 if ($_{'ID'} ne "DSAll"){
208 if ($self->{'section_level'}==0) {
209 $self->open_document();
210 } else {
211 my $doc_obj = $self->{'doc_obj'};
212 $self->{'section'}=
213 $doc_obj->insert_section($doc_obj->get_end_child($self->{'section'}));
214 }
215 $self->{'section_level'}++;
216
217 #***Add metadata from dmdSection
218 my $md_list = $self->{'dmdSec_table'}->{"$section_num"};
219
220 foreach my $md_pair (@$md_list){
221 my $metadata_name = $md_pair->{'metadata_name'};
222 my $metadata_value = $md_pair->{'metadata_value'};
223 $self->{'doc_obj'}->add_utf8_metadata($self->{'section'}, $metadata_name, $metadata_value);
224 }
225
226 #*** Add content from fileSection
227 my $content_list = $self->{'fileSec_table'}->{"$section_num"};
228
229 foreach my $section_content (@$content_list){
230 my $content = $section_content->{'section_content'};
231 $self->{'doc_obj'}->add_utf8_text($self->{'section'},$content);
232 }
233 }
234}
235
236sub xml_end_tag {
237 my $self = shift(@_);
238 my ($expat, $element) = @_;
239
240 if ($element eq "gsdl3:Metadata") {
241 my $section_num = $self->{'dmdSec_table'}->{'section_num'};
242 my $metadata_name=$self->{'metadata_name'};
243 my $metadata_value=$self->{'metadata_value'};
244
245 my $md_pair={'metadata_name' => $metadata_name,
246 'metadata_value'=> $metadata_value};
247
248 my $md_list = $self->{'dmdSec_table'}->{"$section_num"};
249
250 push(@$md_list,$md_pair);
251
252 $self->{'metadata_name'} = "";
253 $self->{'metadata_value'} = "";
254 } elsif ($element eq "mets:file"){
255 $self->{'file_id'} = "";
256 }
257
258
259 #*** StrucMap Section
260 if ($element eq "mets:div") {
261 $self->{'section_level'}--;
262 $self->{'section'} = $self->{'doc_obj'}->get_parent_section($self->{'section'});
263 $self->close_document() if $self->{'section_level'}==0;
264 }
265 $self->{'element'} = "";
266}
267
268sub xml_text {
269 my $self = shift(@_);
270 my ($expat) = @_;
271
272 if ($self->{'element'} eq "gsdl3:Metadata") {
273 $self->{'metadata_value'} .= $_;
274 }
275}
276
277sub open_document {
278 my $self = shift(@_);
279
280 # create a new document
281 $self->{'doc_obj'} = new doc ();
282 $self->{'section'} = "";
283}
284
285sub close_document {
286 my $self = shift(@_);
287
288 # add the associated files
289 my $assoc_files =
290 $self->{'doc_obj'}->get_metadata($self->{'doc_obj'}->get_top_section(), "gsdlassocfile");
291
292 # for when "assocfilepath" isn't the same directory that doc.xml is in...
293 my $assoc_filepath_list= $self->{'doc_obj'}->get_metadata($self->{'doc_obj'}->get_top_section(), "assocfilepath");
294
295 my $assoc_filepath=shift (@$assoc_filepath_list);
296 if (defined ($assoc_filepath)) {
297 # make absolute rather than relative...
298 $self->{'filename'} =~ m@^(.*[\\/]archives)@;
299 $assoc_filepath = "$1/$assoc_filepath/";
300 } else {
301 $assoc_filepath = $self->{'filename'};
302 $assoc_filepath =~ s/[^\\\/]*$//;
303 }
304
305 foreach my $assoc_file_info (@$assoc_files) {
306 my ($assoc_file, $mime_type, $dir) = split (":", $assoc_file_info);
307 my $real_dir = &util::filename_cat($assoc_filepath, $assoc_file),
308 my $assoc_dir = (defined $dir && $dir ne "")
309 ? &util::filename_cat($dir, $assoc_file) : $assoc_file;
310 $self->{'doc_obj'}->associate_file($real_dir, $assoc_dir, $mime_type);
311 }
312 $self->{'doc_obj'}->delete_metadata($self->{'doc_obj'}->get_top_section(), "gsdlassocfile");
313
314 # process the document
315 $self->{'processor'}->process($self->{'doc_obj'}, $self->{'file'});
316}
317
318
3191;
320
Note: See TracBrowser for help on using the repository browser.