source: gsdl/trunk/perllib/plugouts/METSPlugout.pm@ 15173

Last change on this file since 15173 was 15013, checked in by davidb, 16 years ago

Adjustment to Fedora and METS plugouts so they can handle Fedora v2.x or Fedora v3.x

  • Property svn:keywords set to Author Date Id Revision
File size: 11.8 KB
Line 
1###########################################################################
2#
3# METSPlugout.pm -- the plugout module for METS archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package METSPlugout;
27
28use strict;
29no strict 'subs';
30no strict 'refs';
31
32use gsprintf 'gsprintf';
33
34eval {require bytes};
35use util;
36use BasPlugout;
37use docprint; # for escape_text
38
39sub BEGIN {
40 @METSPlugout::ISA = ('BasPlugout');
41}
42
43my $arguments = [
44 { 'name' => "xslt_txt",
45 'desc' => "{METSPlugout.xslt_txt}",
46 'type' => "string",
47 'reqd' => "no",
48 'hiddengli' => "no"},
49 { 'name' => "xslt_mets",
50 'desc' => "{METSPlugout.xslt_mets}",
51 'type' => "string",
52 'reqd' => "no",
53 'hiddengli' => "no"}
54 ];
55
56my $options = { 'name' => "METSPlugout",
57 'desc' => "{METSPlugout.desc}",
58 'abstract' => "yes",
59 'inherits' => "yes",
60 'args' => $arguments
61 };
62
63sub new {
64 my ($class) = shift (@_);
65 my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
66 push(@$plugoutlist, $class);
67
68
69 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
70 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
71
72 my $self = (defined $hashArgOptLists)? new BasPlugout($plugoutlist,$inputargs,$hashArgOptLists): new BasPlugout($plugoutlist,$inputargs);
73
74
75 return bless $self, $class;
76}
77
78
79sub saveas_doctxt
80{
81 my $self = shift (@_);
82 my ($doc_obj,$working_dir) = @_;
83
84 my $is_recursive = 1;
85
86 my $doc_txt_file = &util::filename_cat ($working_dir,"doctxt.xml");
87
88 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
89
90 my $outhandler;
91
92 if (defined $self->{'xslt_writer'}){
93 $outhandler = $self->{'xslt_writer'};
94 }
95 else{
96 $outhandler = $self->get_output_handler($doc_txt_file);
97 }
98
99 $self->output_xml_header($outhandler);
100 my $section = $doc_obj->get_top_section();
101 $self->output_txt_section($outhandler,$doc_obj, $section, $is_recursive);
102 $self->output_xml_footer($outhandler);
103
104
105 if (defined $self->{'xslt_writer'}){
106 $self->close_xslt_pipe();
107 }
108 else{
109 close($outhandler);
110 }
111
112}
113
114sub saveas_docmets
115{
116 my $self = shift (@_);
117 my ($doc_obj,$working_dir) = @_;
118
119 my $doc_mets_file = &util::filename_cat ($working_dir, "docmets.xml");
120
121 my $doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"dc.Title");
122 if (!defined $doc_title) {
123 $doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"Title");
124 }
125
126 $self->open_xslt_pipe($doc_mets_file,$self->{'xslt_mets'});
127
128 my $outhandler;
129
130 if (defined $self->{'xslt_writer'}){
131 $outhandler = $self->{'xslt_writer'};
132 }
133 else{
134 $outhandler = $self->get_output_handler($doc_mets_file);
135 }
136
137
138 $self->output_mets_xml_header($outhandler, $doc_obj->get_OID(), $doc_title);
139 $self->output_mets_section($outhandler, $doc_obj, $doc_obj->get_top_section(),$working_dir);
140 $self->output_mets_xml_footer($outhandler);
141
142 if (defined $self->{'xslt_writer'}){
143 $self->close_xslt_pipe();
144 }
145 else{
146 close($outhandler);
147 }
148
149
150}
151
152sub saveas
153{
154 my $self = shift (@_);
155 my ($doc_obj,$doc_dir) = @_;
156
157 $self->process_assoc_files ($doc_obj, $doc_dir, '');
158
159 my $output_dir = $self->get_output_dir();
160 &util::mk_all_dir ($output_dir) unless -e $output_dir;
161
162 my $working_dir = &util::filename_cat ($output_dir, $doc_dir);
163
164 &util::mk_all_dir ($working_dir) unless -e $working_dir;
165
166 ###
167 # Save the text as a filefile
168 ###
169 $self->saveas_doctxt($doc_obj,$working_dir);
170
171 ###
172 # Save the structure and metadata as a METS file
173 ###
174 $self->saveas_docmets($doc_obj,$working_dir);
175
176 $self->{'short_doc_file'} = &util::filename_cat ($doc_dir, "docmets.xml");
177
178 $self->store_output_info_reference($doc_obj);
179
180}
181
182
183sub output_mets_xml_header
184{
185 my $self = shift(@_);
186 my ($handle, $OID, $doc_title) = @_;
187
188 gsprintf(STDERR, "METSPlugout::output_mets_xml_header {common.must_be_implemented}\n") && die "\n";
189}
190
191sub output_mets_xml_header_extra_attribute
192{
193 my $self = shift(@_);
194 my ($handle, $extra_attr, $extra_schema) = @_;
195
196 print $handle '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' . "\n";
197 print $handle '<mets:mets xmlns:mets="http://www.loc.gov/METS/"' . "\n";
198 print $handle ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' . "\n";
199 print $handle ' xmlns:gsdl3="http://www.greenstone.org/namespace/gsdlmetadata/1.0/"' . "\n";
200 if ($ENV{'FEDORA2_HOME'}) {
201 print $handle ' xmlns:xlink="http://www.w3.org/TR/xlink"' ."\n";
202 }
203 else {
204 print $handle ' xmlns:xlink="http://www.w3.org/1999/xlink"' ."\n";
205 }
206 print $handle ' xsi:schemaLocation="http://www.loc.gov/METS/' . "\n";
207 print $handle ' http://www.loc.gov/standards/mets/mets.xsd' . "\n";
208 print $handle " $extra_schema\n" if (defined $extra_schema);
209 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/' . "\n";
210 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/gsdl_metadata.xsd"' . "\n";
211
212 print $handle " $extra_attr>\n";
213
214}
215
216sub output_mets_xml_footer
217{
218 my $self = shift(@_);
219 my ($handle) = @_;
220 print $handle '</mets:mets>' . "\n";
221}
222
223# print out doctxt.xml file
224sub output_txt_section {
225 my $self = shift (@_);
226 my ($handle, $doc_obj, $section, $is_recursive) = @_;
227
228 print $handle $self->buffer_txt_section_xml($doc_obj, $section, $is_recursive);
229}
230
231sub buffer_txt_section_xml {
232 my $self = shift(@_);
233 my ($doc_obj, $section, $is_recursive) = @_;
234
235 my $section_ptr = $doc_obj->_lookup_section ($section);
236
237 return "" unless defined $section_ptr;
238
239 my $all_text = "<Section>\n";
240 $all_text .= &docprint::escape_text("$section_ptr->{'text'}");
241
242 if (defined $is_recursive && $is_recursive)
243 {
244 # Output all the subsections
245 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
246 $all_text .= $self->buffer_txt_section_xml($doc_obj, "$section.$subsection");
247 }
248 }
249
250 $all_text .= "</Section>\n";
251
252
253 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
254 return $all_text;
255}
256
257#
258# Print out docmets.xml file
259#
260sub output_mets_section
261{
262 my $self = shift(@_);
263 my ($handle, $doc_obj, $section, $working_dir) = @_;
264
265 gsprintf(STDERR, "METSPlugout::output_mets_section {common.must_be_implemented}\n") && die "\n";
266
267}
268
269
270sub buffer_mets_dmdSection_section_xml
271{
272 my $self = shift(@_);
273 my ($doc_obj,$section) = @_;
274
275 gsprintf(STDERR, "METSPlugout::buffer_mets_dmdSection_section_xml {common.must_be_implemented}\n") && die "\n";
276}
277
278sub buffer_mets_StructMapSection_section_xml
279{
280 my $self = shift(@_);
281 my ($doc_obj,$section, $order_numref, $fileid_base) = @_;
282
283 $section="" unless defined $section;
284
285
286 my $section_ptr=$doc_obj->_lookup_section($section);
287 return "" unless defined $section_ptr;
288
289 $fileid_base = "FILEGROUP_PRELUDE" unless defined $fileid_base;
290
291 # output fileSection by Sections
292 my $section_num ="1". $section;
293 my $dmd_num = $section_num;
294
295 #**output the StructMap details
296
297 my $dmdid_attr = "DM$dmd_num";
298
299 my $all_text = " <mets:div ID=\"DS$section_num\" TYPE=\"Section\" \n";
300 $all_text .= ' ORDER="'.$$order_numref++.'" ORDERLABEL="'. $section_num .'" '."\n";
301 $all_text .= " LABEL=\"$section_num\" DMDID=\"$dmdid_attr\">\n";
302
303 $all_text .= ' <mets:fptr FILEID="'.$fileid_base.$section_num.'" />'. "\n";
304
305
306 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
307 $all_text .= $self->buffer_mets_StructMapSection_section_xml($doc_obj,"$section.$subsection", $order_numref, $fileid_base);
308 }
309
310 $all_text .= " </mets:div>\n";
311
312 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
313
314 return $all_text;
315}
316
317
318sub buffer_mets_StructMapWhole_section_xml
319{
320 my $self = shift(@_);
321 my ($doc_obj,$section) = @_;
322
323 my $section_ptr = $doc_obj->_lookup_section($section);
324 return "" unless defined $section_ptr;
325
326 my $all_text="";
327 my $fileID=0;
328 my $order_num = 0;
329
330 $all_text .= ' <mets:div ID="DSAll" TYPE="Document" ORDER="'.$order_num.'" ORDERLABEL="All" LABEL="Whole Documemt" DMDID="DM1">' . "\n";
331
332 #** output the StructMapSection for the whole section
333 # get the sourcefile and associative file
334
335 foreach my $data (@{$section_ptr->{'metadata'}}){
336 my $escaped_value = &docprint::escape_text($data->[1]);
337
338 if ($data->[0] eq "gsdlsourcefilename") {
339 ++$fileID;
340 $all_text .= ' <mets:fptr FILEID="default.'.$fileID.'" />'."\n";
341 }
342
343 if ($data->[0] eq "gsdlassocfile"){
344 ++$fileID;
345 $all_text .= ' <mets:fptr FILEID="default.'.$fileID. '" />'. "\n";
346 }
347 }
348 $all_text .= " </mets:div>\n";
349
350 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
351
352 return $all_text;
353}
354
355
356
357sub doctxt_to_xlink
358{
359 my $self = shift @_;
360 my ($fname,$working_dir) = @_;
361
362 gsprintf(STDERR, "METSPlugout::doxtxt_to_xlink {common.must_be_implemented}\n") && die "\n";
363}
364
365sub buffer_mets_fileSection_section_xml
366{
367 my $self = shift(@_);
368 my ($doc_obj,$section,$working_dir, $is_txt_split,$opt_attr,$fileid_base) = @_;
369
370 #$section="" unless defined $section;
371
372 my $section_ptr=$doc_obj->_lookup_section($section);
373 return "" unless defined $section_ptr;
374
375 $fileid_base = "FILEGROUP_PRELUDE" unless defined $fileid_base;
376
377 # output fileSection by sections
378 my $section_num ="1". $section;
379
380 $opt_attr = "" unless defined $opt_attr;
381
382 # output the fileSection details
383 my $all_text = ' <mets:fileGrp ID="'.$fileid_base.$section_num . '">'. "\n";
384 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILE$section_num\" $opt_attr >\n";
385
386 my $xlink;
387 if (defined $is_txt_split && $is_txt_split)
388 {
389 my $section_fnum ="1". $section;
390 $section_fnum =~ s/\./_/g;
391
392 $xlink = $self->doctxt_to_xlink("doctxt$section_fnum.xml",$working_dir);
393 }
394 else
395 {
396 $xlink = $self->doctxt_to_xlink("doctxt.xml",$working_dir);
397
398 $xlink .= '#xpointer(/Section[';
399
400 my $xpath = "1".$section;
401 $xpath =~ s/\./\]\/Section\[/g;
402
403 $xlink .= $xpath;
404
405 $xlink .= ']/text())';
406 }
407
408
409
410 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="'.$xlink.'"';
411
412 $all_text .= ' xlink:title="Hierarchical Document Structure"/>' . "\n";
413 $all_text .= " </mets:file>\n";
414 $all_text .= " </mets:fileGrp>\n";
415
416
417 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
418 $all_text .= $self->buffer_mets_fileSection_section_xml($doc_obj,"$section.$subsection",$working_dir, $is_txt_split, $opt_attr, $fileid_base);
419 }
420
421 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
422
423 return $all_text;
424}
425
426sub buffer_mets_fileWhole_section_xml
427{
428 my $self = shift(@_);
429 my ($doc_obj,$section,$working_dir) = @_;
430
431 gsprintf(STDERR, "METSPlugout::buffer_mets_fileWhole_section_xml {common.must_be_implemented}\n") && die "\n";
432
433}
434
4351;
Note: See TracBrowser for help on using the repository browser.