source: main/trunk/greenstone2/perllib/plugouts/METSPlugout.pm@ 32530

Last change on this file since 32530 was 32511, checked in by ak19, 6 years ago

Running plugoutinfo.pl with describeall or listall flag would break on FedoraMETSPlugout when either FEDORA_HOME or FEDORA_VERSION aren't set (as is often the case), as there's a die statement in the BEGIN of FedoraMETSPlugout. Needed to run die if either FEDORA env var is not set only if the plugout is NOT in info_only mode in plugout constructor. However, info_only mode was never set in any of the plugouts, so had to add set up the infrastructure for it in plugoutinfo.pl and plugout.pm. Then added the info_only test to all teh plugouts, even though it's redundant in most of them for making sure future changes to any plugout's constructors does not break plugoutinfo.pl.

  • Property svn:keywords set to Author Date Id Revision
File size: 12.9 KB
Line 
1###########################################################################
2#
3# METSPlugout.pm -- the plugout module for METS archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package METSPlugout;
27
28use strict;
29no strict 'subs';
30no strict 'refs';
31
32use gsprintf 'gsprintf';
33
34eval {require bytes};
35use FileUtils;
36use BasePlugout;
37use docprint; # for escape_text
38
39sub BEGIN {
40 @METSPlugout::ISA = ('BasePlugout');
41}
42
43my $arguments = [
44 { 'name' => "xslt_txt",
45 'desc' => "{METSPlugout.xslt_txt}",
46 'type' => "string",
47 'reqd' => "no",
48 'hiddengli' => "no"},
49 { 'name' => "xslt_mets",
50 'desc' => "{METSPlugout.xslt_mets}",
51 'type' => "string",
52 'reqd' => "no",
53 'hiddengli' => "no"}
54 ];
55
56my $options = { 'name' => "METSPlugout",
57 'desc' => "{METSPlugout.desc}",
58 'abstract' => "yes",
59 'inherits' => "yes",
60 'args' => $arguments
61 };
62
63sub new {
64 my ($class) = shift (@_);
65 my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
66 push(@$plugoutlist, $class);
67
68
69 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
70 push(@{$hashArgOptLists->{"OptList"}},$options);
71
72 my $self = new BasePlugout($plugoutlist,$inputargs,$hashArgOptLists);
73
74 if ($self->{'info_only'}) {
75 # don't worry about any options etc
76 return bless $self, $class;
77 }
78
79 if(defined $self->{'xslt_txt'} && $self->{'xslt_txt'} ne "")
80 {
81 my $full_file_path = &util::locate_config_file($self->{'xslt_txt'});
82 if (!defined $full_file_path) {
83 print STDERR "Can not find $self->{'xslt_txt'}, please make sure you have supplied the correct file path or put the file into collection or greenstone etc folder\n";
84 die "\n";
85 }
86 $self->{'xslt_txt'} = $full_file_path;
87 }
88 if(defined $self->{'xslt_mets'} && $self->{'xslt_mets'} ne "")
89 {
90 my $full_file_path = &util::locate_config_file($self->{'xslt_mets'});
91 if (!defined $full_file_path) {
92 print STDERR "Can not find $self->{'xslt_mets'}, please make sure you have supplied the correct file path or put the file into collection or greenstone etc folder\n";
93 die "\n";
94 }
95 $self->{'xslt_mets'} = $full_file_path;
96 }
97
98 return bless $self, $class;
99}
100
101
102sub saveas_doctxt
103{
104 my $self = shift (@_);
105 my ($doc_obj,$working_dir) = @_;
106
107 my $is_recursive = 1;
108
109 my $doc_txt_file = &FileUtils::filenameConcatenate ($working_dir,"doctxt.xml");
110
111 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
112
113 my $outhandler;
114
115 if (defined $self->{'xslt_writer'}){
116 $outhandler = $self->{'xslt_writer'};
117 }
118 else{
119 $outhandler = $self->get_output_handler($doc_txt_file);
120 }
121
122 binmode($outhandler,":utf8");
123
124 $self->output_xml_header($outhandler);
125 my $section = $doc_obj->get_top_section();
126 $self->output_txt_section($outhandler,$doc_obj, $section, $is_recursive);
127 $self->output_xml_footer($outhandler);
128
129
130 if (defined $self->{'xslt_writer'}){
131 $self->close_xslt_pipe();
132 }
133 else{
134 close($outhandler);
135 }
136
137}
138
139sub saveas_docmets
140{
141 my $self = shift (@_);
142 my ($doc_obj,$working_dir) = @_;
143
144 my $doc_mets_file = &FileUtils::filenameConcatenate ($working_dir, "docmets.xml");
145
146 my $doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"dc.Title");
147 if (!defined $doc_title) {
148 $doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"Title");
149 }
150
151 $self->open_xslt_pipe($doc_mets_file,$self->{'xslt_mets'});
152
153 my $outhandler;
154
155 if (defined $self->{'xslt_writer'}){
156 $outhandler = $self->{'xslt_writer'};
157 }
158 else{
159 $outhandler = $self->get_output_handler($doc_mets_file);
160 }
161
162 binmode($outhandler,":utf8");
163
164 $self->output_mets_xml_header($outhandler, $doc_obj->get_OID(), $doc_title);
165 $self->output_mets_section($outhandler, $doc_obj, $doc_obj->get_top_section(),$working_dir);
166 $self->output_mets_xml_footer($outhandler);
167
168 if (defined $self->{'xslt_writer'}){
169 $self->close_xslt_pipe();
170 }
171 else{
172 close($outhandler);
173 }
174
175
176}
177
178sub saveas
179{
180 my $self = shift (@_);
181 my ($doc_obj,$doc_dir) = @_;
182
183 $self->process_assoc_files ($doc_obj, $doc_dir, '');
184
185 $self->process_metafiles_metadata ($doc_obj);
186
187 my $output_dir = $self->get_output_dir();
188 &FileUtils::makeAllDirectories ($output_dir) unless -e $output_dir;
189
190 my $working_dir = &FileUtils::filenameConcatenate ($output_dir, $doc_dir);
191
192 &FileUtils::makeAllDirectories ($working_dir) unless -e $working_dir;
193
194 ###
195 # Save the text as a filefile
196 ###
197 $self->saveas_doctxt($doc_obj,$working_dir);
198
199 ###
200 # Save the structure and metadata as a METS file
201 ###
202 $self->saveas_docmets($doc_obj,$working_dir);
203
204 $self->{'short_doc_file'} = &FileUtils::filenameConcatenate ($doc_dir, "docmets.xml");
205
206 $self->store_output_info_reference($doc_obj);
207
208}
209
210
211sub output_mets_xml_header
212{
213 my $self = shift(@_);
214 my ($handle, $OID, $doc_title) = @_;
215
216 gsprintf(STDERR, "METSPlugout::output_mets_xml_header {common.must_be_implemented}\n") && die "\n";
217}
218
219sub output_mets_xml_header_extra_attribute
220{
221 my $self = shift(@_);
222 my ($handle, $extra_attr, $extra_schema) = @_;
223
224 print $handle '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' . "\n";
225 print $handle '<mets:mets xmlns:mets="http://www.loc.gov/METS/"' . "\n";
226 print $handle ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' . "\n";
227 print $handle ' xmlns:gsdl3="http://www.greenstone.org/namespace/gsdlmetadata/1.0/"' . "\n";
228 if (defined ($ENV{'FEDORA_VERSION'}) && $ENV{'FEDORA_VERSION'} =~ m/^2/) { # checking if major version is 2
229 print $handle ' xmlns:xlink="http://www.w3.org/TR/xlink"' ."\n";
230 }
231 else {
232 print $handle ' xmlns:xlink="http://www.w3.org/1999/xlink"' ."\n";
233 }
234 print $handle ' xsi:schemaLocation="http://www.loc.gov/METS/' . "\n";
235 print $handle ' http://www.loc.gov/standards/mets/mets.xsd' . "\n";
236 print $handle " $extra_schema\n" if (defined $extra_schema);
237 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/' . "\n";
238 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/gsdl_metadata.xsd"' . "\n";
239
240 print $handle " $extra_attr>\n";
241
242}
243
244sub output_mets_xml_footer
245{
246 my $self = shift(@_);
247 my ($handle) = @_;
248 print $handle '</mets:mets>' . "\n";
249}
250
251# print out doctxt.xml file
252sub output_txt_section {
253 my $self = shift (@_);
254 my ($handle, $doc_obj, $section, $is_recursive) = @_;
255
256 print $handle $self->buffer_txt_section_xml($doc_obj, $section, $is_recursive);
257}
258
259sub buffer_txt_section_xml {
260 my $self = shift(@_);
261 my ($doc_obj, $section, $is_recursive) = @_;
262
263 my $section_ptr = $doc_obj->_lookup_section ($section);
264
265 return "" unless defined $section_ptr;
266
267 my $all_text = "<Section>\n";
268 $all_text .= &docprint::escape_text("$section_ptr->{'text'}");
269
270 if (defined $is_recursive && $is_recursive)
271 {
272 # Output all the subsections
273 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
274 $all_text .= $self->buffer_txt_section_xml($doc_obj, "$section.$subsection", $is_recursive);
275 }
276 }
277
278 $all_text .= "</Section>\n";
279
280
281 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
282 return $all_text;
283}
284
285#
286# Print out docmets.xml file
287#
288sub output_mets_section
289{
290 my $self = shift(@_);
291 my ($handle, $doc_obj, $section, $working_dir) = @_;
292
293 gsprintf(STDERR, "METSPlugout::output_mets_section {common.must_be_implemented}\n") && die "\n";
294
295}
296
297
298sub buffer_mets_dmdSection_section_xml
299{
300 my $self = shift(@_);
301 my ($doc_obj,$section) = @_;
302
303 gsprintf(STDERR, "METSPlugout::buffer_mets_dmdSection_section_xml {common.must_be_implemented}\n") && die "\n";
304}
305
306sub buffer_mets_StructMapSection_section_xml
307{
308 my $self = shift(@_);
309 my ($doc_obj,$section, $order_numref, $fileid_base) = @_;
310
311 $section="" unless defined $section;
312
313
314 my $section_ptr=$doc_obj->_lookup_section($section);
315 return "" unless defined $section_ptr;
316
317 $fileid_base = "FILEGROUP_PRELUDE" unless defined $fileid_base;
318
319 # output fileSection by Sections
320 my $section_num ="1". $section;
321 my $dmd_num = $section_num;
322
323 #**output the StructMap details
324
325 my $dmdid_attr = "DM$dmd_num";
326
327 my $all_text = " <mets:div ID=\"DS$section_num\" TYPE=\"Section\" \n";
328 $all_text .= ' ORDER="'.$$order_numref++.'" ORDERLABEL="'. $section_num .'" '."\n";
329 $all_text .= " LABEL=\"$section_num\" DMDID=\"$dmdid_attr\">\n";
330
331 $all_text .= ' <mets:fptr FILEID="'.$fileid_base.$section_num.'" />'. "\n";
332
333
334 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
335 $all_text .= $self->buffer_mets_StructMapSection_section_xml($doc_obj,"$section.$subsection", $order_numref, $fileid_base);
336 }
337
338 $all_text .= " </mets:div>\n";
339
340 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
341
342 return $all_text;
343}
344
345
346sub buffer_mets_StructMapWhole_section_xml
347{
348 my $self = shift(@_);
349 my ($doc_obj,$section) = @_;
350
351 my $section_ptr = $doc_obj->_lookup_section($section);
352 return "" unless defined $section_ptr;
353
354 my $all_text="";
355 my $fileID=0;
356 my $order_num = 0;
357
358 $all_text .= ' <mets:div ID="DSAll" TYPE="Document" ORDER="'.$order_num.'" ORDERLABEL="All" LABEL="Whole Documemt" DMDID="DM1">' . "\n";
359
360 #** output the StructMapSection for the whole section
361 # get the sourcefile and associative file
362
363 foreach my $data (@{$section_ptr->{'metadata'}}){
364 my $escaped_value = &docprint::escape_text($data->[1]);
365
366 if ($data->[0] eq "gsdlsourcefilename") {
367 ++$fileID;
368 $all_text .= ' <mets:fptr FILEID="default.'.$fileID.'" />'."\n";
369 }
370
371 if ($data->[0] eq "gsdlassocfile"){
372 ++$fileID;
373 $all_text .= ' <mets:fptr FILEID="default.'.$fileID. '" />'. "\n";
374 }
375 }
376 $all_text .= " </mets:div>\n";
377
378 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
379
380 return $all_text;
381}
382
383
384
385sub doctxt_to_xlink
386{
387 my $self = shift @_;
388 my ($fname,$working_dir) = @_;
389
390 gsprintf(STDERR, "METSPlugout::doxtxt_to_xlink {common.must_be_implemented}\n") && die "\n";
391}
392
393sub buffer_mets_fileSection_section_xml
394{
395 my $self = shift(@_);
396 my ($doc_obj,$section,$working_dir, $is_txt_split,$opt_attr,$fileid_base) = @_;
397
398 #$section="" unless defined $section;
399
400 my $section_ptr=$doc_obj->_lookup_section($section);
401 return "" unless defined $section_ptr;
402
403 $fileid_base = "FILEGROUP_PRELUDE" unless defined $fileid_base;
404
405 # output fileSection by sections
406 my $section_num ="1". $section;
407
408 $opt_attr = "" unless defined $opt_attr;
409
410 # output the fileSection details
411 my $all_text = ' <mets:fileGrp ID="'.$fileid_base.$section_num . '">'. "\n";
412 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILE$section_num\" $opt_attr >\n";
413
414 my $xlink;
415 if (defined $is_txt_split && $is_txt_split)
416 {
417 my $section_fnum ="1". $section;
418 $section_fnum =~ s/\./_/g;
419
420 $xlink = $self->doctxt_to_xlink("doctxt$section_fnum.xml",$working_dir);
421 }
422 else
423 {
424 $xlink = $self->doctxt_to_xlink("doctxt.xml",$working_dir);
425
426 $xlink .= '#xpointer(/Section[';
427
428 my $xpath = "1".$section;
429 $xpath =~ s/\./\]\/Section\[/g;
430
431 $xlink .= $xpath;
432
433 $xlink .= ']/text())';
434 }
435
436
437
438 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="'.$xlink.'"';
439
440 $all_text .= ' xlink:title="Hierarchical Document Structure"/>' . "\n";
441 $all_text .= " </mets:file>\n";
442 $all_text .= " </mets:fileGrp>\n";
443
444
445 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
446 $all_text .= $self->buffer_mets_fileSection_section_xml($doc_obj,"$section.$subsection",$working_dir, $is_txt_split, $opt_attr, $fileid_base);
447 }
448
449 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
450
451 return $all_text;
452}
453
454sub buffer_mets_fileWhole_section_xml
455{
456 my $self = shift(@_);
457 my ($doc_obj,$section,$working_dir) = @_;
458
459 gsprintf(STDERR, "METSPlugout::buffer_mets_fileWhole_section_xml {common.must_be_implemented}\n") && die "\n";
460
461}
462
4631;
Note: See TracBrowser for help on using the repository browser.