source: main/trunk/greenstone2/perllib/plugouts/METSPlugout.pm@ 32130

Last change on this file since 32130 was 28708, checked in by kjdon, 10 years ago

check for xslt_mets and xslt_txt files

  • Property svn:keywords set to Author Date Id Revision
File size: 12.8 KB
Line 
1###########################################################################
2#
3# METSPlugout.pm -- the plugout module for METS archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package METSPlugout;
27
28use strict;
29no strict 'subs';
30no strict 'refs';
31
32use gsprintf 'gsprintf';
33
34eval {require bytes};
35use FileUtils;
36use BasePlugout;
37use docprint; # for escape_text
38
39sub BEGIN {
40 @METSPlugout::ISA = ('BasePlugout');
41}
42
43my $arguments = [
44 { 'name' => "xslt_txt",
45 'desc' => "{METSPlugout.xslt_txt}",
46 'type' => "string",
47 'reqd' => "no",
48 'hiddengli' => "no"},
49 { 'name' => "xslt_mets",
50 'desc' => "{METSPlugout.xslt_mets}",
51 'type' => "string",
52 'reqd' => "no",
53 'hiddengli' => "no"}
54 ];
55
56my $options = { 'name' => "METSPlugout",
57 'desc' => "{METSPlugout.desc}",
58 'abstract' => "yes",
59 'inherits' => "yes",
60 'args' => $arguments
61 };
62
63sub new {
64 my ($class) = shift (@_);
65 my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
66 push(@$plugoutlist, $class);
67
68
69 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
70 push(@{$hashArgOptLists->{"OptList"}},$options);
71
72 my $self = new BasePlugout($plugoutlist,$inputargs,$hashArgOptLists);
73
74 if(defined $self->{'xslt_txt'} && $self->{'xslt_txt'} ne "")
75 {
76 my $full_file_path = &util::locate_config_file($self->{'xslt_txt'});
77 if (!defined $full_file_path) {
78 print STDERR "Can not find $self->{'xslt_txt'}, please make sure you have supplied the correct file path or put the file into collection or greenstone etc folder\n";
79 die "\n";
80 }
81 $self->{'xslt_txt'} = $full_file_path;
82 }
83 if(defined $self->{'xslt_mets'} && $self->{'xslt_mets'} ne "")
84 {
85 my $full_file_path = &util::locate_config_file($self->{'xslt_mets'});
86 if (!defined $full_file_path) {
87 print STDERR "Can not find $self->{'xslt_mets'}, please make sure you have supplied the correct file path or put the file into collection or greenstone etc folder\n";
88 die "\n";
89 }
90 $self->{'xslt_mets'} = $full_file_path;
91 }
92
93 return bless $self, $class;
94}
95
96
97sub saveas_doctxt
98{
99 my $self = shift (@_);
100 my ($doc_obj,$working_dir) = @_;
101
102 my $is_recursive = 1;
103
104 my $doc_txt_file = &FileUtils::filenameConcatenate ($working_dir,"doctxt.xml");
105
106 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
107
108 my $outhandler;
109
110 if (defined $self->{'xslt_writer'}){
111 $outhandler = $self->{'xslt_writer'};
112 }
113 else{
114 $outhandler = $self->get_output_handler($doc_txt_file);
115 }
116
117 binmode($outhandler,":utf8");
118
119 $self->output_xml_header($outhandler);
120 my $section = $doc_obj->get_top_section();
121 $self->output_txt_section($outhandler,$doc_obj, $section, $is_recursive);
122 $self->output_xml_footer($outhandler);
123
124
125 if (defined $self->{'xslt_writer'}){
126 $self->close_xslt_pipe();
127 }
128 else{
129 close($outhandler);
130 }
131
132}
133
134sub saveas_docmets
135{
136 my $self = shift (@_);
137 my ($doc_obj,$working_dir) = @_;
138
139 my $doc_mets_file = &FileUtils::filenameConcatenate ($working_dir, "docmets.xml");
140
141 my $doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"dc.Title");
142 if (!defined $doc_title) {
143 $doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"Title");
144 }
145
146 $self->open_xslt_pipe($doc_mets_file,$self->{'xslt_mets'});
147
148 my $outhandler;
149
150 if (defined $self->{'xslt_writer'}){
151 $outhandler = $self->{'xslt_writer'};
152 }
153 else{
154 $outhandler = $self->get_output_handler($doc_mets_file);
155 }
156
157 binmode($outhandler,":utf8");
158
159 $self->output_mets_xml_header($outhandler, $doc_obj->get_OID(), $doc_title);
160 $self->output_mets_section($outhandler, $doc_obj, $doc_obj->get_top_section(),$working_dir);
161 $self->output_mets_xml_footer($outhandler);
162
163 if (defined $self->{'xslt_writer'}){
164 $self->close_xslt_pipe();
165 }
166 else{
167 close($outhandler);
168 }
169
170
171}
172
173sub saveas
174{
175 my $self = shift (@_);
176 my ($doc_obj,$doc_dir) = @_;
177
178 $self->process_assoc_files ($doc_obj, $doc_dir, '');
179
180 $self->process_metafiles_metadata ($doc_obj);
181
182 my $output_dir = $self->get_output_dir();
183 &FileUtils::makeAllDirectories ($output_dir) unless -e $output_dir;
184
185 my $working_dir = &FileUtils::filenameConcatenate ($output_dir, $doc_dir);
186
187 &FileUtils::makeAllDirectories ($working_dir) unless -e $working_dir;
188
189 ###
190 # Save the text as a filefile
191 ###
192 $self->saveas_doctxt($doc_obj,$working_dir);
193
194 ###
195 # Save the structure and metadata as a METS file
196 ###
197 $self->saveas_docmets($doc_obj,$working_dir);
198
199 $self->{'short_doc_file'} = &FileUtils::filenameConcatenate ($doc_dir, "docmets.xml");
200
201 $self->store_output_info_reference($doc_obj);
202
203}
204
205
206sub output_mets_xml_header
207{
208 my $self = shift(@_);
209 my ($handle, $OID, $doc_title) = @_;
210
211 gsprintf(STDERR, "METSPlugout::output_mets_xml_header {common.must_be_implemented}\n") && die "\n";
212}
213
214sub output_mets_xml_header_extra_attribute
215{
216 my $self = shift(@_);
217 my ($handle, $extra_attr, $extra_schema) = @_;
218
219 print $handle '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' . "\n";
220 print $handle '<mets:mets xmlns:mets="http://www.loc.gov/METS/"' . "\n";
221 print $handle ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' . "\n";
222 print $handle ' xmlns:gsdl3="http://www.greenstone.org/namespace/gsdlmetadata/1.0/"' . "\n";
223 if (defined ($ENV{'FEDORA_VERSION'}) && $ENV{'FEDORA_VERSION'} =~ m/^2/) { # checking if major version is 2
224 print $handle ' xmlns:xlink="http://www.w3.org/TR/xlink"' ."\n";
225 }
226 else {
227 print $handle ' xmlns:xlink="http://www.w3.org/1999/xlink"' ."\n";
228 }
229 print $handle ' xsi:schemaLocation="http://www.loc.gov/METS/' . "\n";
230 print $handle ' http://www.loc.gov/standards/mets/mets.xsd' . "\n";
231 print $handle " $extra_schema\n" if (defined $extra_schema);
232 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/' . "\n";
233 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/gsdl_metadata.xsd"' . "\n";
234
235 print $handle " $extra_attr>\n";
236
237}
238
239sub output_mets_xml_footer
240{
241 my $self = shift(@_);
242 my ($handle) = @_;
243 print $handle '</mets:mets>' . "\n";
244}
245
246# print out doctxt.xml file
247sub output_txt_section {
248 my $self = shift (@_);
249 my ($handle, $doc_obj, $section, $is_recursive) = @_;
250
251 print $handle $self->buffer_txt_section_xml($doc_obj, $section, $is_recursive);
252}
253
254sub buffer_txt_section_xml {
255 my $self = shift(@_);
256 my ($doc_obj, $section, $is_recursive) = @_;
257
258 my $section_ptr = $doc_obj->_lookup_section ($section);
259
260 return "" unless defined $section_ptr;
261
262 my $all_text = "<Section>\n";
263 $all_text .= &docprint::escape_text("$section_ptr->{'text'}");
264
265 if (defined $is_recursive && $is_recursive)
266 {
267 # Output all the subsections
268 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
269 $all_text .= $self->buffer_txt_section_xml($doc_obj, "$section.$subsection", $is_recursive);
270 }
271 }
272
273 $all_text .= "</Section>\n";
274
275
276 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
277 return $all_text;
278}
279
280#
281# Print out docmets.xml file
282#
283sub output_mets_section
284{
285 my $self = shift(@_);
286 my ($handle, $doc_obj, $section, $working_dir) = @_;
287
288 gsprintf(STDERR, "METSPlugout::output_mets_section {common.must_be_implemented}\n") && die "\n";
289
290}
291
292
293sub buffer_mets_dmdSection_section_xml
294{
295 my $self = shift(@_);
296 my ($doc_obj,$section) = @_;
297
298 gsprintf(STDERR, "METSPlugout::buffer_mets_dmdSection_section_xml {common.must_be_implemented}\n") && die "\n";
299}
300
301sub buffer_mets_StructMapSection_section_xml
302{
303 my $self = shift(@_);
304 my ($doc_obj,$section, $order_numref, $fileid_base) = @_;
305
306 $section="" unless defined $section;
307
308
309 my $section_ptr=$doc_obj->_lookup_section($section);
310 return "" unless defined $section_ptr;
311
312 $fileid_base = "FILEGROUP_PRELUDE" unless defined $fileid_base;
313
314 # output fileSection by Sections
315 my $section_num ="1". $section;
316 my $dmd_num = $section_num;
317
318 #**output the StructMap details
319
320 my $dmdid_attr = "DM$dmd_num";
321
322 my $all_text = " <mets:div ID=\"DS$section_num\" TYPE=\"Section\" \n";
323 $all_text .= ' ORDER="'.$$order_numref++.'" ORDERLABEL="'. $section_num .'" '."\n";
324 $all_text .= " LABEL=\"$section_num\" DMDID=\"$dmdid_attr\">\n";
325
326 $all_text .= ' <mets:fptr FILEID="'.$fileid_base.$section_num.'" />'. "\n";
327
328
329 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
330 $all_text .= $self->buffer_mets_StructMapSection_section_xml($doc_obj,"$section.$subsection", $order_numref, $fileid_base);
331 }
332
333 $all_text .= " </mets:div>\n";
334
335 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
336
337 return $all_text;
338}
339
340
341sub buffer_mets_StructMapWhole_section_xml
342{
343 my $self = shift(@_);
344 my ($doc_obj,$section) = @_;
345
346 my $section_ptr = $doc_obj->_lookup_section($section);
347 return "" unless defined $section_ptr;
348
349 my $all_text="";
350 my $fileID=0;
351 my $order_num = 0;
352
353 $all_text .= ' <mets:div ID="DSAll" TYPE="Document" ORDER="'.$order_num.'" ORDERLABEL="All" LABEL="Whole Documemt" DMDID="DM1">' . "\n";
354
355 #** output the StructMapSection for the whole section
356 # get the sourcefile and associative file
357
358 foreach my $data (@{$section_ptr->{'metadata'}}){
359 my $escaped_value = &docprint::escape_text($data->[1]);
360
361 if ($data->[0] eq "gsdlsourcefilename") {
362 ++$fileID;
363 $all_text .= ' <mets:fptr FILEID="default.'.$fileID.'" />'."\n";
364 }
365
366 if ($data->[0] eq "gsdlassocfile"){
367 ++$fileID;
368 $all_text .= ' <mets:fptr FILEID="default.'.$fileID. '" />'. "\n";
369 }
370 }
371 $all_text .= " </mets:div>\n";
372
373 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
374
375 return $all_text;
376}
377
378
379
380sub doctxt_to_xlink
381{
382 my $self = shift @_;
383 my ($fname,$working_dir) = @_;
384
385 gsprintf(STDERR, "METSPlugout::doxtxt_to_xlink {common.must_be_implemented}\n") && die "\n";
386}
387
388sub buffer_mets_fileSection_section_xml
389{
390 my $self = shift(@_);
391 my ($doc_obj,$section,$working_dir, $is_txt_split,$opt_attr,$fileid_base) = @_;
392
393 #$section="" unless defined $section;
394
395 my $section_ptr=$doc_obj->_lookup_section($section);
396 return "" unless defined $section_ptr;
397
398 $fileid_base = "FILEGROUP_PRELUDE" unless defined $fileid_base;
399
400 # output fileSection by sections
401 my $section_num ="1". $section;
402
403 $opt_attr = "" unless defined $opt_attr;
404
405 # output the fileSection details
406 my $all_text = ' <mets:fileGrp ID="'.$fileid_base.$section_num . '">'. "\n";
407 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILE$section_num\" $opt_attr >\n";
408
409 my $xlink;
410 if (defined $is_txt_split && $is_txt_split)
411 {
412 my $section_fnum ="1". $section;
413 $section_fnum =~ s/\./_/g;
414
415 $xlink = $self->doctxt_to_xlink("doctxt$section_fnum.xml",$working_dir);
416 }
417 else
418 {
419 $xlink = $self->doctxt_to_xlink("doctxt.xml",$working_dir);
420
421 $xlink .= '#xpointer(/Section[';
422
423 my $xpath = "1".$section;
424 $xpath =~ s/\./\]\/Section\[/g;
425
426 $xlink .= $xpath;
427
428 $xlink .= ']/text())';
429 }
430
431
432
433 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="'.$xlink.'"';
434
435 $all_text .= ' xlink:title="Hierarchical Document Structure"/>' . "\n";
436 $all_text .= " </mets:file>\n";
437 $all_text .= " </mets:fileGrp>\n";
438
439
440 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
441 $all_text .= $self->buffer_mets_fileSection_section_xml($doc_obj,"$section.$subsection",$working_dir, $is_txt_split, $opt_attr, $fileid_base);
442 }
443
444 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
445
446 return $all_text;
447}
448
449sub buffer_mets_fileWhole_section_xml
450{
451 my $self = shift(@_);
452 my ($doc_obj,$section,$working_dir) = @_;
453
454 gsprintf(STDERR, "METSPlugout::buffer_mets_fileWhole_section_xml {common.must_be_implemented}\n") && die "\n";
455
456}
457
4581;
Note: See TracBrowser for help on using the repository browser.