source: main/trunk/greenstone2/perllib/plugouts/METSPlugout.pm@ 32012

Last change on this file since 32012 was 28708, checked in by kjdon, 11 years ago

check for xslt_mets and xslt_txt files

  • Property svn:keywords set to Author Date Id Revision
File size: 12.8 KB
RevLine 
[12330]1###########################################################################
2#
3# METSPlugout.pm -- the plugout module for METS archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package METSPlugout;
27
28use strict;
[14927]29no strict 'subs';
[12330]30no strict 'refs';
31
[14927]32use gsprintf 'gsprintf';
33
[12330]34eval {require bytes};
[28562]35use FileUtils;
[17203]36use BasePlugout;
[13172]37use docprint; # for escape_text
[12330]38
39sub BEGIN {
[17203]40 @METSPlugout::ISA = ('BasePlugout');
[12330]41}
42
43my $arguments = [
44 { 'name' => "xslt_txt",
[12693]45 'desc' => "{METSPlugout.xslt_txt}",
[12330]46 'type' => "string",
47 'reqd' => "no",
48 'hiddengli' => "no"},
49 { 'name' => "xslt_mets",
[12693]50 'desc' => "{METSPlugout.xslt_mets}",
[12330]51 'type' => "string",
52 'reqd' => "no",
53 'hiddengli' => "no"}
54 ];
55
56my $options = { 'name' => "METSPlugout",
57 'desc' => "{METSPlugout.desc}",
[14927]58 'abstract' => "yes",
[12330]59 'inherits' => "yes",
60 'args' => $arguments
61 };
62
63sub new {
64 my ($class) = shift (@_);
65 my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
66 push(@$plugoutlist, $class);
67
68
[17203]69 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
70 push(@{$hashArgOptLists->{"OptList"}},$options);
[12330]71
[17203]72 my $self = new BasePlugout($plugoutlist,$inputargs,$hashArgOptLists);
[28708]73
74 if(defined $self->{'xslt_txt'} && $self->{'xslt_txt'} ne "")
75 {
76 my $full_file_path = &util::locate_config_file($self->{'xslt_txt'});
77 if (!defined $full_file_path) {
78 print STDERR "Can not find $self->{'xslt_txt'}, please make sure you have supplied the correct file path or put the file into collection or greenstone etc folder\n";
79 die "\n";
80 }
81 $self->{'xslt_txt'} = $full_file_path;
82 }
83 if(defined $self->{'xslt_mets'} && $self->{'xslt_mets'} ne "")
84 {
85 my $full_file_path = &util::locate_config_file($self->{'xslt_mets'});
86 if (!defined $full_file_path) {
87 print STDERR "Can not find $self->{'xslt_mets'}, please make sure you have supplied the correct file path or put the file into collection or greenstone etc folder\n";
88 die "\n";
89 }
90 $self->{'xslt_mets'} = $full_file_path;
91 }
92
[17203]93 return bless $self, $class;
[12330]94}
95
[14927]96
97sub saveas_doctxt
98{
[12330]99 my $self = shift (@_);
[14927]100 my ($doc_obj,$working_dir) = @_;
[12330]101
[14927]102 my $is_recursive = 1;
[12330]103
[28562]104 my $doc_txt_file = &FileUtils::filenameConcatenate ($working_dir,"doctxt.xml");
[12330]105
106 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
107
108 my $outhandler;
109
110 if (defined $self->{'xslt_writer'}){
111 $outhandler = $self->{'xslt_writer'};
112 }
113 else{
114 $outhandler = $self->get_output_handler($doc_txt_file);
115 }
116
[22839]117 binmode($outhandler,":utf8");
118
[12330]119 $self->output_xml_header($outhandler);
[14927]120 my $section = $doc_obj->get_top_section();
121 $self->output_txt_section($outhandler,$doc_obj, $section, $is_recursive);
[12330]122 $self->output_xml_footer($outhandler);
123
124
125 if (defined $self->{'xslt_writer'}){
126 $self->close_xslt_pipe();
127 }
128 else{
129 close($outhandler);
130 }
[14927]131
132}
133
134sub saveas_docmets
135{
136 my $self = shift (@_);
137 my ($doc_obj,$working_dir) = @_;
138
[28562]139 my $doc_mets_file = &FileUtils::filenameConcatenate ($working_dir, "docmets.xml");
[12330]140
141 my $doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"dc.Title");
142 if (!defined $doc_title) {
143 $doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"Title");
144 }
145
146 $self->open_xslt_pipe($doc_mets_file,$self->{'xslt_mets'});
147
[14927]148 my $outhandler;
149
[12330]150 if (defined $self->{'xslt_writer'}){
151 $outhandler = $self->{'xslt_writer'};
152 }
153 else{
154 $outhandler = $self->get_output_handler($doc_mets_file);
155 }
[22839]156
157 binmode($outhandler,":utf8");
158
[12330]159 $self->output_mets_xml_header($outhandler, $doc_obj->get_OID(), $doc_title);
[14927]160 $self->output_mets_section($outhandler, $doc_obj, $doc_obj->get_top_section(),$working_dir);
[12330]161 $self->output_mets_xml_footer($outhandler);
162
163 if (defined $self->{'xslt_writer'}){
164 $self->close_xslt_pipe();
165 }
166 else{
167 close($outhandler);
168 }
[14927]169
170
171}
172
173sub saveas
174{
175 my $self = shift (@_);
176 my ($doc_obj,$doc_dir) = @_;
177
178 $self->process_assoc_files ($doc_obj, $doc_dir, '');
179
[20643]180 $self->process_metafiles_metadata ($doc_obj);
181
[14927]182 my $output_dir = $self->get_output_dir();
[28562]183 &FileUtils::makeAllDirectories ($output_dir) unless -e $output_dir;
[14927]184
[28562]185 my $working_dir = &FileUtils::filenameConcatenate ($output_dir, $doc_dir);
[14927]186
[28562]187 &FileUtils::makeAllDirectories ($working_dir) unless -e $working_dir;
[14927]188
189 ###
190 # Save the text as a filefile
191 ###
192 $self->saveas_doctxt($doc_obj,$working_dir);
193
194 ###
195 # Save the structure and metadata as a METS file
196 ###
197 $self->saveas_docmets($doc_obj,$working_dir);
198
[28562]199 $self->{'short_doc_file'} = &FileUtils::filenameConcatenate ($doc_dir, "docmets.xml");
[12330]200
[14927]201 $self->store_output_info_reference($doc_obj);
202
[12330]203}
204
205
[14927]206sub output_mets_xml_header
207{
[12330]208 my $self = shift(@_);
209 my ($handle, $OID, $doc_title) = @_;
210
[14927]211 gsprintf(STDERR, "METSPlugout::output_mets_xml_header {common.must_be_implemented}\n") && die "\n";
212}
[12330]213
[14927]214sub output_mets_xml_header_extra_attribute
215{
216 my $self = shift(@_);
[15013]217 my ($handle, $extra_attr, $extra_schema) = @_;
[12330]218
219 print $handle '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' . "\n";
220 print $handle '<mets:mets xmlns:mets="http://www.loc.gov/METS/"' . "\n";
221 print $handle ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' . "\n";
222 print $handle ' xmlns:gsdl3="http://www.greenstone.org/namespace/gsdlmetadata/1.0/"' . "\n";
[17055]223 if (defined ($ENV{'FEDORA_VERSION'}) && $ENV{'FEDORA_VERSION'} =~ m/^2/) { # checking if major version is 2
[16462]224 print $handle ' xmlns:xlink="http://www.w3.org/TR/xlink"' ."\n";
[15013]225 }
226 else {
227 print $handle ' xmlns:xlink="http://www.w3.org/1999/xlink"' ."\n";
228 }
[12330]229 print $handle ' xsi:schemaLocation="http://www.loc.gov/METS/' . "\n";
230 print $handle ' http://www.loc.gov/standards/mets/mets.xsd' . "\n";
[15013]231 print $handle " $extra_schema\n" if (defined $extra_schema);
[12330]232 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/' . "\n";
233 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/gsdl_metadata.xsd"' . "\n";
[14927]234
[12330]235 print $handle " $extra_attr>\n";
236
237}
238
[14927]239sub output_mets_xml_footer
240{
[12330]241 my $self = shift(@_);
242 my ($handle) = @_;
243 print $handle '</mets:mets>' . "\n";
244}
245
[13051]246# print out doctxt.xml file
247sub output_txt_section {
248 my $self = shift (@_);
[14927]249 my ($handle, $doc_obj, $section, $is_recursive) = @_;
[13051]250
[14927]251 print $handle $self->buffer_txt_section_xml($doc_obj, $section, $is_recursive);
[13051]252}
253
254sub buffer_txt_section_xml {
255 my $self = shift(@_);
[14927]256 my ($doc_obj, $section, $is_recursive) = @_;
[13051]257
258 my $section_ptr = $doc_obj->_lookup_section ($section);
259
260 return "" unless defined $section_ptr;
261
262 my $all_text = "<Section>\n";
[13172]263 $all_text .= &docprint::escape_text("$section_ptr->{'text'}");
[13051]264
[14927]265 if (defined $is_recursive && $is_recursive)
266 {
267 # Output all the subsections
268 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
[17055]269 $all_text .= $self->buffer_txt_section_xml($doc_obj, "$section.$subsection", $is_recursive);
[14927]270 }
271 }
[13051]272
273 $all_text .= "</Section>\n";
274
275
276 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
277 return $all_text;
278}
279
[14927]280#
281# Print out docmets.xml file
282#
283sub output_mets_section
284{
[13051]285 my $self = shift(@_);
[14927]286 my ($handle, $doc_obj, $section, $working_dir) = @_;
[13051]287
[14927]288 gsprintf(STDERR, "METSPlugout::output_mets_section {common.must_be_implemented}\n") && die "\n";
[13051]289
[14927]290}
[13051]291
292
[14927]293sub buffer_mets_dmdSection_section_xml
294{
[13051]295 my $self = shift(@_);
[14927]296 my ($doc_obj,$section) = @_;
[13051]297
[14927]298 gsprintf(STDERR, "METSPlugout::buffer_mets_dmdSection_section_xml {common.must_be_implemented}\n") && die "\n";
[13051]299}
300
[14927]301sub buffer_mets_StructMapSection_section_xml
302{
[13051]303 my $self = shift(@_);
[14927]304 my ($doc_obj,$section, $order_numref, $fileid_base) = @_;
[13051]305
306 $section="" unless defined $section;
307
308
309 my $section_ptr=$doc_obj->_lookup_section($section);
310 return "" unless defined $section_ptr;
311
[14927]312 $fileid_base = "FILEGROUP_PRELUDE" unless defined $fileid_base;
[13051]313
314 # output fileSection by Sections
315 my $section_num ="1". $section;
316 my $dmd_num = $section_num;
317
318 #**output the StructMap details
319
320 my $dmdid_attr = "DM$dmd_num";
321
322 my $all_text = " <mets:div ID=\"DS$section_num\" TYPE=\"Section\" \n";
323 $all_text .= ' ORDER="'.$$order_numref++.'" ORDERLABEL="'. $section_num .'" '."\n";
324 $all_text .= " LABEL=\"$section_num\" DMDID=\"$dmdid_attr\">\n";
325
[14927]326 $all_text .= ' <mets:fptr FILEID="'.$fileid_base.$section_num.'" />'. "\n";
[13051]327
328
329 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
[14927]330 $all_text .= $self->buffer_mets_StructMapSection_section_xml($doc_obj,"$section.$subsection", $order_numref, $fileid_base);
[13051]331 }
332
333 $all_text .= " </mets:div>\n";
334
335 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
336
337 return $all_text;
338}
339
340
[14927]341sub buffer_mets_StructMapWhole_section_xml
342{
[13051]343 my $self = shift(@_);
344 my ($doc_obj,$section) = @_;
345
346 my $section_ptr = $doc_obj->_lookup_section($section);
347 return "" unless defined $section_ptr;
348
349 my $all_text="";
350 my $fileID=0;
351 my $order_num = 0;
352
353 $all_text .= ' <mets:div ID="DSAll" TYPE="Document" ORDER="'.$order_num.'" ORDERLABEL="All" LABEL="Whole Documemt" DMDID="DM1">' . "\n";
354
355 #** output the StructMapSection for the whole section
356 # get the sourcefile and associative file
357
358 foreach my $data (@{$section_ptr->{'metadata'}}){
[13172]359 my $escaped_value = &docprint::escape_text($data->[1]);
[13051]360
361 if ($data->[0] eq "gsdlsourcefilename") {
362 ++$fileID;
363 $all_text .= ' <mets:fptr FILEID="default.'.$fileID.'" />'."\n";
364 }
365
366 if ($data->[0] eq "gsdlassocfile"){
367 ++$fileID;
368 $all_text .= ' <mets:fptr FILEID="default.'.$fileID. '" />'. "\n";
369 }
370 }
371 $all_text .= " </mets:div>\n";
372
373 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
374
375 return $all_text;
376}
377
[14927]378
379
380sub doctxt_to_xlink
381{
382 my $self = shift @_;
383 my ($fname,$working_dir) = @_;
384
385 gsprintf(STDERR, "METSPlugout::doxtxt_to_xlink {common.must_be_implemented}\n") && die "\n";
386}
387
388sub buffer_mets_fileSection_section_xml
389{
[13051]390 my $self = shift(@_);
[14927]391 my ($doc_obj,$section,$working_dir, $is_txt_split,$opt_attr,$fileid_base) = @_;
[13051]392
393 #$section="" unless defined $section;
394
395 my $section_ptr=$doc_obj->_lookup_section($section);
396 return "" unless defined $section_ptr;
397
[14927]398 $fileid_base = "FILEGROUP_PRELUDE" unless defined $fileid_base;
[13051]399
400 # output fileSection by sections
401 my $section_num ="1". $section;
[14927]402
403 $opt_attr = "" unless defined $opt_attr;
404
405 # output the fileSection details
406 my $all_text = ' <mets:fileGrp ID="'.$fileid_base.$section_num . '">'. "\n";
407 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILE$section_num\" $opt_attr >\n";
[13051]408
[14927]409 my $xlink;
410 if (defined $is_txt_split && $is_txt_split)
411 {
412 my $section_fnum ="1". $section;
413 $section_fnum =~ s/\./_/g;
414
415 $xlink = $self->doctxt_to_xlink("doctxt$section_fnum.xml",$working_dir);
[13051]416 }
[14927]417 else
418 {
419 $xlink = $self->doctxt_to_xlink("doctxt.xml",$working_dir);
[13051]420
[14927]421 $xlink .= '#xpointer(/Section[';
[13051]422
[14927]423 my $xpath = "1".$section;
424 $xpath =~ s/\./\]\/Section\[/g;
425
426 $xlink .= $xpath;
427
428 $xlink .= ']/text())';
429 }
[13051]430
[14927]431
432
433 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="'.$xlink.'"';
434
435 $all_text .= ' xlink:title="Hierarchical Document Structure"/>' . "\n";
[13051]436 $all_text .= " </mets:file>\n";
437 $all_text .= " </mets:fileGrp>\n";
438
439
440 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
[14927]441 $all_text .= $self->buffer_mets_fileSection_section_xml($doc_obj,"$section.$subsection",$working_dir, $is_txt_split, $opt_attr, $fileid_base);
[13051]442 }
443
444 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
445
446 return $all_text;
447}
448
[14927]449sub buffer_mets_fileWhole_section_xml
450{
[13051]451 my $self = shift(@_);
[14927]452 my ($doc_obj,$section,$working_dir) = @_;
[13051]453
[14927]454 gsprintf(STDERR, "METSPlugout::buffer_mets_fileWhole_section_xml {common.must_be_implemented}\n") && die "\n";
[13051]455
456}
457
[12330]4581;
Note: See TracBrowser for help on using the repository browser.