source: gsdl/trunk/perllib/plugouts/METSPlugout.pm@ 20643

Last change on this file since 20643 was 20643, checked in by davidb, 15 years ago

Updated to include meta-files information in 'saveas'

  • Property svn:keywords set to Author Date Id Revision
File size: 11.9 KB
RevLine 
[12330]1###########################################################################
2#
3# METSPlugout.pm -- the plugout module for METS archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package METSPlugout;
27
28use strict;
[14927]29no strict 'subs';
[12330]30no strict 'refs';
31
[14927]32use gsprintf 'gsprintf';
33
[12330]34eval {require bytes};
35use util;
[17203]36use BasePlugout;
[13172]37use docprint; # for escape_text
[12330]38
39sub BEGIN {
[17203]40 @METSPlugout::ISA = ('BasePlugout');
[12330]41}
42
43my $arguments = [
44 { 'name' => "xslt_txt",
[12693]45 'desc' => "{METSPlugout.xslt_txt}",
[12330]46 'type' => "string",
47 'reqd' => "no",
48 'hiddengli' => "no"},
49 { 'name' => "xslt_mets",
[12693]50 'desc' => "{METSPlugout.xslt_mets}",
[12330]51 'type' => "string",
52 'reqd' => "no",
53 'hiddengli' => "no"}
54 ];
55
56my $options = { 'name' => "METSPlugout",
57 'desc' => "{METSPlugout.desc}",
[14927]58 'abstract' => "yes",
[12330]59 'inherits' => "yes",
60 'args' => $arguments
61 };
62
63sub new {
64 my ($class) = shift (@_);
65 my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
66 push(@$plugoutlist, $class);
67
68
[17203]69 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
70 push(@{$hashArgOptLists->{"OptList"}},$options);
[12330]71
[17203]72 my $self = new BasePlugout($plugoutlist,$inputargs,$hashArgOptLists);
73 return bless $self, $class;
[12330]74}
75
[14927]76
77sub saveas_doctxt
78{
[12330]79 my $self = shift (@_);
[14927]80 my ($doc_obj,$working_dir) = @_;
[12330]81
[14927]82 my $is_recursive = 1;
[12330]83
84 my $doc_txt_file = &util::filename_cat ($working_dir,"doctxt.xml");
85
86 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
87
88 my $outhandler;
89
90 if (defined $self->{'xslt_writer'}){
91 $outhandler = $self->{'xslt_writer'};
92 }
93 else{
94 $outhandler = $self->get_output_handler($doc_txt_file);
95 }
96
97 $self->output_xml_header($outhandler);
[14927]98 my $section = $doc_obj->get_top_section();
99 $self->output_txt_section($outhandler,$doc_obj, $section, $is_recursive);
[12330]100 $self->output_xml_footer($outhandler);
101
102
103 if (defined $self->{'xslt_writer'}){
104 $self->close_xslt_pipe();
105 }
106 else{
107 close($outhandler);
108 }
[14927]109
110}
111
112sub saveas_docmets
113{
114 my $self = shift (@_);
115 my ($doc_obj,$working_dir) = @_;
116
[12330]117 my $doc_mets_file = &util::filename_cat ($working_dir, "docmets.xml");
118
119 my $doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"dc.Title");
120 if (!defined $doc_title) {
121 $doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"Title");
122 }
123
124 $self->open_xslt_pipe($doc_mets_file,$self->{'xslt_mets'});
125
[14927]126 my $outhandler;
127
[12330]128 if (defined $self->{'xslt_writer'}){
129 $outhandler = $self->{'xslt_writer'};
130 }
131 else{
132 $outhandler = $self->get_output_handler($doc_mets_file);
133 }
134
135
136 $self->output_mets_xml_header($outhandler, $doc_obj->get_OID(), $doc_title);
[14927]137 $self->output_mets_section($outhandler, $doc_obj, $doc_obj->get_top_section(),$working_dir);
[12330]138 $self->output_mets_xml_footer($outhandler);
139
140 if (defined $self->{'xslt_writer'}){
141 $self->close_xslt_pipe();
142 }
143 else{
144 close($outhandler);
145 }
[14927]146
147
148}
149
150sub saveas
151{
152 my $self = shift (@_);
153 my ($doc_obj,$doc_dir) = @_;
154
155 $self->process_assoc_files ($doc_obj, $doc_dir, '');
156
[20643]157 $self->process_metafiles_metadata ($doc_obj);
158
[14927]159 my $output_dir = $self->get_output_dir();
160 &util::mk_all_dir ($output_dir) unless -e $output_dir;
161
162 my $working_dir = &util::filename_cat ($output_dir, $doc_dir);
163
164 &util::mk_all_dir ($working_dir) unless -e $working_dir;
165
166 ###
167 # Save the text as a filefile
168 ###
169 $self->saveas_doctxt($doc_obj,$working_dir);
170
171 ###
172 # Save the structure and metadata as a METS file
173 ###
174 $self->saveas_docmets($doc_obj,$working_dir);
175
[12330]176 $self->{'short_doc_file'} = &util::filename_cat ($doc_dir, "docmets.xml");
177
[14927]178 $self->store_output_info_reference($doc_obj);
179
[12330]180}
181
182
[14927]183sub output_mets_xml_header
184{
[12330]185 my $self = shift(@_);
186 my ($handle, $OID, $doc_title) = @_;
187
[14927]188 gsprintf(STDERR, "METSPlugout::output_mets_xml_header {common.must_be_implemented}\n") && die "\n";
189}
[12330]190
[14927]191sub output_mets_xml_header_extra_attribute
192{
193 my $self = shift(@_);
[15013]194 my ($handle, $extra_attr, $extra_schema) = @_;
[12330]195
196 print $handle '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' . "\n";
197 print $handle '<mets:mets xmlns:mets="http://www.loc.gov/METS/"' . "\n";
198 print $handle ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' . "\n";
199 print $handle ' xmlns:gsdl3="http://www.greenstone.org/namespace/gsdlmetadata/1.0/"' . "\n";
[17055]200 if (defined ($ENV{'FEDORA_VERSION'}) && $ENV{'FEDORA_VERSION'} =~ m/^2/) { # checking if major version is 2
[16462]201 print $handle ' xmlns:xlink="http://www.w3.org/TR/xlink"' ."\n";
[15013]202 }
203 else {
204 print $handle ' xmlns:xlink="http://www.w3.org/1999/xlink"' ."\n";
205 }
[12330]206 print $handle ' xsi:schemaLocation="http://www.loc.gov/METS/' . "\n";
207 print $handle ' http://www.loc.gov/standards/mets/mets.xsd' . "\n";
[15013]208 print $handle " $extra_schema\n" if (defined $extra_schema);
[12330]209 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/' . "\n";
210 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/gsdl_metadata.xsd"' . "\n";
[14927]211
[12330]212 print $handle " $extra_attr>\n";
213
214}
215
[14927]216sub output_mets_xml_footer
217{
[12330]218 my $self = shift(@_);
219 my ($handle) = @_;
220 print $handle '</mets:mets>' . "\n";
221}
222
[13051]223# print out doctxt.xml file
224sub output_txt_section {
225 my $self = shift (@_);
[14927]226 my ($handle, $doc_obj, $section, $is_recursive) = @_;
[13051]227
[14927]228 print $handle $self->buffer_txt_section_xml($doc_obj, $section, $is_recursive);
[13051]229}
230
231sub buffer_txt_section_xml {
232 my $self = shift(@_);
[14927]233 my ($doc_obj, $section, $is_recursive) = @_;
[13051]234
235 my $section_ptr = $doc_obj->_lookup_section ($section);
236
237 return "" unless defined $section_ptr;
238
239 my $all_text = "<Section>\n";
[13172]240 $all_text .= &docprint::escape_text("$section_ptr->{'text'}");
[13051]241
[14927]242 if (defined $is_recursive && $is_recursive)
243 {
244 # Output all the subsections
245 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
[17055]246 $all_text .= $self->buffer_txt_section_xml($doc_obj, "$section.$subsection", $is_recursive);
[14927]247 }
248 }
[13051]249
250 $all_text .= "</Section>\n";
251
252
253 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
254 return $all_text;
255}
256
[14927]257#
258# Print out docmets.xml file
259#
260sub output_mets_section
261{
[13051]262 my $self = shift(@_);
[14927]263 my ($handle, $doc_obj, $section, $working_dir) = @_;
[13051]264
[14927]265 gsprintf(STDERR, "METSPlugout::output_mets_section {common.must_be_implemented}\n") && die "\n";
[13051]266
[14927]267}
[13051]268
269
[14927]270sub buffer_mets_dmdSection_section_xml
271{
[13051]272 my $self = shift(@_);
[14927]273 my ($doc_obj,$section) = @_;
[13051]274
[14927]275 gsprintf(STDERR, "METSPlugout::buffer_mets_dmdSection_section_xml {common.must_be_implemented}\n") && die "\n";
[13051]276}
277
[14927]278sub buffer_mets_StructMapSection_section_xml
279{
[13051]280 my $self = shift(@_);
[14927]281 my ($doc_obj,$section, $order_numref, $fileid_base) = @_;
[13051]282
283 $section="" unless defined $section;
284
285
286 my $section_ptr=$doc_obj->_lookup_section($section);
287 return "" unless defined $section_ptr;
288
[14927]289 $fileid_base = "FILEGROUP_PRELUDE" unless defined $fileid_base;
[13051]290
291 # output fileSection by Sections
292 my $section_num ="1". $section;
293 my $dmd_num = $section_num;
294
295 #**output the StructMap details
296
297 my $dmdid_attr = "DM$dmd_num";
298
299 my $all_text = " <mets:div ID=\"DS$section_num\" TYPE=\"Section\" \n";
300 $all_text .= ' ORDER="'.$$order_numref++.'" ORDERLABEL="'. $section_num .'" '."\n";
301 $all_text .= " LABEL=\"$section_num\" DMDID=\"$dmdid_attr\">\n";
302
[14927]303 $all_text .= ' <mets:fptr FILEID="'.$fileid_base.$section_num.'" />'. "\n";
[13051]304
305
306 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
[14927]307 $all_text .= $self->buffer_mets_StructMapSection_section_xml($doc_obj,"$section.$subsection", $order_numref, $fileid_base);
[13051]308 }
309
310 $all_text .= " </mets:div>\n";
311
312 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
313
314 return $all_text;
315}
316
317
[14927]318sub buffer_mets_StructMapWhole_section_xml
319{
[13051]320 my $self = shift(@_);
321 my ($doc_obj,$section) = @_;
322
323 my $section_ptr = $doc_obj->_lookup_section($section);
324 return "" unless defined $section_ptr;
325
326 my $all_text="";
327 my $fileID=0;
328 my $order_num = 0;
329
330 $all_text .= ' <mets:div ID="DSAll" TYPE="Document" ORDER="'.$order_num.'" ORDERLABEL="All" LABEL="Whole Documemt" DMDID="DM1">' . "\n";
331
332 #** output the StructMapSection for the whole section
333 # get the sourcefile and associative file
334
335 foreach my $data (@{$section_ptr->{'metadata'}}){
[13172]336 my $escaped_value = &docprint::escape_text($data->[1]);
[13051]337
338 if ($data->[0] eq "gsdlsourcefilename") {
339 ++$fileID;
340 $all_text .= ' <mets:fptr FILEID="default.'.$fileID.'" />'."\n";
341 }
342
343 if ($data->[0] eq "gsdlassocfile"){
344 ++$fileID;
345 $all_text .= ' <mets:fptr FILEID="default.'.$fileID. '" />'. "\n";
346 }
347 }
348 $all_text .= " </mets:div>\n";
349
350 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
351
352 return $all_text;
353}
354
[14927]355
356
357sub doctxt_to_xlink
358{
359 my $self = shift @_;
360 my ($fname,$working_dir) = @_;
361
362 gsprintf(STDERR, "METSPlugout::doxtxt_to_xlink {common.must_be_implemented}\n") && die "\n";
363}
364
365sub buffer_mets_fileSection_section_xml
366{
[13051]367 my $self = shift(@_);
[14927]368 my ($doc_obj,$section,$working_dir, $is_txt_split,$opt_attr,$fileid_base) = @_;
[13051]369
370 #$section="" unless defined $section;
371
372 my $section_ptr=$doc_obj->_lookup_section($section);
373 return "" unless defined $section_ptr;
374
[14927]375 $fileid_base = "FILEGROUP_PRELUDE" unless defined $fileid_base;
[13051]376
377 # output fileSection by sections
378 my $section_num ="1". $section;
[14927]379
380 $opt_attr = "" unless defined $opt_attr;
381
382 # output the fileSection details
383 my $all_text = ' <mets:fileGrp ID="'.$fileid_base.$section_num . '">'. "\n";
384 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILE$section_num\" $opt_attr >\n";
[13051]385
[14927]386 my $xlink;
387 if (defined $is_txt_split && $is_txt_split)
388 {
389 my $section_fnum ="1". $section;
390 $section_fnum =~ s/\./_/g;
391
392 $xlink = $self->doctxt_to_xlink("doctxt$section_fnum.xml",$working_dir);
[13051]393 }
[14927]394 else
395 {
396 $xlink = $self->doctxt_to_xlink("doctxt.xml",$working_dir);
[13051]397
[14927]398 $xlink .= '#xpointer(/Section[';
[13051]399
[14927]400 my $xpath = "1".$section;
401 $xpath =~ s/\./\]\/Section\[/g;
402
403 $xlink .= $xpath;
404
405 $xlink .= ']/text())';
406 }
[13051]407
[14927]408
409
410 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="'.$xlink.'"';
411
412 $all_text .= ' xlink:title="Hierarchical Document Structure"/>' . "\n";
[13051]413 $all_text .= " </mets:file>\n";
414 $all_text .= " </mets:fileGrp>\n";
415
416
417 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
[14927]418 $all_text .= $self->buffer_mets_fileSection_section_xml($doc_obj,"$section.$subsection",$working_dir, $is_txt_split, $opt_attr, $fileid_base);
[13051]419 }
420
421 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
422
423 return $all_text;
424}
425
[14927]426sub buffer_mets_fileWhole_section_xml
427{
[13051]428 my $self = shift(@_);
[14927]429 my ($doc_obj,$section,$working_dir) = @_;
[13051]430
[14927]431 gsprintf(STDERR, "METSPlugout::buffer_mets_fileWhole_section_xml {common.must_be_implemented}\n") && die "\n";
[13051]432
433}
434
[12330]4351;
Note: See TracBrowser for help on using the repository browser.