source: gsdl/trunk/perllib/plugouts/METSPlugout.pm@ 17203

Last change on this file since 17203 was 17203, checked in by kjdon, 16 years ago

BasPlugout renamed to BasePlugout. And tidied up the constructors

  • Property svn:keywords set to Author Date Id Revision
File size: 11.8 KB
Line 
1###########################################################################
2#
3# METSPlugout.pm -- the plugout module for METS archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package METSPlugout;
27
28use strict;
29no strict 'subs';
30no strict 'refs';
31
32use gsprintf 'gsprintf';
33
34eval {require bytes};
35use util;
36use BasePlugout;
37use docprint; # for escape_text
38
39sub BEGIN {
40 @METSPlugout::ISA = ('BasePlugout');
41}
42
43my $arguments = [
44 { 'name' => "xslt_txt",
45 'desc' => "{METSPlugout.xslt_txt}",
46 'type' => "string",
47 'reqd' => "no",
48 'hiddengli' => "no"},
49 { 'name' => "xslt_mets",
50 'desc' => "{METSPlugout.xslt_mets}",
51 'type' => "string",
52 'reqd' => "no",
53 'hiddengli' => "no"}
54 ];
55
56my $options = { 'name' => "METSPlugout",
57 'desc' => "{METSPlugout.desc}",
58 'abstract' => "yes",
59 'inherits' => "yes",
60 'args' => $arguments
61 };
62
63sub new {
64 my ($class) = shift (@_);
65 my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
66 push(@$plugoutlist, $class);
67
68
69 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
70 push(@{$hashArgOptLists->{"OptList"}},$options);
71
72 my $self = new BasePlugout($plugoutlist,$inputargs,$hashArgOptLists);
73 return bless $self, $class;
74}
75
76
77sub saveas_doctxt
78{
79 my $self = shift (@_);
80 my ($doc_obj,$working_dir) = @_;
81
82 my $is_recursive = 1;
83
84 my $doc_txt_file = &util::filename_cat ($working_dir,"doctxt.xml");
85
86 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
87
88 my $outhandler;
89
90 if (defined $self->{'xslt_writer'}){
91 $outhandler = $self->{'xslt_writer'};
92 }
93 else{
94 $outhandler = $self->get_output_handler($doc_txt_file);
95 }
96
97 $self->output_xml_header($outhandler);
98 my $section = $doc_obj->get_top_section();
99 $self->output_txt_section($outhandler,$doc_obj, $section, $is_recursive);
100 $self->output_xml_footer($outhandler);
101
102
103 if (defined $self->{'xslt_writer'}){
104 $self->close_xslt_pipe();
105 }
106 else{
107 close($outhandler);
108 }
109
110}
111
112sub saveas_docmets
113{
114 my $self = shift (@_);
115 my ($doc_obj,$working_dir) = @_;
116
117 my $doc_mets_file = &util::filename_cat ($working_dir, "docmets.xml");
118
119 my $doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"dc.Title");
120 if (!defined $doc_title) {
121 $doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"Title");
122 }
123
124 $self->open_xslt_pipe($doc_mets_file,$self->{'xslt_mets'});
125
126 my $outhandler;
127
128 if (defined $self->{'xslt_writer'}){
129 $outhandler = $self->{'xslt_writer'};
130 }
131 else{
132 $outhandler = $self->get_output_handler($doc_mets_file);
133 }
134
135
136 $self->output_mets_xml_header($outhandler, $doc_obj->get_OID(), $doc_title);
137 $self->output_mets_section($outhandler, $doc_obj, $doc_obj->get_top_section(),$working_dir);
138 $self->output_mets_xml_footer($outhandler);
139
140 if (defined $self->{'xslt_writer'}){
141 $self->close_xslt_pipe();
142 }
143 else{
144 close($outhandler);
145 }
146
147
148}
149
150sub saveas
151{
152 my $self = shift (@_);
153 my ($doc_obj,$doc_dir) = @_;
154
155 $self->process_assoc_files ($doc_obj, $doc_dir, '');
156
157 my $output_dir = $self->get_output_dir();
158 &util::mk_all_dir ($output_dir) unless -e $output_dir;
159
160 my $working_dir = &util::filename_cat ($output_dir, $doc_dir);
161
162 &util::mk_all_dir ($working_dir) unless -e $working_dir;
163
164 ###
165 # Save the text as a filefile
166 ###
167 $self->saveas_doctxt($doc_obj,$working_dir);
168
169 ###
170 # Save the structure and metadata as a METS file
171 ###
172 $self->saveas_docmets($doc_obj,$working_dir);
173
174 $self->{'short_doc_file'} = &util::filename_cat ($doc_dir, "docmets.xml");
175
176 $self->store_output_info_reference($doc_obj);
177
178}
179
180
181sub output_mets_xml_header
182{
183 my $self = shift(@_);
184 my ($handle, $OID, $doc_title) = @_;
185
186 gsprintf(STDERR, "METSPlugout::output_mets_xml_header {common.must_be_implemented}\n") && die "\n";
187}
188
189sub output_mets_xml_header_extra_attribute
190{
191 my $self = shift(@_);
192 my ($handle, $extra_attr, $extra_schema) = @_;
193
194 print $handle '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' . "\n";
195 print $handle '<mets:mets xmlns:mets="http://www.loc.gov/METS/"' . "\n";
196 print $handle ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' . "\n";
197 print $handle ' xmlns:gsdl3="http://www.greenstone.org/namespace/gsdlmetadata/1.0/"' . "\n";
198 if (defined ($ENV{'FEDORA_VERSION'}) && $ENV{'FEDORA_VERSION'} =~ m/^2/) { # checking if major version is 2
199 print $handle ' xmlns:xlink="http://www.w3.org/TR/xlink"' ."\n";
200 }
201 else {
202 print $handle ' xmlns:xlink="http://www.w3.org/1999/xlink"' ."\n";
203 }
204 print $handle ' xsi:schemaLocation="http://www.loc.gov/METS/' . "\n";
205 print $handle ' http://www.loc.gov/standards/mets/mets.xsd' . "\n";
206 print $handle " $extra_schema\n" if (defined $extra_schema);
207 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/' . "\n";
208 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/gsdl_metadata.xsd"' . "\n";
209
210 print $handle " $extra_attr>\n";
211
212}
213
214sub output_mets_xml_footer
215{
216 my $self = shift(@_);
217 my ($handle) = @_;
218 print $handle '</mets:mets>' . "\n";
219}
220
221# print out doctxt.xml file
222sub output_txt_section {
223 my $self = shift (@_);
224 my ($handle, $doc_obj, $section, $is_recursive) = @_;
225
226 print $handle $self->buffer_txt_section_xml($doc_obj, $section, $is_recursive);
227}
228
229sub buffer_txt_section_xml {
230 my $self = shift(@_);
231 my ($doc_obj, $section, $is_recursive) = @_;
232
233 my $section_ptr = $doc_obj->_lookup_section ($section);
234
235 return "" unless defined $section_ptr;
236
237 my $all_text = "<Section>\n";
238 $all_text .= &docprint::escape_text("$section_ptr->{'text'}");
239
240 if (defined $is_recursive && $is_recursive)
241 {
242 # Output all the subsections
243 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
244 $all_text .= $self->buffer_txt_section_xml($doc_obj, "$section.$subsection", $is_recursive);
245 }
246 }
247
248 $all_text .= "</Section>\n";
249
250
251 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
252 return $all_text;
253}
254
255#
256# Print out docmets.xml file
257#
258sub output_mets_section
259{
260 my $self = shift(@_);
261 my ($handle, $doc_obj, $section, $working_dir) = @_;
262
263 gsprintf(STDERR, "METSPlugout::output_mets_section {common.must_be_implemented}\n") && die "\n";
264
265}
266
267
268sub buffer_mets_dmdSection_section_xml
269{
270 my $self = shift(@_);
271 my ($doc_obj,$section) = @_;
272
273 gsprintf(STDERR, "METSPlugout::buffer_mets_dmdSection_section_xml {common.must_be_implemented}\n") && die "\n";
274}
275
276sub buffer_mets_StructMapSection_section_xml
277{
278 my $self = shift(@_);
279 my ($doc_obj,$section, $order_numref, $fileid_base) = @_;
280
281 $section="" unless defined $section;
282
283
284 my $section_ptr=$doc_obj->_lookup_section($section);
285 return "" unless defined $section_ptr;
286
287 $fileid_base = "FILEGROUP_PRELUDE" unless defined $fileid_base;
288
289 # output fileSection by Sections
290 my $section_num ="1". $section;
291 my $dmd_num = $section_num;
292
293 #**output the StructMap details
294
295 my $dmdid_attr = "DM$dmd_num";
296
297 my $all_text = " <mets:div ID=\"DS$section_num\" TYPE=\"Section\" \n";
298 $all_text .= ' ORDER="'.$$order_numref++.'" ORDERLABEL="'. $section_num .'" '."\n";
299 $all_text .= " LABEL=\"$section_num\" DMDID=\"$dmdid_attr\">\n";
300
301 $all_text .= ' <mets:fptr FILEID="'.$fileid_base.$section_num.'" />'. "\n";
302
303
304 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
305 $all_text .= $self->buffer_mets_StructMapSection_section_xml($doc_obj,"$section.$subsection", $order_numref, $fileid_base);
306 }
307
308 $all_text .= " </mets:div>\n";
309
310 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
311
312 return $all_text;
313}
314
315
316sub buffer_mets_StructMapWhole_section_xml
317{
318 my $self = shift(@_);
319 my ($doc_obj,$section) = @_;
320
321 my $section_ptr = $doc_obj->_lookup_section($section);
322 return "" unless defined $section_ptr;
323
324 my $all_text="";
325 my $fileID=0;
326 my $order_num = 0;
327
328 $all_text .= ' <mets:div ID="DSAll" TYPE="Document" ORDER="'.$order_num.'" ORDERLABEL="All" LABEL="Whole Documemt" DMDID="DM1">' . "\n";
329
330 #** output the StructMapSection for the whole section
331 # get the sourcefile and associative file
332
333 foreach my $data (@{$section_ptr->{'metadata'}}){
334 my $escaped_value = &docprint::escape_text($data->[1]);
335
336 if ($data->[0] eq "gsdlsourcefilename") {
337 ++$fileID;
338 $all_text .= ' <mets:fptr FILEID="default.'.$fileID.'" />'."\n";
339 }
340
341 if ($data->[0] eq "gsdlassocfile"){
342 ++$fileID;
343 $all_text .= ' <mets:fptr FILEID="default.'.$fileID. '" />'. "\n";
344 }
345 }
346 $all_text .= " </mets:div>\n";
347
348 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
349
350 return $all_text;
351}
352
353
354
355sub doctxt_to_xlink
356{
357 my $self = shift @_;
358 my ($fname,$working_dir) = @_;
359
360 gsprintf(STDERR, "METSPlugout::doxtxt_to_xlink {common.must_be_implemented}\n") && die "\n";
361}
362
363sub buffer_mets_fileSection_section_xml
364{
365 my $self = shift(@_);
366 my ($doc_obj,$section,$working_dir, $is_txt_split,$opt_attr,$fileid_base) = @_;
367
368 #$section="" unless defined $section;
369
370 my $section_ptr=$doc_obj->_lookup_section($section);
371 return "" unless defined $section_ptr;
372
373 $fileid_base = "FILEGROUP_PRELUDE" unless defined $fileid_base;
374
375 # output fileSection by sections
376 my $section_num ="1". $section;
377
378 $opt_attr = "" unless defined $opt_attr;
379
380 # output the fileSection details
381 my $all_text = ' <mets:fileGrp ID="'.$fileid_base.$section_num . '">'. "\n";
382 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILE$section_num\" $opt_attr >\n";
383
384 my $xlink;
385 if (defined $is_txt_split && $is_txt_split)
386 {
387 my $section_fnum ="1". $section;
388 $section_fnum =~ s/\./_/g;
389
390 $xlink = $self->doctxt_to_xlink("doctxt$section_fnum.xml",$working_dir);
391 }
392 else
393 {
394 $xlink = $self->doctxt_to_xlink("doctxt.xml",$working_dir);
395
396 $xlink .= '#xpointer(/Section[';
397
398 my $xpath = "1".$section;
399 $xpath =~ s/\./\]\/Section\[/g;
400
401 $xlink .= $xpath;
402
403 $xlink .= ']/text())';
404 }
405
406
407
408 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="'.$xlink.'"';
409
410 $all_text .= ' xlink:title="Hierarchical Document Structure"/>' . "\n";
411 $all_text .= " </mets:file>\n";
412 $all_text .= " </mets:fileGrp>\n";
413
414
415 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
416 $all_text .= $self->buffer_mets_fileSection_section_xml($doc_obj,"$section.$subsection",$working_dir, $is_txt_split, $opt_attr, $fileid_base);
417 }
418
419 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
420
421 return $all_text;
422}
423
424sub buffer_mets_fileWhole_section_xml
425{
426 my $self = shift(@_);
427 my ($doc_obj,$section,$working_dir) = @_;
428
429 gsprintf(STDERR, "METSPlugout::buffer_mets_fileWhole_section_xml {common.must_be_implemented}\n") && die "\n";
430
431}
432
4331;
Note: See TracBrowser for help on using the repository browser.