source: gsdl/trunk/perllib/plugouts/METSPlugout.pm@ 14927

Last change on this file since 14927 was 14927, checked in by davidb, 16 years ago

Exporting as GreenstoneMETS and FedoraMETS changed tobe separate plugout

  • Property svn:keywords set to Author Date Id Revision
File size: 11.6 KB
Line 
1###########################################################################
2#
3# METSPlugout.pm -- the plugout module for METS archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package METSPlugout;
27
28use strict;
29no strict 'subs';
30no strict 'refs';
31
32use gsprintf 'gsprintf';
33
34eval {require bytes};
35use util;
36use BasPlugout;
37use docprint; # for escape_text
38
39sub BEGIN {
40 @METSPlugout::ISA = ('BasPlugout');
41}
42
43my $arguments = [
44 { 'name' => "xslt_txt",
45 'desc' => "{METSPlugout.xslt_txt}",
46 'type' => "string",
47 'reqd' => "no",
48 'hiddengli' => "no"},
49 { 'name' => "xslt_mets",
50 'desc' => "{METSPlugout.xslt_mets}",
51 'type' => "string",
52 'reqd' => "no",
53 'hiddengli' => "no"}
54 ];
55
56my $options = { 'name' => "METSPlugout",
57 'desc' => "{METSPlugout.desc}",
58 'abstract' => "yes",
59 'inherits' => "yes",
60 'args' => $arguments
61 };
62
63sub new {
64 my ($class) = shift (@_);
65 my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
66 push(@$plugoutlist, $class);
67
68
69 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
70 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
71
72 my $self = (defined $hashArgOptLists)? new BasPlugout($plugoutlist,$inputargs,$hashArgOptLists): new BasPlugout($plugoutlist,$inputargs);
73
74
75 return bless $self, $class;
76}
77
78
79sub saveas_doctxt
80{
81 my $self = shift (@_);
82 my ($doc_obj,$working_dir) = @_;
83
84 my $is_recursive = 1;
85
86 my $doc_txt_file = &util::filename_cat ($working_dir,"doctxt.xml");
87
88 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
89
90 my $outhandler;
91
92 if (defined $self->{'xslt_writer'}){
93 $outhandler = $self->{'xslt_writer'};
94 }
95 else{
96 $outhandler = $self->get_output_handler($doc_txt_file);
97 }
98
99 $self->output_xml_header($outhandler);
100 my $section = $doc_obj->get_top_section();
101 $self->output_txt_section($outhandler,$doc_obj, $section, $is_recursive);
102 $self->output_xml_footer($outhandler);
103
104
105 if (defined $self->{'xslt_writer'}){
106 $self->close_xslt_pipe();
107 }
108 else{
109 close($outhandler);
110 }
111
112}
113
114sub saveas_docmets
115{
116 my $self = shift (@_);
117 my ($doc_obj,$working_dir) = @_;
118
119 my $doc_mets_file = &util::filename_cat ($working_dir, "docmets.xml");
120
121 my $doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"dc.Title");
122 if (!defined $doc_title) {
123 $doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"Title");
124 }
125
126 $self->open_xslt_pipe($doc_mets_file,$self->{'xslt_mets'});
127
128 my $outhandler;
129
130 if (defined $self->{'xslt_writer'}){
131 $outhandler = $self->{'xslt_writer'};
132 }
133 else{
134 $outhandler = $self->get_output_handler($doc_mets_file);
135 }
136
137
138 $self->output_mets_xml_header($outhandler, $doc_obj->get_OID(), $doc_title);
139 $self->output_mets_section($outhandler, $doc_obj, $doc_obj->get_top_section(),$working_dir);
140 $self->output_mets_xml_footer($outhandler);
141
142 if (defined $self->{'xslt_writer'}){
143 $self->close_xslt_pipe();
144 }
145 else{
146 close($outhandler);
147 }
148
149
150}
151
152sub saveas
153{
154 my $self = shift (@_);
155 my ($doc_obj,$doc_dir) = @_;
156
157 $self->process_assoc_files ($doc_obj, $doc_dir, '');
158
159 my $output_dir = $self->get_output_dir();
160 &util::mk_all_dir ($output_dir) unless -e $output_dir;
161
162 my $working_dir = &util::filename_cat ($output_dir, $doc_dir);
163
164 &util::mk_all_dir ($working_dir) unless -e $working_dir;
165
166 ###
167 # Save the text as a filefile
168 ###
169 $self->saveas_doctxt($doc_obj,$working_dir);
170
171 ###
172 # Save the structure and metadata as a METS file
173 ###
174 $self->saveas_docmets($doc_obj,$working_dir);
175
176 $self->{'short_doc_file'} = &util::filename_cat ($doc_dir, "docmets.xml");
177
178 $self->store_output_info_reference($doc_obj);
179
180}
181
182
183sub output_mets_xml_header
184{
185 my $self = shift(@_);
186 my ($handle, $OID, $doc_title) = @_;
187
188 gsprintf(STDERR, "METSPlugout::output_mets_xml_header {common.must_be_implemented}\n") && die "\n";
189}
190
191sub output_mets_xml_header_extra_attribute
192{
193 my $self = shift(@_);
194 my ($handle, $extra_attr) = @_;
195
196 print $handle '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' . "\n";
197 print $handle '<mets:mets xmlns:mets="http://www.loc.gov/METS/"' . "\n";
198 print $handle ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' . "\n";
199 print $handle ' xmlns:gsdl3="http://www.greenstone.org/namespace/gsdlmetadata/1.0/"' . "\n";
200 print $handle ' xmlns:xlink="http://www.w3.org/TR/xlink"' ."\n";
201 print $handle ' xsi:schemaLocation="http://www.loc.gov/METS/' . "\n";
202 print $handle ' http://www.loc.gov/standards/mets/mets.xsd' . "\n";
203 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/' . "\n";
204 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/gsdl_metadata.xsd"' . "\n";
205
206 print $handle " $extra_attr>\n";
207
208}
209
210sub output_mets_xml_footer
211{
212 my $self = shift(@_);
213 my ($handle) = @_;
214 print $handle '</mets:mets>' . "\n";
215}
216
217# print out doctxt.xml file
218sub output_txt_section {
219 my $self = shift (@_);
220 my ($handle, $doc_obj, $section, $is_recursive) = @_;
221
222 print $handle $self->buffer_txt_section_xml($doc_obj, $section, $is_recursive);
223}
224
225sub buffer_txt_section_xml {
226 my $self = shift(@_);
227 my ($doc_obj, $section, $is_recursive) = @_;
228
229 my $section_ptr = $doc_obj->_lookup_section ($section);
230
231 return "" unless defined $section_ptr;
232
233 my $all_text = "<Section>\n";
234 $all_text .= &docprint::escape_text("$section_ptr->{'text'}");
235
236 if (defined $is_recursive && $is_recursive)
237 {
238 # Output all the subsections
239 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
240 $all_text .= $self->buffer_txt_section_xml($doc_obj, "$section.$subsection");
241 }
242 }
243
244 $all_text .= "</Section>\n";
245
246
247 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
248 return $all_text;
249}
250
251#
252# Print out docmets.xml file
253#
254sub output_mets_section
255{
256 my $self = shift(@_);
257 my ($handle, $doc_obj, $section, $working_dir) = @_;
258
259 gsprintf(STDERR, "METSPlugout::output_mets_section {common.must_be_implemented}\n") && die "\n";
260
261}
262
263
264sub buffer_mets_dmdSection_section_xml
265{
266 my $self = shift(@_);
267 my ($doc_obj,$section) = @_;
268
269 gsprintf(STDERR, "METSPlugout::buffer_mets_dmdSection_section_xml {common.must_be_implemented}\n") && die "\n";
270}
271
272sub buffer_mets_StructMapSection_section_xml
273{
274 my $self = shift(@_);
275 my ($doc_obj,$section, $order_numref, $fileid_base) = @_;
276
277 $section="" unless defined $section;
278
279
280 my $section_ptr=$doc_obj->_lookup_section($section);
281 return "" unless defined $section_ptr;
282
283 $fileid_base = "FILEGROUP_PRELUDE" unless defined $fileid_base;
284
285 # output fileSection by Sections
286 my $section_num ="1". $section;
287 my $dmd_num = $section_num;
288
289 #**output the StructMap details
290
291 my $dmdid_attr = "DM$dmd_num";
292
293 my $all_text = " <mets:div ID=\"DS$section_num\" TYPE=\"Section\" \n";
294 $all_text .= ' ORDER="'.$$order_numref++.'" ORDERLABEL="'. $section_num .'" '."\n";
295 $all_text .= " LABEL=\"$section_num\" DMDID=\"$dmdid_attr\">\n";
296
297 $all_text .= ' <mets:fptr FILEID="'.$fileid_base.$section_num.'" />'. "\n";
298
299
300 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
301 $all_text .= $self->buffer_mets_StructMapSection_section_xml($doc_obj,"$section.$subsection", $order_numref, $fileid_base);
302 }
303
304 $all_text .= " </mets:div>\n";
305
306 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
307
308 return $all_text;
309}
310
311
312sub buffer_mets_StructMapWhole_section_xml
313{
314 my $self = shift(@_);
315 my ($doc_obj,$section) = @_;
316
317 my $section_ptr = $doc_obj->_lookup_section($section);
318 return "" unless defined $section_ptr;
319
320 my $all_text="";
321 my $fileID=0;
322 my $order_num = 0;
323
324 $all_text .= ' <mets:div ID="DSAll" TYPE="Document" ORDER="'.$order_num.'" ORDERLABEL="All" LABEL="Whole Documemt" DMDID="DM1">' . "\n";
325
326 #** output the StructMapSection for the whole section
327 # get the sourcefile and associative file
328
329 foreach my $data (@{$section_ptr->{'metadata'}}){
330 my $escaped_value = &docprint::escape_text($data->[1]);
331
332 if ($data->[0] eq "gsdlsourcefilename") {
333 ++$fileID;
334 $all_text .= ' <mets:fptr FILEID="default.'.$fileID.'" />'."\n";
335 }
336
337 if ($data->[0] eq "gsdlassocfile"){
338 ++$fileID;
339 $all_text .= ' <mets:fptr FILEID="default.'.$fileID. '" />'. "\n";
340 }
341 }
342 $all_text .= " </mets:div>\n";
343
344 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
345
346 return $all_text;
347}
348
349
350
351sub doctxt_to_xlink
352{
353 my $self = shift @_;
354 my ($fname,$working_dir) = @_;
355
356 gsprintf(STDERR, "METSPlugout::doxtxt_to_xlink {common.must_be_implemented}\n") && die "\n";
357}
358
359sub buffer_mets_fileSection_section_xml
360{
361 my $self = shift(@_);
362 my ($doc_obj,$section,$working_dir, $is_txt_split,$opt_attr,$fileid_base) = @_;
363
364 #$section="" unless defined $section;
365
366 my $section_ptr=$doc_obj->_lookup_section($section);
367 return "" unless defined $section_ptr;
368
369 $fileid_base = "FILEGROUP_PRELUDE" unless defined $fileid_base;
370
371 # output fileSection by sections
372 my $section_num ="1". $section;
373
374 $opt_attr = "" unless defined $opt_attr;
375
376 # output the fileSection details
377 my $all_text = ' <mets:fileGrp ID="'.$fileid_base.$section_num . '">'. "\n";
378 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILE$section_num\" $opt_attr >\n";
379
380 my $xlink;
381 if (defined $is_txt_split && $is_txt_split)
382 {
383 my $section_fnum ="1". $section;
384 $section_fnum =~ s/\./_/g;
385
386 $xlink = $self->doctxt_to_xlink("doctxt$section_fnum.xml",$working_dir);
387 }
388 else
389 {
390 $xlink = $self->doctxt_to_xlink("doctxt.xml",$working_dir);
391
392 $xlink .= '#xpointer(/Section[';
393
394 my $xpath = "1".$section;
395 $xpath =~ s/\./\]\/Section\[/g;
396
397 $xlink .= $xpath;
398
399 $xlink .= ']/text())';
400 }
401
402
403
404 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="'.$xlink.'"';
405
406 $all_text .= ' xlink:title="Hierarchical Document Structure"/>' . "\n";
407 $all_text .= " </mets:file>\n";
408 $all_text .= " </mets:fileGrp>\n";
409
410
411 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
412 $all_text .= $self->buffer_mets_fileSection_section_xml($doc_obj,"$section.$subsection",$working_dir, $is_txt_split, $opt_attr, $fileid_base);
413 }
414
415 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
416
417 return $all_text;
418}
419
420sub buffer_mets_fileWhole_section_xml
421{
422 my $self = shift(@_);
423 my ($doc_obj,$section,$working_dir) = @_;
424
425 gsprintf(STDERR, "METSPlugout::buffer_mets_fileWhole_section_xml {common.must_be_implemented}\n") && die "\n";
426
427}
428
4291;
Note: See TracBrowser for help on using the repository browser.