source: gsdl/trunk/perllib/plugouts/METSPlugout.pm@ 14970

Last change on this file since 14970 was 14970, checked in by davidb, 15 years ago

Changes made to generated files to support Fedora3 syntax

  • Property svn:keywords set to Author Date Id Revision
File size: 11.7 KB
Line 
1###########################################################################
2#
3# METSPlugout.pm -- the plugout module for METS archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package METSPlugout;
27
28use strict;
29no strict 'subs';
30no strict 'refs';
31
32use gsprintf 'gsprintf';
33
34eval {require bytes};
35use util;
36use BasPlugout;
37use docprint; # for escape_text
38
39sub BEGIN {
40 @METSPlugout::ISA = ('BasPlugout');
41}
42
43my $arguments = [
44 { 'name' => "xslt_txt",
45 'desc' => "{METSPlugout.xslt_txt}",
46 'type' => "string",
47 'reqd' => "no",
48 'hiddengli' => "no"},
49 { 'name' => "xslt_mets",
50 'desc' => "{METSPlugout.xslt_mets}",
51 'type' => "string",
52 'reqd' => "no",
53 'hiddengli' => "no"}
54 ];
55
56my $options = { 'name' => "METSPlugout",
57 'desc' => "{METSPlugout.desc}",
58 'abstract' => "yes",
59 'inherits' => "yes",
60 'args' => $arguments
61 };
62
63sub new {
64 my ($class) = shift (@_);
65 my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
66 push(@$plugoutlist, $class);
67
68
69 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
70 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
71
72 my $self = (defined $hashArgOptLists)? new BasPlugout($plugoutlist,$inputargs,$hashArgOptLists): new BasPlugout($plugoutlist,$inputargs);
73
74
75 return bless $self, $class;
76}
77
78
79sub saveas_doctxt
80{
81 my $self = shift (@_);
82 my ($doc_obj,$working_dir) = @_;
83
84 my $is_recursive = 1;
85
86 my $doc_txt_file = &util::filename_cat ($working_dir,"doctxt.xml");
87
88 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
89
90 my $outhandler;
91
92 if (defined $self->{'xslt_writer'}){
93 $outhandler = $self->{'xslt_writer'};
94 }
95 else{
96 $outhandler = $self->get_output_handler($doc_txt_file);
97 }
98
99 $self->output_xml_header($outhandler);
100 my $section = $doc_obj->get_top_section();
101 $self->output_txt_section($outhandler,$doc_obj, $section, $is_recursive);
102 $self->output_xml_footer($outhandler);
103
104
105 if (defined $self->{'xslt_writer'}){
106 $self->close_xslt_pipe();
107 }
108 else{
109 close($outhandler);
110 }
111
112}
113
114sub saveas_docmets
115{
116 my $self = shift (@_);
117 my ($doc_obj,$working_dir) = @_;
118
119 my $doc_mets_file = &util::filename_cat ($working_dir, "docmets.xml");
120
121 my $doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"dc.Title");
122 if (!defined $doc_title) {
123 $doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"Title");
124 }
125
126 $self->open_xslt_pipe($doc_mets_file,$self->{'xslt_mets'});
127
128 my $outhandler;
129
130 if (defined $self->{'xslt_writer'}){
131 $outhandler = $self->{'xslt_writer'};
132 }
133 else{
134 $outhandler = $self->get_output_handler($doc_mets_file);
135 }
136
137
138 $self->output_mets_xml_header($outhandler, $doc_obj->get_OID(), $doc_title);
139 $self->output_mets_section($outhandler, $doc_obj, $doc_obj->get_top_section(),$working_dir);
140 $self->output_mets_xml_footer($outhandler);
141
142 if (defined $self->{'xslt_writer'}){
143 $self->close_xslt_pipe();
144 }
145 else{
146 close($outhandler);
147 }
148
149
150}
151
152sub saveas
153{
154 my $self = shift (@_);
155 my ($doc_obj,$doc_dir) = @_;
156
157 $self->process_assoc_files ($doc_obj, $doc_dir, '');
158
159 my $output_dir = $self->get_output_dir();
160 &util::mk_all_dir ($output_dir) unless -e $output_dir;
161
162 my $working_dir = &util::filename_cat ($output_dir, $doc_dir);
163
164 &util::mk_all_dir ($working_dir) unless -e $working_dir;
165
166 ###
167 # Save the text as a filefile
168 ###
169 $self->saveas_doctxt($doc_obj,$working_dir);
170
171 ###
172 # Save the structure and metadata as a METS file
173 ###
174 $self->saveas_docmets($doc_obj,$working_dir);
175
176 $self->{'short_doc_file'} = &util::filename_cat ($doc_dir, "docmets.xml");
177
178 $self->store_output_info_reference($doc_obj);
179
180}
181
182
183sub output_mets_xml_header
184{
185 my $self = shift(@_);
186 my ($handle, $OID, $doc_title) = @_;
187
188 gsprintf(STDERR, "METSPlugout::output_mets_xml_header {common.must_be_implemented}\n") && die "\n";
189}
190
191sub output_mets_xml_header_extra_attribute
192{
193 my $self = shift(@_);
194 my ($handle, $extra_attr) = @_;
195
196 print $handle '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' . "\n";
197 print $handle '<mets:mets xmlns:mets="http://www.loc.gov/METS/"' . "\n";
198 print $handle ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' . "\n";
199 print $handle ' xmlns:gsdl3="http://www.greenstone.org/namespace/gsdlmetadata/1.0/"' . "\n";
200## print $handle ' xmlns:xlink="http://www.w3.org/TR/xlink"' ."\n";
201 print $handle ' xmlns:xlink="http://www.w3.org/1999/xlink"' ."\n";
202 print $handle ' xsi:schemaLocation="http://www.loc.gov/METS/' . "\n";
203 print $handle ' http://www.loc.gov/standards/mets/mets.xsd' . "\n";
204 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/' . "\n";
205 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/gsdl_metadata.xsd"' . "\n";
206
207 print $handle " $extra_attr>\n";
208
209}
210
211sub output_mets_xml_footer
212{
213 my $self = shift(@_);
214 my ($handle) = @_;
215 print $handle '</mets:mets>' . "\n";
216}
217
218# print out doctxt.xml file
219sub output_txt_section {
220 my $self = shift (@_);
221 my ($handle, $doc_obj, $section, $is_recursive) = @_;
222
223 print $handle $self->buffer_txt_section_xml($doc_obj, $section, $is_recursive);
224}
225
226sub buffer_txt_section_xml {
227 my $self = shift(@_);
228 my ($doc_obj, $section, $is_recursive) = @_;
229
230 my $section_ptr = $doc_obj->_lookup_section ($section);
231
232 return "" unless defined $section_ptr;
233
234 my $all_text = "<Section>\n";
235 $all_text .= &docprint::escape_text("$section_ptr->{'text'}");
236
237 if (defined $is_recursive && $is_recursive)
238 {
239 # Output all the subsections
240 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
241 $all_text .= $self->buffer_txt_section_xml($doc_obj, "$section.$subsection");
242 }
243 }
244
245 $all_text .= "</Section>\n";
246
247
248 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
249 return $all_text;
250}
251
252#
253# Print out docmets.xml file
254#
255sub output_mets_section
256{
257 my $self = shift(@_);
258 my ($handle, $doc_obj, $section, $working_dir) = @_;
259
260 gsprintf(STDERR, "METSPlugout::output_mets_section {common.must_be_implemented}\n") && die "\n";
261
262}
263
264
265sub buffer_mets_dmdSection_section_xml
266{
267 my $self = shift(@_);
268 my ($doc_obj,$section) = @_;
269
270 gsprintf(STDERR, "METSPlugout::buffer_mets_dmdSection_section_xml {common.must_be_implemented}\n") && die "\n";
271}
272
273sub buffer_mets_StructMapSection_section_xml
274{
275 my $self = shift(@_);
276 my ($doc_obj,$section, $order_numref, $fileid_base) = @_;
277
278 $section="" unless defined $section;
279
280
281 my $section_ptr=$doc_obj->_lookup_section($section);
282 return "" unless defined $section_ptr;
283
284 $fileid_base = "FILEGROUP_PRELUDE" unless defined $fileid_base;
285
286 # output fileSection by Sections
287 my $section_num ="1". $section;
288 my $dmd_num = $section_num;
289
290 #**output the StructMap details
291
292 my $dmdid_attr = "DM$dmd_num";
293
294 my $all_text = " <mets:div ID=\"DS$section_num\" TYPE=\"Section\" \n";
295 $all_text .= ' ORDER="'.$$order_numref++.'" ORDERLABEL="'. $section_num .'" '."\n";
296 $all_text .= " LABEL=\"$section_num\" DMDID=\"$dmdid_attr\">\n";
297
298 $all_text .= ' <mets:fptr FILEID="'.$fileid_base.$section_num.'" />'. "\n";
299
300
301 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
302 $all_text .= $self->buffer_mets_StructMapSection_section_xml($doc_obj,"$section.$subsection", $order_numref, $fileid_base);
303 }
304
305 $all_text .= " </mets:div>\n";
306
307 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
308
309 return $all_text;
310}
311
312
313sub buffer_mets_StructMapWhole_section_xml
314{
315 my $self = shift(@_);
316 my ($doc_obj,$section) = @_;
317
318 my $section_ptr = $doc_obj->_lookup_section($section);
319 return "" unless defined $section_ptr;
320
321 my $all_text="";
322 my $fileID=0;
323 my $order_num = 0;
324
325 $all_text .= ' <mets:div ID="DSAll" TYPE="Document" ORDER="'.$order_num.'" ORDERLABEL="All" LABEL="Whole Documemt" DMDID="DM1">' . "\n";
326
327 #** output the StructMapSection for the whole section
328 # get the sourcefile and associative file
329
330 foreach my $data (@{$section_ptr->{'metadata'}}){
331 my $escaped_value = &docprint::escape_text($data->[1]);
332
333 if ($data->[0] eq "gsdlsourcefilename") {
334 ++$fileID;
335 $all_text .= ' <mets:fptr FILEID="default.'.$fileID.'" />'."\n";
336 }
337
338 if ($data->[0] eq "gsdlassocfile"){
339 ++$fileID;
340 $all_text .= ' <mets:fptr FILEID="default.'.$fileID. '" />'. "\n";
341 }
342 }
343 $all_text .= " </mets:div>\n";
344
345 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
346
347 return $all_text;
348}
349
350
351
352sub doctxt_to_xlink
353{
354 my $self = shift @_;
355 my ($fname,$working_dir) = @_;
356
357 gsprintf(STDERR, "METSPlugout::doxtxt_to_xlink {common.must_be_implemented}\n") && die "\n";
358}
359
360sub buffer_mets_fileSection_section_xml
361{
362 my $self = shift(@_);
363 my ($doc_obj,$section,$working_dir, $is_txt_split,$opt_attr,$fileid_base) = @_;
364
365 #$section="" unless defined $section;
366
367 my $section_ptr=$doc_obj->_lookup_section($section);
368 return "" unless defined $section_ptr;
369
370 $fileid_base = "FILEGROUP_PRELUDE" unless defined $fileid_base;
371
372 # output fileSection by sections
373 my $section_num ="1". $section;
374
375 $opt_attr = "" unless defined $opt_attr;
376
377 # output the fileSection details
378 my $all_text = ' <mets:fileGrp ID="'.$fileid_base.$section_num . '">'. "\n";
379 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILE$section_num\" $opt_attr >\n";
380
381 my $xlink;
382 if (defined $is_txt_split && $is_txt_split)
383 {
384 my $section_fnum ="1". $section;
385 $section_fnum =~ s/\./_/g;
386
387 $xlink = $self->doctxt_to_xlink("doctxt$section_fnum.xml",$working_dir);
388 }
389 else
390 {
391 $xlink = $self->doctxt_to_xlink("doctxt.xml",$working_dir);
392
393 $xlink .= '#xpointer(/Section[';
394
395 my $xpath = "1".$section;
396 $xpath =~ s/\./\]\/Section\[/g;
397
398 $xlink .= $xpath;
399
400 $xlink .= ']/text())';
401 }
402
403
404
405 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="'.$xlink.'"';
406
407 $all_text .= ' xlink:title="Hierarchical Document Structure"/>' . "\n";
408 $all_text .= " </mets:file>\n";
409 $all_text .= " </mets:fileGrp>\n";
410
411
412 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
413 $all_text .= $self->buffer_mets_fileSection_section_xml($doc_obj,"$section.$subsection",$working_dir, $is_txt_split, $opt_attr, $fileid_base);
414 }
415
416 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
417
418 return $all_text;
419}
420
421sub buffer_mets_fileWhole_section_xml
422{
423 my $self = shift(@_);
424 my ($doc_obj,$section,$working_dir) = @_;
425
426 gsprintf(STDERR, "METSPlugout::buffer_mets_fileWhole_section_xml {common.must_be_implemented}\n") && die "\n";
427
428}
429
4301;
Note: See TracBrowser for help on using the repository browser.