source: main/trunk/greenstone2/perllib/plugouts/METSPlugout.pm@ 28562

Last change on this file since 28562 was 28562, checked in by kjdon, 10 years ago

changing some util:: methods to FileUtils:: methods

  • Property svn:keywords set to Author Date Id Revision
File size: 12.0 KB
Line 
1###########################################################################
2#
3# METSPlugout.pm -- the plugout module for METS archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package METSPlugout;
27
28use strict;
29no strict 'subs';
30no strict 'refs';
31
32use gsprintf 'gsprintf';
33
34eval {require bytes};
35use FileUtils;
36use BasePlugout;
37use docprint; # for escape_text
38
39sub BEGIN {
40 @METSPlugout::ISA = ('BasePlugout');
41}
42
43my $arguments = [
44 { 'name' => "xslt_txt",
45 'desc' => "{METSPlugout.xslt_txt}",
46 'type' => "string",
47 'reqd' => "no",
48 'hiddengli' => "no"},
49 { 'name' => "xslt_mets",
50 'desc' => "{METSPlugout.xslt_mets}",
51 'type' => "string",
52 'reqd' => "no",
53 'hiddengli' => "no"}
54 ];
55
56my $options = { 'name' => "METSPlugout",
57 'desc' => "{METSPlugout.desc}",
58 'abstract' => "yes",
59 'inherits' => "yes",
60 'args' => $arguments
61 };
62
63sub new {
64 my ($class) = shift (@_);
65 my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
66 push(@$plugoutlist, $class);
67
68
69 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
70 push(@{$hashArgOptLists->{"OptList"}},$options);
71
72 my $self = new BasePlugout($plugoutlist,$inputargs,$hashArgOptLists);
73 return bless $self, $class;
74}
75
76
77sub saveas_doctxt
78{
79 my $self = shift (@_);
80 my ($doc_obj,$working_dir) = @_;
81
82 my $is_recursive = 1;
83
84 my $doc_txt_file = &FileUtils::filenameConcatenate ($working_dir,"doctxt.xml");
85
86 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
87
88 my $outhandler;
89
90 if (defined $self->{'xslt_writer'}){
91 $outhandler = $self->{'xslt_writer'};
92 }
93 else{
94 $outhandler = $self->get_output_handler($doc_txt_file);
95 }
96
97 binmode($outhandler,":utf8");
98
99 $self->output_xml_header($outhandler);
100 my $section = $doc_obj->get_top_section();
101 $self->output_txt_section($outhandler,$doc_obj, $section, $is_recursive);
102 $self->output_xml_footer($outhandler);
103
104
105 if (defined $self->{'xslt_writer'}){
106 $self->close_xslt_pipe();
107 }
108 else{
109 close($outhandler);
110 }
111
112}
113
114sub saveas_docmets
115{
116 my $self = shift (@_);
117 my ($doc_obj,$working_dir) = @_;
118
119 my $doc_mets_file = &FileUtils::filenameConcatenate ($working_dir, "docmets.xml");
120
121 my $doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"dc.Title");
122 if (!defined $doc_title) {
123 $doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"Title");
124 }
125
126 $self->open_xslt_pipe($doc_mets_file,$self->{'xslt_mets'});
127
128 my $outhandler;
129
130 if (defined $self->{'xslt_writer'}){
131 $outhandler = $self->{'xslt_writer'};
132 }
133 else{
134 $outhandler = $self->get_output_handler($doc_mets_file);
135 }
136
137 binmode($outhandler,":utf8");
138
139 $self->output_mets_xml_header($outhandler, $doc_obj->get_OID(), $doc_title);
140 $self->output_mets_section($outhandler, $doc_obj, $doc_obj->get_top_section(),$working_dir);
141 $self->output_mets_xml_footer($outhandler);
142
143 if (defined $self->{'xslt_writer'}){
144 $self->close_xslt_pipe();
145 }
146 else{
147 close($outhandler);
148 }
149
150
151}
152
153sub saveas
154{
155 my $self = shift (@_);
156 my ($doc_obj,$doc_dir) = @_;
157
158 $self->process_assoc_files ($doc_obj, $doc_dir, '');
159
160 $self->process_metafiles_metadata ($doc_obj);
161
162 my $output_dir = $self->get_output_dir();
163 &FileUtils::makeAllDirectories ($output_dir) unless -e $output_dir;
164
165 my $working_dir = &FileUtils::filenameConcatenate ($output_dir, $doc_dir);
166
167 &FileUtils::makeAllDirectories ($working_dir) unless -e $working_dir;
168
169 ###
170 # Save the text as a filefile
171 ###
172 $self->saveas_doctxt($doc_obj,$working_dir);
173
174 ###
175 # Save the structure and metadata as a METS file
176 ###
177 $self->saveas_docmets($doc_obj,$working_dir);
178
179 $self->{'short_doc_file'} = &FileUtils::filenameConcatenate ($doc_dir, "docmets.xml");
180
181 $self->store_output_info_reference($doc_obj);
182
183}
184
185
186sub output_mets_xml_header
187{
188 my $self = shift(@_);
189 my ($handle, $OID, $doc_title) = @_;
190
191 gsprintf(STDERR, "METSPlugout::output_mets_xml_header {common.must_be_implemented}\n") && die "\n";
192}
193
194sub output_mets_xml_header_extra_attribute
195{
196 my $self = shift(@_);
197 my ($handle, $extra_attr, $extra_schema) = @_;
198
199 print $handle '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' . "\n";
200 print $handle '<mets:mets xmlns:mets="http://www.loc.gov/METS/"' . "\n";
201 print $handle ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' . "\n";
202 print $handle ' xmlns:gsdl3="http://www.greenstone.org/namespace/gsdlmetadata/1.0/"' . "\n";
203 if (defined ($ENV{'FEDORA_VERSION'}) && $ENV{'FEDORA_VERSION'} =~ m/^2/) { # checking if major version is 2
204 print $handle ' xmlns:xlink="http://www.w3.org/TR/xlink"' ."\n";
205 }
206 else {
207 print $handle ' xmlns:xlink="http://www.w3.org/1999/xlink"' ."\n";
208 }
209 print $handle ' xsi:schemaLocation="http://www.loc.gov/METS/' . "\n";
210 print $handle ' http://www.loc.gov/standards/mets/mets.xsd' . "\n";
211 print $handle " $extra_schema\n" if (defined $extra_schema);
212 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/' . "\n";
213 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/gsdl_metadata.xsd"' . "\n";
214
215 print $handle " $extra_attr>\n";
216
217}
218
219sub output_mets_xml_footer
220{
221 my $self = shift(@_);
222 my ($handle) = @_;
223 print $handle '</mets:mets>' . "\n";
224}
225
226# print out doctxt.xml file
227sub output_txt_section {
228 my $self = shift (@_);
229 my ($handle, $doc_obj, $section, $is_recursive) = @_;
230
231 print $handle $self->buffer_txt_section_xml($doc_obj, $section, $is_recursive);
232}
233
234sub buffer_txt_section_xml {
235 my $self = shift(@_);
236 my ($doc_obj, $section, $is_recursive) = @_;
237
238 my $section_ptr = $doc_obj->_lookup_section ($section);
239
240 return "" unless defined $section_ptr;
241
242 my $all_text = "<Section>\n";
243 $all_text .= &docprint::escape_text("$section_ptr->{'text'}");
244
245 if (defined $is_recursive && $is_recursive)
246 {
247 # Output all the subsections
248 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
249 $all_text .= $self->buffer_txt_section_xml($doc_obj, "$section.$subsection", $is_recursive);
250 }
251 }
252
253 $all_text .= "</Section>\n";
254
255
256 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
257 return $all_text;
258}
259
260#
261# Print out docmets.xml file
262#
263sub output_mets_section
264{
265 my $self = shift(@_);
266 my ($handle, $doc_obj, $section, $working_dir) = @_;
267
268 gsprintf(STDERR, "METSPlugout::output_mets_section {common.must_be_implemented}\n") && die "\n";
269
270}
271
272
273sub buffer_mets_dmdSection_section_xml
274{
275 my $self = shift(@_);
276 my ($doc_obj,$section) = @_;
277
278 gsprintf(STDERR, "METSPlugout::buffer_mets_dmdSection_section_xml {common.must_be_implemented}\n") && die "\n";
279}
280
281sub buffer_mets_StructMapSection_section_xml
282{
283 my $self = shift(@_);
284 my ($doc_obj,$section, $order_numref, $fileid_base) = @_;
285
286 $section="" unless defined $section;
287
288
289 my $section_ptr=$doc_obj->_lookup_section($section);
290 return "" unless defined $section_ptr;
291
292 $fileid_base = "FILEGROUP_PRELUDE" unless defined $fileid_base;
293
294 # output fileSection by Sections
295 my $section_num ="1". $section;
296 my $dmd_num = $section_num;
297
298 #**output the StructMap details
299
300 my $dmdid_attr = "DM$dmd_num";
301
302 my $all_text = " <mets:div ID=\"DS$section_num\" TYPE=\"Section\" \n";
303 $all_text .= ' ORDER="'.$$order_numref++.'" ORDERLABEL="'. $section_num .'" '."\n";
304 $all_text .= " LABEL=\"$section_num\" DMDID=\"$dmdid_attr\">\n";
305
306 $all_text .= ' <mets:fptr FILEID="'.$fileid_base.$section_num.'" />'. "\n";
307
308
309 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
310 $all_text .= $self->buffer_mets_StructMapSection_section_xml($doc_obj,"$section.$subsection", $order_numref, $fileid_base);
311 }
312
313 $all_text .= " </mets:div>\n";
314
315 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
316
317 return $all_text;
318}
319
320
321sub buffer_mets_StructMapWhole_section_xml
322{
323 my $self = shift(@_);
324 my ($doc_obj,$section) = @_;
325
326 my $section_ptr = $doc_obj->_lookup_section($section);
327 return "" unless defined $section_ptr;
328
329 my $all_text="";
330 my $fileID=0;
331 my $order_num = 0;
332
333 $all_text .= ' <mets:div ID="DSAll" TYPE="Document" ORDER="'.$order_num.'" ORDERLABEL="All" LABEL="Whole Documemt" DMDID="DM1">' . "\n";
334
335 #** output the StructMapSection for the whole section
336 # get the sourcefile and associative file
337
338 foreach my $data (@{$section_ptr->{'metadata'}}){
339 my $escaped_value = &docprint::escape_text($data->[1]);
340
341 if ($data->[0] eq "gsdlsourcefilename") {
342 ++$fileID;
343 $all_text .= ' <mets:fptr FILEID="default.'.$fileID.'" />'."\n";
344 }
345
346 if ($data->[0] eq "gsdlassocfile"){
347 ++$fileID;
348 $all_text .= ' <mets:fptr FILEID="default.'.$fileID. '" />'. "\n";
349 }
350 }
351 $all_text .= " </mets:div>\n";
352
353 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
354
355 return $all_text;
356}
357
358
359
360sub doctxt_to_xlink
361{
362 my $self = shift @_;
363 my ($fname,$working_dir) = @_;
364
365 gsprintf(STDERR, "METSPlugout::doxtxt_to_xlink {common.must_be_implemented}\n") && die "\n";
366}
367
368sub buffer_mets_fileSection_section_xml
369{
370 my $self = shift(@_);
371 my ($doc_obj,$section,$working_dir, $is_txt_split,$opt_attr,$fileid_base) = @_;
372
373 #$section="" unless defined $section;
374
375 my $section_ptr=$doc_obj->_lookup_section($section);
376 return "" unless defined $section_ptr;
377
378 $fileid_base = "FILEGROUP_PRELUDE" unless defined $fileid_base;
379
380 # output fileSection by sections
381 my $section_num ="1". $section;
382
383 $opt_attr = "" unless defined $opt_attr;
384
385 # output the fileSection details
386 my $all_text = ' <mets:fileGrp ID="'.$fileid_base.$section_num . '">'. "\n";
387 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILE$section_num\" $opt_attr >\n";
388
389 my $xlink;
390 if (defined $is_txt_split && $is_txt_split)
391 {
392 my $section_fnum ="1". $section;
393 $section_fnum =~ s/\./_/g;
394
395 $xlink = $self->doctxt_to_xlink("doctxt$section_fnum.xml",$working_dir);
396 }
397 else
398 {
399 $xlink = $self->doctxt_to_xlink("doctxt.xml",$working_dir);
400
401 $xlink .= '#xpointer(/Section[';
402
403 my $xpath = "1".$section;
404 $xpath =~ s/\./\]\/Section\[/g;
405
406 $xlink .= $xpath;
407
408 $xlink .= ']/text())';
409 }
410
411
412
413 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="'.$xlink.'"';
414
415 $all_text .= ' xlink:title="Hierarchical Document Structure"/>' . "\n";
416 $all_text .= " </mets:file>\n";
417 $all_text .= " </mets:fileGrp>\n";
418
419
420 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
421 $all_text .= $self->buffer_mets_fileSection_section_xml($doc_obj,"$section.$subsection",$working_dir, $is_txt_split, $opt_attr, $fileid_base);
422 }
423
424 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
425
426 return $all_text;
427}
428
429sub buffer_mets_fileWhole_section_xml
430{
431 my $self = shift(@_);
432 my ($doc_obj,$section,$working_dir) = @_;
433
434 gsprintf(STDERR, "METSPlugout::buffer_mets_fileWhole_section_xml {common.must_be_implemented}\n") && die "\n";
435
436}
437
4381;
Note: See TracBrowser for help on using the repository browser.