source: trunk/gsdl/perllib/plugouts/METSPlugout.pm@ 13172

Last change on this file since 13172 was 13172, checked in by kjdon, 18 years ago

Moved all printing stuff out of doc.pm.
docprint now prints a GA representation of a doc obj - use &docprint::get_section_xml instead of $doc_obj->buffer_section_xml or $doc_obj->output_section.
Most of the code has been moved into plugouts, except for the bit thats gone to docprint.pm.

  • Property svn:keywords set to Author Date Id Revision
File size: 17.3 KB
Line 
1###########################################################################
2#
3# METSPlugout.pm -- the plugout module for METS archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package METSPlugout;
27
28use strict;
29no strict 'refs';
30
31eval {require bytes};
32use util;
33use BasPlugout;
34use docprint; # for escape_text
35
36sub BEGIN {
37 @METSPlugout::ISA = ('BasPlugout');
38}
39
40my $arguments = [
41 { 'name' => "saveas_version",
42 'desc' => "{METSPlugout.version}",
43 'type' => "string",
44 'deft' => 'greenstone',
45 'reqd' => "yes",
46 'hiddengli' => "no"},
47 { 'name' => "xslt_txt",
48 'desc' => "{METSPlugout.xslt_txt}",
49 'type' => "string",
50 'reqd' => "no",
51 'hiddengli' => "no"},
52 { 'name' => "xslt_mets",
53 'desc' => "{METSPlugout.xslt_mets}",
54 'type' => "string",
55 'reqd' => "no",
56 'hiddengli' => "no"}
57 ];
58
59my $options = { 'name' => "METSPlugout",
60 'desc' => "{METSPlugout.desc}",
61 'abstract' => "no",
62 'inherits' => "yes",
63 'args' => $arguments
64 };
65
66sub new {
67 my ($class) = shift (@_);
68 my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
69 push(@$plugoutlist, $class);
70
71
72 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
73 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
74
75 my $self = (defined $hashArgOptLists)? new BasPlugout($plugoutlist,$inputargs,$hashArgOptLists): new BasPlugout($plugoutlist,$inputargs);
76
77
78
79 return bless $self, $class;
80}
81
82sub saveas {
83 my $self = shift (@_);
84 my ($doc_obj,$doc_dir) = @_;
85 my $version = $self->{'saveas_version'};
86
87 $self->process_assoc_files ($doc_obj, $doc_dir, '');
88
89 my $output_dir = $self->get_output_dir();
90 &util::mk_all_dir ($output_dir) unless -e $output_dir;
91
92 my $working_dir = &util::filename_cat ($output_dir, $doc_dir);
93
94 &util::mk_all_dir ($working_dir) unless -e $working_dir;
95
96 #########################
97 # save the text file
98 #########################
99 my $doc_txt_file = &util::filename_cat ($working_dir,"doctxt.xml");
100
101 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
102
103 my $outhandler;
104
105 if (defined $self->{'xslt_writer'}){
106 $outhandler = $self->{'xslt_writer'};
107 }
108 else{
109 $outhandler = $self->get_output_handler($doc_txt_file);
110 }
111
112 $self->output_xml_header($outhandler);
113 $self->output_txt_section($outhandler,$doc_obj, $doc_obj->get_top_section());
114 $self->output_xml_footer($outhandler);
115
116
117 if (defined $self->{'xslt_writer'}){
118 $self->close_xslt_pipe();
119 }
120 else{
121 close($outhandler);
122 }
123
124 #########################
125 # save the mets file
126 #########################
127 my $doc_mets_file = &util::filename_cat ($working_dir, "docmets.xml");
128
129 my $doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"dc.Title");
130 if (!defined $doc_title) {
131 $doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"Title");
132 }
133
134 $self->open_xslt_pipe($doc_mets_file,$self->{'xslt_mets'});
135
136 if (defined $self->{'xslt_writer'}){
137 $outhandler = $self->{'xslt_writer'};
138 }
139 else{
140 $outhandler = $self->get_output_handler($doc_mets_file);
141 }
142
143
144 $self->output_mets_xml_header($outhandler, $doc_obj->get_OID(), $doc_title);
145 $self->output_mets_section($outhandler, $doc_obj, $doc_obj->get_top_section(),$version,$working_dir);
146 $self->output_mets_xml_footer($outhandler);
147
148 if (defined $self->{'xslt_writer'}){
149 $self->close_xslt_pipe();
150 }
151 else{
152 close($outhandler);
153 }
154
155 $self->{'short_doc_file'} = &util::filename_cat ($doc_dir, "docmets.xml");
156
157 $self->store_output_info_reference($doc_obj);
158
159}
160
161
162sub output_mets_xml_header(){
163 my $self = shift(@_);
164 my ($handle, $OID, $doc_title) = @_;
165
166 my $version = $self->{'saveas_version'};
167
168 my $extra_attr = "";
169 if ($version eq "fedora") {
170 my $fnamespace = $ENV{'FEDORA_PID_NAMESPACE'};
171 my $oid_namespace = (defined $fnamespace) ? $fnamespace : "test";
172
173 $extra_attr = "OBJID=\"$oid_namespace:$OID\" TYPE=\"FedoraObject\" LABEL=\"$doc_title\"";
174 }
175 else {
176 # Greenstone METS profile
177 $extra_attr = "OBJID=\"$OID:2\"";
178 }
179
180
181 print $handle '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' . "\n";
182 print $handle '<mets:mets xmlns:mets="http://www.loc.gov/METS/"' . "\n";
183 print $handle ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' . "\n";
184 print $handle ' xmlns:gsdl3="http://www.greenstone.org/namespace/gsdlmetadata/1.0/"' . "\n";
185 print $handle ' xmlns:xlink="http://www.w3.org/TR/xlink"' ."\n";
186 print $handle ' xsi:schemaLocation="http://www.loc.gov/METS/' . "\n";
187 print $handle ' http://www.loc.gov/standards/mets/mets.xsd' . "\n";
188 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/' . "\n";
189 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/gsdl_metadata.xsd"' . "\n";
190 print $handle " $extra_attr>\n";
191
192 if ($version eq "fedora") {
193 print $handle '<mets:metsHdr RECORDSTATUS="A"/>'. "\n"; # A = active
194 }
195
196}
197
198sub output_mets_xml_footer() {
199 my $self = shift(@_);
200 my ($handle) = @_;
201 print $handle '</mets:mets>' . "\n";
202}
203
204# print out doctxt.xml file
205sub output_txt_section {
206 my $self = shift (@_);
207 my ($handle, $doc_obj, $section) = @_;
208
209 print $handle $self->buffer_txt_section_xml($doc_obj, $section);
210}
211
212sub buffer_txt_section_xml {
213 my $self = shift(@_);
214 my ($doc_obj, $section) = @_;
215
216 my $section_ptr = $doc_obj->_lookup_section ($section);
217
218 return "" unless defined $section_ptr;
219
220 my $all_text = "<Section>\n";
221 $all_text .= &docprint::escape_text("$section_ptr->{'text'}");
222
223 #output all the subsections
224 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
225 $all_text .= $self->buffer_txt_section_xml($doc_obj, "$section.$subsection");
226 }
227
228 $all_text .= "</Section>\n";
229
230
231 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
232 return $all_text;
233}
234
235# print out docmets.xml file
236sub output_mets_section {
237 my $self = shift(@_);
238 my ($handle, $doc_obj, $section, $version, $working_dir) = @_;
239
240 # print out the dmdSection
241 print $handle $self->buffer_mets_dmdSection_section_xml($doc_obj,$section, $version);
242
243 print $handle "<mets:fileSec>\n";
244 if ($version eq "fedora") {
245 print $handle " <mets:fileGrp ID=\"DATASTREAMS\">\n";
246 }
247
248 # print out the fileSection by sections
249 print $handle $self->buffer_mets_fileSection_section_xml($doc_obj,$section,$version);
250
251 # print out the whole fileSection
252 print $handle $self->buffer_mets_fileWhole_section_xml($doc_obj,$section,$version,$working_dir);
253
254 if ($version eq "fedora") {
255 print $handle " </mets:fileGrp>\n";
256 }
257 print $handle "</mets:fileSec>\n";
258
259 # print out the StructMapSection by sections
260
261 my $struct_type;
262 if ($version eq "fedora") {
263 $struct_type = "fedora:dsBindingMap";
264 }
265 else {
266 $struct_type = "Section";
267 }
268
269 if ($version ne "fedora") {
270 print $handle "<mets:structMap ID=\"Section\" TYPE=\"$struct_type\" LABEL=\"Section\">\n";
271 my $order_num=0;
272 print $handle $self->buffer_mets_StructMapSection_section_xml($doc_obj,$section, \$order_num);
273 print $handle "</mets:structMap>\n";
274
275 print $handle '<mets:structMap ID="All" TYPE="Whole Document" LABEL="All">'."\n";
276 print $handle $self->buffer_mets_StructMapWhole_section_xml($doc_obj,$section);
277 print $handle "</mets:structMap>\n";
278 }
279
280}
281
282sub buffer_mets_dmdSection_section_xml(){
283 my $self = shift(@_);
284 my ($doc_obj,$section,$version) = @_;
285
286 $section="" unless defined $section;
287
288 my $section_ptr=$doc_obj->_lookup_section($section);
289 return "" unless defined $section_ptr;
290
291 # convert section number
292 my $section_num ="1". $section;
293 my $dmd_num = $section_num;
294
295 # #**output the dmdSection details
296 # if ($section_num eq "1") {
297 # $dmd_num = "0";
298 # }
299
300
301 my $all_text = "";
302
303 my $label_attr = "";
304 if ($version eq "fedora") {
305 $all_text .= "<mets:amdSec ID=\"DC\" >\n";
306 $all_text .= " <mets:techMD ID=\"DC.0\">\n"; # .0 fedora version number?
307
308 $label_attr = "LABEL=\"Dublin Core Metadata\"";
309 }
310 else {
311 # TODO::
312 #print STDERR "***** Check that GROUPID in dmdSec is valid!!!\n";
313 #print STDERR "***** Check to see if <techMD> required\n";
314 # if it isn't allowed, go back and set $mdTag = dmdSec/amdSec
315
316 $all_text .= "<mets:dmdSec ID=\"DM$dmd_num\" GROUPID=\"$section_num\">\n";
317 }
318
319 $all_text .= " <mets:mdWrap $label_attr MDTYPE=\"OTHER\" OTHERMDTYPE=\"gsdl3\" ID=\"gsdl$section_num\">\n";
320 $all_text .= " <mets:xmlData>\n";
321
322 if ($version eq "fedora") {
323 my $dc_namespace = "";
324 $dc_namespace .= "xmlns:dc=\"http://purl.org/dc/elements/1.1/\"";
325 $dc_namespace .= " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\">\n";
326
327 $all_text .= " <oai_dc:dc $dc_namespace>\n";
328
329 $all_text .= $self->get_dc_metadata($doc_obj, $section,"oai_dc");
330 $all_text .= " </oai_dc:dc>\n";
331 }
332 else {
333 foreach my $data (@{$section_ptr->{'metadata'}}){
334 my $escaped_value = &docprint::escape_text($data->[1]);
335 $all_text .= ' <gsdl3:Metadata name="'. $data->[0].'">'. $escaped_value. "</gsdl3:Metadata>\n";
336 if ($data->[0] eq "dc.Title") {
337 $all_text .= ' <gsdl3:Metadata name="Title">'. $escaped_value."</gsdl3:Metadata>\n";
338 }
339 }
340 }
341
342 $all_text .= " </mets:xmlData>\n";
343 $all_text .= " </mets:mdWrap>\n";
344
345 if ($version eq "fedora") {
346 $all_text .= " </mets:techMD>\n";
347 $all_text .= "</mets:amdSec>\n";
348 }
349 else {
350 $all_text .= "</mets:dmdSec>\n";
351 }
352
353
354 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
355 $all_text .= $self->buffer_mets_dmdSection_section_xml($doc_obj,"$section.$subsection",$version);
356 }
357
358 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
359
360 return $all_text;
361}
362
363sub buffer_mets_StructMapSection_section_xml(){
364 my $self = shift(@_);
365 my ($doc_obj,$section, $order_numref) = @_;
366
367 $section="" unless defined $section;
368
369
370 my $section_ptr=$doc_obj->_lookup_section($section);
371 return "" unless defined $section_ptr;
372
373
374 # output fileSection by Sections
375 my $section_num ="1". $section;
376 my $dmd_num = $section_num;
377
378 ##**output the dmdSection details
379 #if ($section_num eq "1") {
380 # $dmd_num = "0";
381 #}
382
383 #**output the StructMap details
384
385 my $dmdid_attr = "DM$dmd_num";
386
387 my $all_text = " <mets:div ID=\"DS$section_num\" TYPE=\"Section\" \n";
388 $all_text .= ' ORDER="'.$$order_numref++.'" ORDERLABEL="'. $section_num .'" '."\n";
389 $all_text .= " LABEL=\"$section_num\" DMDID=\"$dmdid_attr\">\n";
390
391 $all_text .= ' <mets:fptr FILEID="FILEGROUP_PRELUDE'.$section_num.'" />'. "\n";
392
393
394 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
395 $all_text .= $self->buffer_mets_StructMapSection_section_xml($doc_obj,"$section.$subsection", $order_numref);
396 }
397
398 $all_text .= " </mets:div>\n";
399
400 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
401
402 return $all_text;
403}
404
405
406sub buffer_mets_StructMapWhole_section_xml(){
407 my $self = shift(@_);
408 my ($doc_obj,$section) = @_;
409
410 my $section_ptr = $doc_obj->_lookup_section($section);
411 return "" unless defined $section_ptr;
412
413 my $all_text="";
414 my $fileID=0;
415 my $order_num = 0;
416
417 $all_text .= ' <mets:div ID="DSAll" TYPE="Document" ORDER="'.$order_num.'" ORDERLABEL="All" LABEL="Whole Documemt" DMDID="DM1">' . "\n";
418
419 #** output the StructMapSection for the whole section
420 # get the sourcefile and associative file
421
422 foreach my $data (@{$section_ptr->{'metadata'}}){
423 my $escaped_value = &docprint::escape_text($data->[1]);
424
425 if ($data->[0] eq "gsdlsourcefilename") {
426 ++$fileID;
427 $all_text .= ' <mets:fptr FILEID="default.'.$fileID.'" />'."\n";
428 }
429
430 if ($data->[0] eq "gsdlassocfile"){
431 ++$fileID;
432 $all_text .= ' <mets:fptr FILEID="default.'.$fileID. '" />'. "\n";
433 }
434 }
435 $all_text .= " </mets:div>\n";
436
437 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
438
439 return $all_text;
440}
441
442sub buffer_mets_fileSection_section_xml() {
443 my $self = shift(@_);
444 my ($doc_obj,$section,$version) = @_;
445
446 #$section="" unless defined $section;
447
448
449 my $section_ptr=$doc_obj->_lookup_section($section);
450 return "" unless defined $section_ptr;
451
452
453 # output fileSection by sections
454 my $section_num ="1". $section;
455
456 my $filePath = 'doctxt.xml';
457
458 my $opt_owner_id = "";
459 if ($version eq "fedora") {
460 $opt_owner_id = "OWNERID=\"M\"";
461 }
462
463 # output the fileSection details
464 my $all_text = ' <mets:fileGrp ID="FILEGROUP_PRELUDE' . $section_num . '">'. "\n";
465 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILE$section_num\" $opt_owner_id >\n";
466 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$filePath.'#xpointer(/Section[';
467
468 my $xpath = "1".$section;
469 $xpath =~ s/\./]\/Section[/g;
470
471 $all_text .= $xpath;
472
473 $all_text .= ']/text())" xlink:title="Hierarchical Document Structure"/>' . "\n";
474 $all_text .= " </mets:file>\n";
475 $all_text .= " </mets:fileGrp>\n";
476
477
478 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
479 $all_text .= $self->buffer_mets_fileSection_section_xml($doc_obj,"$section.$subsection",$version);
480 }
481
482 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
483
484 return $all_text;
485}
486
487sub buffer_mets_fileWhole_section_xml(){
488 my $self = shift(@_);
489 my ($doc_obj,$section,$version,$working_dir) = @_;
490
491 my $section_ptr = $doc_obj-> _lookup_section($section);
492 return "" unless defined $section_ptr;
493
494 my $all_text="";
495
496 my $fileID=0;
497
498 # Output the fileSection for the whole section
499 # => get the sourcefile and associative file
500
501 my $id_root = "";
502 my $opt_owner_id = "";
503
504 if ($version eq "fedora") {
505 $opt_owner_id = "OWNERID=\"M\"";
506 }
507 else {
508 $id_root = "default";
509 }
510
511 if ($version ne "fedora") {
512 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n";
513 }
514
515 foreach my $data (@{$section_ptr->{'metadata'}}){
516 my $escaped_value = &docprint::escape_text($data->[1]);
517
518 if (($data->[0] eq "gsdlsourcefilename") && ($version ne "fedora")) {
519 my ($dirPath) = $escaped_value =~ m/^(.*)[\/\\][^\/\\]*$/;
520
521 ++$fileID;
522 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"$id_root.$fileID\" $opt_owner_id >\n";
523
524 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$data->[1].'" />'."\n";
525
526 $all_text .= " </mets:file>\n";
527 }
528
529 if ($data->[0] eq "gsdlassocfile"){
530
531 $escaped_value =~ m/^(.*?):(.*):(.*)$/;
532 my $assoc_file = $1;
533 my $mime_type = $2;
534 my $assoc_dir = $3;
535
536 if ($version eq "fedora") {
537 $id_root = $assoc_file;
538 $id_root =~ s/\//_/g;
539 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n";
540 }
541
542 my $assfilePath = ($assoc_dir eq "") ? $assoc_file : "$assoc_dir/$assoc_file";
543 ++$fileID;
544
545 my $mime_attr = "MIMETYPE=\"$mime_type\"";
546 my $xlink_title = "xlink:title=\"$assoc_file\"";
547
548 my $id_attr;
549 my $xlink_href;
550
551 if ($version eq "fedora") {
552 $id_attr = "ID=\"$id_root.0\"";
553
554 my $fedora_prefix = $ENV{'FEDORA_PREFIX'};
555 if (!defined $fedora_prefix) {
556 $xlink_href = "xlink:href=\"$assfilePath\"";
557 }
558 else
559 {
560 my $gsdlhome = $ENV{'GSDLHOME'};
561 my $gsdl_href = "$working_dir/$assfilePath";
562
563 $gsdl_href =~ s/^$gsdlhome(\/)?//;
564 $gsdl_href = "/gsdl/$gsdl_href";
565
566 my $fserver = $ENV{'FEDORA_HOSTNAME'};
567 my $fport = $ENV{'FEDORA_SERVER_PORT'};
568
569 my $fdomain = "http://$fserver:$fport";
570 $xlink_href = "xlink:href=\"$fdomain$gsdl_href\"";
571 }
572
573 my $top_section = $doc_obj->get_top_section();
574 my $id = $doc_obj->get_metadata_element($top_section,"Identifier");
575 }
576 else {
577 $id_attr = "ID=\"$id_root.$fileID\"";
578 $xlink_href = "xlink:href=\"$assfilePath\"";
579 }
580
581 $all_text .= " <mets:file $mime_attr $id_attr $opt_owner_id >\n";
582 $all_text .= " <mets:FLocat LOCTYPE=\"URL\" $xlink_href $xlink_title />\n";
583
584 $all_text .= " </mets:file>\n";
585
586 if ($version eq "fedora") {
587 $all_text .= " </mets:fileGrp>\n";
588 }
589
590 }
591 }
592
593 if ($version ne "fedora") {
594 $all_text .= " </mets:fileGrp>\n";
595 }
596
597 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
598
599 return $all_text;
600}
601
602
6031;
Note: See TracBrowser for help on using the repository browser.