source: gsdl/trunk/perllib/plugouts/FedoraMETSPlugout.pm@ 14927

Last change on this file since 14927 was 14927, checked in by davidb, 16 years ago

Exporting as GreenstoneMETS and FedoraMETS changed tobe separate plugout

File size: 14.6 KB
RevLine 
[14927]1###########################################################################
2#
3# FedoraMETSPlugout.pm -- the plugout module for METS archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# But WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package FedoraMETSPlugout;
27
28use strict;
29no strict 'refs';
30
31#eval {require bytes};
32#use util;
33use METSPlugout;
34#use docprint; # for escape_text
35
36sub BEGIN {
37 @FedoraMETSPlugout::ISA = ('METSPlugout');
38}
39
40my $arguments = [
41 { 'name' => "fedora_namespace",
42 'desc' => "{FedoraPlugout.fedora_namespace}",
43 'type' => "string",
44 'deft' => "greenstone",
45 'reqd' => "no",
46 'hiddengli' => "no"}
47 ];
48
49
50
51my $options = { 'name' => "FedoraMETSPlugout",
52 'desc' => "{FedoraMETSPlugout.desc}",
53 'abstract' => "no",
54 'inherits' => "yes",
55 'args' => $arguments
56 };
57
58
59sub new
60{
61 my ($class) = shift (@_);
62 my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
63 push(@$plugoutlist, $class);
64
65
66 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
67 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
68
69 my $self = (defined $hashArgOptLists)? new METSPlugout($plugoutlist,$inputargs,$hashArgOptLists): new METSPlugout($plugoutlist,$inputargs);
70
71
72 return bless $self, $class;
73}
74
75
76sub output_mets_xml_header
77{
78 my $self = shift(@_);
79 my ($handle, $OID, $doc_title) = @_;
80
81 my $fnamespace = $self->{'fedora_namespace'};
82 my $oid_namespace = (defined $fnamespace) ? $fnamespace : "test";
83
84 my $collection = $ENV{'GSDLCOLLECTION'};
85
86 my $extra_attr = "OBJID=\"$oid_namespace:$collection-$OID\" TYPE=\"FedoraObject\" LABEL=\"$doc_title\"";
87
88 $self->output_mets_xml_header_extra_attribute($handle,$extra_attr);
89
90 print $handle '<mets:metsHdr RECORDSTATUS="A"/>'. "\n"; # A = active
91
92}
93
94#
95# Print out "family" of doctxt.xml files
96#
97
98sub saveas_doctxt_section
99{
100 my $self = shift (@_);
101 my ($doc_obj,$working_dir,$section) = @_;
102
103 my $section_ptr=$doc_obj->_lookup_section($section);
104 return unless defined $section_ptr;
105
106 my $section_fnum ="1". $section;
107 $section_fnum =~ s/\./_/g;
108
109 my $doc_txt_file = &util::filename_cat ($working_dir,"doctxt$section_fnum.xml");
110
111 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
112
113 my $outhandler;
114
115 if (defined $self->{'xslt_writer'}){
116 $outhandler = $self->{'xslt_writer'};
117 }
118 else{
119 $outhandler = $self->get_output_handler($doc_txt_file);
120 }
121
122 $self->output_xml_header($outhandler);
123 $self->output_txt_section($outhandler,$doc_obj, $section);
124 $self->output_xml_footer($outhandler);
125
126
127 if (defined $self->{'xslt_writer'}){
128 $self->close_xslt_pipe();
129 }
130 else{
131 close($outhandler);
132 }
133
134
135 # Output all the subsections as separate files
136 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
137
138 $self->saveas_doctxt_section($doc_obj, $working_dir, "$section.$subsection");
139 }
140
141
142}
143
144
145sub saveas_doctxt
146{
147 my $self = shift (@_);
148 my ($doc_obj,$working_dir) = @_;
149
150 my $section = $doc_obj->get_top_section();
151
152 $self->saveas_doctxt_section($doc_obj,$working_dir,$section);
153
154 $self->saveas_toc($doc_obj,$working_dir);
155}
156
157sub buffer_toc
158{
159 my $self = shift (@_);
160 my ($doc_obj,$working_dir,$section,$depth) = @_;
161
162 my $section_ptr=$doc_obj->_lookup_section($section);
163 return "" unless defined $section_ptr;
164
165 my $all_text = "";
166
167 my $section_num ="1". $section;
168 my $indent = " " x ($depth*2);
169
170 $all_text .= "$indent<Section id=\"$section_num\">\n";
171
172 # Output all the subsections as separate files
173 foreach my $subsection (@{$section_ptr->{'subsection_order'}})
174 {
175 $all_text
176 .= $self->buffer_toc($doc_obj, $working_dir,
177 "$section.$subsection",$depth+1);
178 }
179
180 $all_text .= "$indent</Section>\n";
181
182 return $all_text;
183}
184
185
186sub saveas_toc
187{
188 my $self = shift (@_);
189 my ($doc_obj,$working_dir) = @_;
190
191 my $section = $doc_obj->get_top_section();
192
193 my $doc_txt_file = &util::filename_cat ($working_dir,"doctoc.xml");
194
195 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
196
197 my $outhandler;
198
199 if (defined $self->{'xslt_writer'}){
200 $outhandler = $self->{'xslt_writer'};
201 }
202 else{
203 $outhandler = $self->get_output_handler($doc_txt_file);
204 }
205
206 print $outhandler $self->buffer_toc($doc_obj, $working_dir, $section, 0);
207
208 if (defined $self->{'xslt_writer'}){
209 $self->close_xslt_pipe();
210 }
211 else{
212 close($outhandler);
213 }
214
215}
216
217
218#
219# Print out docmets.xml file
220#
221sub output_mets_section
222{
223 my $self = shift(@_);
224 my ($handle, $doc_obj, $section, $working_dir) = @_;
225
226 # print out the dmdSection
227 print $handle $self->buffer_mets_dmdSection_section_xml($doc_obj,$section);
228
229 print $handle "<mets:fileSec>\n";
230 print $handle " <mets:fileGrp ID=\"DATASTREAMS\">\n";
231
232 # Generate Filestream for Table of Contents (TOC)
233 print $handle $self->buffer_mets_fileSection_toc($doc_obj,$section,$working_dir);
234
235 # print out the fileSection by sections
236 print $handle $self->buffer_mets_fileSection_section_xml($doc_obj,$section,$working_dir);
237
238 # print out the whole fileSection
239 print $handle $self->buffer_mets_fileWhole_section_xml($doc_obj,$section,$working_dir);
240
241 print $handle " </mets:fileGrp>\n";
242 print $handle "</mets:fileSec>\n";
243
244 # print out the StructMapSection by sections
245
246 my $struct_type = "fedora:dsBindingMap";
247
248 # If document is going to make use of deminators (BMech and BDef) then
249 # need to code up more output XML here (structMap)and in
250 # METS:behaviorSec (Fedora extension?) sections
251
252}
253
254sub buffer_mets_amdSec_header
255{
256 my $self = shift(@_);
257 my ($section,$id) = @_;
258
259 # convert section number
260 my $section_num ="1". $section;
261
262 my $all_text = "";
263
264 my $label_attr = "";
265
266 $all_text .= "<mets:amdSec ID=\"$id$section\" >\n";
267 $all_text .= " <mets:techMD ID=\"$id$section.0\">\n"; # .0 fedora version number?
268
269 $label_attr = "LABEL=\"Metadata\"";
270
271 $all_text .= " <mets:mdWrap $label_attr MDTYPE=\"OTHER\" OTHERMDTYPE=\"gsdl3\" ID=\"".$id."gsdl$section_num\">\n";
272 $all_text .= " <mets:xmlData>\n";
273
274 return $all_text;
275
276}
277
278sub buffer_mets_amdSec_footer
279{
280 my $self = shift(@_);
281
282 my $all_text = "";
283
284 $all_text .= " </mets:xmlData>\n";
285 $all_text .= " </mets:mdWrap>\n";
286
287 $all_text .= " </mets:techMD>\n";
288 $all_text .= "</mets:amdSec>\n";
289
290 return $all_text;
291
292}
293
294sub oai_dc_metadata_xml
295{
296 my $self = shift(@_);
297 my ($doc_obj,$section) = @_;
298
299 my $all_text = "";
300
301 my $dc_namespace = "";
302 $dc_namespace .= "xmlns:dc=\"http://purl.org/dc/elements/1.1/\"";
303 $dc_namespace .= " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\" ";
304
305 $all_text .= " <oai_dc:dc $dc_namespace>\n";
306
307 $all_text .= $self->get_dc_metadata($doc_obj, $section,"oai_dc");
308 $all_text .= " </oai_dc:dc>\n";
309
310 return $all_text;
311}
312
313
314
315
316
317# Work out the what the metadata set prefixes (dc,dls etc.) are for
318# this document
319
320sub metadata_set_prefixes
321{
322 my $self = shift(@_);
323 my ($doc_obj, $section) = @_;
324
325 $section="" unless defined $section;
326
327 my $section_ptr = $doc_obj->_lookup_section($section);
328 return {} unless defined $section_ptr;
329
330 my $unique_prefix = {};
331
332 foreach my $data (@{$section_ptr->{'metadata'}})
333 {
334 my ($prefix) = ($data->[0]=~ m/^(.*?)\./);
335
336 if (defined $prefix)
337 {
338 next if ($prefix eq "dc"); # skip dublin core as handled separately elsewhere
339
340 $unique_prefix->{$prefix} = 1;
341 }
342 else
343 {
344 $unique_prefix->{"ex"} = 1;
345 }
346
347 }
348
349 return $unique_prefix;
350}
351
352
353sub mds_metadata_xml
354{
355 my $self = shift(@_);
356 my ($doc_obj, $section, $mds_prefix, $namespace) = @_;
357
358 # build up string of metadata with $mds_prefix
359 $section="" unless defined $section;
360
361 my $section_ptr = $doc_obj->_lookup_section($section);
362 return "" unless defined $section_ptr;
363
364 my $all_text="";
365 $all_text .= " <$mds_prefix:$mds_prefix $namespace>\n";
366
367
368 foreach my $data (@{$section_ptr->{'metadata'}})
369 {
370 if ($data->[0]=~ m/^(?:(.*?)\.)?(.*)$/)
371 {
372 my $curr_mds_prefix = $1;
373 my $mds_full_element = $2;
374
375 $curr_mds_prefix = "ex" unless defined $curr_mds_prefix;
376
377 if ($curr_mds_prefix eq $mds_prefix)
378 {
379 # split up full element in the form Title^en into element=Title, attr="en"
380 my ($mds_element,$subelem) = ($mds_full_element =~ m/^(.*?)(?:\^(.*))?$/);
381 my $mds_attr = (defined $subelem) ? "qualifier=\"$subelem\"" : "";
382
383 my $escaped_value = &docprint::escape_text($data->[1]);
384
385 $all_text .= " <$mds_prefix:metadata name=\"$mds_element\" $mds_attr>$escaped_value</$mds_prefix:metadata>\n";
386 }
387 }
388 }
389
390 $all_text .= " </$mds_prefix:$mds_prefix>\n";
391
392
393 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
394
395 return $all_text;
396}
397
398
399
400sub buffer_mets_dmdSection_section_xml
401{
402 my $self = shift(@_);
403 my ($doc_obj,$section) = @_;
404
405 $section="" unless defined $section;
406
407 my $section_ptr=$doc_obj->_lookup_section($section);
408 return "" unless defined $section_ptr;
409
410 my $all_text = "";
411
412 $all_text .= $self->buffer_mets_amdSec_header($section,"DC");
413 $all_text .= $self->oai_dc_metadata_xml($doc_obj,$section);
414 $all_text .= $self->buffer_mets_amdSec_footer($section);
415
416 # for each metadata set
417 my $md_sets = $self->metadata_set_prefixes($doc_obj,$section);
418
419 foreach my $md_set (keys %$md_sets)
420 {
421 # Greenstone's agnostic approach to metadata sets conflicts with
422 # Fedoras more clinically prescribed one. Fake a namespace for
423 # each $md_set to keep both sides happy
424
425 my $fake_namespace
426 = "xmlns:$md_set=\"http://www.greenstone.org/namespace/fake/$md_set\"";
427 my $id_caps = $md_set;
428 $id_caps =~ tr/[a-z]/[A-Z]/;
429
430 $all_text .= $self->buffer_mets_amdSec_header($section,$id_caps);
431 $all_text .= $self->mds_metadata_xml($doc_obj,$section,$md_set,$fake_namespace);
432 $all_text .= $self->buffer_mets_amdSec_footer($section);
433 }
434
435
436 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
437 $all_text .= $self->buffer_mets_dmdSection_section_xml($doc_obj,"$section.$subsection");
438 }
439
440 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
441
442 return $all_text;
443}
444
445
446
447
448sub doctxt_to_xlink
449{
450 my $self = shift @_;
451 my ($fname,$working_dir) = @_;
452
453 my $xlink_href;
454
455 my $fedora_prefix = $ENV{'FEDORA_PREFIX'};
456 if (!defined $fedora_prefix) {
457 $xlink_href = "file:$fname";
458 }
459 else
460 {
461 my $gsdlhome = $ENV{'GSDLHOME'};
462 my $gsdl_href = "$working_dir/$fname";
463
464 $gsdl_href =~ s/^$gsdlhome(\/)?//;
465 $gsdl_href = "/gsdl/$gsdl_href";
466
467 my $fserver = $ENV{'FEDORA_HOSTNAME'};
468 my $fport = $ENV{'FEDORA_SERVER_PORT'};
469
470 my $fdomain = "http://$fserver:$fport";
471 $xlink_href = "$fdomain$gsdl_href";
472 }
473
474
475 return $xlink_href;
476
477}
478
479
480sub buffer_mets_fileSection_toc
481{
482 my $self = shift(@_);
483 my ($doc_obj,$section,$working_dir) = @_;
484
485 my $opt_attr = "OWNERID=\"M\"";
486
487 my $all_text = ' <mets:fileGrp ID="TOC">'. "\n";
488 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILETOC\" $opt_attr >\n";
489 my $xlink = $self->doctxt_to_xlink("doctoc.xml",$working_dir);
490
491 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="'.$xlink.'"';
492
493 $all_text .= ' xlink:title="Table of Contents"/>' . "\n";
494 $all_text .= " </mets:file>\n";
495 $all_text .= " </mets:fileGrp>\n";
496
497 return $all_text;
498}
499
500
501sub buffer_mets_fileSection_section_xml
502{
503 my $self = shift(@_);
504 my ($doc_obj,$section,$working_dir) = @_;
505
506 my $is_txt_split = 1;
507 my $opt_owner_id = "OWNERID=\"M\"";
508
509 my $all_text
510 = $self->SUPER::buffer_mets_fileSection_section_xml($doc_obj,$section,$working_dir,$is_txt_split, $opt_owner_id,"SECTION");
511
512
513 return $all_text;
514}
515
516sub buffer_mets_fileWhole_section_xml
517{
518 my $self = shift(@_);
519 my ($doc_obj,$section,$working_dir) = @_;
520
521 my $section_ptr = $doc_obj-> _lookup_section($section);
522 return "" unless defined $section_ptr;
523
524 my $all_text="";
525
526 my $fileID=0;
527
528 # Output the fileSection for the whole section
529 # => get the sourcefile and associative file
530
531 my $id_root = "";
532 my $opt_owner_id = "OWNERID=\"M\"";
533
534
535 foreach my $data (@{$section_ptr->{'metadata'}}){
536 my $escaped_value = &docprint::escape_text($data->[1]);
537
538 if ($data->[0] eq "gsdlassocfile"){
539
540 $escaped_value =~ m/^(.*?):(.*):(.*)$/;
541 my $assoc_file = $1;
542 my $mime_type = $2;
543 my $assoc_dir = $3;
544
545 $id_root = $assoc_file;
546 $id_root =~ s/\//_/g;
547 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n";
548
549 my $assfilePath = ($assoc_dir eq "") ? $assoc_file : "$assoc_dir/$assoc_file";
550 ++$fileID;
551
552 my $mime_attr = "MIMETYPE=\"$mime_type\"";
553 my $xlink_title = "xlink:title=\"$assoc_file\"";
554
555 my $id_attr;
556 my $xlink_href;
557
558 $id_attr = "ID=\"$id_root.0\"";
559
560 my $fedora_prefix = $ENV{'FEDORA_PREFIX'};
561 if (!defined $fedora_prefix) {
562 $xlink_href = "xlink:href=\"$assfilePath\"";
563 }
564 else
565 {
566 my $gsdlhome = $ENV{'GSDLHOME'};
567 my $gsdl_href = "$working_dir/$assfilePath";
568
569 $gsdl_href =~ s/^$gsdlhome(\/)?//;
570 $gsdl_href = "/gsdl/$gsdl_href";
571
572 my $fserver = $ENV{'FEDORA_HOSTNAME'};
573 my $fport = $ENV{'FEDORA_SERVER_PORT'};
574
575 my $fdomain = "http://$fserver:$fport";
576 $xlink_href = "xlink:href=\"$fdomain$gsdl_href\"";
577 }
578
579 my $top_section = $doc_obj->get_top_section();
580 my $id = $doc_obj->get_metadata_element($top_section,"Identifier");
581
582 $all_text .= " <mets:file $mime_attr $id_attr $opt_owner_id >\n";
583 $all_text .= " <mets:FLocat LOCTYPE=\"URL\" $xlink_href $xlink_title />\n";
584
585 $all_text .= " </mets:file>\n";
586
587 $all_text .= " </mets:fileGrp>\n";
588 }
589 }
590
591 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
592
593 return $all_text;
594}
595
596
5971;
Note: See TracBrowser for help on using the repository browser.