source: gsdl/trunk/perllib/plugouts/FedoraMETSPlugout.pm@ 15013

Last change on this file since 15013 was 15013, checked in by davidb, 16 years ago

Adjustment to Fedora and METS plugouts so they can handle Fedora v2.x or Fedora v3.x

File size: 16.6 KB
Line 
1###########################################################################
2#
3# FedoraMETSPlugout.pm -- the plugout module for METS archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# But WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package FedoraMETSPlugout;
27
28use strict;
29no strict 'refs';
30
31#eval {require bytes};
32#use util;
33use METSPlugout;
34#use docprint; # for escape_text
35
36sub BEGIN {
37 @FedoraMETSPlugout::ISA = ('METSPlugout');
38}
39
40my $arguments = [
41 { 'name' => "fedora_namespace",
42 'desc' => "{FedoraPlugout.fedora_namespace}",
43 'type' => "string",
44 'deft' => "greenstone",
45 'reqd' => "no",
46 'hiddengli' => "no"}
47 ];
48
49
50
51my $options = { 'name' => "FedoraMETSPlugout",
52 'desc' => "{FedoraMETSPlugout.desc}",
53 'abstract' => "no",
54 'inherits' => "yes",
55 'args' => $arguments
56 };
57
58
59sub new
60{
61 my ($class) = shift (@_);
62 my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
63 push(@$plugoutlist, $class);
64
65
66 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
67 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
68
69 my $self = (defined $hashArgOptLists)? new METSPlugout($plugoutlist,$inputargs,$hashArgOptLists): new METSPlugout($plugoutlist,$inputargs);
70
71
72 return bless $self, $class;
73}
74
75
76sub output_mets_xml_header
77{
78 my $self = shift(@_);
79 my ($handle, $OID, $doc_title) = @_;
80
81 my $fnamespace = $self->{'fedora_namespace'};
82 my $oid_namespace = (defined $fnamespace) ? $fnamespace : "test";
83
84 my $collection = $ENV{'GSDLCOLLECTION'};
85
86 # Might need the following in the schemeLocation attribute for Fedora3
87 # http://www.fedora.info/definitions/1/0/mets-fedora-ext1-1.xsd
88
89 my $extra_attr = "OBJID=\"$oid_namespace:$collection-$OID\" TYPE=\"FedoraObject\" LABEL=\"$doc_title\"";
90
91 my $extra_schema = undef;
92
93 if ($ENV{'FEDORA2_HOME'}) {
94 $extra_schema = "http://www.fedora.info/definitions/1/0/mets-fedora-ext.xsd";
95 }
96 else {
97 $extra_attr .= " EXT_VERSION=\"1.1\"";
98 }
99
100 $self->output_mets_xml_header_extra_attribute($handle,$extra_attr,$extra_schema);
101
102 print $handle '<mets:metsHdr RECORDSTATUS="A"/>'. "\n"; # A = active
103
104}
105
106#
107# Print out "family" of doctxt.xml files
108#
109
110sub saveas_doctxt_section
111{
112 my $self = shift (@_);
113 my ($doc_obj,$working_dir,$section) = @_;
114
115 my $section_ptr=$doc_obj->_lookup_section($section);
116 return unless defined $section_ptr;
117
118 my $section_fnum ="1". $section;
119 $section_fnum =~ s/\./_/g;
120
121 my $doc_txt_file = &util::filename_cat ($working_dir,"doctxt$section_fnum.xml");
122
123 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
124
125 my $outhandler;
126
127 if (defined $self->{'xslt_writer'}){
128 $outhandler = $self->{'xslt_writer'};
129 }
130 else{
131 $outhandler = $self->get_output_handler($doc_txt_file);
132 }
133
134 $self->output_xml_header($outhandler);
135 $self->output_txt_section($outhandler,$doc_obj, $section);
136 $self->output_xml_footer($outhandler);
137
138
139 if (defined $self->{'xslt_writer'}){
140 $self->close_xslt_pipe();
141 }
142 else{
143 close($outhandler);
144 }
145
146
147 # Output all the subsections as separate files
148 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
149
150 $self->saveas_doctxt_section($doc_obj, $working_dir, "$section.$subsection");
151 }
152
153
154}
155
156
157sub saveas_doctxt
158{
159 my $self = shift (@_);
160 my ($doc_obj,$working_dir) = @_;
161
162 my $section = $doc_obj->get_top_section();
163
164 $self->saveas_doctxt_section($doc_obj,$working_dir,$section);
165
166 $self->saveas_toc($doc_obj,$working_dir);
167}
168
169sub buffer_toc
170{
171 my $self = shift (@_);
172 my ($doc_obj,$working_dir,$section,$depth) = @_;
173
174 my $section_ptr=$doc_obj->_lookup_section($section);
175 return "" unless defined $section_ptr;
176
177 my $all_text = "";
178
179 my $section_num ="1". $section;
180 my $indent = " " x ($depth*2);
181
182 $all_text .= "$indent<Section id=\"$section_num\">\n";
183
184 # Output all the subsections as separate files
185 foreach my $subsection (@{$section_ptr->{'subsection_order'}})
186 {
187 $all_text
188 .= $self->buffer_toc($doc_obj, $working_dir,
189 "$section.$subsection",$depth+1);
190 }
191
192 $all_text .= "$indent</Section>\n";
193
194 return $all_text;
195}
196
197
198sub saveas_toc
199{
200 my $self = shift (@_);
201 my ($doc_obj,$working_dir) = @_;
202
203 my $section = $doc_obj->get_top_section();
204
205 my $doc_txt_file = &util::filename_cat ($working_dir,"doctoc.xml");
206
207 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
208
209 my $outhandler;
210
211 if (defined $self->{'xslt_writer'}){
212 $outhandler = $self->{'xslt_writer'};
213 }
214 else{
215 $outhandler = $self->get_output_handler($doc_txt_file);
216 }
217
218 print $outhandler $self->buffer_toc($doc_obj, $working_dir, $section, 0);
219
220 if (defined $self->{'xslt_writer'}){
221 $self->close_xslt_pipe();
222 }
223 else{
224 close($outhandler);
225 }
226
227}
228
229
230sub buffer_mets_relsext_xml
231{
232 my $self = shift(@_);
233 my ($doc_obj) = @_;
234
235 my $OID = $doc_obj->get_OID();
236
237 my $fnamespace = $self->{'fedora_namespace'};
238 my $oid_namespace = (defined $fnamespace) ? $fnamespace : "test";
239 my $collection = $ENV{'GSDLCOLLECTION'};
240
241 my $fed_id = "$oid_namespace:$collection-$OID";
242
243 my $all_text = "";
244
245 my $top_section = $doc_obj->get_top_section();
246 my $plugin_type = $doc_obj->get_metadata_element($top_section,"Plugin");
247
248 if ((defined $plugin_type) && ($plugin_type eq "ImagePlug"))
249 {
250
251 $all_text .= "<mets:amdSec ID=\"RELS-EXT\">\n";
252 $all_text .= " <mets:techMD ID=\"RELS-EXT1.0\" STATUS=\"A\">\n";
253 $all_text .= " <mets:mdWrap LABEL=\"RELS-EXT - RDF formatted relationship metadata\" MDTYPE=\"OTHER\" MIMETYPE=\"text/xml\">\n";
254 $all_text .= " <mets:xmlData>\n";
255 $all_text .= " <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" xmlns:fedora-model=\"info:fedora/fedora-system:def/model#\">\n";
256 $all_text .= " <rdf:Description rdf:about=\"info:fedora/$fed_id\">\n";
257 $all_text .= " <fedora-model:hasContentModel rdf:resource=\"info:fedora/demo:UVA_STD_IMAGE\"/>\n";
258 $all_text .= " </rdf:Description>\n";
259 $all_text .= " </rdf:RDF>\n";
260 $all_text .= " </mets:xmlData>\n";
261 $all_text .= " </mets:mdWrap>\n";
262 $all_text .= " </mets:techMD>\n";
263 $all_text .= "</mets:amdSec>\n";
264 }
265
266 return $all_text;
267}
268
269
270#
271# Print out docmets.xml file
272#
273sub output_mets_section
274{
275 my $self = shift(@_);
276 my ($handle, $doc_obj, $section, $working_dir) = @_;
277
278 # print out the dmdSection
279 print $handle $self->buffer_mets_dmdSection_section_xml($doc_obj,$section);
280
281 print $handle $self->buffer_mets_relsext_xml($doc_obj);
282
283 print $handle "<mets:fileSec>\n";
284 print $handle " <mets:fileGrp ID=\"DATASTREAMS\">\n";
285
286 # Generate Filestream for Table of Contents (TOC)
287 print $handle $self->buffer_mets_fileSection_toc($doc_obj,$section,$working_dir);
288
289 # print out the fileSection by sections
290 print $handle $self->buffer_mets_fileSection_section_xml($doc_obj,$section,$working_dir);
291
292 # print out the whole fileSection
293 print $handle $self->buffer_mets_fileWhole_section_xml($doc_obj,$section,$working_dir);
294
295 print $handle " </mets:fileGrp>\n";
296 print $handle "</mets:fileSec>\n";
297
298 # print out the StructMapSection by sections
299
300 my $struct_type = "fedora:dsBindingMap";
301
302 # If document is going to make use of deminators (BMech and BDef) then
303 # need to code up more output XML here (structMap)and in
304 # METS:behaviorSec (Fedora extension?) sections
305
306}
307
308sub buffer_mets_amdSec_header
309{
310 my $self = shift(@_);
311 my ($section,$id) = @_;
312
313 # convert section number
314 my $section_num ="1". $section;
315
316 my $all_text = "";
317
318 my $label_attr = "";
319
320 $all_text .= "<mets:amdSec ID=\"$id$section\" >\n";
321 $all_text .= " <mets:techMD ID=\"$id$section.0\">\n"; # .0 fedora version number?
322
323 $label_attr = "LABEL=\"Metadata\"";
324
325 $all_text .= " <mets:mdWrap $label_attr MDTYPE=\"OTHER\" OTHERMDTYPE=\"gsdl3\" ID=\"".$id."gsdl$section_num\">\n";
326 $all_text .= " <mets:xmlData>\n";
327
328 return $all_text;
329
330}
331
332sub buffer_mets_amdSec_footer
333{
334 my $self = shift(@_);
335
336 my $all_text = "";
337
338 $all_text .= " </mets:xmlData>\n";
339 $all_text .= " </mets:mdWrap>\n";
340
341 $all_text .= " </mets:techMD>\n";
342 $all_text .= "</mets:amdSec>\n";
343
344 return $all_text;
345
346}
347
348sub oai_dc_metadata_xml
349{
350 my $self = shift(@_);
351 my ($doc_obj,$section) = @_;
352
353 my $all_text = "";
354
355 my $dc_namespace = "";
356 $dc_namespace .= "xmlns:dc=\"http://purl.org/dc/elements/1.1/\"";
357 $dc_namespace .= " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\" ";
358
359 $all_text .= " <oai_dc:dc $dc_namespace>\n";
360
361 $all_text .= $self->get_dc_metadata($doc_obj, $section,"oai_dc");
362 $all_text .= " </oai_dc:dc>\n";
363
364 return $all_text;
365}
366
367
368
369
370
371# Work out the what the metadata set prefixes (dc,dls etc.) are for
372# this document
373
374sub metadata_set_prefixes
375{
376 my $self = shift(@_);
377 my ($doc_obj, $section) = @_;
378
379 $section="" unless defined $section;
380
381 my $section_ptr = $doc_obj->_lookup_section($section);
382 return {} unless defined $section_ptr;
383
384 my $unique_prefix = {};
385
386 foreach my $data (@{$section_ptr->{'metadata'}})
387 {
388 my ($prefix) = ($data->[0]=~ m/^(.*?)\./);
389
390 if (defined $prefix)
391 {
392 next if ($prefix eq "dc"); # skip dublin core as handled separately elsewhere
393
394 $unique_prefix->{$prefix} = 1;
395 }
396 else
397 {
398 $unique_prefix->{"ex"} = 1;
399 }
400
401 }
402
403 return $unique_prefix;
404}
405
406
407sub mds_metadata_xml
408{
409 my $self = shift(@_);
410 my ($doc_obj, $section, $mds_prefix, $namespace) = @_;
411
412 # build up string of metadata with $mds_prefix
413 $section="" unless defined $section;
414
415 my $section_ptr = $doc_obj->_lookup_section($section);
416 return "" unless defined $section_ptr;
417
418 my $all_text="";
419 $all_text .= " <$mds_prefix:$mds_prefix $namespace>\n";
420
421
422 foreach my $data (@{$section_ptr->{'metadata'}})
423 {
424 if ($data->[0]=~ m/^(?:(.*?)\.)?(.*)$/)
425 {
426 my $curr_mds_prefix = $1;
427 my $mds_full_element = $2;
428
429 $curr_mds_prefix = "ex" unless defined $curr_mds_prefix;
430
431 if ($curr_mds_prefix eq $mds_prefix)
432 {
433 # split up full element in the form Title^en into element=Title, attr="en"
434 my ($mds_element,$subelem) = ($mds_full_element =~ m/^(.*?)(?:\^(.*))?$/);
435 my $mds_attr = (defined $subelem) ? "qualifier=\"$subelem\"" : "";
436
437 my $escaped_value = &docprint::escape_text($data->[1]);
438
439 $all_text .= " <$mds_prefix:metadata name=\"$mds_element\" $mds_attr>$escaped_value</$mds_prefix:metadata>\n";
440 }
441 }
442 }
443
444 $all_text .= " </$mds_prefix:$mds_prefix>\n";
445
446
447 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
448
449 return $all_text;
450}
451
452
453
454sub buffer_mets_dmdSection_section_xml
455{
456 my $self = shift(@_);
457 my ($doc_obj,$section) = @_;
458
459 $section="" unless defined $section;
460
461 my $section_ptr=$doc_obj->_lookup_section($section);
462 return "" unless defined $section_ptr;
463
464 my $all_text = "";
465
466 $all_text .= $self->buffer_mets_amdSec_header($section,"DC");
467 $all_text .= $self->oai_dc_metadata_xml($doc_obj,$section);
468 $all_text .= $self->buffer_mets_amdSec_footer($section);
469
470 # for each metadata set
471 my $md_sets = $self->metadata_set_prefixes($doc_obj,$section);
472
473 foreach my $md_set (keys %$md_sets)
474 {
475 # Greenstone's agnostic approach to metadata sets conflicts with
476 # Fedoras more clinically prescribed one. Fake a namespace for
477 # each $md_set to keep both sides happy
478
479 my $fake_namespace
480 = "xmlns:$md_set=\"http://www.greenstone.org/namespace/fake/$md_set\"";
481 my $id_caps = $md_set;
482 $id_caps =~ tr/[a-z]/[A-Z]/;
483
484 $all_text .= $self->buffer_mets_amdSec_header($section,$id_caps);
485 $all_text .= $self->mds_metadata_xml($doc_obj,$section,$md_set,$fake_namespace);
486 $all_text .= $self->buffer_mets_amdSec_footer($section);
487 }
488
489
490 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
491 $all_text .= $self->buffer_mets_dmdSection_section_xml($doc_obj,"$section.$subsection");
492 }
493
494 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
495
496 return $all_text;
497}
498
499
500
501
502sub doctxt_to_xlink
503{
504 my $self = shift @_;
505 my ($fname,$working_dir) = @_;
506
507 my $xlink_href;
508
509 my $fedora_prefix = $ENV{'FEDORA_HOME'};
510 if (!defined $fedora_prefix) {
511 $xlink_href = "file:$fname";
512 }
513 else
514 {
515 my $gsdlhome = $ENV{'GSDLHOME'};
516 my $gsdl_href = "$working_dir/$fname";
517
518 $gsdl_href =~ s/^$gsdlhome(\/)?//;
519 $gsdl_href = "/gsdl/$gsdl_href";
520
521 my $fserver = $ENV{'FEDORA_HOSTNAME'};
522 my $fport = $ENV{'FEDORA_SERVER_PORT'};
523
524 my $fdomain = "http://$fserver:$fport";
525 $xlink_href = "$fdomain$gsdl_href";
526 }
527
528
529 return $xlink_href;
530
531}
532
533
534sub buffer_mets_fileSection_toc
535{
536 my $self = shift(@_);
537 my ($doc_obj,$section,$working_dir) = @_;
538
539 my $opt_attr = "OWNERID=\"M\"";
540
541 my $all_text = ' <mets:fileGrp ID="TOC">'. "\n";
542 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILETOC\" $opt_attr >\n";
543 my $xlink = $self->doctxt_to_xlink("doctoc.xml",$working_dir);
544
545 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="'.$xlink.'"';
546
547 $all_text .= ' xlink:title="Table of Contents"/>' . "\n";
548 $all_text .= " </mets:file>\n";
549 $all_text .= " </mets:fileGrp>\n";
550
551 return $all_text;
552}
553
554
555sub buffer_mets_fileSection_section_xml
556{
557 my $self = shift(@_);
558 my ($doc_obj,$section,$working_dir) = @_;
559
560 my $is_txt_split = 1;
561 my $opt_owner_id = "OWNERID=\"M\"";
562
563 my $all_text
564 = $self->SUPER::buffer_mets_fileSection_section_xml($doc_obj,$section,$working_dir,$is_txt_split, $opt_owner_id,"SECTION");
565
566
567 return $all_text;
568}
569
570sub buffer_mets_fileWhole_section_xml
571{
572 my $self = shift(@_);
573 my ($doc_obj,$section,$working_dir) = @_;
574
575 my $section_ptr = $doc_obj-> _lookup_section($section);
576 return "" unless defined $section_ptr;
577
578 my $all_text="";
579
580 my $fileID=0;
581
582 # Output the fileSection for the whole section
583 # => get the sourcefile and associative file
584
585 my $id_root = "";
586 my $opt_owner_id = "OWNERID=\"M\"";
587
588
589 my $first_assocfile = 1;
590
591 foreach my $data (@{$section_ptr->{'metadata'}}){
592 my $escaped_value = &docprint::escape_text($data->[1]);
593
594 if ($data->[0] eq "gsdlassocfile"){
595
596 $escaped_value =~ m/^(.*?):(.*):(.*)$/;
597 my $assoc_file = $1;
598 my $mime_type = $2;
599 my $assoc_dir = $3;
600
601 if ($first_assocfile) {
602 $id_root = "url";
603 $first_assocfile = 0;
604 }
605 else {
606 $id_root = "FG$assoc_file";
607 }
608
609 $id_root =~ s/\//_/g;
610 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n";
611
612 my $assfilePath = ($assoc_dir eq "") ? $assoc_file : "$assoc_dir/$assoc_file";
613 ++$fileID;
614
615 my $mime_attr = "MIMETYPE=\"$mime_type\"";
616 my $xlink_title = "xlink:title=\"$assoc_file\"";
617
618 my $id_attr;
619 my $xlink_href;
620
621 $id_attr = "ID=\"F$id_root.0\"";
622
623 my $fedora_prefix = $ENV{'FEDORA_HOME'};
624 if (!defined $fedora_prefix) {
625 $xlink_href = "xlink:href=\"$assfilePath\"";
626 }
627 else
628 {
629 my $gsdlhome = $ENV{'GSDLHOME'};
630 my $gsdl_href = "$working_dir/$assfilePath";
631
632 $gsdl_href =~ s/^$gsdlhome(\/)?//;
633 $gsdl_href = "/gsdl/$gsdl_href";
634
635 my $fserver = $ENV{'FEDORA_HOSTNAME'};
636 my $fport = $ENV{'FEDORA_SERVER_PORT'};
637
638 my $fdomain = "http://$fserver:$fport";
639 $xlink_href = "xlink:href=\"$fdomain$gsdl_href\"";
640 }
641
642 my $top_section = $doc_obj->get_top_section();
643 my $id = $doc_obj->get_metadata_element($top_section,"Identifier");
644
645 $all_text .= " <mets:file $mime_attr $id_attr $opt_owner_id >\n";
646 $all_text .= " <mets:FLocat LOCTYPE=\"URL\" $xlink_href $xlink_title />\n";
647
648 $all_text .= " </mets:file>\n";
649
650 $all_text .= " </mets:fileGrp>\n";
651 }
652 }
653
654 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
655
656 return $all_text;
657}
658
659
6601;
Note: See TracBrowser for help on using the repository browser.