source: gsdl/trunk/perllib/plugouts/FedoraMETSPlugout.pm@ 15360

Last change on this file since 15360 was 15360, checked in by ak19, 16 years ago

Extra test for Greenstone3 added so plugout works for both versions of Greenstone

File size: 17.0 KB
Line 
1###########################################################################
2#
3# FedoraMETSPlugout.pm -- the plugout module for METS archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# But WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package FedoraMETSPlugout;
27
28use strict;
29no strict 'refs';
30
31#eval {require bytes};
32#use util;
33use METSPlugout;
34#use docprint; # for escape_text
35
36sub BEGIN {
37 @FedoraMETSPlugout::ISA = ('METSPlugout');
38}
39
40my $arguments = [
41 { 'name' => "fedora_namespace",
42 'desc' => "{FedoraPlugout.fedora_namespace}",
43 'type' => "string",
44 'deft' => "greenstone",
45 'reqd' => "no",
46 'hiddengli' => "no"}
47 ];
48
49
50
51my $options = { 'name' => "FedoraMETSPlugout",
52 'desc' => "{FedoraMETSPlugout.desc}",
53 'abstract' => "no",
54 'inherits' => "yes",
55 'args' => $arguments
56 };
57
58
59sub new
60{
61 my ($class) = shift (@_);
62 my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
63 push(@$plugoutlist, $class);
64
65
66 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
67 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
68
69 my $self = (defined $hashArgOptLists)? new METSPlugout($plugoutlist,$inputargs,$hashArgOptLists): new METSPlugout($plugoutlist,$inputargs);
70
71
72 return bless $self, $class;
73}
74
75
76sub output_mets_xml_header
77{
78 my $self = shift(@_);
79 my ($handle, $OID, $doc_title) = @_;
80
81 my $fnamespace = $self->{'fedora_namespace'};
82 my $oid_namespace = (defined $fnamespace) ? $fnamespace : "test";
83
84 my $collection = $ENV{'GSDLCOLLECTION'};
85
86 # Might need the following in the schemeLocation attribute for Fedora3
87 # http://www.fedora.info/definitions/1/0/mets-fedora-ext1-1.xsd
88
89 my $extra_attr = "OBJID=\"$oid_namespace:$collection-$OID\" TYPE=\"FedoraObject\" LABEL=\"$doc_title\"";
90
91 my $extra_schema = undef;
92
93 if ($ENV{'FEDORA2_HOME'}) {
94 $extra_schema = "http://www.fedora.info/definitions/1/0/mets-fedora-ext.xsd";
95 }
96 else {
97 $extra_attr .= " EXT_VERSION=\"1.1\"";
98 }
99
100 $self->output_mets_xml_header_extra_attribute($handle,$extra_attr,$extra_schema);
101
102 print $handle '<mets:metsHdr RECORDSTATUS="A"/>'. "\n"; # A = active
103
104}
105
106#
107# Print out "family" of doctxt.xml files
108#
109
110sub saveas_doctxt_section
111{
112 my $self = shift (@_);
113 my ($doc_obj,$working_dir,$section) = @_;
114
115 my $section_ptr=$doc_obj->_lookup_section($section);
116 return unless defined $section_ptr;
117
118 my $section_fnum ="1". $section;
119 $section_fnum =~ s/\./_/g;
120
121 my $doc_txt_file = &util::filename_cat ($working_dir,"doctxt$section_fnum.xml");
122
123 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
124
125 my $outhandler;
126
127 if (defined $self->{'xslt_writer'}){
128 $outhandler = $self->{'xslt_writer'};
129 }
130 else{
131 $outhandler = $self->get_output_handler($doc_txt_file);
132 }
133
134 $self->output_xml_header($outhandler);
135 $self->output_txt_section($outhandler,$doc_obj, $section);
136 $self->output_xml_footer($outhandler);
137
138
139 if (defined $self->{'xslt_writer'}){
140 $self->close_xslt_pipe();
141 }
142 else{
143 close($outhandler);
144 }
145
146
147 # Output all the subsections as separate files
148 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
149
150 $self->saveas_doctxt_section($doc_obj, $working_dir, "$section.$subsection");
151 }
152
153
154}
155
156
157sub saveas_doctxt
158{
159 my $self = shift (@_);
160 my ($doc_obj,$working_dir) = @_;
161
162 my $section = $doc_obj->get_top_section();
163
164 $self->saveas_doctxt_section($doc_obj,$working_dir,$section);
165
166 $self->saveas_toc($doc_obj,$working_dir);
167}
168
169sub buffer_toc
170{
171 my $self = shift (@_);
172 my ($doc_obj,$working_dir,$section,$depth) = @_;
173
174 my $section_ptr=$doc_obj->_lookup_section($section);
175 return "" unless defined $section_ptr;
176
177 my $all_text = "";
178
179 my $section_num ="1". $section;
180 my $indent = " " x ($depth*2);
181
182 $all_text .= "$indent<Section id=\"$section_num\">\n";
183
184 # Output all the subsections as separate files
185 foreach my $subsection (@{$section_ptr->{'subsection_order'}})
186 {
187 $all_text
188 .= $self->buffer_toc($doc_obj, $working_dir,
189 "$section.$subsection",$depth+1);
190 }
191
192 $all_text .= "$indent</Section>\n";
193
194 return $all_text;
195}
196
197
198sub saveas_toc
199{
200 my $self = shift (@_);
201 my ($doc_obj,$working_dir) = @_;
202
203 my $section = $doc_obj->get_top_section();
204
205 my $doc_txt_file = &util::filename_cat ($working_dir,"doctoc.xml");
206
207 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
208
209 my $outhandler;
210
211 if (defined $self->{'xslt_writer'}){
212 $outhandler = $self->{'xslt_writer'};
213 }
214 else{
215 $outhandler = $self->get_output_handler($doc_txt_file);
216 }
217
218 print $outhandler $self->buffer_toc($doc_obj, $working_dir, $section, 0);
219
220 if (defined $self->{'xslt_writer'}){
221 $self->close_xslt_pipe();
222 }
223 else{
224 close($outhandler);
225 }
226
227}
228
229
230sub buffer_mets_relsext_xml
231{
232 my $self = shift(@_);
233 my ($doc_obj) = @_;
234
235 my $OID = $doc_obj->get_OID();
236
237 my $fnamespace = $self->{'fedora_namespace'};
238 my $oid_namespace = (defined $fnamespace) ? $fnamespace : "test";
239 my $collection = $ENV{'GSDLCOLLECTION'};
240
241 my $fed_id = "$oid_namespace:$collection-$OID";
242
243 my $all_text = "";
244
245 my $top_section = $doc_obj->get_top_section();
246 my $plugin_type = $doc_obj->get_metadata_element($top_section,"Plugin");
247
248 if ((defined $plugin_type) && ($plugin_type eq "ImagePlug"))
249 {
250
251 $all_text .= "<mets:amdSec ID=\"RELS-EXT\">\n";
252 $all_text .= " <mets:techMD ID=\"RELS-EXT1.0\" STATUS=\"A\">\n";
253 $all_text .= " <mets:mdWrap LABEL=\"RELS-EXT - RDF formatted relationship metadata\" MDTYPE=\"OTHER\" MIMETYPE=\"text/xml\">\n";
254 $all_text .= " <mets:xmlData>\n";
255 $all_text .= " <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" xmlns:fedora-model=\"info:fedora/fedora-system:def/model#\">\n";
256 $all_text .= " <rdf:Description rdf:about=\"info:fedora/$fed_id\">\n";
257 $all_text .= " <fedora-model:hasContentModel rdf:resource=\"info:fedora/demo:UVA_STD_IMAGE\"/>\n";
258 $all_text .= " </rdf:Description>\n";
259 $all_text .= " </rdf:RDF>\n";
260 $all_text .= " </mets:xmlData>\n";
261 $all_text .= " </mets:mdWrap>\n";
262 $all_text .= " </mets:techMD>\n";
263 $all_text .= "</mets:amdSec>\n";
264 }
265
266 return $all_text;
267}
268
269
270#
271# Print out docmets.xml file
272#
273sub output_mets_section
274{
275 my $self = shift(@_);
276 my ($handle, $doc_obj, $section, $working_dir) = @_;
277
278 # print out the dmdSection
279 print $handle $self->buffer_mets_dmdSection_section_xml($doc_obj,$section);
280
281 print $handle $self->buffer_mets_relsext_xml($doc_obj);
282
283 print $handle "<mets:fileSec>\n";
284 print $handle " <mets:fileGrp ID=\"DATASTREAMS\">\n";
285
286 # Generate Filestream for Table of Contents (TOC)
287 print $handle $self->buffer_mets_fileSection_toc($doc_obj,$section,$working_dir);
288
289 # print out the fileSection by sections
290 print $handle $self->buffer_mets_fileSection_section_xml($doc_obj,$section,$working_dir);
291
292 # print out the whole fileSection
293 print $handle $self->buffer_mets_fileWhole_section_xml($doc_obj,$section,$working_dir);
294
295 print $handle " </mets:fileGrp>\n";
296 print $handle "</mets:fileSec>\n";
297
298 # print out the StructMapSection by sections
299
300 my $struct_type = "fedora:dsBindingMap";
301
302 # If document is going to make use of deminators (BMech and BDef) then
303 # need to code up more output XML here (structMap)and in
304 # METS:behaviorSec (Fedora extension?) sections
305
306}
307
308sub buffer_mets_amdSec_header
309{
310 my $self = shift(@_);
311 my ($section,$id) = @_;
312
313 # convert section number
314 my $section_num ="1". $section;
315
316 my $all_text = "";
317
318 my $label_attr = "";
319
320 $all_text .= "<mets:amdSec ID=\"$id$section\" >\n";
321 $all_text .= " <mets:techMD ID=\"$id$section.0\">\n"; # .0 fedora version number?
322
323 $label_attr = "LABEL=\"Metadata\"";
324
325 $all_text .= " <mets:mdWrap $label_attr MDTYPE=\"OTHER\" OTHERMDTYPE=\"gsdl3\" ID=\"".$id."gsdl$section_num\">\n";
326 $all_text .= " <mets:xmlData>\n";
327
328 return $all_text;
329
330}
331
332sub buffer_mets_amdSec_footer
333{
334 my $self = shift(@_);
335
336 my $all_text = "";
337
338 $all_text .= " </mets:xmlData>\n";
339 $all_text .= " </mets:mdWrap>\n";
340
341 $all_text .= " </mets:techMD>\n";
342 $all_text .= "</mets:amdSec>\n";
343
344 return $all_text;
345
346}
347
348sub oai_dc_metadata_xml
349{
350 my $self = shift(@_);
351 my ($doc_obj,$section) = @_;
352
353 my $all_text = "";
354
355 my $dc_namespace = "";
356 $dc_namespace .= "xmlns:dc=\"http://purl.org/dc/elements/1.1/\"";
357 $dc_namespace .= " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\" ";
358
359 $all_text .= " <oai_dc:dc $dc_namespace>\n";
360
361 $all_text .= $self->get_dc_metadata($doc_obj, $section,"oai_dc");
362 $all_text .= " </oai_dc:dc>\n";
363
364 return $all_text;
365}
366
367
368
369
370
371# Work out the what the metadata set prefixes (dc,dls etc.) are for
372# this document
373
374sub metadata_set_prefixes
375{
376 my $self = shift(@_);
377 my ($doc_obj, $section) = @_;
378
379 $section="" unless defined $section;
380
381 my $section_ptr = $doc_obj->_lookup_section($section);
382 return {} unless defined $section_ptr;
383
384 my $unique_prefix = {};
385
386 foreach my $data (@{$section_ptr->{'metadata'}})
387 {
388 my ($prefix) = ($data->[0]=~ m/^(.*?)\./);
389
390 if (defined $prefix)
391 {
392 next if ($prefix eq "dc"); # skip dublin core as handled separately elsewhere
393
394 $unique_prefix->{$prefix} = 1;
395 }
396 else
397 {
398 $unique_prefix->{"ex"} = 1;
399 }
400
401 }
402
403 return $unique_prefix;
404}
405
406
407sub mds_metadata_xml
408{
409 my $self = shift(@_);
410 my ($doc_obj, $section, $mds_prefix, $namespace) = @_;
411
412 # build up string of metadata with $mds_prefix
413 $section="" unless defined $section;
414
415 my $section_ptr = $doc_obj->_lookup_section($section);
416 return "" unless defined $section_ptr;
417
418 my $all_text="";
419 $all_text .= " <$mds_prefix:$mds_prefix $namespace>\n";
420
421
422 foreach my $data (@{$section_ptr->{'metadata'}})
423 {
424 if ($data->[0]=~ m/^(?:(.*?)\.)?(.*)$/)
425 {
426 my $curr_mds_prefix = $1;
427 my $mds_full_element = $2;
428
429 $curr_mds_prefix = "ex" unless defined $curr_mds_prefix;
430
431 if ($curr_mds_prefix eq $mds_prefix)
432 {
433 # split up full element in the form Title^en into element=Title, attr="en"
434 my ($mds_element,$subelem) = ($mds_full_element =~ m/^(.*?)(?:\^(.*))?$/);
435 my $mds_attr = (defined $subelem) ? "qualifier=\"$subelem\"" : "";
436
437 my $escaped_value = &docprint::escape_text($data->[1]);
438
439 $all_text .= " <$mds_prefix:metadata name=\"$mds_element\" $mds_attr>$escaped_value</$mds_prefix:metadata>\n";
440 }
441 }
442 }
443
444 $all_text .= " </$mds_prefix:$mds_prefix>\n";
445
446
447 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
448
449 return $all_text;
450}
451
452
453
454sub buffer_mets_dmdSection_section_xml
455{
456 my $self = shift(@_);
457 my ($doc_obj,$section) = @_;
458
459 $section="" unless defined $section;
460
461 my $section_ptr=$doc_obj->_lookup_section($section);
462 return "" unless defined $section_ptr;
463
464 my $all_text = "";
465
466 $all_text .= $self->buffer_mets_amdSec_header($section,"DC");
467 $all_text .= $self->oai_dc_metadata_xml($doc_obj,$section);
468 $all_text .= $self->buffer_mets_amdSec_footer($section);
469
470 # for each metadata set
471 my $md_sets = $self->metadata_set_prefixes($doc_obj,$section);
472
473 foreach my $md_set (keys %$md_sets)
474 {
475 # Greenstone's agnostic approach to metadata sets conflicts with
476 # Fedoras more clinically prescribed one. Fake a namespace for
477 # each $md_set to keep both sides happy
478
479 my $fake_namespace
480 = "xmlns:$md_set=\"http://www.greenstone.org/namespace/fake/$md_set\"";
481 my $id_caps = $md_set;
482 $id_caps =~ tr/[a-z]/[A-Z]/;
483
484 $all_text .= $self->buffer_mets_amdSec_header($section,$id_caps);
485 $all_text .= $self->mds_metadata_xml($doc_obj,$section,$md_set,$fake_namespace);
486 $all_text .= $self->buffer_mets_amdSec_footer($section);
487 }
488
489
490 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
491 $all_text .= $self->buffer_mets_dmdSection_section_xml($doc_obj,"$section.$subsection");
492 }
493
494 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
495
496 return $all_text;
497}
498
499
500
501
502sub doctxt_to_xlink
503{
504 my $self = shift @_;
505 my ($fname,$working_dir) = @_;
506
507 my $xlink_href;
508
509 my $fedora_prefix = $ENV{'FEDORA_HOME'};
510 if (!defined $fedora_prefix) {
511 $xlink_href = "file:$fname";
512 }
513 else
514 {
515 my $collectparent;
516 if (defined $ENV{'GSDL3HOME'}) {
517 $collectparent = &util::filename_cat($ENV{'GSDL3HOME'},"sites","localsite");
518 }
519 else {
520 # greenstone 2
521 $collectparent = $ENV{'GSDLHOME'};
522 }
523
524 my $gsdl_href = "$working_dir/$fname";
525
526 $gsdl_href =~ s/^$collectparent(\/)?//;
527 $gsdl_href = "/gsdl/$gsdl_href";
528
529 my $fserver = $ENV{'FEDORA_HOSTNAME'};
530 my $fport = $ENV{'FEDORA_SERVER_PORT'};
531
532 my $fdomain = "http://$fserver:$fport";
533 $xlink_href = "$fdomain$gsdl_href";
534 }
535
536
537 return $xlink_href;
538
539}
540
541
542sub buffer_mets_fileSection_toc
543{
544 my $self = shift(@_);
545 my ($doc_obj,$section,$working_dir) = @_;
546
547 my $opt_attr = "OWNERID=\"M\"";
548
549 my $all_text = ' <mets:fileGrp ID="TOC">'. "\n";
550 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILETOC\" $opt_attr >\n";
551 my $xlink = $self->doctxt_to_xlink("doctoc.xml",$working_dir);
552
553 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="'.$xlink.'"';
554
555 $all_text .= ' xlink:title="Table of Contents"/>' . "\n";
556 $all_text .= " </mets:file>\n";
557 $all_text .= " </mets:fileGrp>\n";
558
559 return $all_text;
560}
561
562
563sub buffer_mets_fileSection_section_xml
564{
565 my $self = shift(@_);
566 my ($doc_obj,$section,$working_dir) = @_;
567
568 my $is_txt_split = 1;
569 my $opt_owner_id = "OWNERID=\"M\"";
570
571 my $all_text
572 = $self->SUPER::buffer_mets_fileSection_section_xml($doc_obj,$section,$working_dir,$is_txt_split, $opt_owner_id,"SECTION");
573
574
575 return $all_text;
576}
577
578sub buffer_mets_fileWhole_section_xml
579{
580 my $self = shift(@_);
581 my ($doc_obj,$section,$working_dir) = @_;
582
583 my $section_ptr = $doc_obj-> _lookup_section($section);
584 return "" unless defined $section_ptr;
585
586 my $all_text="";
587
588 my $fileID=0;
589
590 # Output the fileSection for the whole section
591 # => get the sourcefile and associative file
592
593 my $id_root = "";
594 my $opt_owner_id = "OWNERID=\"M\"";
595
596
597 my $first_assocfile = 1;
598
599 foreach my $data (@{$section_ptr->{'metadata'}}){
600 my $escaped_value = &docprint::escape_text($data->[1]);
601
602 if ($data->[0] eq "gsdlassocfile"){
603
604 $escaped_value =~ m/^(.*?):(.*):(.*)$/;
605 my $assoc_file = $1;
606 my $mime_type = $2;
607 my $assoc_dir = $3;
608
609 if ($first_assocfile) {
610 $id_root = "url";
611 $first_assocfile = 0;
612 }
613 else {
614 $id_root = "FG$assoc_file";
615 }
616
617 $id_root =~ s/\//_/g;
618 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n";
619
620 my $assfilePath = ($assoc_dir eq "") ? $assoc_file : "$assoc_dir/$assoc_file";
621 ++$fileID;
622
623 my $mime_attr = "MIMETYPE=\"$mime_type\"";
624 my $xlink_title = "xlink:title=\"$assoc_file\"";
625
626 my $id_attr;
627 my $xlink_href;
628
629 $id_attr = "ID=\"F$id_root.0\"";
630
631 my $fedora_prefix = $ENV{'FEDORA_HOME'};
632 if (!defined $fedora_prefix) {
633 $xlink_href = "xlink:href=\"$assfilePath\"";
634 }
635 else
636 {
637 my $collectparent;
638 if (defined $ENV{'GSDL3HOME'}) {
639 $collectparent = &util::filename_cat($ENV{'GSDL3HOME'},"sites","localsite");
640 }
641 else {
642 # greenstone 2
643 $collectparent = $ENV{'GSDLHOME'};
644 }
645
646 my $gsdl_href = "$working_dir/$assfilePath";
647
648 $gsdl_href =~ s/^$collectparent(\/)?//;
649 $gsdl_href = "/gsdl/$gsdl_href";
650
651 my $fserver = $ENV{'FEDORA_HOSTNAME'};
652 my $fport = $ENV{'FEDORA_SERVER_PORT'};
653
654 my $fdomain = "http://$fserver:$fport";
655 $xlink_href = "xlink:href=\"$fdomain$gsdl_href\"";
656 }
657
658 my $top_section = $doc_obj->get_top_section();
659 my $id = $doc_obj->get_metadata_element($top_section,"Identifier");
660
661 $all_text .= " <mets:file $mime_attr $id_attr $opt_owner_id >\n";
662 $all_text .= " <mets:FLocat LOCTYPE=\"URL\" $xlink_href $xlink_title />\n";
663
664 $all_text .= " </mets:file>\n";
665
666 $all_text .= " </mets:fileGrp>\n";
667 }
668 }
669
670 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
671
672 return $all_text;
673}
674
675
6761;
Note: See TracBrowser for help on using the repository browser.