source: gsdl/trunk/perllib/plugouts/FedoraMETSPlugout.pm@ 16414

Last change on this file since 16414 was 16414, checked in by ak19, 13 years ago

Slightly better way of dealing with GSDL3HOME not being set in the remote case, yet where it can still be declared as being in a separate location as GSDL3SRCHOME in the local GS3 case.

File size: 18.6 KB
Line 
1###########################################################################
2#
3# FedoraMETSPlugout.pm -- the plugout module for METS archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# But WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package FedoraMETSPlugout;
27
28use strict;
29no strict 'refs';
30
31#eval {require bytes};
32#use util;
33use METSPlugout;
34#use docprint; # for escape_text
35
36sub BEGIN {
37 @FedoraMETSPlugout::ISA = ('METSPlugout');
38}
39
40my $arguments = [
41 { 'name' => "fedora_namespace",
42 'desc' => "{FedoraPlugout.fedora_namespace}",
43 'type' => "string",
44 'deft' => "greenstone",
45 'reqd' => "no",
46 'hiddengli' => "no"}
47 ];
48
49
50
51my $options = { 'name' => "FedoraMETSPlugout",
52 'desc' => "{FedoraMETSPlugout.desc}",
53 'abstract' => "no",
54 'inherits' => "yes",
55 'args' => $arguments
56 };
57
58
59sub new
60{
61 my ($class) = shift (@_);
62 my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
63 push(@$plugoutlist, $class);
64
65
66 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
67 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
68
69 my $self = (defined $hashArgOptLists)? new METSPlugout($plugoutlist,$inputargs,$hashArgOptLists): new METSPlugout($plugoutlist,$inputargs);
70
71
72 return bless $self, $class;
73}
74
75
76sub output_mets_xml_header
77{
78 my $self = shift(@_);
79 my ($handle, $OID, $doc_title) = @_;
80
81 my $fnamespace = $self->{'fedora_namespace'};
82 my $oid_namespace = (defined $fnamespace) ? $fnamespace : "test";
83
84 my $collection = $ENV{'GSDLCOLLECTION'};
85
86 # Might need the following in the schemeLocation attribute for Fedora3
87 # http://www.fedora.info/definitions/1/0/mets-fedora-ext1-1.xsd
88 my $extra_attr = "OBJID=\"$oid_namespace:$collection-$OID\" TYPE=\"FedoraObject\" LABEL=\"$doc_title\"";
89
90 my $extra_schema = undef;
91
92 if ($ENV{'FEDORA2_HOME'}) {
93 $extra_schema = "http://www.fedora.info/definitions/1/0/mets-fedora-ext.xsd";
94 }
95 else {
96 $extra_attr .= " EXT_VERSION=\"1.1\"";
97 }
98
99 $self->output_mets_xml_header_extra_attribute($handle,$extra_attr,$extra_schema);
100
101 print $handle '<mets:metsHdr RECORDSTATUS="A"/>'. "\n"; # A = active
102
103}
104
105#
106# Print out "family" of doctxt.xml files
107#
108
109sub saveas_doctxt_section
110{
111 my $self = shift (@_);
112 my ($doc_obj,$working_dir,$section) = @_;
113
114 my $section_ptr=$doc_obj->_lookup_section($section);
115 return unless defined $section_ptr;
116
117 my $section_fnum ="1". $section;
118 $section_fnum =~ s/\./_/g;
119
120 my $doc_txt_file = &util::filename_cat ($working_dir,"doctxt$section_fnum.xml");
121
122 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
123
124 my $outhandler;
125
126 if (defined $self->{'xslt_writer'}){
127 $outhandler = $self->{'xslt_writer'};
128 }
129 else{
130 $outhandler = $self->get_output_handler($doc_txt_file);
131 }
132
133 $self->output_xml_header($outhandler);
134 $self->output_txt_section($outhandler,$doc_obj, $section);
135 $self->output_xml_footer($outhandler);
136
137
138 if (defined $self->{'xslt_writer'}){
139 $self->close_xslt_pipe();
140 }
141 else{
142 close($outhandler);
143 }
144
145
146 # Output all the subsections as separate files
147 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
148
149 $self->saveas_doctxt_section($doc_obj, $working_dir, "$section.$subsection");
150 }
151
152
153}
154
155
156sub saveas_doctxt
157{
158 my $self = shift (@_);
159 my ($doc_obj,$working_dir) = @_;
160
161 my $section = $doc_obj->get_top_section();
162
163 $self->saveas_doctxt_section($doc_obj,$working_dir,$section);
164
165 $self->saveas_toc($doc_obj,$working_dir);
166}
167
168sub buffer_toc
169{
170 my $self = shift (@_);
171 my ($doc_obj,$working_dir,$section,$depth) = @_;
172
173 my $section_ptr=$doc_obj->_lookup_section($section);
174 return "" unless defined $section_ptr;
175
176 my $all_text = "";
177
178 my $section_num ="1". $section;
179 my $indent = " " x ($depth*2);
180
181 $all_text .= "$indent<Section id=\"$section_num\">\n";
182
183 # Output all the subsections as separate files
184 foreach my $subsection (@{$section_ptr->{'subsection_order'}})
185 {
186 $all_text
187 .= $self->buffer_toc($doc_obj, $working_dir,
188 "$section.$subsection",$depth+1);
189 }
190
191 $all_text .= "$indent</Section>\n";
192
193 return $all_text;
194}
195
196
197sub saveas_toc
198{
199 my $self = shift (@_);
200 my ($doc_obj,$working_dir) = @_;
201
202 my $section = $doc_obj->get_top_section();
203 my $section_ptr=$doc_obj->_lookup_section($section);
204 my $num_subsections = scalar(@{$section_ptr->{'subsection_order'}});
205
206 # If num_subsections is 0, then there is no nested TOC
207
208 if ($num_subsections>0) {
209
210 my $doc_txt_file = &util::filename_cat ($working_dir,"doctoc.xml");
211
212 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
213
214 my $outhandler;
215
216 if (defined $self->{'xslt_writer'}){
217 $outhandler = $self->{'xslt_writer'};
218 }
219 else{
220 $outhandler = $self->get_output_handler($doc_txt_file);
221 }
222 print $outhandler $self->buffer_toc($doc_obj, $working_dir, $section, 0);
223
224 if (defined $self->{'xslt_writer'}){
225 $self->close_xslt_pipe();
226 }
227 else{
228 close($outhandler);
229 }
230 }
231
232}
233
234
235sub buffer_mets_relsext_xml
236{
237 my $self = shift(@_);
238 my ($doc_obj) = @_;
239
240 my $OID = $doc_obj->get_OID();
241
242 my $fnamespace = $self->{'fedora_namespace'};
243 my $oid_namespace = (defined $fnamespace) ? $fnamespace : "test";
244 my $collection = $ENV{'GSDLCOLLECTION'};
245
246 my $fed_id = "$oid_namespace:$collection-$OID";
247
248 my $all_text = "";
249
250 my $top_section = $doc_obj->get_top_section();
251 my $plugin_type = $doc_obj->get_metadata_element($top_section,"Plugin");
252
253 if ((defined $plugin_type) && ($plugin_type eq "ImagePlugin"))
254 {
255
256 $all_text .= "<mets:amdSec ID=\"RELS-EXT\">\n";
257 $all_text .= " <mets:techMD ID=\"RELS-EXT1.0\" STATUS=\"A\">\n";
258 $all_text .= " <mets:mdWrap LABEL=\"RELS-EXT - RDF formatted relationship metadata\" MDTYPE=\"OTHER\" MIMETYPE=\"text/xml\">\n";
259 $all_text .= " <mets:xmlData>\n";
260 $all_text .= " <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" xmlns:fedora-model=\"info:fedora/fedora-system:def/model#\">\n";
261 $all_text .= " <rdf:Description rdf:about=\"info:fedora/$fed_id\">\n";
262 $all_text .= " <fedora-model:hasContentModel rdf:resource=\"info:fedora/demo:UVA_STD_IMAGE\"/>\n";
263 $all_text .= " </rdf:Description>\n";
264 $all_text .= " </rdf:RDF>\n";
265 $all_text .= " </mets:xmlData>\n";
266 $all_text .= " </mets:mdWrap>\n";
267 $all_text .= " </mets:techMD>\n";
268 $all_text .= "</mets:amdSec>\n";
269 }
270
271 return $all_text;
272}
273
274
275#
276# Print out docmets.xml file
277#
278sub output_mets_section
279{
280 my $self = shift(@_);
281 my ($handle, $doc_obj, $section, $working_dir) = @_;
282
283 # print out the dmdSection
284 print $handle $self->buffer_mets_dmdSection_section_xml($doc_obj,$section);
285
286 print $handle $self->buffer_mets_relsext_xml($doc_obj);
287
288 print $handle "<mets:fileSec>\n";
289 print $handle " <mets:fileGrp ID=\"DATASTREAMS\">\n";
290
291 # Generate Filestream for Table of Contents (TOC)
292 my $section_ptr=$doc_obj->_lookup_section($section);
293 my $num_subsections = scalar(@{$section_ptr->{'subsection_order'}});
294
295 # If num_subsections is 0, then there is no nested TOC
296
297 if ($num_subsections>0) {
298 print $handle $self->buffer_mets_fileSection_toc($doc_obj,$section,$working_dir);
299 }
300
301 # print out the fileSection by sections
302 print $handle $self->buffer_mets_fileSection_section_xml($doc_obj,$section,$working_dir);
303
304 # print out the whole fileSection
305 print $handle $self->buffer_mets_fileWhole_section_xml($doc_obj,$section,$working_dir);
306
307 print $handle " </mets:fileGrp>\n";
308 print $handle "</mets:fileSec>\n";
309
310 # print out the StructMapSection by sections
311
312 my $struct_type = "fedora:dsBindingMap";
313
314 # If document is going to make use of deminators (BMech and BDef) then
315 # need to code up more output XML here (structMap)and in
316 # METS:behaviorSec (Fedora extension?) sections
317
318}
319
320sub buffer_mets_amdSec_header
321{
322 my $self = shift(@_);
323 my ($section,$id) = @_;
324
325 # convert section number
326 my $section_num ="1". $section;
327
328 my $all_text = "";
329
330 my $label_attr = "";
331
332 $all_text .= "<mets:amdSec ID=\"$id$section\" >\n";
333 $all_text .= " <mets:techMD ID=\"$id$section.0\">\n"; # .0 fedora version number?
334
335 $label_attr = "LABEL=\"Metadata\"";
336
337 $all_text .= " <mets:mdWrap $label_attr MDTYPE=\"OTHER\" OTHERMDTYPE=\"gsdl3\" ID=\"".$id."gsdl$section_num\">\n";
338 $all_text .= " <mets:xmlData>\n";
339
340 return $all_text;
341
342}
343
344sub buffer_mets_amdSec_footer
345{
346 my $self = shift(@_);
347
348 my $all_text = "";
349
350 $all_text .= " </mets:xmlData>\n";
351 $all_text .= " </mets:mdWrap>\n";
352
353 $all_text .= " </mets:techMD>\n";
354 $all_text .= "</mets:amdSec>\n";
355
356 return $all_text;
357
358}
359
360sub oai_dc_metadata_xml
361{
362 my $self = shift(@_);
363 my ($doc_obj,$section) = @_;
364
365 my $all_text = "";
366
367 my $dc_namespace = "";
368 $dc_namespace .= "xmlns:dc=\"http://purl.org/dc/elements/1.1/\"";
369 $dc_namespace .= " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\" ";
370
371 $all_text .= " <oai_dc:dc $dc_namespace>\n";
372
373 $all_text .= $self->get_dc_metadata($doc_obj, $section,"oai_dc");
374 $all_text .= " </oai_dc:dc>\n";
375
376 return $all_text;
377}
378
379
380
381
382
383# Work out the what the metadata set prefixes (dc,dls etc.) are for
384# this document
385
386sub metadata_set_prefixes
387{
388 my $self = shift(@_);
389 my ($doc_obj, $section) = @_;
390
391 $section="" unless defined $section;
392
393 my $section_ptr = $doc_obj->_lookup_section($section);
394 return {} unless defined $section_ptr;
395
396 my $unique_prefix = {};
397
398 foreach my $data (@{$section_ptr->{'metadata'}})
399 {
400 my ($prefix) = ($data->[0]=~ m/^(.*?)\./);
401
402 if (defined $prefix)
403 {
404 next if ($prefix eq "dc"); # skip dublin core as handled separately elsewhere
405
406 $unique_prefix->{$prefix} = 1;
407 }
408 else
409 {
410 $unique_prefix->{"ex"} = 1;
411 }
412
413 }
414
415 return $unique_prefix;
416}
417
418
419sub mds_metadata_xml
420{
421 my $self = shift(@_);
422 my ($doc_obj, $section, $mds_prefix, $namespace) = @_;
423
424 # build up string of metadata with $mds_prefix
425 $section="" unless defined $section;
426
427 my $section_ptr = $doc_obj->_lookup_section($section);
428 return "" unless defined $section_ptr;
429
430 my $all_text="";
431 $all_text .= " <$mds_prefix:$mds_prefix $namespace>\n";
432
433
434 foreach my $data (@{$section_ptr->{'metadata'}})
435 {
436 if ($data->[0]=~ m/^(?:(.*?)\.)?(.*)$/)
437 {
438 my $curr_mds_prefix = $1;
439 my $mds_full_element = $2;
440
441 $curr_mds_prefix = "ex" unless defined $curr_mds_prefix;
442
443 if ($curr_mds_prefix eq $mds_prefix)
444 {
445 # split up full element in the form Title^en into element=Title, attr="en"
446 my ($mds_element,$subelem) = ($mds_full_element =~ m/^(.*?)(?:\^(.*))?$/);
447 my $mds_attr = (defined $subelem) ? "qualifier=\"$subelem\"" : "";
448
449 my $escaped_value = &docprint::escape_text($data->[1]);
450
451 $all_text .= " <$mds_prefix:metadata name=\"$mds_element\" $mds_attr>$escaped_value</$mds_prefix:metadata>\n";
452 }
453 }
454 }
455
456 $all_text .= " </$mds_prefix:$mds_prefix>\n";
457
458
459 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
460
461 return $all_text;
462}
463
464
465
466sub buffer_mets_dmdSection_section_xml
467{
468 my $self = shift(@_);
469 my ($doc_obj,$section) = @_;
470
471 $section="" unless defined $section;
472
473 my $section_ptr=$doc_obj->_lookup_section($section);
474 return "" unless defined $section_ptr;
475
476 my $all_text = "";
477
478 $all_text .= $self->buffer_mets_amdSec_header($section,"DC");
479 $all_text .= $self->oai_dc_metadata_xml($doc_obj,$section);
480 $all_text .= $self->buffer_mets_amdSec_footer($section);
481
482 # for each metadata set
483 my $md_sets = $self->metadata_set_prefixes($doc_obj,$section);
484
485 foreach my $md_set (keys %$md_sets)
486 {
487 # Greenstone's agnostic approach to metadata sets conflicts with
488 # Fedoras more clinically prescribed one. Fake a namespace for
489 # each $md_set to keep both sides happy
490
491 my $fake_namespace
492 = "xmlns:$md_set=\"http://www.greenstone.org/namespace/fake/$md_set\"";
493 my $id_caps = $md_set;
494 $id_caps =~ tr/[a-z]/[A-Z]/;
495
496 $all_text .= $self->buffer_mets_amdSec_header($section,$id_caps);
497 $all_text .= $self->mds_metadata_xml($doc_obj,$section,$md_set,$fake_namespace);
498 $all_text .= $self->buffer_mets_amdSec_footer($section);
499 }
500
501
502 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
503 $all_text .= $self->buffer_mets_dmdSection_section_xml($doc_obj,"$section.$subsection");
504 }
505
506 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
507
508 return $all_text;
509}
510
511
512sub doctxt_to_xlink
513{
514 my $self = shift @_;
515 my ($fname,$working_dir) = @_;
516
517 my $xlink_href;
518
519 my $fedora_prefix = $ENV{'FEDORA_HOME'};
520 if (!defined $fedora_prefix) {
521 $xlink_href = "file:$fname";
522 }
523 else
524 {
525 my $collectparent;
526 if (defined $ENV{'GSDL3SRCHOME'}) { # we're dealing with a GS3 server
527 if(defined $ENV{'GSDL3HOME'}) { # in case the web directory is located in a separate place
528 $collectparent = &util::filename_cat($ENV{'GSDL3HOME'},"sites","localsite");
529 }
530 else { # try the default location for the web directory
531 $collectparent = &util::filename_cat($ENV{'GSDL3SRCHOME'},"web","sites","localsite");
532 }
533 }
534 else {
535 # greenstone 2
536 $collectparent = $ENV{'GSDLHOME'};
537 }
538
539 my $gsdl_href = "$working_dir/$fname";
540 $collectparent =~ s/\\/\\\\/g; # escape reserved metacharacter \ in path (by replacing it with \\) for substitution
541 $gsdl_href =~ s/^$collectparent(\/|\\)?//; # remove the collectparent path in gsdl_href and any trailing slash
542 $gsdl_href =~ s/\\/\//g; # make sure we have url paths (which only use / not \)
543 $gsdl_href = "/gsdl/$gsdl_href"; # prepend gsdl
544
545 my $fserver = $ENV{'FEDORA_HOSTNAME'};
546 my $fport = $ENV{'FEDORA_SERVER_PORT'};
547
548 my $fdomain = "http://$fserver:$fport";
549 $xlink_href = "$fdomain$gsdl_href";
550 }
551
552
553 return $xlink_href;
554
555}
556
557
558sub buffer_mets_fileSection_toc
559{
560 my $self = shift(@_);
561 my ($doc_obj,$section,$working_dir) = @_;
562
563 my $opt_attr = "OWNERID=\"M\"";
564
565 my $all_text = ' <mets:fileGrp ID="TOC">'. "\n";
566 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILETOC\" $opt_attr >\n";
567 my $xlink = $self->doctxt_to_xlink("doctoc.xml",$working_dir);
568
569 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="'.$xlink.'"';
570
571 $all_text .= ' xlink:title="Table of Contents"/>' . "\n";
572 $all_text .= " </mets:file>\n";
573 $all_text .= " </mets:fileGrp>\n";
574
575 return $all_text;
576}
577
578
579sub buffer_mets_fileSection_section_xml
580{
581 my $self = shift(@_);
582 my ($doc_obj,$section,$working_dir) = @_;
583
584 my $is_txt_split = 1;
585 my $opt_owner_id = "OWNERID=\"M\"";
586
587 my $all_text
588 = $self->SUPER::buffer_mets_fileSection_section_xml($doc_obj,$section,$working_dir,$is_txt_split, $opt_owner_id,"SECTION");
589
590
591 return $all_text;
592}
593
594sub buffer_mets_fileWhole_section_xml
595{
596 my $self = shift(@_);
597 my ($doc_obj,$section,$working_dir) = @_;
598
599 my $section_ptr = $doc_obj-> _lookup_section($section);
600 return "" unless defined $section_ptr;
601
602 my $all_text="";
603
604 my $fileID=0;
605
606 # Output the fileSection for the whole section
607 # => get the sourcefile and associative file
608
609 my $id_root = "";
610 my $opt_owner_id = "OWNERID=\"M\"";
611
612
613 my $first_assocfile = 1;
614
615 foreach my $data (@{$section_ptr->{'metadata'}}){
616 my $escaped_value = &docprint::escape_text($data->[1]);
617
618 if ($data->[0] eq "gsdlassocfile"){
619
620 $escaped_value =~ m/^(.*?):(.*):(.*)$/;
621 my $assoc_file = $1;
622 my $mime_type = $2;
623 my $assoc_dir = $3;
624
625 if ($first_assocfile) {
626 $id_root = "url";
627 $first_assocfile = 0;
628 }
629 else {
630 $id_root = "FG$assoc_file";
631 }
632
633 $id_root =~ s/\//_/g;
634 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n";
635
636 my $assfilePath = ($assoc_dir eq "") ? $assoc_file : "$assoc_dir/$assoc_file";
637 ++$fileID;
638
639 my $mime_attr = "MIMETYPE=\"$mime_type\"";
640 my $xlink_title = "xlink:title=\"$assoc_file\"";
641
642 my $id_attr;
643 my $xlink_href;
644
645 $id_attr = "ID=\"F$id_root.0\"";
646
647 my $fedora_prefix = $ENV{'FEDORA_HOME'};
648 if (!defined $fedora_prefix) {
649 $xlink_href = "xlink:href=\"$assfilePath\"";
650 }
651 else
652 {
653 my $collectparent;
654 if (defined $ENV{'GSDL3SRCHOME'}) { # we're dealing with a GS3 server
655 if(defined $ENV{'GSDL3HOME'}) { # in case the web directory is located in a separate place
656 $collectparent = &util::filename_cat($ENV{'GSDL3HOME'},"sites","localsite");
657 }
658 else { # try the default location for the web directory
659 $collectparent = &util::filename_cat($ENV{'GSDL3SRCHOME'},"web","sites","localsite");
660 }
661 }
662 else {
663 # greenstone 2
664 $collectparent = $ENV{'GSDLHOME'};
665 }
666
667 my $gsdl_href = "$working_dir/$assfilePath";
668 $collectparent =~ s/\\/\\\\/g; # escape reserved metacharacter \ in path (by replacing it with \\) for substitution
669 $gsdl_href =~ s/^$collectparent(\/|\\)?//; # remove the collectparent path in gsdl_href and any trailing slash
670 $gsdl_href =~ s/\\/\//g; # make sure we have url paths (which only use / not \)
671 $gsdl_href = "/gsdl/$gsdl_href"; # prepend gsdl
672
673 my $fserver = $ENV{'FEDORA_HOSTNAME'};
674 my $fport = $ENV{'FEDORA_SERVER_PORT'};
675
676 my $fdomain = "http://$fserver:$fport";
677 $xlink_href = "xlink:href=\"$fdomain$gsdl_href\"";
678 }
679
680 my $top_section = $doc_obj->get_top_section();
681 my $id = $doc_obj->get_metadata_element($top_section,"Identifier");
682
683 $all_text .= " <mets:file $mime_attr $id_attr $opt_owner_id >\n";
684 $all_text .= " <mets:FLocat LOCTYPE=\"URL\" $xlink_href $xlink_title />\n";
685
686 $all_text .= " </mets:file>\n";
687
688 $all_text .= " </mets:fileGrp>\n";
689 }
690 }
691
692 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
693
694 return $all_text;
695}
696
697
6981;
Note: See TracBrowser for help on using the repository browser.