source: gsdl/trunk/perllib/plugouts/FedoraMETSPlugout.pm@ 16426

Last change on this file since 16426 was 16426, checked in by ak19, 16 years ago

Minor changes. Although it may not be necessary, using filename_cat to ensure that a filename that is only going to be replaced by a regular expression is still properly created for Windows

File size: 18.7 KB
Line 
1###########################################################################
2#
3# FedoraMETSPlugout.pm -- the plugout module for METS archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# But WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package FedoraMETSPlugout;
27
28use strict;
29no strict 'refs';
30
31#eval {require bytes};
32#use util;
33use METSPlugout;
34#use docprint; # for escape_text
35
36sub BEGIN {
37 @FedoraMETSPlugout::ISA = ('METSPlugout');
38}
39
40my $arguments = [
41 { 'name' => "fedora_namespace",
42 'desc' => "{FedoraPlugout.fedora_namespace}",
43 'type' => "string",
44 'deft' => "greenstone",
45 'reqd' => "no",
46 'hiddengli' => "no"}
47 ];
48
49
50
51my $options = { 'name' => "FedoraMETSPlugout",
52 'desc' => "{FedoraMETSPlugout.desc}",
53 'abstract' => "no",
54 'inherits' => "yes",
55 'args' => $arguments
56 };
57
58
59sub new
60{
61 my ($class) = shift (@_);
62 my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
63 push(@$plugoutlist, $class);
64
65
66 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
67 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
68
69 my $self = (defined $hashArgOptLists)? new METSPlugout($plugoutlist,$inputargs,$hashArgOptLists): new METSPlugout($plugoutlist,$inputargs);
70
71
72 return bless $self, $class;
73}
74
75
76sub output_mets_xml_header
77{
78 my $self = shift(@_);
79 my ($handle, $OID, $doc_title) = @_;
80
81 my $fnamespace = $self->{'fedora_namespace'};
82 my $oid_namespace = (defined $fnamespace) ? $fnamespace : "test";
83
84 my $collection = $ENV{'GSDLCOLLECTION'};
85
86 # Might need the following in the schemeLocation attribute for Fedora3
87 # http://www.fedora.info/definitions/1/0/mets-fedora-ext1-1.xsd
88 my $extra_attr = "OBJID=\"$oid_namespace:$collection-$OID\" TYPE=\"FedoraObject\" LABEL=\"$doc_title\"";
89
90 my $extra_schema = undef;
91
92 if ($ENV{'FEDORA2_HOME'}) {
93 $extra_schema = "http://www.fedora.info/definitions/1/0/mets-fedora-ext.xsd";
94 }
95 else {
96 $extra_attr .= " EXT_VERSION=\"1.1\"";
97 }
98
99 $self->output_mets_xml_header_extra_attribute($handle,$extra_attr,$extra_schema);
100
101 print $handle '<mets:metsHdr RECORDSTATUS="A"/>'. "\n"; # A = active
102
103}
104
105#
106# Print out "family" of doctxt.xml files
107#
108
109sub saveas_doctxt_section
110{
111 my $self = shift (@_);
112 my ($doc_obj,$working_dir,$section) = @_;
113
114 my $section_ptr=$doc_obj->_lookup_section($section);
115 return unless defined $section_ptr;
116
117 my $section_fnum ="1". $section;
118 $section_fnum =~ s/\./_/g;
119
120 my $doc_txt_file = &util::filename_cat ($working_dir,"doctxt$section_fnum.xml");
121
122 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
123
124 my $outhandler;
125
126 if (defined $self->{'xslt_writer'}){
127 $outhandler = $self->{'xslt_writer'};
128 }
129 else{
130 $outhandler = $self->get_output_handler($doc_txt_file);
131 }
132
133 $self->output_xml_header($outhandler);
134 $self->output_txt_section($outhandler,$doc_obj, $section);
135 $self->output_xml_footer($outhandler);
136
137
138 if (defined $self->{'xslt_writer'}){
139 $self->close_xslt_pipe();
140 }
141 else{
142 close($outhandler);
143 }
144
145
146 # Output all the subsections as separate files
147 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
148
149 $self->saveas_doctxt_section($doc_obj, $working_dir, "$section.$subsection");
150 }
151
152
153}
154
155
156sub saveas_doctxt
157{
158 my $self = shift (@_);
159 my ($doc_obj,$working_dir) = @_;
160
161 my $section = $doc_obj->get_top_section();
162
163 $self->saveas_doctxt_section($doc_obj,$working_dir,$section);
164
165 $self->saveas_toc($doc_obj,$working_dir);
166}
167
168sub buffer_toc
169{
170 my $self = shift (@_);
171 my ($doc_obj,$working_dir,$section,$depth) = @_;
172
173 my $section_ptr=$doc_obj->_lookup_section($section);
174 return "" unless defined $section_ptr;
175
176 my $all_text = "";
177
178 my $section_num ="1". $section;
179 my $indent = " " x ($depth*2);
180
181 $all_text .= "$indent<Section id=\"$section_num\">\n";
182
183 # Output all the subsections as separate files
184 foreach my $subsection (@{$section_ptr->{'subsection_order'}})
185 {
186 $all_text
187 .= $self->buffer_toc($doc_obj, $working_dir,
188 "$section.$subsection",$depth+1);
189 }
190
191 $all_text .= "$indent</Section>\n";
192
193 return $all_text;
194}
195
196
197sub saveas_toc
198{
199 my $self = shift (@_);
200 my ($doc_obj,$working_dir) = @_;
201
202 my $section = $doc_obj->get_top_section();
203 my $section_ptr=$doc_obj->_lookup_section($section);
204 my $num_subsections = scalar(@{$section_ptr->{'subsection_order'}});
205
206 # If num_subsections is 0, then there is no nested TOC
207
208 if ($num_subsections>0) {
209
210 my $doc_txt_file = &util::filename_cat ($working_dir,"doctoc.xml");
211
212 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
213
214 my $outhandler;
215
216 if (defined $self->{'xslt_writer'}){
217 $outhandler = $self->{'xslt_writer'};
218 }
219 else{
220 $outhandler = $self->get_output_handler($doc_txt_file);
221 }
222 print $outhandler $self->buffer_toc($doc_obj, $working_dir, $section, 0);
223
224 if (defined $self->{'xslt_writer'}){
225 $self->close_xslt_pipe();
226 }
227 else{
228 close($outhandler);
229 }
230 }
231
232}
233
234
235sub buffer_mets_relsext_xml
236{
237 my $self = shift(@_);
238 my ($doc_obj) = @_;
239
240 my $OID = $doc_obj->get_OID();
241
242 my $fnamespace = $self->{'fedora_namespace'};
243 my $oid_namespace = (defined $fnamespace) ? $fnamespace : "test";
244 my $collection = $ENV{'GSDLCOLLECTION'};
245
246 my $fed_id = "$oid_namespace:$collection-$OID";
247
248 my $all_text = "";
249
250 my $top_section = $doc_obj->get_top_section();
251 my $plugin_type = $doc_obj->get_metadata_element($top_section,"Plugin");
252
253 if ((defined $plugin_type) && ($plugin_type eq "ImagePlugin"))
254 {
255
256 $all_text .= "<mets:amdSec ID=\"RELS-EXT\">\n";
257 $all_text .= " <mets:techMD ID=\"RELS-EXT1.0\" STATUS=\"A\">\n";
258 $all_text .= " <mets:mdWrap LABEL=\"RELS-EXT - RDF formatted relationship metadata\" MDTYPE=\"OTHER\" MIMETYPE=\"text/xml\">\n";
259 $all_text .= " <mets:xmlData>\n";
260 $all_text .= " <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" xmlns:fedora-model=\"info:fedora/fedora-system:def/model#\">\n";
261 $all_text .= " <rdf:Description rdf:about=\"info:fedora/$fed_id\">\n";
262 $all_text .= " <fedora-model:hasContentModel rdf:resource=\"info:fedora/demo:UVA_STD_IMAGE\"/>\n";
263 $all_text .= " </rdf:Description>\n";
264 $all_text .= " </rdf:RDF>\n";
265 $all_text .= " </mets:xmlData>\n";
266 $all_text .= " </mets:mdWrap>\n";
267 $all_text .= " </mets:techMD>\n";
268 $all_text .= "</mets:amdSec>\n";
269 }
270
271 return $all_text;
272}
273
274
275#
276# Print out docmets.xml file
277#
278sub output_mets_section
279{
280 my $self = shift(@_);
281 my ($handle, $doc_obj, $section, $working_dir) = @_;
282
283 # print out the dmdSection
284 print $handle $self->buffer_mets_dmdSection_section_xml($doc_obj,$section);
285
286 print $handle $self->buffer_mets_relsext_xml($doc_obj);
287
288 print $handle "<mets:fileSec>\n";
289 print $handle " <mets:fileGrp ID=\"DATASTREAMS\">\n";
290
291 # Generate Filestream for Table of Contents (TOC)
292 my $section_ptr=$doc_obj->_lookup_section($section);
293 my $num_subsections = scalar(@{$section_ptr->{'subsection_order'}});
294
295 # If num_subsections is 0, then there is no nested TOC
296
297 if ($num_subsections>0) {
298 print $handle $self->buffer_mets_fileSection_toc($doc_obj,$section,$working_dir);
299 }
300
301 # print out the fileSection by sections
302 print $handle $self->buffer_mets_fileSection_section_xml($doc_obj,$section,$working_dir);
303
304 # print out the whole fileSection
305 print $handle $self->buffer_mets_fileWhole_section_xml($doc_obj,$section,$working_dir);
306
307 print $handle " </mets:fileGrp>\n";
308 print $handle "</mets:fileSec>\n";
309
310 # print out the StructMapSection by sections
311
312 my $struct_type = "fedora:dsBindingMap";
313
314 # If document is going to make use of deminators (BMech and BDef) then
315 # need to code up more output XML here (structMap)and in
316 # METS:behaviorSec (Fedora extension?) sections
317
318}
319
320sub buffer_mets_amdSec_header
321{
322 my $self = shift(@_);
323 my ($section,$id) = @_;
324
325 # convert section number
326 my $section_num ="1". $section;
327
328 my $all_text = "";
329
330 my $label_attr = "";
331
332 $all_text .= "<mets:amdSec ID=\"$id$section\" >\n";
333 $all_text .= " <mets:techMD ID=\"$id$section.0\">\n"; # .0 fedora version number?
334
335 $label_attr = "LABEL=\"Metadata\"";
336
337 $all_text .= " <mets:mdWrap $label_attr MDTYPE=\"OTHER\" OTHERMDTYPE=\"gsdl3\" ID=\"".$id."gsdl$section_num\">\n";
338 $all_text .= " <mets:xmlData>\n";
339
340 return $all_text;
341
342}
343
344sub buffer_mets_amdSec_footer
345{
346 my $self = shift(@_);
347
348 my $all_text = "";
349
350 $all_text .= " </mets:xmlData>\n";
351 $all_text .= " </mets:mdWrap>\n";
352
353 $all_text .= " </mets:techMD>\n";
354 $all_text .= "</mets:amdSec>\n";
355
356 return $all_text;
357
358}
359
360sub oai_dc_metadata_xml
361{
362 my $self = shift(@_);
363 my ($doc_obj,$section) = @_;
364
365 my $all_text = "";
366
367 my $dc_namespace = "";
368 $dc_namespace .= "xmlns:dc=\"http://purl.org/dc/elements/1.1/\"";
369 $dc_namespace .= " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\" ";
370
371 $all_text .= " <oai_dc:dc $dc_namespace>\n";
372
373 $all_text .= $self->get_dc_metadata($doc_obj, $section,"oai_dc");
374 $all_text .= " </oai_dc:dc>\n";
375
376 return $all_text;
377}
378
379
380
381
382
383# Work out the what the metadata set prefixes (dc,dls etc.) are for
384# this document
385
386sub metadata_set_prefixes
387{
388 my $self = shift(@_);
389 my ($doc_obj, $section) = @_;
390
391 $section="" unless defined $section;
392
393 my $section_ptr = $doc_obj->_lookup_section($section);
394 return {} unless defined $section_ptr;
395
396 my $unique_prefix = {};
397
398 foreach my $data (@{$section_ptr->{'metadata'}})
399 {
400 my ($prefix) = ($data->[0]=~ m/^(.*?)\./);
401
402 if (defined $prefix)
403 {
404 next if ($prefix eq "dc"); # skip dublin core as handled separately elsewhere
405
406 $unique_prefix->{$prefix} = 1;
407 }
408 else
409 {
410 $unique_prefix->{"ex"} = 1;
411 }
412
413 }
414
415 return $unique_prefix;
416}
417
418
419sub mds_metadata_xml
420{
421 my $self = shift(@_);
422 my ($doc_obj, $section, $mds_prefix, $namespace) = @_;
423
424 # build up string of metadata with $mds_prefix
425 $section="" unless defined $section;
426
427 my $section_ptr = $doc_obj->_lookup_section($section);
428 return "" unless defined $section_ptr;
429
430 my $all_text="";
431 $all_text .= " <$mds_prefix:$mds_prefix $namespace>\n";
432
433
434 foreach my $data (@{$section_ptr->{'metadata'}})
435 {
436 if ($data->[0]=~ m/^(?:(.*?)\.)?(.*)$/)
437 {
438 my $curr_mds_prefix = $1;
439 my $mds_full_element = $2;
440
441 $curr_mds_prefix = "ex" unless defined $curr_mds_prefix;
442
443 if ($curr_mds_prefix eq $mds_prefix)
444 {
445 # split up full element in the form Title^en into element=Title, attr="en"
446 my ($mds_element,$subelem) = ($mds_full_element =~ m/^(.*?)(?:\^(.*))?$/);
447 my $mds_attr = (defined $subelem) ? "qualifier=\"$subelem\"" : "";
448
449 my $escaped_value = &docprint::escape_text($data->[1]);
450
451 $all_text .= " <$mds_prefix:metadata name=\"$mds_element\" $mds_attr>$escaped_value</$mds_prefix:metadata>\n";
452 }
453 }
454 }
455
456 $all_text .= " </$mds_prefix:$mds_prefix>\n";
457
458
459 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
460
461 return $all_text;
462}
463
464
465
466sub buffer_mets_dmdSection_section_xml
467{
468 my $self = shift(@_);
469 my ($doc_obj,$section) = @_;
470
471 $section="" unless defined $section;
472
473 my $section_ptr=$doc_obj->_lookup_section($section);
474 return "" unless defined $section_ptr;
475
476 my $all_text = "";
477
478 $all_text .= $self->buffer_mets_amdSec_header($section,"DC");
479 $all_text .= $self->oai_dc_metadata_xml($doc_obj,$section);
480 $all_text .= $self->buffer_mets_amdSec_footer($section);
481
482 # for each metadata set
483 my $md_sets = $self->metadata_set_prefixes($doc_obj,$section);
484
485 foreach my $md_set (keys %$md_sets)
486 {
487 # Greenstone's agnostic approach to metadata sets conflicts with
488 # Fedoras more clinically prescribed one. Fake a namespace for
489 # each $md_set to keep both sides happy
490
491 my $fake_namespace
492 = "xmlns:$md_set=\"http://www.greenstone.org/namespace/fake/$md_set\"";
493 my $id_caps = $md_set;
494 $id_caps =~ tr/[a-z]/[A-Z]/;
495
496 $all_text .= $self->buffer_mets_amdSec_header($section,$id_caps);
497 $all_text .= $self->mds_metadata_xml($doc_obj,$section,$md_set,$fake_namespace);
498 $all_text .= $self->buffer_mets_amdSec_footer($section);
499 }
500
501
502 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
503 $all_text .= $self->buffer_mets_dmdSection_section_xml($doc_obj,"$section.$subsection");
504 }
505
506 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
507
508 return $all_text;
509}
510
511
512sub doctxt_to_xlink
513{
514 my $self = shift @_;
515 my ($fname,$working_dir) = @_;
516
517 my $xlink_href;
518
519 my $fedora_prefix = $ENV{'FEDORA_HOME'};
520 if (!defined $fedora_prefix) {
521 $xlink_href = "file:$fname";
522 }
523 else
524 {
525 my $collectparent;
526 if (defined $ENV{'GSDL3SRCHOME'}) { # we're dealing with a GS3 server
527 if(defined $ENV{'GSDL3HOME'}) { # in case the web directory is located in a separate place
528 $collectparent = &util::filename_cat($ENV{'GSDL3HOME'},"sites","localsite");
529 }
530 else { # try the default location for the web directory
531 $collectparent = &util::filename_cat($ENV{'GSDL3SRCHOME'},"web","sites","localsite");
532 }
533 }
534 else {
535 # greenstone 2
536 $collectparent = $ENV{'GSDLHOME'};
537 }
538
539 my $gsdl_href = &util::filename_cat($working_dir, $fname);
540 $collectparent =~ s/\\/\\\\/g; # escape reserved metacharacter \ in path (by replacing it with \\) for substitution
541 $gsdl_href =~ s/^$collectparent(\/|\\)?//; # remove the collectparent path in gsdl_href and any trailing slash
542 $gsdl_href =~ s/\\/\//g; # make sure we have url paths (which only use / not \)
543 $gsdl_href = "/gsdl/$gsdl_href"; # prepend gsdl
544
545 my $fserver = $ENV{'FEDORA_HOSTNAME'};
546 my $fport = $ENV{'FEDORA_SERVER_PORT'};
547
548 my $fdomain = "http://$fserver:$fport";
549 $xlink_href = "$fdomain$gsdl_href";
550 }
551
552
553 return $xlink_href;
554
555}
556
557
558sub buffer_mets_fileSection_toc
559{
560 my $self = shift(@_);
561 my ($doc_obj,$section,$working_dir) = @_;
562
563 my $opt_attr = "OWNERID=\"M\"";
564
565 my $all_text = ' <mets:fileGrp ID="TOC">'. "\n";
566 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILETOC\" $opt_attr >\n";
567 my $xlink = $self->doctxt_to_xlink("doctoc.xml",$working_dir);
568
569 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="'.$xlink.'"';
570
571 $all_text .= ' xlink:title="Table of Contents"/>' . "\n";
572 $all_text .= " </mets:file>\n";
573 $all_text .= " </mets:fileGrp>\n";
574
575 return $all_text;
576}
577
578
579sub buffer_mets_fileSection_section_xml
580{
581 my $self = shift(@_);
582 my ($doc_obj,$section,$working_dir) = @_;
583
584 my $is_txt_split = 1;
585 my $opt_owner_id = "OWNERID=\"M\"";
586
587 my $all_text
588 = $self->SUPER::buffer_mets_fileSection_section_xml($doc_obj,$section,$working_dir,$is_txt_split, $opt_owner_id,"SECTION");
589
590
591 return $all_text;
592}
593
594sub buffer_mets_fileWhole_section_xml
595{
596 my $self = shift(@_);
597 my ($doc_obj,$section,$working_dir) = @_;
598
599 my $section_ptr = $doc_obj-> _lookup_section($section);
600 return "" unless defined $section_ptr;
601
602 my $all_text="";
603
604 my $fileID=0;
605
606 # Output the fileSection for the whole section
607 # => get the sourcefile and associative file
608
609 my $id_root = "";
610 my $opt_owner_id = "OWNERID=\"M\"";
611
612
613 my $first_assocfile = 1;
614
615 foreach my $data (@{$section_ptr->{'metadata'}}){
616 my $escaped_value = &docprint::escape_text($data->[1]);
617
618 if ($data->[0] eq "gsdlassocfile"){
619
620 $escaped_value =~ m/^(.*?):(.*):(.*)$/;
621 my $assoc_file = $1;
622 my $mime_type = $2;
623 my $assoc_dir = $3;
624
625 if ($first_assocfile) {
626 $id_root = "url";
627 $first_assocfile = 0;
628 }
629 else {
630 $id_root = "FG$assoc_file";
631 }
632
633 $id_root =~ s/\//_/g;
634 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n";
635
636 my $assfilePath = ($assoc_dir eq "") ? $assoc_file : "$assoc_dir/$assoc_file";
637 ++$fileID;
638
639 my $mime_attr = "MIMETYPE=\"$mime_type\"";
640 my $xlink_title = "xlink:title=\"$assoc_file\"";
641
642 my $id_attr;
643 my $xlink_href;
644
645 $id_attr = "ID=\"F$id_root.0\"";
646
647 my $fedora_prefix = $ENV{'FEDORA_HOME'};
648 if (!defined $fedora_prefix) {
649 $xlink_href = "xlink:href=\"$assfilePath\"";
650 }
651 else
652 {
653 my $collectparent;
654 if (defined $ENV{'GSDL3SRCHOME'}) { # we're dealing with a GS3 server
655 if(defined $ENV{'GSDL3HOME'}) { # in case the web directory is located in a separate place
656 $collectparent = &util::filename_cat($ENV{'GSDL3HOME'},"sites","localsite");
657 }
658 else { # try the default location for the web directory
659 $collectparent = &util::filename_cat($ENV{'GSDL3SRCHOME'},"web","sites","localsite");
660 }
661 }
662 else {
663 # greenstone 2
664 $collectparent = $ENV{'GSDLHOME'};
665 }
666
667 my $gsdl_href = &util::filename_cat($working_dir,$assfilePath);
668 $collectparent =~ s/\\/\\\\/g; # escape reserved metacharacter \ in path (by replacing it with \\) for substitution
669 $gsdl_href =~ s/^$collectparent(\/|\\)?//; # remove the collectparent path in gsdl_href and any trailing slash
670 $gsdl_href =~ s/\\/\//g; # make sure we have url paths (which only use / not \)
671 $gsdl_href = "/gsdl/$gsdl_href"; # prepend gsdl
672
673 my $fserver = $ENV{'FEDORA_HOSTNAME'};
674 my $fport = $ENV{'FEDORA_SERVER_PORT'};
675
676 my $fdomain = "http://$fserver:$fport";
677 $xlink_href = "xlink:href=\"$fdomain$gsdl_href\"";
678 }
679
680 my $top_section = $doc_obj->get_top_section();
681 my $id = $doc_obj->get_metadata_element($top_section,"Identifier");
682
683 $all_text .= " <mets:file $mime_attr $id_attr $opt_owner_id >\n";
684 $all_text .= " <mets:FLocat LOCTYPE=\"URL\" $xlink_href $xlink_title />\n";
685
686 $all_text .= " </mets:file>\n";
687
688 $all_text .= " </mets:fileGrp>\n";
689 }
690 }
691
692 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
693
694 return $all_text;
695}
696
697
6981;
Note: See TracBrowser for help on using the repository browser.