source: gsdl/trunk/perllib/plugouts/FedoraMETSPlugout.pm@ 15583

Last change on this file since 15583 was 15583, checked in by ak19, 16 years ago

In method doctxt_to_xlink, made sure that backslashes in filepaths (as happens in Windows) don't interfere during the regex substitution

File size: 17.2 KB
Line 
1###########################################################################
2#
3# FedoraMETSPlugout.pm -- the plugout module for METS archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# But WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package FedoraMETSPlugout;
27
28use strict;
29no strict 'refs';
30
31#eval {require bytes};
32#use util;
33use METSPlugout;
34#use docprint; # for escape_text
35
36sub BEGIN {
37 @FedoraMETSPlugout::ISA = ('METSPlugout');
38}
39
40my $arguments = [
41 { 'name' => "fedora_namespace",
42 'desc' => "{FedoraPlugout.fedora_namespace}",
43 'type' => "string",
44 'deft' => "greenstone",
45 'reqd' => "no",
46 'hiddengli' => "no"}
47 ];
48
49
50
51my $options = { 'name' => "FedoraMETSPlugout",
52 'desc' => "{FedoraMETSPlugout.desc}",
53 'abstract' => "no",
54 'inherits' => "yes",
55 'args' => $arguments
56 };
57
58
59sub new
60{
61 my ($class) = shift (@_);
62 my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
63 push(@$plugoutlist, $class);
64
65
66 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
67 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
68
69 my $self = (defined $hashArgOptLists)? new METSPlugout($plugoutlist,$inputargs,$hashArgOptLists): new METSPlugout($plugoutlist,$inputargs);
70
71
72 return bless $self, $class;
73}
74
75
76sub output_mets_xml_header
77{
78 my $self = shift(@_);
79 my ($handle, $OID, $doc_title) = @_;
80
81 my $fnamespace = $self->{'fedora_namespace'};
82 my $oid_namespace = (defined $fnamespace) ? $fnamespace : "test";
83
84 my $collection = $ENV{'GSDLCOLLECTION'};
85
86 # Might need the following in the schemeLocation attribute for Fedora3
87 # http://www.fedora.info/definitions/1/0/mets-fedora-ext1-1.xsd
88 my $extra_attr = "OBJID=\"$oid_namespace:$collection-$OID\" TYPE=\"FedoraObject\" LABEL=\"$doc_title\"";
89
90 my $extra_schema = undef;
91
92 if ($ENV{'FEDORA2_HOME'}) {
93 $extra_schema = "http://www.fedora.info/definitions/1/0/mets-fedora-ext.xsd";
94 }
95 else {
96 $extra_attr .= " EXT_VERSION=\"1.1\"";
97 }
98
99 $self->output_mets_xml_header_extra_attribute($handle,$extra_attr,$extra_schema);
100
101 print $handle '<mets:metsHdr RECORDSTATUS="A"/>'. "\n"; # A = active
102
103}
104
105#
106# Print out "family" of doctxt.xml files
107#
108
109sub saveas_doctxt_section
110{
111 my $self = shift (@_);
112 my ($doc_obj,$working_dir,$section) = @_;
113
114 my $section_ptr=$doc_obj->_lookup_section($section);
115 return unless defined $section_ptr;
116
117 my $section_fnum ="1". $section;
118 $section_fnum =~ s/\./_/g;
119
120 my $doc_txt_file = &util::filename_cat ($working_dir,"doctxt$section_fnum.xml");
121
122 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
123
124 my $outhandler;
125
126 if (defined $self->{'xslt_writer'}){
127 $outhandler = $self->{'xslt_writer'};
128 }
129 else{
130 $outhandler = $self->get_output_handler($doc_txt_file);
131 }
132
133 $self->output_xml_header($outhandler);
134 $self->output_txt_section($outhandler,$doc_obj, $section);
135 $self->output_xml_footer($outhandler);
136
137
138 if (defined $self->{'xslt_writer'}){
139 $self->close_xslt_pipe();
140 }
141 else{
142 close($outhandler);
143 }
144
145
146 # Output all the subsections as separate files
147 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
148
149 $self->saveas_doctxt_section($doc_obj, $working_dir, "$section.$subsection");
150 }
151
152
153}
154
155
156sub saveas_doctxt
157{
158 my $self = shift (@_);
159 my ($doc_obj,$working_dir) = @_;
160
161 my $section = $doc_obj->get_top_section();
162
163 $self->saveas_doctxt_section($doc_obj,$working_dir,$section);
164
165 $self->saveas_toc($doc_obj,$working_dir);
166}
167
168sub buffer_toc
169{
170 my $self = shift (@_);
171 my ($doc_obj,$working_dir,$section,$depth) = @_;
172
173 my $section_ptr=$doc_obj->_lookup_section($section);
174 return "" unless defined $section_ptr;
175
176 my $all_text = "";
177
178 my $section_num ="1". $section;
179 my $indent = " " x ($depth*2);
180
181 $all_text .= "$indent<Section id=\"$section_num\">\n";
182
183 # Output all the subsections as separate files
184 foreach my $subsection (@{$section_ptr->{'subsection_order'}})
185 {
186 $all_text
187 .= $self->buffer_toc($doc_obj, $working_dir,
188 "$section.$subsection",$depth+1);
189 }
190
191 $all_text .= "$indent</Section>\n";
192
193 return $all_text;
194}
195
196
197sub saveas_toc
198{
199 my $self = shift (@_);
200 my ($doc_obj,$working_dir) = @_;
201
202 my $section = $doc_obj->get_top_section();
203
204 my $doc_txt_file = &util::filename_cat ($working_dir,"doctoc.xml");
205
206 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
207
208 my $outhandler;
209
210 if (defined $self->{'xslt_writer'}){
211 $outhandler = $self->{'xslt_writer'};
212 }
213 else{
214 $outhandler = $self->get_output_handler($doc_txt_file);
215 }
216
217 print $outhandler $self->buffer_toc($doc_obj, $working_dir, $section, 0);
218
219 if (defined $self->{'xslt_writer'}){
220 $self->close_xslt_pipe();
221 }
222 else{
223 close($outhandler);
224 }
225
226}
227
228
229sub buffer_mets_relsext_xml
230{
231 my $self = shift(@_);
232 my ($doc_obj) = @_;
233
234 my $OID = $doc_obj->get_OID();
235
236 my $fnamespace = $self->{'fedora_namespace'};
237 my $oid_namespace = (defined $fnamespace) ? $fnamespace : "test";
238 my $collection = $ENV{'GSDLCOLLECTION'};
239
240 my $fed_id = "$oid_namespace:$collection-$OID";
241
242 my $all_text = "";
243
244 my $top_section = $doc_obj->get_top_section();
245 my $plugin_type = $doc_obj->get_metadata_element($top_section,"Plugin");
246
247 if ((defined $plugin_type) && ($plugin_type eq "ImagePlug"))
248 {
249
250 $all_text .= "<mets:amdSec ID=\"RELS-EXT\">\n";
251 $all_text .= " <mets:techMD ID=\"RELS-EXT1.0\" STATUS=\"A\">\n";
252 $all_text .= " <mets:mdWrap LABEL=\"RELS-EXT - RDF formatted relationship metadata\" MDTYPE=\"OTHER\" MIMETYPE=\"text/xml\">\n";
253 $all_text .= " <mets:xmlData>\n";
254 $all_text .= " <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" xmlns:fedora-model=\"info:fedora/fedora-system:def/model#\">\n";
255 $all_text .= " <rdf:Description rdf:about=\"info:fedora/$fed_id\">\n";
256 $all_text .= " <fedora-model:hasContentModel rdf:resource=\"info:fedora/demo:UVA_STD_IMAGE\"/>\n";
257 $all_text .= " </rdf:Description>\n";
258 $all_text .= " </rdf:RDF>\n";
259 $all_text .= " </mets:xmlData>\n";
260 $all_text .= " </mets:mdWrap>\n";
261 $all_text .= " </mets:techMD>\n";
262 $all_text .= "</mets:amdSec>\n";
263 }
264
265 return $all_text;
266}
267
268
269#
270# Print out docmets.xml file
271#
272sub output_mets_section
273{
274 my $self = shift(@_);
275 my ($handle, $doc_obj, $section, $working_dir) = @_;
276
277 # print out the dmdSection
278 print $handle $self->buffer_mets_dmdSection_section_xml($doc_obj,$section);
279
280 print $handle $self->buffer_mets_relsext_xml($doc_obj);
281
282 print $handle "<mets:fileSec>\n";
283 print $handle " <mets:fileGrp ID=\"DATASTREAMS\">\n";
284
285 # Generate Filestream for Table of Contents (TOC)
286 print $handle $self->buffer_mets_fileSection_toc($doc_obj,$section,$working_dir);
287
288 # print out the fileSection by sections
289 print $handle $self->buffer_mets_fileSection_section_xml($doc_obj,$section,$working_dir);
290
291 # print out the whole fileSection
292 print $handle $self->buffer_mets_fileWhole_section_xml($doc_obj,$section,$working_dir);
293
294 print $handle " </mets:fileGrp>\n";
295 print $handle "</mets:fileSec>\n";
296
297 # print out the StructMapSection by sections
298
299 my $struct_type = "fedora:dsBindingMap";
300
301 # If document is going to make use of deminators (BMech and BDef) then
302 # need to code up more output XML here (structMap)and in
303 # METS:behaviorSec (Fedora extension?) sections
304
305}
306
307sub buffer_mets_amdSec_header
308{
309 my $self = shift(@_);
310 my ($section,$id) = @_;
311
312 # convert section number
313 my $section_num ="1". $section;
314
315 my $all_text = "";
316
317 my $label_attr = "";
318
319 $all_text .= "<mets:amdSec ID=\"$id$section\" >\n";
320 $all_text .= " <mets:techMD ID=\"$id$section.0\">\n"; # .0 fedora version number?
321
322 $label_attr = "LABEL=\"Metadata\"";
323
324 $all_text .= " <mets:mdWrap $label_attr MDTYPE=\"OTHER\" OTHERMDTYPE=\"gsdl3\" ID=\"".$id."gsdl$section_num\">\n";
325 $all_text .= " <mets:xmlData>\n";
326
327 return $all_text;
328
329}
330
331sub buffer_mets_amdSec_footer
332{
333 my $self = shift(@_);
334
335 my $all_text = "";
336
337 $all_text .= " </mets:xmlData>\n";
338 $all_text .= " </mets:mdWrap>\n";
339
340 $all_text .= " </mets:techMD>\n";
341 $all_text .= "</mets:amdSec>\n";
342
343 return $all_text;
344
345}
346
347sub oai_dc_metadata_xml
348{
349 my $self = shift(@_);
350 my ($doc_obj,$section) = @_;
351
352 my $all_text = "";
353
354 my $dc_namespace = "";
355 $dc_namespace .= "xmlns:dc=\"http://purl.org/dc/elements/1.1/\"";
356 $dc_namespace .= " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\" ";
357
358 $all_text .= " <oai_dc:dc $dc_namespace>\n";
359
360 $all_text .= $self->get_dc_metadata($doc_obj, $section,"oai_dc");
361 $all_text .= " </oai_dc:dc>\n";
362
363 return $all_text;
364}
365
366
367
368
369
370# Work out the what the metadata set prefixes (dc,dls etc.) are for
371# this document
372
373sub metadata_set_prefixes
374{
375 my $self = shift(@_);
376 my ($doc_obj, $section) = @_;
377
378 $section="" unless defined $section;
379
380 my $section_ptr = $doc_obj->_lookup_section($section);
381 return {} unless defined $section_ptr;
382
383 my $unique_prefix = {};
384
385 foreach my $data (@{$section_ptr->{'metadata'}})
386 {
387 my ($prefix) = ($data->[0]=~ m/^(.*?)\./);
388
389 if (defined $prefix)
390 {
391 next if ($prefix eq "dc"); # skip dublin core as handled separately elsewhere
392
393 $unique_prefix->{$prefix} = 1;
394 }
395 else
396 {
397 $unique_prefix->{"ex"} = 1;
398 }
399
400 }
401
402 return $unique_prefix;
403}
404
405
406sub mds_metadata_xml
407{
408 my $self = shift(@_);
409 my ($doc_obj, $section, $mds_prefix, $namespace) = @_;
410
411 # build up string of metadata with $mds_prefix
412 $section="" unless defined $section;
413
414 my $section_ptr = $doc_obj->_lookup_section($section);
415 return "" unless defined $section_ptr;
416
417 my $all_text="";
418 $all_text .= " <$mds_prefix:$mds_prefix $namespace>\n";
419
420
421 foreach my $data (@{$section_ptr->{'metadata'}})
422 {
423 if ($data->[0]=~ m/^(?:(.*?)\.)?(.*)$/)
424 {
425 my $curr_mds_prefix = $1;
426 my $mds_full_element = $2;
427
428 $curr_mds_prefix = "ex" unless defined $curr_mds_prefix;
429
430 if ($curr_mds_prefix eq $mds_prefix)
431 {
432 # split up full element in the form Title^en into element=Title, attr="en"
433 my ($mds_element,$subelem) = ($mds_full_element =~ m/^(.*?)(?:\^(.*))?$/);
434 my $mds_attr = (defined $subelem) ? "qualifier=\"$subelem\"" : "";
435
436 my $escaped_value = &docprint::escape_text($data->[1]);
437
438 $all_text .= " <$mds_prefix:metadata name=\"$mds_element\" $mds_attr>$escaped_value</$mds_prefix:metadata>\n";
439 }
440 }
441 }
442
443 $all_text .= " </$mds_prefix:$mds_prefix>\n";
444
445
446 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
447
448 return $all_text;
449}
450
451
452
453sub buffer_mets_dmdSection_section_xml
454{
455 my $self = shift(@_);
456 my ($doc_obj,$section) = @_;
457
458 $section="" unless defined $section;
459
460 my $section_ptr=$doc_obj->_lookup_section($section);
461 return "" unless defined $section_ptr;
462
463 my $all_text = "";
464
465 $all_text .= $self->buffer_mets_amdSec_header($section,"DC");
466 $all_text .= $self->oai_dc_metadata_xml($doc_obj,$section);
467 $all_text .= $self->buffer_mets_amdSec_footer($section);
468
469 # for each metadata set
470 my $md_sets = $self->metadata_set_prefixes($doc_obj,$section);
471
472 foreach my $md_set (keys %$md_sets)
473 {
474 # Greenstone's agnostic approach to metadata sets conflicts with
475 # Fedoras more clinically prescribed one. Fake a namespace for
476 # each $md_set to keep both sides happy
477
478 my $fake_namespace
479 = "xmlns:$md_set=\"http://www.greenstone.org/namespace/fake/$md_set\"";
480 my $id_caps = $md_set;
481 $id_caps =~ tr/[a-z]/[A-Z]/;
482
483 $all_text .= $self->buffer_mets_amdSec_header($section,$id_caps);
484 $all_text .= $self->mds_metadata_xml($doc_obj,$section,$md_set,$fake_namespace);
485 $all_text .= $self->buffer_mets_amdSec_footer($section);
486 }
487
488
489 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
490 $all_text .= $self->buffer_mets_dmdSection_section_xml($doc_obj,"$section.$subsection");
491 }
492
493 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
494
495 return $all_text;
496}
497
498
499
500
501sub doctxt_to_xlink
502{
503 my $self = shift @_;
504 my ($fname,$working_dir) = @_;
505
506 my $xlink_href;
507
508 my $fedora_prefix = $ENV{'FEDORA_HOME'};
509 if (!defined $fedora_prefix) {
510 $xlink_href = "file:$fname";
511 }
512 else
513 {
514 my $collectparent;
515 if (defined $ENV{'GSDL3HOME'}) {
516 $collectparent = &util::filename_cat($ENV{'GSDL3HOME'},"sites","localsite");
517 }
518 else {
519 # greenstone 2
520 $collectparent = $ENV{'GSDLHOME'};
521 }
522
523 my $gsdl_href = "$working_dir/$fname";
524
525 $collectparent =~ s/\\/|/g; # Temporarily replace any back slashes in the file path with |
526 $gsdl_href =~ s/^$collectparent(\/)?//;
527 $gsdl_href = "/gsdl/$gsdl_href";
528 $collectparent =~ s/\|/\\/g; # Change the | in the file path back into backslashes
529
530 my $fserver = $ENV{'FEDORA_HOSTNAME'};
531 my $fport = $ENV{'FEDORA_SERVER_PORT'};
532
533 my $fdomain = "http://$fserver:$fport";
534 $xlink_href = "$fdomain$gsdl_href";
535 }
536
537
538 return $xlink_href;
539
540}
541
542
543sub buffer_mets_fileSection_toc
544{
545 my $self = shift(@_);
546 my ($doc_obj,$section,$working_dir) = @_;
547
548 my $opt_attr = "OWNERID=\"M\"";
549
550 my $all_text = ' <mets:fileGrp ID="TOC">'. "\n";
551 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILETOC\" $opt_attr >\n";
552 my $xlink = $self->doctxt_to_xlink("doctoc.xml",$working_dir);
553
554 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="'.$xlink.'"';
555
556 $all_text .= ' xlink:title="Table of Contents"/>' . "\n";
557 $all_text .= " </mets:file>\n";
558 $all_text .= " </mets:fileGrp>\n";
559
560 return $all_text;
561}
562
563
564sub buffer_mets_fileSection_section_xml
565{
566 my $self = shift(@_);
567 my ($doc_obj,$section,$working_dir) = @_;
568
569 my $is_txt_split = 1;
570 my $opt_owner_id = "OWNERID=\"M\"";
571
572 my $all_text
573 = $self->SUPER::buffer_mets_fileSection_section_xml($doc_obj,$section,$working_dir,$is_txt_split, $opt_owner_id,"SECTION");
574
575
576 return $all_text;
577}
578
579sub buffer_mets_fileWhole_section_xml
580{
581 my $self = shift(@_);
582 my ($doc_obj,$section,$working_dir) = @_;
583
584 my $section_ptr = $doc_obj-> _lookup_section($section);
585 return "" unless defined $section_ptr;
586
587 my $all_text="";
588
589 my $fileID=0;
590
591 # Output the fileSection for the whole section
592 # => get the sourcefile and associative file
593
594 my $id_root = "";
595 my $opt_owner_id = "OWNERID=\"M\"";
596
597
598 my $first_assocfile = 1;
599
600 foreach my $data (@{$section_ptr->{'metadata'}}){
601 my $escaped_value = &docprint::escape_text($data->[1]);
602
603 if ($data->[0] eq "gsdlassocfile"){
604
605 $escaped_value =~ m/^(.*?):(.*):(.*)$/;
606 my $assoc_file = $1;
607 my $mime_type = $2;
608 my $assoc_dir = $3;
609
610 if ($first_assocfile) {
611 $id_root = "url";
612 $first_assocfile = 0;
613 }
614 else {
615 $id_root = "FG$assoc_file";
616 }
617
618 $id_root =~ s/\//_/g;
619 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n";
620
621 my $assfilePath = ($assoc_dir eq "") ? $assoc_file : "$assoc_dir/$assoc_file";
622 ++$fileID;
623
624 my $mime_attr = "MIMETYPE=\"$mime_type\"";
625 my $xlink_title = "xlink:title=\"$assoc_file\"";
626
627 my $id_attr;
628 my $xlink_href;
629
630 $id_attr = "ID=\"F$id_root.0\"";
631
632 my $fedora_prefix = $ENV{'FEDORA_HOME'};
633 if (!defined $fedora_prefix) {
634 $xlink_href = "xlink:href=\"$assfilePath\"";
635 }
636 else
637 {
638 my $collectparent;
639 if (defined $ENV{'GSDL3HOME'}) {
640 $collectparent = &util::filename_cat($ENV{'GSDL3HOME'},"sites","localsite");
641 }
642 else {
643 # greenstone 2
644 $collectparent = $ENV{'GSDLHOME'};
645 }
646
647 my $gsdl_href = "$working_dir/$assfilePath";
648
649 $gsdl_href =~ s/^$collectparent(\/)?//;
650 $gsdl_href = "/gsdl/$gsdl_href";
651
652 my $fserver = $ENV{'FEDORA_HOSTNAME'};
653 my $fport = $ENV{'FEDORA_SERVER_PORT'};
654
655 my $fdomain = "http://$fserver:$fport";
656 $xlink_href = "xlink:href=\"$fdomain$gsdl_href\"";
657 }
658
659 my $top_section = $doc_obj->get_top_section();
660 my $id = $doc_obj->get_metadata_element($top_section,"Identifier");
661
662 $all_text .= " <mets:file $mime_attr $id_attr $opt_owner_id >\n";
663 $all_text .= " <mets:FLocat LOCTYPE=\"URL\" $xlink_href $xlink_title />\n";
664
665 $all_text .= " </mets:file>\n";
666
667 $all_text .= " </mets:fileGrp>\n";
668 }
669 }
670
671 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
672
673 return $all_text;
674}
675
676
6771;
Note: See TracBrowser for help on using the repository browser.