source: main/trunk/greenstone2/perllib/plugouts/FedoraMETSPlugout.pm@ 24829

Last change on this file since 24829 was 24829, checked in by ak19, 12 years ago

Changes to bat files and perl code to deal with brackets in (Windows) filepath. Also checked winmake.bat files to see if changes were needed there. These changes go together with the commits 24826 to 24828 for gems.bat, and commit 24820 on makegs2.bat.

File size: 22.9 KB
Line 
1###########################################################################
2#
3# FedoraMETSPlugout.pm -- the plugout module for METS archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# But WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package FedoraMETSPlugout;
27
28use strict;
29no strict 'refs';
30
31#eval {require bytes};
32#use util;
33use METSPlugout;
34#use docprint; # for escape_text
35
36sub BEGIN {
37 @FedoraMETSPlugout::ISA = ('METSPlugout');
38
39 if ((!defined $ENV{'FEDORA_HOME'}) || (!defined $ENV{'FEDORA_VERSION'})) {
40 if (defined $ENV{'FEDORA_HOME'}) {
41 print STDERR "FEDORA_HOME = $ENV{'FEDORA_HOME'}";
42 }
43
44 if (defined $ENV{'FEDORA_VERSION'}) {
45 print STDERR "FEDORA_VERSION = $ENV{'FEDORA_VERSION'}";
46 }
47
48 die "Need both environment variables FEDORA_HOME and FEDORA_VERSION to be set\n";
49 }
50
51 $ENV{'FEDORA_HOSTNAME'} = "localhost" if (!defined $ENV{'FEDORA_HOSTNAME'});
52 $ENV{'FEDORA_SERVER_PORT'} = "8080" if (!defined $ENV{'FEDORA_SERVER_PORT'});
53 $ENV{'FEDORA_USER'} = "fedoraAdmin" if (!defined $ENV{'FEDORA_USER'});
54 $ENV{'FEDORA_PASS'} = "fedoraAdmin" if (!defined $ENV{'FEDORA_PASS'});
55 $ENV{'FEDORA_PROTOCOL'} = "http" if (!defined $ENV{'FEDORA_PROTOCOL'});
56 $ENV{'FEDORA_PID_NAMESPACE'} = "greenstone" if (!defined $ENV{'FEDORA_PID_NAMESPACE'});
57 $ENV{'FEDORA_PREFIX'} = "/fedora" if (!defined $ENV{'FEDORA_PREFIX'});
58}
59
60my $arguments = [
61 { 'name' => "fedora_namespace",
62 'desc' => "{FedoraMETSPlugout.fedora_namespace}",
63 'type' => "string",
64 'deft' => "greenstone",
65 'reqd' => "no",
66 'hiddengli' => "no"}
67 ];
68
69
70
71my $options = { 'name' => "FedoraMETSPlugout",
72 'desc' => "{FedoraMETSPlugout.desc}",
73 'abstract' => "no",
74 'inherits' => "yes",
75 'args' => $arguments
76 };
77
78
79sub new
80{
81 my ($class) = shift (@_);
82 my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
83 push(@$plugoutlist, $class);
84
85
86 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
87 push(@{$hashArgOptLists->{"OptList"}},$options);
88
89 my $self = new METSPlugout($plugoutlist,$inputargs,$hashArgOptLists);
90
91 return bless $self, $class;
92}
93
94
95sub output_mets_xml_header
96{
97 my $self = shift(@_);
98 my ($handle, $OID, $doc_title) = @_;
99
100 my $fnamespace = $self->{'fedora_namespace'};
101 my $oid_namespace = (defined $fnamespace) ? $fnamespace : "test";
102
103 my $collection = $ENV{'GSDLCOLLECTION'};
104
105 # Might need the following in the schemeLocation attribute for Fedora3
106 # http://www.fedora.info/definitions/1/0/mets-fedora-ext1-1.xsd
107 my $extra_attr = "OBJID=\"$oid_namespace:$collection-$OID\" TYPE=\"FedoraObject\" LABEL=\"$doc_title\"";
108
109 my $extra_schema = undef;
110
111 if (defined $ENV{'FEDORA_VERSION'} && $ENV{'FEDORA_VERSION'} =~ m/^2/) { # checking if major version is 2
112 $extra_schema = "http://www.fedora.info/definitions/1/0/mets-fedora-ext.xsd";
113 }
114 else {
115 $extra_attr .= " EXT_VERSION=\"1.1\"";
116 }
117
118 $self->output_mets_xml_header_extra_attribute($handle,$extra_attr,$extra_schema);
119
120 print $handle '<mets:metsHdr RECORDSTATUS="A"/>'. "\n"; # A = active
121
122}
123
124#
125# Print out "family" of doctxt.xml files
126#
127
128sub saveas_doctxt_section
129{
130 my $self = shift (@_);
131 my ($doc_obj,$working_dir,$section) = @_;
132
133 my $section_ptr=$doc_obj->_lookup_section($section);
134 return unless defined $section_ptr;
135
136 my $section_fnum ="1". $section;
137 $section_fnum =~ s/\./_/g;
138
139 my $doc_txt_file = &util::filename_cat ($working_dir,"doctxt$section_fnum.xml");
140
141 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
142
143 my $outhandler;
144
145 if (defined $self->{'xslt_writer'}){
146 $outhandler = $self->{'xslt_writer'};
147 }
148 else{
149 $outhandler = $self->get_output_handler($doc_txt_file);
150 }
151
152 $self->output_xml_header($outhandler);
153
154 ## change links to be Fedora cognant:
155 my $txt = $section_ptr->{'text'};
156 $section_ptr->{'text'} = $self->adjust_links($doc_obj, \$txt);
157
158 $self->output_txt_section($outhandler,$doc_obj, $section);
159 $self->output_xml_footer($outhandler);
160
161
162 if (defined $self->{'xslt_writer'}){
163 $self->close_xslt_pipe();
164 }
165 else{
166 close($outhandler);
167 }
168
169
170 # Output all the subsections as separate files
171 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
172
173 $self->saveas_doctxt_section($doc_obj, $working_dir, "$section.$subsection");
174 }
175}
176
177sub adjust_links
178{
179 my $self = shift(@_);
180 my ($doc_obj, $textref) = @_;
181
182 ## change links to be Fedora cognant:
183 # 1. retrieve txt of section - $$textref
184 # 2. change it:
185 # /$ENV{'FEDORA_PREFIX'}/objects/$greenstone-docobj-hash-xxx/datastreams/FG<orig-img-name>/content
186 # 3. only replace it back in doc_obj if we didn't get a ref in the first place
187
188 my $OID = $doc_obj->get_OID();
189 my $fnamespace = $self->{'fedora_namespace'};
190 if($OID ne "collection" && defined $fnamespace) {
191 my $fed_id = "$fnamespace:".$ENV{'GSDLCOLLECTION'}."-$OID"; #oid_namespace:collection-OID
192 my $fedora_url_prefix = $ENV{'FEDORA_PREFIX'}."/objects/$fed_id/datastreams/FG";
193 my $fedora_url_suffix = "/content";
194
195 $$textref =~ s/(<(?:img|embed|table|tr|td|link|script)[^>]*?(?:src|background|href)\s*=\s*)((?:[\"][^\"]+[\"])|(?:[\'][^\']+[\'])|(?:[^\s\/>]+))([^>]*>)/$self->replace_rel_link($1, $2, $3, $fedora_url_prefix, $fedora_url_suffix)/isge;
196# print STDERR "*** all text after: $$textref\n\n";
197 }
198
199 return $$textref;
200}
201
202# replace relative link with the prefix and suffix given
203sub replace_rel_link
204{
205 my $self = shift (@_);
206 my ($front, $link, $back, $url_prefix, $url_suffix) = @_;
207
208 # only process relative links. Return if absolute link
209 if($link =~ m/^http/) {
210 return "$front$link$back";
211 }
212
213 # remove quotes from link at start and end if necessary
214 if ($link=~/^[\"\']/) {
215 $link=~s/^[\"\']//;
216 $link=~s/[\"\']$//;
217 $front.='"';
218 $back="\"$back";
219 }
220
221 # remove any _httpdocimg_/ that greenstone may have prefixed to the image
222 $link =~ s/^_httpdocimg_(?:\/|\\)//;
223
224 return "$front$url_prefix$link$url_suffix$back";
225}
226
227
228sub saveas_doctxt
229{
230 my $self = shift (@_);
231 my ($doc_obj,$working_dir) = @_;
232
233 my $section = $doc_obj->get_top_section();
234
235 $self->saveas_doctxt_section($doc_obj,$working_dir,$section);
236
237 $self->saveas_toc($doc_obj,$working_dir);
238}
239
240sub buffer_toc
241{
242 my $self = shift (@_);
243 my ($doc_obj,$working_dir,$section,$depth) = @_;
244
245 my $section_ptr=$doc_obj->_lookup_section($section);
246 return "" unless defined $section_ptr;
247
248 my $all_text = "";
249
250 my $section_num ="1". $section;
251 my $indent = " " x ($depth*2);
252
253 $all_text .= "$indent<Section id=\"$section_num\">\n";
254
255 # Output all the subsections as separate files
256 foreach my $subsection (@{$section_ptr->{'subsection_order'}})
257 {
258 $all_text
259 .= $self->buffer_toc($doc_obj, $working_dir,
260 "$section.$subsection",$depth+1);
261 }
262
263 $all_text .= "$indent</Section>\n";
264
265 return $all_text;
266}
267
268
269sub saveas_toc
270{
271 my $self = shift (@_);
272 my ($doc_obj,$working_dir) = @_;
273
274 my $section = $doc_obj->get_top_section();
275 my $section_ptr=$doc_obj->_lookup_section($section);
276 my $num_subsections = scalar(@{$section_ptr->{'subsection_order'}});
277
278 # If num_subsections is 0, then there is no nested TOC
279
280 if ($num_subsections>0) {
281
282 my $doc_txt_file = &util::filename_cat ($working_dir,"doctoc.xml");
283
284 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
285
286 my $outhandler;
287
288 if (defined $self->{'xslt_writer'}){
289 $outhandler = $self->{'xslt_writer'};
290 }
291 else{
292 $outhandler = $self->get_output_handler($doc_txt_file);
293 }
294 print $outhandler $self->buffer_toc($doc_obj, $working_dir, $section, 0);
295
296 if (defined $self->{'xslt_writer'}){
297 $self->close_xslt_pipe();
298 }
299 else{
300 close($outhandler);
301 }
302 }
303
304}
305
306
307sub buffer_mets_relsext_xml
308{
309 my $self = shift(@_);
310 my ($doc_obj) = @_;
311
312 my $OID = $doc_obj->get_OID();
313
314 my $fnamespace = $self->{'fedora_namespace'};
315 my $oid_namespace = (defined $fnamespace) ? $fnamespace : "test";
316 my $collection = $ENV{'GSDLCOLLECTION'};
317
318 my $fed_id = "$oid_namespace:$collection-$OID";
319
320 my $all_text = "";
321
322 my $top_section = $doc_obj->get_top_section();
323 my $plugin_type = $doc_obj->get_metadata_element($top_section,"Plugin");
324
325# Images do not get ingested into Fedora when on Linux if the following is included
326# Needs more investigation, since we'd like a working version of the following
327# in order to get thumbnails working and other stuff.
328# if ((defined $plugin_type) && ($plugin_type eq "ImagePlugin"))
329# {
330#
331# $all_text .= "<mets:amdSec ID=\"RELS-EXT\">\n";
332# $all_text .= " <mets:techMD ID=\"RELS-EXT1.0\" STATUS=\"A\">\n";
333# $all_text .= " <mets:mdWrap LABEL=\"RELS-EXT - RDF formatted relationship metadata\" MDTYPE=\"OTHER\" MIMETYPE=\"text/xml\">\n";
334# $all_text .= " <mets:xmlData>\n";
335# $all_text .= " <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" xmlns:fedora-model=\"info:fedora/fedora-system:def/model#\">\n";
336# $all_text .= " <rdf:Description rdf:about=\"info:fedora/$fed_id\">\n";
337# $all_text .= " <fedora-model:hasContentModel rdf:resource=\"info:fedora/demo:UVA_STD_IMAGE\"/>\n";
338# $all_text .= " </rdf:Description>\n";
339# $all_text .= " </rdf:RDF>\n";
340# $all_text .= " </mets:xmlData>\n";
341# $all_text .= " </mets:mdWrap>\n";
342# $all_text .= " </mets:techMD>\n";
343# $all_text .= "</mets:amdSec>\n";
344# }
345
346 return $all_text;
347}
348
349
350#
351# Print out docmets.xml file
352#
353sub output_mets_section
354{
355 my $self = shift(@_);
356 my ($handle, $doc_obj, $section, $working_dir) = @_;
357
358 # print out the dmdSection
359 print $handle $self->buffer_mets_dmdSection_section_xml($doc_obj,$section);
360
361 print $handle $self->buffer_mets_relsext_xml($doc_obj);
362
363 print $handle "<mets:fileSec>\n";
364 print $handle " <mets:fileGrp ID=\"DATASTREAMS\">\n";
365
366 # Generate Filestream for Table of Contents (TOC)
367 my $section_ptr=$doc_obj->_lookup_section($section);
368 my $num_subsections = scalar(@{$section_ptr->{'subsection_order'}});
369
370 # If num_subsections is 0, then there is no nested TOC
371
372 if ($num_subsections>0) {
373 print $handle $self->buffer_mets_fileSection_toc($doc_obj,$section,$working_dir);
374 }
375
376 # print out the fileSection by sections
377 print $handle $self->buffer_mets_fileSection_section_xml($doc_obj,$section,$working_dir);
378
379 # print out the whole fileSection
380 print $handle $self->buffer_mets_fileWhole_section_xml($doc_obj,$section,$working_dir);
381
382 print $handle " </mets:fileGrp>\n";
383 print $handle "</mets:fileSec>\n";
384
385 # print out the StructMapSection by sections
386
387 my $struct_type = "fedora:dsBindingMap";
388
389 # If document is going to make use of deminators (BMech and BDef) then
390 # need to code up more output XML here (structMap)and in
391 # METS:behaviorSec (Fedora extension?) sections
392
393}
394
395sub buffer_mets_amdSec_header
396{
397 my $self = shift(@_);
398 my ($section,$id) = @_;
399
400 # convert section number
401 my $section_num ="1". $section;
402
403 my $all_text = "";
404
405 my $label_attr = "";
406
407 $all_text .= "<mets:amdSec ID=\"$id$section\" >\n";
408 $all_text .= " <mets:techMD ID=\"$id$section.0\">\n"; # .0 fedora version number?
409
410 $label_attr = "LABEL=\"Metadata\"";
411
412 $all_text .= " <mets:mdWrap $label_attr MDTYPE=\"OTHER\" OTHERMDTYPE=\"gsdl3\" ID=\"".$id."gsdl$section_num\">\n";
413 $all_text .= " <mets:xmlData>\n";
414
415 return $all_text;
416
417}
418
419sub buffer_mets_amdSec_footer
420{
421 my $self = shift(@_);
422
423 my $all_text = "";
424
425 $all_text .= " </mets:xmlData>\n";
426 $all_text .= " </mets:mdWrap>\n";
427
428 $all_text .= " </mets:techMD>\n";
429 $all_text .= "</mets:amdSec>\n";
430
431 return $all_text;
432
433}
434
435sub oai_dc_metadata_xml
436{
437 my $self = shift(@_);
438 my ($doc_obj,$section) = @_;
439
440 my $all_text = "";
441
442 my $dc_namespace = "";
443 $dc_namespace .= "xmlns:dc=\"http://purl.org/dc/elements/1.1/\"";
444 $dc_namespace .= " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\" ";
445
446 $all_text .= " <oai_dc:dc $dc_namespace>\n";
447
448 $all_text .= $self->get_dc_metadata($doc_obj, $section,"oai_dc");
449 $all_text .= " </oai_dc:dc>\n";
450
451 return $all_text;
452}
453
454
455
456
457
458# Work out the what the metadata set prefixes (dc,dls etc.) are for
459# this document
460
461sub metadata_set_prefixes
462{
463 my $self = shift(@_);
464 my ($doc_obj, $section) = @_;
465
466 $section="" unless defined $section;
467
468 my $section_ptr = $doc_obj->_lookup_section($section);
469 return {} unless defined $section_ptr;
470
471 my $unique_prefix = {};
472
473 foreach my $data (@{$section_ptr->{'metadata'}})
474 {
475 my ($prefix) = ($data->[0]=~ m/^(.*?)\./);
476
477 if (defined $prefix)
478 {
479 next if ($prefix eq "dc"); # skip dublin core as handled separately elsewhere
480
481 $unique_prefix->{$prefix} = 1;
482 }
483 else
484 {
485 $unique_prefix->{"ex"} = 1;
486 }
487
488 }
489
490 return $unique_prefix;
491}
492
493
494sub mds_metadata_xml
495{
496 my $self = shift(@_);
497 my ($doc_obj, $section, $mds_prefix, $namespace) = @_;
498
499 # build up string of metadata with $mds_prefix
500 $section="" unless defined $section;
501
502 my $section_ptr = $doc_obj->_lookup_section($section);
503 return "" unless defined $section_ptr;
504
505 my $all_text="";
506 $all_text .= " <$mds_prefix:$mds_prefix $namespace>\n";
507
508
509 foreach my $data (@{$section_ptr->{'metadata'}})
510 {
511 if ($data->[0]=~ m/^(?:(.*?)\.)?(.*)$/)
512 {
513 my $curr_mds_prefix = $1;
514 my $mds_full_element = $2;
515
516 $curr_mds_prefix = "ex" unless defined $curr_mds_prefix;
517
518 if ($curr_mds_prefix eq $mds_prefix)
519 {
520 # split up full element in the form Title^en into element=Title, attr="en"
521 my ($mds_element,$subelem) = ($mds_full_element =~ m/^(.*?)(?:\^(.*))?$/);
522 my $mds_attr = (defined $subelem) ? "qualifier=\"$subelem\"" : "";
523
524 my $escaped_value = &docprint::escape_text($data->[1]);
525
526 $all_text .= " <$mds_prefix:metadata name=\"$mds_element\" $mds_attr>$escaped_value</$mds_prefix:metadata>\n";
527 }
528 }
529 }
530
531 $all_text .= " </$mds_prefix:$mds_prefix>\n";
532
533
534 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
535
536 return $all_text;
537}
538
539
540
541sub buffer_mets_dmdSection_section_xml
542{
543 my $self = shift(@_);
544 my ($doc_obj,$section) = @_;
545
546 $section="" unless defined $section;
547
548 my $section_ptr=$doc_obj->_lookup_section($section);
549 return "" unless defined $section_ptr;
550
551 my $all_text = "";
552
553 $all_text .= $self->buffer_mets_amdSec_header($section,"DC");
554 $all_text .= $self->oai_dc_metadata_xml($doc_obj,$section);
555 $all_text .= $self->buffer_mets_amdSec_footer($section);
556
557 # for each metadata set
558 my $md_sets = $self->metadata_set_prefixes($doc_obj,$section);
559
560 foreach my $md_set (keys %$md_sets)
561 {
562 # Greenstone's agnostic approach to metadata sets conflicts with
563 # Fedoras more clinically prescribed one. Fake a namespace for
564 # each $md_set to keep both sides happy
565
566 my $fake_namespace
567 = "xmlns:$md_set=\"http://www.greenstone.org/namespace/fake/$md_set\"";
568 my $id_caps = $md_set;
569 $id_caps =~ tr/[a-z]/[A-Z]/;
570
571 $all_text .= $self->buffer_mets_amdSec_header($section,$id_caps);
572 $all_text .= $self->mds_metadata_xml($doc_obj,$section,$md_set,$fake_namespace);
573 $all_text .= $self->buffer_mets_amdSec_footer($section);
574 }
575
576
577 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
578 $all_text .= $self->buffer_mets_dmdSection_section_xml($doc_obj,"$section.$subsection");
579 }
580
581 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
582
583 return $all_text;
584}
585
586
587sub doctxt_to_xlink
588{
589 my $self = shift @_;
590 my ($fname,$working_dir) = @_;
591
592 my $xlink_href;
593
594 my $fedora_prefix = $ENV{'FEDORA_HOME'};
595 if (!defined $fedora_prefix) {
596 $xlink_href = "file:$fname";
597 }
598 else
599 {
600 my $collectparent;
601 if (defined $ENV{'GSDL3SRCHOME'}) { # we're dealing with a GS3 server
602 if(defined $ENV{'GSDL3HOME'}) { # in case the web directory is located in a separate place
603 $collectparent = &util::filename_cat($ENV{'GSDL3HOME'},"sites","localsite");
604 }
605 else { # try the default location for the web directory
606 $collectparent = &util::filename_cat($ENV{'GSDL3SRCHOME'},"web","sites","localsite");
607 }
608 }
609 else {
610 # greenstone 2
611 $collectparent = $ENV{'GSDLHOME'};
612 }
613
614 my $gsdl_href = &util::filename_cat($working_dir, $fname);
615 $collectparent = &util::filename_to_regex($collectparent); # escape reserved metacharacter \ in path (by replacing it with \\) for substitution
616 $gsdl_href =~ s/^$collectparent(\/|\\)?//; # remove the collectparent path in gsdl_href and any trailing slash
617 $gsdl_href =~ s/\\/\//g; # make sure we have url paths (which only use / not \)
618 my $localfedora = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "fedora.xml");
619
620 my $greenstone_url_prefix = &util::get_greenstone_url_prefix();
621 # prepend url_prefix (which will contain the forward slash upfront)
622 if($ENV{'GSDL3SRCHOME'} && -e $localfedora) { # Fedora uses Greenstone's tomcat.
623 $gsdl_href = "$greenstone_url_prefix/sites/localsite/$gsdl_href"; # Default: /greenstone3/sites/localsite/$gsdl_href
624 } else {
625 $gsdl_href = "$greenstone_url_prefix/$gsdl_href"; # By default: "/greenstone/$gsdl_href";
626 }
627
628 my $fserver = $ENV{'FEDORA_HOSTNAME'};
629 my $fport = $ENV{'FEDORA_SERVER_PORT'};
630
631 my $fdomain = "http://$fserver:$fport";
632 $xlink_href = "$fdomain$gsdl_href";
633#ERROR: $xlink_href = "$fname";
634 }
635
636 return $xlink_href;
637
638}
639
640
641sub buffer_mets_fileSection_toc
642{
643 my $self = shift(@_);
644 my ($doc_obj,$section,$working_dir) = @_;
645
646 my $opt_attr = "OWNERID=\"M\"";
647
648 my $all_text = ' <mets:fileGrp ID="TOC">'. "\n";
649 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILETOC\" $opt_attr >\n";
650 my $xlink = $self->doctxt_to_xlink("doctoc.xml",$working_dir);
651
652 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="'.$xlink.'"';
653
654 $all_text .= ' xlink:title="Table of Contents"/>' . "\n";
655 $all_text .= " </mets:file>\n";
656 $all_text .= " </mets:fileGrp>\n";
657
658 return $all_text;
659}
660
661
662sub buffer_mets_fileSection_section_xml
663{
664 my $self = shift(@_);
665 my ($doc_obj,$section,$working_dir) = @_;
666
667 my $is_txt_split = 1;
668 my $opt_owner_id = "OWNERID=\"M\"";
669
670 my $all_text
671 = $self->SUPER::buffer_mets_fileSection_section_xml($doc_obj,$section,$working_dir,$is_txt_split, $opt_owner_id,"SECTION");
672
673
674 return $all_text;
675}
676
677sub buffer_mets_fileWhole_section_xml
678{
679 my $self = shift(@_);
680 my ($doc_obj,$section,$working_dir) = @_;
681
682 my $section_ptr = $doc_obj-> _lookup_section($section);
683 return "" unless defined $section_ptr;
684
685 my $all_text="";
686
687 my $fileID=0;
688
689 # Output the fileSection for the whole section
690 # => get the sourcefile and associative file
691
692 my $id_root = "";
693 my $opt_owner_id = "OWNERID=\"M\"";
694
695
696 foreach my $data (@{$section_ptr->{'metadata'}}){
697 my $escaped_value = &docprint::escape_text($data->[1]);
698
699 if ($data->[0] eq "gsdlassocfile"){
700
701 $escaped_value =~ m/^(.*?):(.*):(.*)$/;
702 my $assoc_file = $1;
703 my $mime_type = $2;
704 my $assoc_dir = $3;
705
706 $id_root = "FG$assoc_file";
707
708 $id_root =~ s/\//_/g;
709 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n";
710
711 # The assoc_file's name may be url-encoded, so the xlink_href in the <mets:FLocat>
712 # element must be the url to this (possibly url-encoded) filename
713 my $assocfile_url = &unicode::filename_to_url($assoc_file);
714 my $assfilePath = ($assoc_dir eq "") ? $assocfile_url : "$assoc_dir/$assocfile_url";
715 ++$fileID;
716
717 my $mime_attr = "MIMETYPE=\"$mime_type\"";
718 my $xlink_title = "xlink:title=\"$assoc_file\"";
719
720 my $id_attr;
721 my $xlink_href;
722
723 $id_attr = "ID=\"F$id_root.0\"";
724
725 my $fedora_prefix = $ENV{'FEDORA_HOME'};
726 if (!defined $fedora_prefix) {
727 $xlink_href = "xlink:href=\"$assfilePath\"";
728 }
729 else
730 {
731 my $collectparent;
732 if (defined $ENV{'GSDL3SRCHOME'}) { # we're dealing with a GS3 server
733 if(defined $ENV{'GSDL3HOME'}) { # in case the web directory is located in a separate place
734 $collectparent = &util::filename_cat($ENV{'GSDL3HOME'},"sites","localsite");
735 }
736 else { # try the default location for the web directory
737 $collectparent = &util::filename_cat($ENV{'GSDL3SRCHOME'},"web","sites","localsite");
738 }
739 }
740 else {
741 # greenstone 2
742 $collectparent = $ENV{'GSDLHOME'};
743 }
744
745 my $gsdl_href = &util::filename_cat($working_dir,$assfilePath);
746 $collectparent = &util::filename_to_regex($collectparent); # escape reserved metacharacter \ in path (by replacing it with \\) for substitution
747 $gsdl_href =~ s/^$collectparent(\/|\\)?//; # remove the collectparent path in gsdl_href and any trailing slash
748 $gsdl_href =~ s/\\/\//g; # make sure we have url paths (which only use / not \)
749 my $localfedora = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "fedora.xml");
750
751 my $greenstone_url_prefix = &util::get_greenstone_url_prefix();
752 # prepend url_prefix (which will contain the forward slash upfront)
753 if($ENV{'GSDL3SRCHOME'} && -e $localfedora) { # Fedora uses Greenstone's tomcat.
754 $gsdl_href = "$greenstone_url_prefix/sites/localsite/$gsdl_href"; # Default: /greenstone3/sites/localsite/$gsdl_href
755 } else {
756 $gsdl_href = "$greenstone_url_prefix/$gsdl_href"; # By default: "/greenstone/$gsdl_href";
757 }
758
759 my $fserver = $ENV{'FEDORA_HOSTNAME'};
760 my $fport = $ENV{'FEDORA_SERVER_PORT'};
761
762 my $fdomain = "http://$fserver:$fport";
763 $xlink_href = "xlink:href=\"$fdomain$gsdl_href\"";
764#ERROR: $xlink_href = "xlink:href=\"$assfilePath\"";
765 }
766
767 my $top_section = $doc_obj->get_top_section();
768 my $id = $doc_obj->get_metadata_element($top_section,"Identifier");
769
770### print STDERR "**** mime-type: $mime_attr\n";
771
772 $all_text .= " <mets:file $mime_attr $id_attr $opt_owner_id >\n";
773 $all_text .= " <mets:FLocat LOCTYPE=\"URL\" $xlink_href $xlink_title />\n";
774
775 $all_text .= " </mets:file>\n";
776
777 $all_text .= " </mets:fileGrp>\n";
778 }
779 }
780
781 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
782
783 return $all_text;
784}
785
786
7871;
Note: See TracBrowser for help on using the repository browser.