source: main/trunk/greenstone2/perllib/plugouts/FedoraMETSPlugout.pm@ 22372

Last change on this file since 22372 was 22372, checked in by ak19, 14 years ago

None of Fedora, the GS3 democlient and Greenstone seem to require the first associated file to have a datastream named url, so the special handling for this has been removed.

File size: 22.0 KB
Line 
1###########################################################################
2#
3# FedoraMETSPlugout.pm -- the plugout module for METS archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# But WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package FedoraMETSPlugout;
27
28use strict;
29no strict 'refs';
30
31#eval {require bytes};
32#use util;
33use METSPlugout;
34#use docprint; # for escape_text
35
36sub BEGIN {
37 @FedoraMETSPlugout::ISA = ('METSPlugout');
38}
39
40my $arguments = [
41 { 'name' => "fedora_namespace",
42 'desc' => "{FedoraMETSPlugout.fedora_namespace}",
43 'type' => "string",
44 'deft' => "greenstone",
45 'reqd' => "no",
46 'hiddengli' => "no"}
47 ];
48
49
50
51my $options = { 'name' => "FedoraMETSPlugout",
52 'desc' => "{FedoraMETSPlugout.desc}",
53 'abstract' => "no",
54 'inherits' => "yes",
55 'args' => $arguments
56 };
57
58
59sub new
60{
61 my ($class) = shift (@_);
62 my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
63 push(@$plugoutlist, $class);
64
65
66 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
67 push(@{$hashArgOptLists->{"OptList"}},$options);
68
69 my $self = new METSPlugout($plugoutlist,$inputargs,$hashArgOptLists);
70
71 return bless $self, $class;
72}
73
74
75sub output_mets_xml_header
76{
77 my $self = shift(@_);
78 my ($handle, $OID, $doc_title) = @_;
79
80 my $fnamespace = $self->{'fedora_namespace'};
81 my $oid_namespace = (defined $fnamespace) ? $fnamespace : "test";
82
83 my $collection = $ENV{'GSDLCOLLECTION'};
84
85 # Might need the following in the schemeLocation attribute for Fedora3
86 # http://www.fedora.info/definitions/1/0/mets-fedora-ext1-1.xsd
87 my $extra_attr = "OBJID=\"$oid_namespace:$collection-$OID\" TYPE=\"FedoraObject\" LABEL=\"$doc_title\"";
88
89 my $extra_schema = undef;
90
91 if (defined $ENV{'FEDORA_VERSION'} && $ENV{'FEDORA_VERSION'} =~ m/^2/) { # checking if major version is 2
92 $extra_schema = "http://www.fedora.info/definitions/1/0/mets-fedora-ext.xsd";
93 }
94 else {
95 $extra_attr .= " EXT_VERSION=\"1.1\"";
96 }
97
98 $self->output_mets_xml_header_extra_attribute($handle,$extra_attr,$extra_schema);
99
100 print $handle '<mets:metsHdr RECORDSTATUS="A"/>'. "\n"; # A = active
101
102}
103
104#
105# Print out "family" of doctxt.xml files
106#
107
108sub saveas_doctxt_section
109{
110 my $self = shift (@_);
111 my ($doc_obj,$working_dir,$section) = @_;
112
113 my $section_ptr=$doc_obj->_lookup_section($section);
114 return unless defined $section_ptr;
115
116 my $section_fnum ="1". $section;
117 $section_fnum =~ s/\./_/g;
118
119 my $doc_txt_file = &util::filename_cat ($working_dir,"doctxt$section_fnum.xml");
120
121 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
122
123 my $outhandler;
124
125 if (defined $self->{'xslt_writer'}){
126 $outhandler = $self->{'xslt_writer'};
127 }
128 else{
129 $outhandler = $self->get_output_handler($doc_txt_file);
130 }
131
132 $self->output_xml_header($outhandler);
133
134 ## change links to be Fedora cognant:
135 my $txt = $section_ptr->{'text'};
136 $section_ptr->{'text'} = $self->adjust_links($doc_obj, \$txt);
137
138 $self->output_txt_section($outhandler,$doc_obj, $section);
139 $self->output_xml_footer($outhandler);
140
141
142 if (defined $self->{'xslt_writer'}){
143 $self->close_xslt_pipe();
144 }
145 else{
146 close($outhandler);
147 }
148
149
150 # Output all the subsections as separate files
151 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
152
153 $self->saveas_doctxt_section($doc_obj, $working_dir, "$section.$subsection");
154 }
155}
156
157sub adjust_links
158{
159 my $self = shift(@_);
160 my ($doc_obj, $textref) = @_;
161
162 ## change links to be Fedora cognant:
163 # 1. retrieve txt of section - $$textref
164 # 2. change it:
165 # /$ENV{'FEDORA_PREFIX'}/objects/$greenstone-docobj-hash-xxx/datastreams/FG<orig-img-name>/content
166 # 3. only replace it back in doc_obj if we didn't get a ref in the first place
167
168 my $OID = $doc_obj->get_OID();
169 my $fnamespace = $self->{'fedora_namespace'};
170 if($OID ne "collection" && defined $fnamespace) {
171 my $fed_id = "$fnamespace:".$ENV{'GSDLCOLLECTION'}."-$OID"; #oid_namespace:collection-OID
172 my $fedora_url_prefix = $ENV{'FEDORA_PREFIX'}."/objects/$fed_id/datastreams/FG";
173 my $fedora_url_suffix = "/content";
174
175 $$textref =~ s/(<(?:img|embed|table|tr|td|link|script)[^>]*?(?:src|background|href)\s*=\s*)((?:[\"][^\"]+[\"])|(?:[\'][^\']+[\'])|(?:[^\s\/>]+))([^>]*>)/$self->replace_rel_link($1, $2, $3, $fedora_url_prefix, $fedora_url_suffix)/isge;
176# print STDERR "*** all text after: $$textref\n\n";
177 }
178
179 return $$textref;
180}
181
182# replace relative link with the prefix and suffix given
183sub replace_rel_link
184{
185 my $self = shift (@_);
186 my ($front, $link, $back, $url_prefix, $url_suffix) = @_;
187
188 # only process relative links. Return if absolute link
189 if($link =~ m/^http/) {
190 return "$front$link$back";
191 }
192
193 # remove quotes from link at start and end if necessary
194 if ($link=~/^[\"\']/) {
195 $link=~s/^[\"\']//;
196 $link=~s/[\"\']$//;
197 $front.='"';
198 $back="\"$back";
199 }
200
201 # remove any _httpdocimg_/ that greenstone may have prefixed to the image
202 $link =~ s/^_httpdocimg_(?:\/|\\)//;
203
204 return "$front$url_prefix$link$url_suffix$back";
205}
206
207
208sub saveas_doctxt
209{
210 my $self = shift (@_);
211 my ($doc_obj,$working_dir) = @_;
212
213 my $section = $doc_obj->get_top_section();
214
215 $self->saveas_doctxt_section($doc_obj,$working_dir,$section);
216
217 $self->saveas_toc($doc_obj,$working_dir);
218}
219
220sub buffer_toc
221{
222 my $self = shift (@_);
223 my ($doc_obj,$working_dir,$section,$depth) = @_;
224
225 my $section_ptr=$doc_obj->_lookup_section($section);
226 return "" unless defined $section_ptr;
227
228 my $all_text = "";
229
230 my $section_num ="1". $section;
231 my $indent = " " x ($depth*2);
232
233 $all_text .= "$indent<Section id=\"$section_num\">\n";
234
235 # Output all the subsections as separate files
236 foreach my $subsection (@{$section_ptr->{'subsection_order'}})
237 {
238 $all_text
239 .= $self->buffer_toc($doc_obj, $working_dir,
240 "$section.$subsection",$depth+1);
241 }
242
243 $all_text .= "$indent</Section>\n";
244
245 return $all_text;
246}
247
248
249sub saveas_toc
250{
251 my $self = shift (@_);
252 my ($doc_obj,$working_dir) = @_;
253
254 my $section = $doc_obj->get_top_section();
255 my $section_ptr=$doc_obj->_lookup_section($section);
256 my $num_subsections = scalar(@{$section_ptr->{'subsection_order'}});
257
258 # If num_subsections is 0, then there is no nested TOC
259
260 if ($num_subsections>0) {
261
262 my $doc_txt_file = &util::filename_cat ($working_dir,"doctoc.xml");
263
264 $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
265
266 my $outhandler;
267
268 if (defined $self->{'xslt_writer'}){
269 $outhandler = $self->{'xslt_writer'};
270 }
271 else{
272 $outhandler = $self->get_output_handler($doc_txt_file);
273 }
274 print $outhandler $self->buffer_toc($doc_obj, $working_dir, $section, 0);
275
276 if (defined $self->{'xslt_writer'}){
277 $self->close_xslt_pipe();
278 }
279 else{
280 close($outhandler);
281 }
282 }
283
284}
285
286
287sub buffer_mets_relsext_xml
288{
289 my $self = shift(@_);
290 my ($doc_obj) = @_;
291
292 my $OID = $doc_obj->get_OID();
293
294 my $fnamespace = $self->{'fedora_namespace'};
295 my $oid_namespace = (defined $fnamespace) ? $fnamespace : "test";
296 my $collection = $ENV{'GSDLCOLLECTION'};
297
298 my $fed_id = "$oid_namespace:$collection-$OID";
299
300 my $all_text = "";
301
302 my $top_section = $doc_obj->get_top_section();
303 my $plugin_type = $doc_obj->get_metadata_element($top_section,"Plugin");
304
305# Images do not get ingested into Fedora when on Linux if the following is included
306# Needs more investigation, since we'd like a working version of the following
307# in order to get thumbnails working and other stuff.
308# if ((defined $plugin_type) && ($plugin_type eq "ImagePlugin"))
309# {
310#
311# $all_text .= "<mets:amdSec ID=\"RELS-EXT\">\n";
312# $all_text .= " <mets:techMD ID=\"RELS-EXT1.0\" STATUS=\"A\">\n";
313# $all_text .= " <mets:mdWrap LABEL=\"RELS-EXT - RDF formatted relationship metadata\" MDTYPE=\"OTHER\" MIMETYPE=\"text/xml\">\n";
314# $all_text .= " <mets:xmlData>\n";
315# $all_text .= " <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" xmlns:fedora-model=\"info:fedora/fedora-system:def/model#\">\n";
316# $all_text .= " <rdf:Description rdf:about=\"info:fedora/$fed_id\">\n";
317# $all_text .= " <fedora-model:hasContentModel rdf:resource=\"info:fedora/demo:UVA_STD_IMAGE\"/>\n";
318# $all_text .= " </rdf:Description>\n";
319# $all_text .= " </rdf:RDF>\n";
320# $all_text .= " </mets:xmlData>\n";
321# $all_text .= " </mets:mdWrap>\n";
322# $all_text .= " </mets:techMD>\n";
323# $all_text .= "</mets:amdSec>\n";
324# }
325
326 return $all_text;
327}
328
329
330#
331# Print out docmets.xml file
332#
333sub output_mets_section
334{
335 my $self = shift(@_);
336 my ($handle, $doc_obj, $section, $working_dir) = @_;
337
338 # print out the dmdSection
339 print $handle $self->buffer_mets_dmdSection_section_xml($doc_obj,$section);
340
341 print $handle $self->buffer_mets_relsext_xml($doc_obj);
342
343 print $handle "<mets:fileSec>\n";
344 print $handle " <mets:fileGrp ID=\"DATASTREAMS\">\n";
345
346 # Generate Filestream for Table of Contents (TOC)
347 my $section_ptr=$doc_obj->_lookup_section($section);
348 my $num_subsections = scalar(@{$section_ptr->{'subsection_order'}});
349
350 # If num_subsections is 0, then there is no nested TOC
351
352 if ($num_subsections>0) {
353 print $handle $self->buffer_mets_fileSection_toc($doc_obj,$section,$working_dir);
354 }
355
356 # print out the fileSection by sections
357 print $handle $self->buffer_mets_fileSection_section_xml($doc_obj,$section,$working_dir);
358
359 # print out the whole fileSection
360 print $handle $self->buffer_mets_fileWhole_section_xml($doc_obj,$section,$working_dir);
361
362 print $handle " </mets:fileGrp>\n";
363 print $handle "</mets:fileSec>\n";
364
365 # print out the StructMapSection by sections
366
367 my $struct_type = "fedora:dsBindingMap";
368
369 # If document is going to make use of deminators (BMech and BDef) then
370 # need to code up more output XML here (structMap)and in
371 # METS:behaviorSec (Fedora extension?) sections
372
373}
374
375sub buffer_mets_amdSec_header
376{
377 my $self = shift(@_);
378 my ($section,$id) = @_;
379
380 # convert section number
381 my $section_num ="1". $section;
382
383 my $all_text = "";
384
385 my $label_attr = "";
386
387 $all_text .= "<mets:amdSec ID=\"$id$section\" >\n";
388 $all_text .= " <mets:techMD ID=\"$id$section.0\">\n"; # .0 fedora version number?
389
390 $label_attr = "LABEL=\"Metadata\"";
391
392 $all_text .= " <mets:mdWrap $label_attr MDTYPE=\"OTHER\" OTHERMDTYPE=\"gsdl3\" ID=\"".$id."gsdl$section_num\">\n";
393 $all_text .= " <mets:xmlData>\n";
394
395 return $all_text;
396
397}
398
399sub buffer_mets_amdSec_footer
400{
401 my $self = shift(@_);
402
403 my $all_text = "";
404
405 $all_text .= " </mets:xmlData>\n";
406 $all_text .= " </mets:mdWrap>\n";
407
408 $all_text .= " </mets:techMD>\n";
409 $all_text .= "</mets:amdSec>\n";
410
411 return $all_text;
412
413}
414
415sub oai_dc_metadata_xml
416{
417 my $self = shift(@_);
418 my ($doc_obj,$section) = @_;
419
420 my $all_text = "";
421
422 my $dc_namespace = "";
423 $dc_namespace .= "xmlns:dc=\"http://purl.org/dc/elements/1.1/\"";
424 $dc_namespace .= " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\" ";
425
426 $all_text .= " <oai_dc:dc $dc_namespace>\n";
427
428 $all_text .= $self->get_dc_metadata($doc_obj, $section,"oai_dc");
429 $all_text .= " </oai_dc:dc>\n";
430
431 return $all_text;
432}
433
434
435
436
437
438# Work out the what the metadata set prefixes (dc,dls etc.) are for
439# this document
440
441sub metadata_set_prefixes
442{
443 my $self = shift(@_);
444 my ($doc_obj, $section) = @_;
445
446 $section="" unless defined $section;
447
448 my $section_ptr = $doc_obj->_lookup_section($section);
449 return {} unless defined $section_ptr;
450
451 my $unique_prefix = {};
452
453 foreach my $data (@{$section_ptr->{'metadata'}})
454 {
455 my ($prefix) = ($data->[0]=~ m/^(.*?)\./);
456
457 if (defined $prefix)
458 {
459 next if ($prefix eq "dc"); # skip dublin core as handled separately elsewhere
460
461 $unique_prefix->{$prefix} = 1;
462 }
463 else
464 {
465 $unique_prefix->{"ex"} = 1;
466 }
467
468 }
469
470 return $unique_prefix;
471}
472
473
474sub mds_metadata_xml
475{
476 my $self = shift(@_);
477 my ($doc_obj, $section, $mds_prefix, $namespace) = @_;
478
479 # build up string of metadata with $mds_prefix
480 $section="" unless defined $section;
481
482 my $section_ptr = $doc_obj->_lookup_section($section);
483 return "" unless defined $section_ptr;
484
485 my $all_text="";
486 $all_text .= " <$mds_prefix:$mds_prefix $namespace>\n";
487
488
489 foreach my $data (@{$section_ptr->{'metadata'}})
490 {
491 if ($data->[0]=~ m/^(?:(.*?)\.)?(.*)$/)
492 {
493 my $curr_mds_prefix = $1;
494 my $mds_full_element = $2;
495
496 $curr_mds_prefix = "ex" unless defined $curr_mds_prefix;
497
498 if ($curr_mds_prefix eq $mds_prefix)
499 {
500 # split up full element in the form Title^en into element=Title, attr="en"
501 my ($mds_element,$subelem) = ($mds_full_element =~ m/^(.*?)(?:\^(.*))?$/);
502 my $mds_attr = (defined $subelem) ? "qualifier=\"$subelem\"" : "";
503
504 my $escaped_value = &docprint::escape_text($data->[1]);
505
506 $all_text .= " <$mds_prefix:metadata name=\"$mds_element\" $mds_attr>$escaped_value</$mds_prefix:metadata>\n";
507 }
508 }
509 }
510
511 $all_text .= " </$mds_prefix:$mds_prefix>\n";
512
513
514 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
515
516 return $all_text;
517}
518
519
520
521sub buffer_mets_dmdSection_section_xml
522{
523 my $self = shift(@_);
524 my ($doc_obj,$section) = @_;
525
526 $section="" unless defined $section;
527
528 my $section_ptr=$doc_obj->_lookup_section($section);
529 return "" unless defined $section_ptr;
530
531 my $all_text = "";
532
533 $all_text .= $self->buffer_mets_amdSec_header($section,"DC");
534 $all_text .= $self->oai_dc_metadata_xml($doc_obj,$section);
535 $all_text .= $self->buffer_mets_amdSec_footer($section);
536
537 # for each metadata set
538 my $md_sets = $self->metadata_set_prefixes($doc_obj,$section);
539
540 foreach my $md_set (keys %$md_sets)
541 {
542 # Greenstone's agnostic approach to metadata sets conflicts with
543 # Fedoras more clinically prescribed one. Fake a namespace for
544 # each $md_set to keep both sides happy
545
546 my $fake_namespace
547 = "xmlns:$md_set=\"http://www.greenstone.org/namespace/fake/$md_set\"";
548 my $id_caps = $md_set;
549 $id_caps =~ tr/[a-z]/[A-Z]/;
550
551 $all_text .= $self->buffer_mets_amdSec_header($section,$id_caps);
552 $all_text .= $self->mds_metadata_xml($doc_obj,$section,$md_set,$fake_namespace);
553 $all_text .= $self->buffer_mets_amdSec_footer($section);
554 }
555
556
557 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
558 $all_text .= $self->buffer_mets_dmdSection_section_xml($doc_obj,"$section.$subsection");
559 }
560
561 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
562
563 return $all_text;
564}
565
566
567sub doctxt_to_xlink
568{
569 my $self = shift @_;
570 my ($fname,$working_dir) = @_;
571
572 my $xlink_href;
573
574 my $fedora_prefix = $ENV{'FEDORA_HOME'};
575 if (!defined $fedora_prefix) {
576 $xlink_href = "file:$fname";
577 }
578 else
579 {
580 my $collectparent;
581 if (defined $ENV{'GSDL3SRCHOME'}) { # we're dealing with a GS3 server
582 if(defined $ENV{'GSDL3HOME'}) { # in case the web directory is located in a separate place
583 $collectparent = &util::filename_cat($ENV{'GSDL3HOME'},"sites","localsite");
584 }
585 else { # try the default location for the web directory
586 $collectparent = &util::filename_cat($ENV{'GSDL3SRCHOME'},"web","sites","localsite");
587 }
588 }
589 else {
590 # greenstone 2
591 $collectparent = $ENV{'GSDLHOME'};
592 }
593
594 my $gsdl_href = &util::filename_cat($working_dir, $fname);
595 $collectparent =~ s/\\/\\\\/g; # escape reserved metacharacter \ in path (by replacing it with \\) for substitution
596 $gsdl_href =~ s/^$collectparent(\/|\\)?//; # remove the collectparent path in gsdl_href and any trailing slash
597 $gsdl_href =~ s/\\/\//g; # make sure we have url paths (which only use / not \)
598 my $localfedora = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "fedora.xml");
599
600 my $greenstone_url_prefix = &util::get_greenstone_url_prefix();
601 # prepend url_prefix (which will contain the forward slash upfront)
602 if($ENV{'GSDL3SRCHOME'} && -e $localfedora) { # Fedora uses Greenstone's tomcat.
603 $gsdl_href = "$greenstone_url_prefix/sites/localsite/$gsdl_href"; # Default: /greenstone3/sites/localsite/$gsdl_href
604 } else {
605 $gsdl_href = "$greenstone_url_prefix/$gsdl_href"; # By default: "/greenstone/$gsdl_href";
606 }
607
608 my $fserver = $ENV{'FEDORA_HOSTNAME'};
609 my $fport = $ENV{'FEDORA_SERVER_PORT'};
610
611 my $fdomain = "http://$fserver:$fport";
612 $xlink_href = "$fdomain$gsdl_href";
613#ERROR: $xlink_href = "$fname";
614 }
615
616 return $xlink_href;
617
618}
619
620
621sub buffer_mets_fileSection_toc
622{
623 my $self = shift(@_);
624 my ($doc_obj,$section,$working_dir) = @_;
625
626 my $opt_attr = "OWNERID=\"M\"";
627
628 my $all_text = ' <mets:fileGrp ID="TOC">'. "\n";
629 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILETOC\" $opt_attr >\n";
630 my $xlink = $self->doctxt_to_xlink("doctoc.xml",$working_dir);
631
632 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="'.$xlink.'"';
633
634 $all_text .= ' xlink:title="Table of Contents"/>' . "\n";
635 $all_text .= " </mets:file>\n";
636 $all_text .= " </mets:fileGrp>\n";
637
638 return $all_text;
639}
640
641
642sub buffer_mets_fileSection_section_xml
643{
644 my $self = shift(@_);
645 my ($doc_obj,$section,$working_dir) = @_;
646
647 my $is_txt_split = 1;
648 my $opt_owner_id = "OWNERID=\"M\"";
649
650 my $all_text
651 = $self->SUPER::buffer_mets_fileSection_section_xml($doc_obj,$section,$working_dir,$is_txt_split, $opt_owner_id,"SECTION");
652
653
654 return $all_text;
655}
656
657sub buffer_mets_fileWhole_section_xml
658{
659 my $self = shift(@_);
660 my ($doc_obj,$section,$working_dir) = @_;
661
662 my $section_ptr = $doc_obj-> _lookup_section($section);
663 return "" unless defined $section_ptr;
664
665 my $all_text="";
666
667 my $fileID=0;
668
669 # Output the fileSection for the whole section
670 # => get the sourcefile and associative file
671
672 my $id_root = "";
673 my $opt_owner_id = "OWNERID=\"M\"";
674
675
676 foreach my $data (@{$section_ptr->{'metadata'}}){
677 my $escaped_value = &docprint::escape_text($data->[1]);
678
679 if ($data->[0] eq "gsdlassocfile"){
680
681 $escaped_value =~ m/^(.*?):(.*):(.*)$/;
682 my $assoc_file = $1;
683 my $mime_type = $2;
684 my $assoc_dir = $3;
685
686 $id_root = "FG$assoc_file";
687
688 $id_root =~ s/\//_/g;
689 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n";
690
691 # The assoc_file's name may be url-encoded, so the xlink_href in the <mets:FLocat>
692 # element must be the url to this (possibly url-encoded) filename
693 my $assocfile_url = &unicode::filename_to_url($assoc_file);
694 my $assfilePath = ($assoc_dir eq "") ? $assocfile_url : "$assoc_dir/$assocfile_url";
695 ++$fileID;
696
697 my $mime_attr = "MIMETYPE=\"$mime_type\"";
698 my $xlink_title = "xlink:title=\"$assoc_file\"";
699
700 my $id_attr;
701 my $xlink_href;
702
703 $id_attr = "ID=\"F$id_root.0\"";
704
705 my $fedora_prefix = $ENV{'FEDORA_HOME'};
706 if (!defined $fedora_prefix) {
707 $xlink_href = "xlink:href=\"$assfilePath\"";
708 }
709 else
710 {
711 my $collectparent;
712 if (defined $ENV{'GSDL3SRCHOME'}) { # we're dealing with a GS3 server
713 if(defined $ENV{'GSDL3HOME'}) { # in case the web directory is located in a separate place
714 $collectparent = &util::filename_cat($ENV{'GSDL3HOME'},"sites","localsite");
715 }
716 else { # try the default location for the web directory
717 $collectparent = &util::filename_cat($ENV{'GSDL3SRCHOME'},"web","sites","localsite");
718 }
719 }
720 else {
721 # greenstone 2
722 $collectparent = $ENV{'GSDLHOME'};
723 }
724
725 my $gsdl_href = &util::filename_cat($working_dir,$assfilePath);
726 $collectparent =~ s/\\/\\\\/g; # escape reserved metacharacter \ in path (by replacing it with \\) for substitution
727 $gsdl_href =~ s/^$collectparent(\/|\\)?//; # remove the collectparent path in gsdl_href and any trailing slash
728 $gsdl_href =~ s/\\/\//g; # make sure we have url paths (which only use / not \)
729 my $localfedora = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "fedora.xml");
730
731 my $greenstone_url_prefix = &util::get_greenstone_url_prefix();
732 # prepend url_prefix (which will contain the forward slash upfront)
733 if($ENV{'GSDL3SRCHOME'} && -e $localfedora) { # Fedora uses Greenstone's tomcat.
734 $gsdl_href = "$greenstone_url_prefix/sites/localsite/$gsdl_href"; # Default: /greenstone3/sites/localsite/$gsdl_href
735 } else {
736 $gsdl_href = "$greenstone_url_prefix/$gsdl_href"; # By default: "/greenstone/$gsdl_href";
737 }
738
739 my $fserver = $ENV{'FEDORA_HOSTNAME'};
740 my $fport = $ENV{'FEDORA_SERVER_PORT'};
741
742 my $fdomain = "http://$fserver:$fport";
743 $xlink_href = "xlink:href=\"$fdomain$gsdl_href\"";
744#ERROR: $xlink_href = "xlink:href=\"$assfilePath\"";
745 }
746
747 my $top_section = $doc_obj->get_top_section();
748 my $id = $doc_obj->get_metadata_element($top_section,"Identifier");
749
750### print STDERR "**** mime-type: $mime_attr\n";
751
752 $all_text .= " <mets:file $mime_attr $id_attr $opt_owner_id >\n";
753 $all_text .= " <mets:FLocat LOCTYPE=\"URL\" $xlink_href $xlink_title />\n";
754
755 $all_text .= " </mets:file>\n";
756
757 $all_text .= " </mets:fileGrp>\n";
758 }
759 }
760
761 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
762
763 return $all_text;
764}
765
766
7671;
Note: See TracBrowser for help on using the repository browser.