root/main/trunk/greenstone2/perllib/plugouts/FedoraMETSPlugout.pm @ 32530

Revision 32511, 23.6 KB (checked in by ak19, 13 months ago)

Running plugoutinfo.pl with describeall or listall flag would break on FedoraMETSPlugout when either FEDORA_HOME or FEDORA_VERSION aren't set (as is often the case), as there's a die statement in the BEGIN of FedoraMETSPlugout. Needed to run die if either FEDORA env var is not set only if the plugout is NOT in info_only mode in plugout constructor. However, info_only mode was never set in any of the plugouts, so had to add set up the infrastructure for it in plugoutinfo.pl and plugout.pm. Then added the info_only test to all teh plugouts, even though it's redundant in most of them for making sure future changes to any plugout's constructors does not break plugoutinfo.pl.

Line 
1###########################################################################
2#
3# FedoraMETSPlugout.pm -- the plugout module for METS archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# But WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package FedoraMETSPlugout;
27
28use strict;
29no strict 'refs';
30
31#eval {require bytes};
32#use util;
33use METSPlugout;
34#use docprint; # for escape_text
35
36sub BEGIN {
37    @FedoraMETSPlugout::ISA = ('METSPlugout');
38
39    # Need both environment variables FEDORA_HOME and FEDORA_VERSION to be set to use this plugout!
40    # But we only test these vars are set in the constructor, so as not to break plugoutinfo.pl by
41    # a 'die' statement here nor break the xml that plugoutinfo.pl produces with print stmts here
42
43
44    $ENV{'FEDORA_HOSTNAME'} = "localhost" if (!defined $ENV{'FEDORA_HOSTNAME'});
45    $ENV{'FEDORA_SERVER_PORT'} = "8080" if (!defined $ENV{'FEDORA_SERVER_PORT'});
46    $ENV{'FEDORA_USER'}     = "fedoraAdmin" if (!defined $ENV{'FEDORA_USER'});
47    $ENV{'FEDORA_PASS'}     = "fedoraAdmin" if (!defined $ENV{'FEDORA_PASS'});
48    $ENV{'FEDORA_PROTOCOL'} = "http" if (!defined $ENV{'FEDORA_PROTOCOL'});
49    $ENV{'FEDORA_PID_NAMESPACE'} = "greenstone" if (!defined $ENV{'FEDORA_PID_NAMESPACE'});
50    $ENV{'FEDORA_PREFIX'} = "/fedora" if (!defined $ENV{'FEDORA_PREFIX'});
51}
52
53my $arguments = [
54      { 'name' => "fedora_namespace",
55    'desc' => "{FedoraMETSPlugout.fedora_namespace}",
56    'type' => "string",
57        'deft' =>  "greenstone",
58    'reqd' => "no",   
59    'hiddengli' => "no"}
60          ];
61
62
63
64my $options = { 'name'     => "FedoraMETSPlugout",
65        'desc'     => "{FedoraMETSPlugout.desc}",
66        'abstract' => "no",
67        'inherits' => "yes",
68            'args'     => $arguments
69                };
70
71
72sub new
73{
74    my ($class) = shift (@_);
75    my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
76    push(@$plugoutlist, $class);
77   
78   
79    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
80    push(@{$hashArgOptLists->{"OptList"}},$options);
81   
82    my $self = new METSPlugout($plugoutlist,$inputargs,$hashArgOptLists);
83   
84    if ($self->{'info_only'}) {
85        # don't worry about any options etc
86        return bless $self, $class;
87    }
88   
89    # else (if we're not producing information for plugoutinfo.pl) make sure
90    # BOTH env vars FEDORA_HOME and FEDORA_VERSION are set. If either not set, die.
91    if ((!defined $ENV{'FEDORA_HOME'}) || (!defined $ENV{'FEDORA_VERSION'})) {
92    if (defined $ENV{'FEDORA_HOME'}) {
93        print STDERR "FEDORA_HOME = $ENV{'FEDORA_HOME'}";
94    }
95   
96    if (defined $ENV{'FEDORA_VERSION'}) {
97        print STDERR "FEDORA_VERSION = $ENV{'FEDORA_VERSION'}";
98    }
99   
100    die "ERROR: Need both environment variables FEDORA_HOME and FEDORA_VERSION to be set\n";
101    }
102   
103    return bless $self, $class;
104}
105
106
107sub output_mets_xml_header
108{
109    my $self = shift(@_);
110    my ($handle, $OID, $doc_title) = @_;
111
112    my $fnamespace = $self->{'fedora_namespace'};
113    my $oid_namespace = (defined $fnamespace) ? $fnamespace : "test";
114
115    my $collection = $ENV{'GSDLCOLLECTION'};
116
117    # Might need the following in the schemeLocation attribute for Fedora3
118    #   http://www.fedora.info/definitions/1/0/mets-fedora-ext1-1.xsd
119    my $extra_attr = "OBJID=\"$oid_namespace:$collection-$OID\" TYPE=\"FedoraObject\" LABEL=\"$doc_title\"";
120
121    my $extra_schema = undef;
122
123    if (defined $ENV{'FEDORA_VERSION'} && $ENV{'FEDORA_VERSION'} =~ m/^2/) { # checking if major version is 2
124        $extra_schema = "http://www.fedora.info/definitions/1/0/mets-fedora-ext.xsd";
125    }
126    else {
127    $extra_attr .= " EXT_VERSION=\"1.1\"";
128    }
129   
130    $self->output_mets_xml_header_extra_attribute($handle,$extra_attr,$extra_schema);
131
132    print $handle '<mets:metsHdr RECORDSTATUS="A"/>'. "\n"; # A = active
133
134}
135
136#
137#  Print out "family" of doctxt.xml files
138#
139
140sub saveas_doctxt_section
141{
142    my $self = shift (@_);
143    my ($doc_obj,$working_dir,$section) = @_;
144
145    my $section_ptr=$doc_obj->_lookup_section($section);
146    return unless defined $section_ptr;
147
148    my $section_fnum ="1". $section;
149    $section_fnum =~ s/\./_/g;
150
151    my $doc_txt_file = &FileUtils::filenameConcatenate ($working_dir,"doctxt$section_fnum.xml");   
152   
153    $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
154
155    my $outhandler;
156
157    if (defined $self->{'xslt_writer'}){
158    $outhandler = $self->{'xslt_writer'};
159    }
160    else{
161    $outhandler = $self->get_output_handler($doc_txt_file);
162    }
163
164    $self->output_xml_header($outhandler);
165
166    ## change links to be Fedora cognant:
167    my $txt = $section_ptr->{'text'};
168    $section_ptr->{'text'} = $self->adjust_links($doc_obj, \$txt);
169
170    $self->output_txt_section($outhandler,$doc_obj, $section);
171    $self->output_xml_footer($outhandler);
172   
173
174    if (defined $self->{'xslt_writer'}){     
175    $self->close_xslt_pipe();
176    }
177    else{
178    close($outhandler);
179    }
180
181
182    # Output all the subsections as separate files
183    foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
184   
185    $self->saveas_doctxt_section($doc_obj, $working_dir, "$section.$subsection");
186    }
187}
188
189sub adjust_links
190{
191    my $self = shift(@_);
192    my ($doc_obj, $textref) = @_;
193
194    ## change links to be Fedora cognant:
195    # 1. retrieve txt of section - $$textref
196    # 2. change it:
197    # /$ENV{'FEDORA_PREFIX'}/objects/$greenstone-docobj-hash-xxx/datastreams/FG<orig-img-name>/content
198    # 3. only replace it back in doc_obj if we didn't get a ref in the first place
199
200    my $OID = $doc_obj->get_OID();
201    my $fnamespace = $self->{'fedora_namespace'};
202    if($OID ne "collection" && defined $fnamespace) {
203    my $fed_id = "$fnamespace:".$ENV{'GSDLCOLLECTION'}."-$OID"; #oid_namespace:collection-OID
204    my $fedora_url_prefix = $ENV{'FEDORA_PREFIX'}."/objects/$fed_id/datastreams/FG";
205    my $fedora_url_suffix = "/content";
206
207    $$textref =~ s/(<(?:img|embed|table|tr|td|link|script)[^>]*?(?:src|background|href)\s*=\s*)((?:[\"][^\"]+[\"])|(?:[\'][^\']+[\'])|(?:[^\s\/>]+))([^>]*>)/$self->replace_rel_link($1, $2, $3, $fedora_url_prefix, $fedora_url_suffix)/isge;
208#   print STDERR "*** all text after: $$textref\n\n";
209    }
210
211    return $$textref;
212}
213
214# replace relative link with the prefix and suffix given
215sub replace_rel_link
216{
217    my $self = shift (@_);
218    my ($front, $link, $back, $url_prefix, $url_suffix) = @_;
219
220    # only process relative links. Return if absolute link
221    if($link =~ m/^http/) {
222    return "$front$link$back";
223    }
224
225    # remove quotes from link at start and end if necessary
226    if ($link=~/^[\"\']/) {
227    $link=~s/^[\"\']//;
228    $link=~s/[\"\']$//;
229    $front.='"';
230    $back="\"$back";
231    }
232
233    # remove any _httpdocimg_/ that greenstone may have prefixed to the image
234    $link =~ s/^_httpdocimg_(?:\/|\\)//;
235   
236    return "$front$url_prefix$link$url_suffix$back";
237}
238
239
240sub saveas_doctxt
241{
242    my $self = shift (@_);
243    my ($doc_obj,$working_dir) = @_;
244
245    my $section = $doc_obj->get_top_section();
246
247    $self->saveas_doctxt_section($doc_obj,$working_dir,$section);
248
249    $self->saveas_toc($doc_obj,$working_dir);
250}
251
252sub buffer_toc
253{
254    my $self = shift (@_);
255    my ($doc_obj,$working_dir,$section,$depth) = @_;
256   
257    my $section_ptr=$doc_obj->_lookup_section($section);
258    return "" unless defined $section_ptr;
259
260    my $all_text = "";
261
262    my $section_num ="1". $section;
263    my $indent = " " x ($depth*2);
264
265    $all_text .= "$indent<Section id=\"$section_num\">\n";
266
267    # Output all the subsections as separate files
268    foreach my $subsection (@{$section_ptr->{'subsection_order'}})
269    {
270    $all_text
271        .= $self->buffer_toc($doc_obj, $working_dir,
272                 "$section.$subsection",$depth+1);
273    }
274
275    $all_text .= "$indent</Section>\n";
276
277    return $all_text;
278}
279
280
281sub saveas_toc
282{
283    my $self = shift (@_);
284    my ($doc_obj,$working_dir) = @_;
285
286    my $section = $doc_obj->get_top_section();
287    my $section_ptr=$doc_obj->_lookup_section($section);
288    my $num_subsections = scalar(@{$section_ptr->{'subsection_order'}});
289
290    # If num_subsections is 0, then there is no nested TOC
291
292    if ($num_subsections>0) {
293
294    my $doc_txt_file = &FileUtils::filenameConcatenate ($working_dir,"doctoc.xml");
295   
296    $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
297   
298    my $outhandler;
299   
300    if (defined $self->{'xslt_writer'}){
301        $outhandler = $self->{'xslt_writer'};
302    }
303    else{
304        $outhandler = $self->get_output_handler($doc_txt_file);
305    }
306    print $outhandler $self->buffer_toc($doc_obj, $working_dir, $section, 0);
307   
308    if (defined $self->{'xslt_writer'}){     
309        $self->close_xslt_pipe();
310    }
311    else{
312        close($outhandler);
313    }
314    }
315
316}
317
318
319sub buffer_mets_relsext_xml
320{
321    my $self = shift(@_);
322    my ($doc_obj) = @_;
323
324    my $OID = $doc_obj->get_OID();
325
326    my $fnamespace = $self->{'fedora_namespace'};
327    my $oid_namespace = (defined $fnamespace) ? $fnamespace : "test";
328    my $collection = $ENV{'GSDLCOLLECTION'};
329
330    my $fed_id = "$oid_namespace:$collection-$OID";
331
332    my $all_text = "";
333
334    my $top_section = $doc_obj->get_top_section();
335    my $plugin_type = $doc_obj->get_metadata_element($top_section,"Plugin");
336   
337# Images do not get ingested into Fedora when on Linux if the following is included
338# Needs more investigation, since we'd like a working version of the following
339# in order to get thumbnails working and other stuff.
340#    if ((defined $plugin_type) && ($plugin_type eq "ImagePlugin"))
341#    {
342#
343#   $all_text .= "<mets:amdSec ID=\"RELS-EXT\">\n";
344#   $all_text .= "  <mets:techMD ID=\"RELS-EXT1.0\" STATUS=\"A\">\n";
345#   $all_text .= "    <mets:mdWrap LABEL=\"RELS-EXT - RDF formatted relationship metadata\" MDTYPE=\"OTHER\" MIMETYPE=\"text/xml\">\n";
346#   $all_text .= "      <mets:xmlData>\n";
347#   $all_text .= "        <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" xmlns:fedora-model=\"info:fedora/fedora-system:def/model#\">\n";
348#   $all_text .= "          <rdf:Description rdf:about=\"info:fedora/$fed_id\">\n";
349#   $all_text .= "            <fedora-model:hasContentModel rdf:resource=\"info:fedora/demo:UVA_STD_IMAGE\"/>\n";
350#   $all_text .= "          </rdf:Description>\n";
351#   $all_text .= "        </rdf:RDF>\n";
352#   $all_text .= "      </mets:xmlData>\n";
353#   $all_text .= "    </mets:mdWrap>\n";
354#   $all_text .= "  </mets:techMD>\n";
355#   $all_text .= "</mets:amdSec>\n";
356#    }
357
358    return $all_text;
359}
360
361
362#
363#  Print out docmets.xml file
364#
365sub output_mets_section
366{
367    my $self = shift(@_);
368    my ($handle, $doc_obj, $section, $working_dir) = @_;
369
370    # print out the dmdSection
371    print $handle $self->buffer_mets_dmdSection_section_xml($doc_obj,$section);
372
373    print $handle $self->buffer_mets_relsext_xml($doc_obj);
374
375    print $handle "<mets:fileSec>\n";
376    print $handle "  <mets:fileGrp ID=\"DATASTREAMS\">\n";
377
378    # Generate Filestream for Table of Contents (TOC)
379    my $section_ptr=$doc_obj->_lookup_section($section);
380    my $num_subsections = scalar(@{$section_ptr->{'subsection_order'}});
381
382    # If num_subsections is 0, then there is no nested TOC
383
384    if ($num_subsections>0) {
385    print $handle $self->buffer_mets_fileSection_toc($doc_obj,$section,$working_dir);
386    }
387
388    # print out the fileSection by sections
389    print $handle $self->buffer_mets_fileSection_section_xml($doc_obj,$section,$working_dir);
390
391    # print out the whole fileSection
392    print $handle $self->buffer_mets_fileWhole_section_xml($doc_obj,$section,$working_dir);
393
394    print $handle "  </mets:fileGrp>\n";
395    print $handle "</mets:fileSec>\n";
396 
397    # print out the StructMapSection by sections
398
399    my $struct_type = "fedora:dsBindingMap";
400
401    # If document is going to make use of deminators (BMech and BDef) then
402    # need to code up more output XML here (structMap)and in 
403    # METS:behaviorSec (Fedora extension?) sections
404 
405}
406
407sub buffer_mets_amdSec_header
408{
409    my $self = shift(@_);
410    my ($section,$id) = @_;
411
412    # convert section number
413    my $section_num ="1". $section;
414
415    my $all_text = "";
416
417    my $label_attr = "";
418
419    $all_text .= "<mets:amdSec ID=\"$id$section\" >\n";
420    $all_text .= "  <mets:techMD ID=\"$id$section.0\">\n"; # .0 fedora version number?
421   
422    $label_attr = "LABEL=\"Metadata\"";
423
424    $all_text .= "  <mets:mdWrap $label_attr MDTYPE=\"OTHER\" OTHERMDTYPE=\"gsdl3\" ID=\"".$id."gsdl$section_num\">\n";
425    $all_text .= "    <mets:xmlData>\n";
426
427    return $all_text;
428
429}
430
431sub buffer_mets_amdSec_footer
432{
433    my $self = shift(@_);
434
435    my $all_text = "";
436   
437    $all_text .= "    </mets:xmlData>\n";
438    $all_text .= "  </mets:mdWrap>\n";
439   
440    $all_text .= "  </mets:techMD>\n";
441    $all_text .= "</mets:amdSec>\n";
442
443    return $all_text;
444
445}
446
447sub oai_dc_metadata_xml
448{
449    my $self = shift(@_);
450    my ($doc_obj,$section) = @_;
451
452    my $all_text = "";
453
454    my $dc_namespace = "";
455    $dc_namespace .= "xmlns:dc=\"http://purl.org/dc/elements/1.1/\"";
456    $dc_namespace .= " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\" ";
457   
458    $all_text .= "  <oai_dc:dc $dc_namespace>\n";
459   
460    $all_text .= $self->get_dc_metadata($doc_obj, $section,"oai_dc");
461    $all_text .= "  </oai_dc:dc>\n";
462
463    return $all_text;
464}
465
466
467
468
469
470# Work out the what the metadata set prefixes (dc,dls etc.) are for
471# this document
472
473sub metadata_set_prefixes
474{
475    my $self = shift(@_);
476    my ($doc_obj, $section) = @_;
477   
478    $section="" unless defined $section;
479   
480    my $section_ptr = $doc_obj->_lookup_section($section);
481    return {} unless defined $section_ptr;
482
483    my $unique_prefix = {};
484
485    foreach my $data (@{$section_ptr->{'metadata'}})
486    {
487    my ($prefix) = ($data->[0]=~ m/^(.*?)\./);
488   
489    if (defined $prefix)
490    {
491        next if ($prefix eq "dc"); # skip dublin core as handled separately elsewhere
492
493        $unique_prefix->{$prefix} = 1;
494    }
495    else
496    {
497        $unique_prefix->{"ex"} = 1;
498    }
499
500    }
501
502    return $unique_prefix;
503}
504
505
506sub mds_metadata_xml
507{
508    my $self = shift(@_);
509    my ($doc_obj, $section, $mds_prefix, $namespace) = @_;
510   
511    # build up string of metadata with $mds_prefix
512    $section="" unless defined $section;
513   
514    my $section_ptr = $doc_obj->_lookup_section($section);
515    return "" unless defined $section_ptr;
516
517    my $all_text="";
518    $all_text .= "  <$mds_prefix:$mds_prefix $namespace>\n";
519
520
521    foreach my $data (@{$section_ptr->{'metadata'}})
522    {
523    if ($data->[0]=~ m/^(?:(.*?)\.)?(.*)$/)
524    {
525        my $curr_mds_prefix = $1;
526        my $mds_full_element = $2;
527
528        $curr_mds_prefix = "ex" unless defined $curr_mds_prefix;
529
530        if ($curr_mds_prefix eq $mds_prefix)
531        {
532        # split up full element in the form Title^en into element=Title, attr="en"
533        my ($mds_element,$subelem) = ($mds_full_element =~ m/^(.*?)(?:\^(.*))?$/);
534        my $mds_attr = (defined $subelem) ? "qualifier=\"$subelem\"" : "";
535       
536        my $escaped_value = &docprint::escape_text($data->[1]);
537       
538        $all_text .= "   <$mds_prefix:metadata name=\"$mds_element\" $mds_attr>$escaped_value</$mds_prefix:metadata>\n";
539        }
540    }
541    }
542
543    $all_text .= "  </$mds_prefix:$mds_prefix>\n";
544
545
546    $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
547
548    return $all_text;
549}
550
551
552
553sub buffer_mets_dmdSection_section_xml
554{
555    my $self = shift(@_);
556    my ($doc_obj,$section) = @_;
557   
558    $section="" unless defined $section;
559   
560    my $section_ptr=$doc_obj->_lookup_section($section);
561    return "" unless defined $section_ptr;
562
563    my $all_text = "";
564
565    $all_text .= $self->buffer_mets_amdSec_header($section,"DC");
566    $all_text .= $self->oai_dc_metadata_xml($doc_obj,$section);
567    $all_text .= $self->buffer_mets_amdSec_footer($section);
568
569    # for each metadata set
570    my $md_sets = $self->metadata_set_prefixes($doc_obj,$section);
571
572    foreach my $md_set (keys %$md_sets)
573    {
574    # Greenstone's agnostic approach to metadata sets conflicts with
575    # Fedoras more clinically prescribed one.  Fake a namespace for
576    # each $md_set to keep both sides happy
577
578    my $fake_namespace
579        = "xmlns:$md_set=\"http://www.greenstone.org/namespace/fake/$md_set\"";
580    my $id_caps = $md_set;
581    $id_caps =~ tr/[a-z]/[A-Z]/;
582
583    $all_text .= $self->buffer_mets_amdSec_header($section,$id_caps);
584    $all_text .= $self->mds_metadata_xml($doc_obj,$section,$md_set,$fake_namespace);
585    $all_text .= $self->buffer_mets_amdSec_footer($section);
586    }
587
588
589    foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
590       $all_text .= $self->buffer_mets_dmdSection_section_xml($doc_obj,"$section.$subsection");
591    }
592   
593    $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
594
595    return $all_text;
596}
597
598
599sub doctxt_to_xlink
600{
601    my $self = shift @_;
602    my ($fname,$working_dir) = @_;
603
604    my $xlink_href;
605
606    my $fedora_prefix = $ENV{'FEDORA_HOME'};
607    if (!defined $fedora_prefix) {
608    $xlink_href  = "file:$fname";
609    }
610    else
611    {
612    my $collectparent;
613    if (defined $ENV{'GSDL3SRCHOME'}) { # we're dealing with a GS3 server
614        if(defined $ENV{'GSDL3HOME'}) { # in case the web directory is located in a separate place
615        $collectparent = &FileUtils::filenameConcatenate($ENV{'GSDL3HOME'},"sites","localsite");
616        }
617        else { # try the default location for the web directory
618        $collectparent = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite");
619        }
620    }
621    else {
622        # greenstone 2
623        $collectparent = $ENV{'GSDLHOME'};
624    }
625   
626    my $gsdl_href = &FileUtils::filenameConcatenate($working_dir, $fname);
627    $collectparent = &util::filename_to_regex($collectparent); # escape reserved metacharacter \ in path (by replacing it with \\) for substitution
628    $gsdl_href =~ s/^$collectparent(\/|\\)?//; # remove the collectparent path in gsdl_href and any trailing slash
629    $gsdl_href =~ s/\\/\//g;                   # make sure we have url paths (which only use / not \)
630    my $localfedora = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "fedora.xml");
631   
632    my $greenstone_url_prefix = &util::get_greenstone_url_prefix();
633    # prepend url_prefix (which will contain the forward slash upfront)
634    if($ENV{'GSDL3SRCHOME'} && -e $localfedora) {               # Fedora uses Greenstone's tomcat.
635        $gsdl_href = "$greenstone_url_prefix/sites/localsite/$gsdl_href";     # Default: /greenstone3/sites/localsite/$gsdl_href
636    } else {
637        $gsdl_href = "$greenstone_url_prefix/$gsdl_href";       # By default: "/greenstone/$gsdl_href";
638    }
639
640    my $fserver = $ENV{'FEDORA_HOSTNAME'};
641    my $fport = $ENV{'FEDORA_SERVER_PORT'};
642   
643    my $fdomain = "http://$fserver:$fport";
644    $xlink_href  = "$fdomain$gsdl_href";
645#ERROR: $xlink_href  = "$fname";
646    }
647
648    return $xlink_href;
649
650}
651
652
653sub buffer_mets_fileSection_toc
654{
655    my $self = shift(@_);
656    my ($doc_obj,$section,$working_dir) = @_;
657
658    my $opt_attr = "OWNERID=\"M\"";
659
660    my $all_text = '  <mets:fileGrp ID="TOC">'. "\n";
661    $all_text .= "    <mets:file MIMETYPE=\"text/xml\" ID=\"FILETOC\" $opt_attr >\n";   
662    my $xlink = $self->doctxt_to_xlink("doctoc.xml",$working_dir);
663
664    $all_text .= '      <mets:FLocat LOCTYPE="URL" xlink:href="'.$xlink.'"';
665
666    $all_text .= ' xlink:title="Table of Contents"/>' . "\n";
667    $all_text .= "    </mets:file>\n";
668    $all_text .= "  </mets:fileGrp>\n";
669
670    return $all_text;
671}
672
673
674sub buffer_mets_fileSection_section_xml
675{
676    my $self = shift(@_);
677    my ($doc_obj,$section,$working_dir) = @_;
678
679    my $is_txt_split = 1;
680    my $opt_owner_id = "OWNERID=\"M\"";
681
682    my $all_text
683    = $self->SUPER::buffer_mets_fileSection_section_xml($doc_obj,$section,$working_dir,$is_txt_split, $opt_owner_id,"SECTION");
684
685
686    return $all_text;
687}
688
689sub buffer_mets_fileWhole_section_xml
690{
691    my $self = shift(@_);
692    my ($doc_obj,$section,$working_dir) = @_;
693
694    my $section_ptr = $doc_obj-> _lookup_section($section);
695    return "" unless defined $section_ptr;
696   
697    my $all_text="";
698
699    my $fileID=0;
700
701    # Output the fileSection for the whole section
702    #  => get the sourcefile and associative file
703
704    my $id_root = "";
705    my $opt_owner_id = "OWNERID=\"M\"";
706
707
708    foreach my $data (@{$section_ptr->{'metadata'}}){
709       my $escaped_value = &docprint::escape_text($data->[1]);
710
711       if ($data->[0] eq "gsdlassocfile"){
712       
713       $escaped_value =~ m/^(.*?):(.*):(.*)$/;
714       my $assoc_file = $1;
715       my $mime_type  = $2;
716       my $assoc_dir  = $3;
717
718       $id_root = "FG$assoc_file";
719
720       $id_root =~ s/\//_/g;
721       $all_text .= "  <mets:fileGrp ID=\"$id_root\">\n";
722       
723       # The assoc_file's name may be url-encoded, so the xlink_href in the <mets:FLocat>
724       # element must be the url to this (possibly url-encoded) filename
725       my $assocfile_url = &unicode::filename_to_url($assoc_file);
726       my $assfilePath = ($assoc_dir eq "") ? $assocfile_url : "$assoc_dir/$assocfile_url";
727       ++$fileID;
728       
729       my $mime_attr   = "MIMETYPE=\"$mime_type\"";
730       my $xlink_title = "xlink:title=\"$assoc_file\"";
731
732       my $id_attr;
733       my $xlink_href;
734
735       $id_attr = "ID=\"F$id_root.0\"";
736
737       my $fedora_prefix = $ENV{'FEDORA_HOME'};
738       if (!defined $fedora_prefix) {
739           $xlink_href  = "xlink:href=\"$assfilePath\"";
740       }
741       else
742       {
743           my $collectparent;
744           if (defined $ENV{'GSDL3SRCHOME'}) { # we're dealing with a GS3 server
745           if(defined $ENV{'GSDL3HOME'}) { # in case the web directory is located in a separate place
746               $collectparent = &FileUtils::filenameConcatenate($ENV{'GSDL3HOME'},"sites","localsite");
747           }
748           else { # try the default location for the web directory
749               $collectparent = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite");
750           }
751           }         
752           else {
753           # greenstone 2
754           $collectparent = $ENV{'GSDLHOME'};
755           }
756
757           my $gsdl_href = &FileUtils::filenameConcatenate($working_dir,$assfilePath);
758           $collectparent = &util::filename_to_regex($collectparent); # escape reserved metacharacter \ in path (by replacing it with \\) for substitution
759           $gsdl_href =~ s/^$collectparent(\/|\\)?//; # remove the collectparent path in gsdl_href and any trailing slash
760           $gsdl_href =~ s/\\/\//g;                   # make sure we have url paths (which only use / not \)
761           my $localfedora = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "fedora.xml");
762
763           my $greenstone_url_prefix = &util::get_greenstone_url_prefix();
764           # prepend url_prefix (which will contain the forward slash upfront)
765           if($ENV{'GSDL3SRCHOME'} && -e $localfedora) {                 # Fedora uses Greenstone's tomcat.
766           $gsdl_href = "$greenstone_url_prefix/sites/localsite/$gsdl_href";    # Default: /greenstone3/sites/localsite/$gsdl_href
767           } else {
768           $gsdl_href = "$greenstone_url_prefix/$gsdl_href";         # By default: "/greenstone/$gsdl_href";
769           }
770
771           my $fserver = $ENV{'FEDORA_HOSTNAME'};
772           my $fport = $ENV{'FEDORA_SERVER_PORT'};
773           
774           my $fdomain = "http://$fserver:$fport";
775           $xlink_href = "xlink:href=\"$fdomain$gsdl_href\"";
776#ERROR: $xlink_href = "xlink:href=\"$assfilePath\"";
777       }
778       
779       my $top_section = $doc_obj->get_top_section();
780       my $id = $doc_obj->get_metadata_element($top_section,"Identifier");
781       
782###    print STDERR "**** mime-type: $mime_attr\n";
783
784       $all_text .= "    <mets:file $mime_attr $id_attr $opt_owner_id >\n";
785       $all_text .= "      <mets:FLocat LOCTYPE=\"URL\" $xlink_href $xlink_title />\n";
786
787       $all_text .= "    </mets:file>\n";
788       
789       $all_text .= "  </mets:fileGrp>\n";     
790       }
791   }
792   
793    $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
794   
795    return $all_text;
796}
797
798
7991;
Note: See TracBrowser for help on using the browser.