Changeset 22338


Ignore:
Timestamp:
2010-07-02T12:57:33+12:00 (14 years ago)
Author:
ak19
Message:

Image URLs are adjusted to refer to their location in Fedora.

Location:
main/trunk/greenstone2
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/bin/script/g2f-import.pl

    r21687 r22338  
    1515    $ENV{'FEDORA_PROTOCOL'} = "http" if (!defined $ENV{'FEDORA_PROTOCOL'});
    1616    $ENV{'FEDORA_PID_NAMESPACE'} = "greenstone" if (!defined $ENV{'FEDORA_PID_NAMESPACE'});
     17    $ENV{'FEDORA_PREFIX'} = "/fedora" if (!defined $ENV{'FEDORA_PREFIX'});
    1718
    1819    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/");
     
    187188    }
    188189
    189     # if GS3, and if Fedora uses Greenstone's tomcat, then we do not need to write out the file gsdl.xml into Fedora's tomcat
     190    # if GS3, and if Fedora uses Greenstone 3's tomcat, then we do not need to write out the file gsdl.xml into Fedora's tomcat
    190191    my $localfedora = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "fedora.xml");
    191192    unless($ENV{'GSDL3SRCHOME'} && -e $localfedora) {
  • main/trunk/greenstone2/perllib/plugouts/FedoraMETSPlugout.pm

    r21719 r22338  
    131131
    132132    $self->output_xml_header($outhandler);
     133
     134    ## change links to be Fedora cognant:
     135    my $txt = $section_ptr->{'text'};
     136    $section_ptr->{'text'} = $self->adjust_links($doc_obj, \$txt);
     137
    133138    $self->output_txt_section($outhandler,$doc_obj, $section);
    134139    $self->output_xml_footer($outhandler);
     
    148153    $self->saveas_doctxt_section($doc_obj, $working_dir, "$section.$subsection");
    149154    }
    150 
    151 
     155}
     156
     157
     158#sub adjust_text_before_saving()
     159sub adjust_links()
     160{
     161    my $self = shift(@_);
     162    my ($doc_obj, $textref) = @_;
     163
     164    ## change links to be Fedora cognant:
     165    # 1. retrieve txt  $$textref ???
     166    # 2. change it:
     167    # /$ENV{'FEDORA_PREFIX'}/objects/$greenstone-docobj-hash-xxx/datastreams/FG<orig-img-name>/content
     168    # (Note that the first image is always "url", instead of FG<orig-img-name>)
     169    # 3. only replace it back in doc_obj if we didn't get a ref in the first place
     170
     171    my $OID = $doc_obj->get_OID();
     172    my $fnamespace = $self->{'fedora_namespace'};
     173    my $replace_img_name = 1;
     174    if($OID ne "collection" && defined $fnamespace) {
     175    my $fed_id = "$fnamespace:".$ENV{'GSDLCOLLECTION'}."-$OID"; #oid_namespace:collection-OID
     176    my $fedora_url_prefix = $ENV{'FEDORA_PREFIX'}."/objects/$fed_id/datastreams/";
     177    my $fedora_url_suffix = "/content";
     178
     179    $$textref =~ s/(<(?:img|embed|table|tr|td|link|script)[^>]*?(?:src|background|href)\s*=\s*)((?:[\"][^\"]+[\"])|(?:[\'][^\']+[\'])|(?:[^\s\/>]+))([^>]*>)/$self->replace_rel_link($1, $2, $3, $fedora_url_prefix, $fedora_url_suffix, $replace_img_name--)/isge;
     180#   print STDERR "*** all text after: $$textref\n\n";
     181    }
     182
     183    return $$textref;
     184}
     185
     186# replace relative link with the prefix and suffix given
     187sub replace_rel_link
     188{
     189    my $self = shift (@_);
     190    my ($front, $link, $back, $url_prefix, $url_suffix, $replace_img_name) = @_;
     191
     192    # only process relative links. Return if absolute link
     193    if($link =~ m/^http/) {
     194    return "$front$link$back";
     195    }
     196
     197    # remove quotes from link at start and end if necessary
     198    if ($link=~/^[\"\']/) {
     199    $link=~s/^[\"\']//;
     200    $link=~s/[\"\']$//;
     201    $front.='"';
     202    $back="\"$back";
     203    }
     204
     205    # remove any _httpdocimg/ that greenstone may have prefixed to the image
     206    $link =~ s/^_httpdocimg_(?:\/|\\)//;
     207
     208    # the datastream of the first image of each section is always called "url"
     209    if($replace_img_name) {
     210    return "$front$url_prefix"."url"."$url_suffix$back";
     211    }
     212    # else
     213    return "$front$url_prefix"."FG"."$link$url_suffix$back";
    152214}
    153215
Note: See TracChangeset for help on using the changeset viewer.