Changeset 22338

Show
Ignore:
Timestamp:
02.07.2010 12:57:33 (9 years ago)
Author:
ak19
Message:

Image URLs are adjusted to refer to their location in Fedora.

Location:
main/trunk/greenstone2
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/bin/script/g2f-import.pl

    r21687 r22338  
    1515    $ENV{'FEDORA_PROTOCOL'} = "http" if (!defined $ENV{'FEDORA_PROTOCOL'}); 
    1616    $ENV{'FEDORA_PID_NAMESPACE'} = "greenstone" if (!defined $ENV{'FEDORA_PID_NAMESPACE'}); 
     17    $ENV{'FEDORA_PREFIX'} = "/fedora" if (!defined $ENV{'FEDORA_PREFIX'}); 
    1718 
    1819    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/"); 
     
    187188    } 
    188189 
    189     # if GS3, and if Fedora uses Greenstone's tomcat, then we do not need to write out the file gsdl.xml into Fedora's tomcat 
     190    # if GS3, and if Fedora uses Greenstone 3's tomcat, then we do not need to write out the file gsdl.xml into Fedora's tomcat 
    190191    my $localfedora = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "fedora.xml"); 
    191192    unless($ENV{'GSDL3SRCHOME'} && -e $localfedora) { 
  • main/trunk/greenstone2/perllib/plugouts/FedoraMETSPlugout.pm

    r21719 r22338  
    131131 
    132132    $self->output_xml_header($outhandler); 
     133 
     134    ## change links to be Fedora cognant: 
     135    my $txt = $section_ptr->{'text'}; 
     136    $section_ptr->{'text'} = $self->adjust_links($doc_obj, \$txt); 
     137 
    133138    $self->output_txt_section($outhandler,$doc_obj, $section); 
    134139    $self->output_xml_footer($outhandler); 
     
    148153    $self->saveas_doctxt_section($doc_obj, $working_dir, "$section.$subsection"); 
    149154    } 
    150  
    151  
     155} 
     156 
     157 
     158#sub adjust_text_before_saving() 
     159sub adjust_links() 
     160{ 
     161    my $self = shift(@_); 
     162    my ($doc_obj, $textref) = @_; 
     163 
     164    ## change links to be Fedora cognant: 
     165    # 1. retrieve txt  $$textref ??? 
     166    # 2. change it: 
     167    # /$ENV{'FEDORA_PREFIX'}/objects/$greenstone-docobj-hash-xxx/datastreams/FG<orig-img-name>/content 
     168    # (Note that the first image is always "url", instead of FG<orig-img-name>) 
     169    # 3. only replace it back in doc_obj if we didn't get a ref in the first place 
     170 
     171    my $OID = $doc_obj->get_OID(); 
     172    my $fnamespace = $self->{'fedora_namespace'}; 
     173    my $replace_img_name = 1; 
     174    if($OID ne "collection" && defined $fnamespace) { 
     175    my $fed_id = "$fnamespace:".$ENV{'GSDLCOLLECTION'}."-$OID"; #oid_namespace:collection-OID 
     176    my $fedora_url_prefix = $ENV{'FEDORA_PREFIX'}."/objects/$fed_id/datastreams/"; 
     177    my $fedora_url_suffix = "/content"; 
     178 
     179    $$textref =~ s/(<(?:img|embed|table|tr|td|link|script)[^>]*?(?:src|background|href)\s*=\s*)((?:[\"][^\"]+[\"])|(?:[\'][^\']+[\'])|(?:[^\s\/>]+))([^>]*>)/$self->replace_rel_link($1, $2, $3, $fedora_url_prefix, $fedora_url_suffix, $replace_img_name--)/isge; 
     180#   print STDERR "*** all text after: $$textref\n\n"; 
     181    } 
     182 
     183    return $$textref; 
     184} 
     185 
     186# replace relative link with the prefix and suffix given 
     187sub replace_rel_link 
     188{ 
     189    my $self = shift (@_); 
     190    my ($front, $link, $back, $url_prefix, $url_suffix, $replace_img_name) = @_; 
     191 
     192    # only process relative links. Return if absolute link 
     193    if($link =~ m/^http/) { 
     194    return "$front$link$back"; 
     195    } 
     196 
     197    # remove quotes from link at start and end if necessary 
     198    if ($link=~/^[\"\']/) { 
     199    $link=~s/^[\"\']//; 
     200    $link=~s/[\"\']$//; 
     201    $front.='"'; 
     202    $back="\"$back"; 
     203    } 
     204 
     205    # remove any _httpdocimg/ that greenstone may have prefixed to the image 
     206    $link =~ s/^_httpdocimg_(?:\/|\\)//; 
     207 
     208    # the datastream of the first image of each section is always called "url" 
     209    if($replace_img_name) { 
     210    return "$front$url_prefix"."url"."$url_suffix$back"; 
     211    } 
     212    # else 
     213    return "$front$url_prefix"."FG"."$link$url_suffix$back"; 
    152214} 
    153215