Changeset 721


Ignore:
Timestamp:
1999-10-19T16:21:35+13:00 (25 years ago)
Author:
davidb
Message:

Support functions to help with the generation of webpages from
Perl CGI scripts.

Location:
trunk/gsdl/perllib
Files:
1 added
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/HTMLPlug.pm

    r617 r721  
    4040use BasPlug;
    4141use sorttools;
     42use html;
    4243use util;
     44
     45use File::Basename;
    4346
    4447sub BEGIN {
     
    5053    $self = new BasPlug ();
    5154
     55    $self->{'f2d_mapping'} = undef;
     56    $self->{'f2d_partitioned_dirs'} = [];
     57
    5258    return bless $self, $class;
    5359}
     
    5965}
    6066
     67#--
     68# Convert filename to lookup filename
     69#--
     70sub f2d_lookup_filename
     71{
     72    my ($self,$filename) = @_;
     73
     74    my $coldir = $ENV{'GSDLCOLLECTDIR'};
     75    my $dirsep_re = &util::get_re_dirsep();
     76    my $supress_dirs = "(import|building)";
     77
     78    my @part_dirs = @{$self->{'f2d_partitioned_dirs'}};
     79
     80    if (scalar(@part_dirs)>0)
     81    {
     82    my $joined_part_dirs = join('|',@part_dirs);
     83    $supress_dirs .= "($dirsep_re)($joined_part_dirs)";
     84    }
     85
     86    my $lookup_filename = $filename;
     87    $lookup_filename =~ s/^$coldir($dirsep_re)//;
     88    $lookup_filename =~ s/^$supress_dirs($dirsep_re)//;
     89
     90    return $lookup_filename;
     91}
     92
     93
     94#--
     95# file to document mapping
     96#--
     97
     98sub rec_f2d_mapping
     99{
     100    my ($self,$dirname) = @_;
     101
     102    # read all the files in the directory
     103    if (!opendir(DIR, $dirname))
     104    {
     105    print STDERR "HTMLPlug: WARNING - couldn't read directory $dirname";
     106    print STDERR " during file to doc mapping\n";
     107    return;
     108    }
     109    my @dir = readdir (DIR);
     110    closedir (DIR);
     111
     112    # process each file
     113    my $subfile;
     114    foreach $subfile (@dir)
     115    {
     116    if ($subfile !~ /^\.\.?$/)
     117    {
     118        my $filename = &util::filename_cat($dirname, $subfile);
     119
     120        if (-d $filename)
     121        {
     122        my $dirname = $filename;
     123        $self->rec_f2d_mapping($dirname);
     124        }
     125        else
     126        {
     127        if ($subfile =~ m/\.(html?(\.gz)?)$/i && (-e $filename))
     128        {
     129            # add mapping
     130
     131            print STDERR "HTMLPlug: Precalculating OID for $subfile\n"
     132            if (defined($self->{'verbosity'}));
     133
     134            my $oid = doc::_calc_OID(undef,$filename);
     135            my $lookup_filename
     136            = $self->f2d_lookup_filename($filename);
     137
     138            $self->{'f2d_mapping'}->{$lookup_filename} = $oid;
     139
     140            if ($subfile =~ m/index\.(html?(\.gz)?)$/i)
     141            {
     142            # Cater for links such as "/paper/" mapping to
     143            #  "/paper/index.html"
     144            my $lookup_dirname
     145                = $self->f2d_lookup_filename($dirname);
     146
     147            $self->{'f2d_mapping'}->{$lookup_dirname} = $oid;
     148            }
     149        }
     150        elsif ($subfile =~ /\.(gif|jpg|jpeg|png)$/i)
     151        {
     152            # convert to png ?
     153
     154            # Hard link in build directory         
     155            #--
     156            my $coldir    = $ENV{'GSDLCOLLECTDIR'};
     157            my $dirsep_re = &util::get_re_dirsep();
     158            my $iorb_re   = "import|building";
     159
     160            my ($dirsep,$iorb)
     161            = ($filename =~ m/^$coldir($dirsep_re)($iorb_re)/);
     162
     163            my $copyname = $filename;
     164            $copyname
     165            =~ s/^${coldir}${dirsep}${iorb}
     166                /${coldir}${dirsep}building_images${dirsep}imgsrc/x;
     167
     168                if (!-e $copyname)
     169            {
     170                        print STDERR "HTMLPlug: Hard linking $subfile\n"
     171                            if (defined($self->{'verbosity'}));
     172            &util::hard_link($filename,$copyname);
     173            }
     174
     175        }
     176        }
     177       
     178    }
     179    }
     180}
     181
     182sub build_file_to_doc_mapping
     183{
     184    my ($self,$base_dir) = @_;
     185
     186    $self->{'f2d_mapping'} = {};
     187
     188    my $bimages_dir
     189    = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"building_images","imgsrc");
     190    if (!-e $bimages_dir && !mkdir ($bimages_dir, 0775))
     191    {
     192    print STDERR "HTMLPlug:: Could not create directory $bimages_dir\n";
     193    return;
     194    }
     195
     196    $self->rec_f2d_mapping($base_dir);
     197}
     198
     199#--
     200#--
    61201
    62202# return number of files processed, undef if can't process
     
    67207    my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_;
    68208
     209    my $verbosity = $processor->{'verbosity'};
     210    $self->{'verbosity'} = $verbosity;
     211
     212    if (!defined($self->{'f2d_mapping'}))
     213    {
     214    print STDERR "HTMLPlug: building OID mapping\n"
     215        if $processor->{'verbosity'};
     216
     217    $self->build_file_to_doc_mapping($base_dir,$verbosity);
     218    }
    69219    my $filename = &util::filename_cat($base_dir, $file);
    70220    my $absdir = $filename;
    71221    $absdir =~ s/[^\/\\]*$//;
    72222
     223    if ($file =~ /\.(gif|jpg|jpeg|png)$/i)
     224    {
     225    return 0;
     226    }
     227
    73228    return undef unless ($filename =~ /\.(html?(\.gz)?)$/i && (-e $filename));
     229    return undef if (-d $filename);
    74230
    75231    my $gz = 0;
     
    79235    }
    80236
    81     print STDERR "HTMLPlug: processing $filename\n" if $processor->{'verbosity'};
     237    print STDERR "HTMLPlug: processing $file\n" if $processor->{'verbosity'};
    82238
    83239    # create a new document
    84240    my $doc_obj = new doc ($file, "indexed_doc");
    85 
     241   
    86242    if ($gz) {
    87243    open (FILE, "zcat $filename |") || die "HTMLPlug::read - zcat can't open $filename\n";
     
    91247    my $cursection = $doc_obj->get_top_section();
    92248
     249    # look up precalculated OID
     250    my $lookup_filename = $self->f2d_lookup_filename($filename);
     251    my $precalc_oid = $self->{'f2d_mapping'}->{$lookup_filename};
     252
     253    # read in HTML file
    93254    my $text = "";
    94255    my $line = "";
     256    my $donehead = 0;
    95257    my $title = "";
    96258    while (defined ($line = <FILE>)) {
     
    98260    }
    99261
    100     # we'll use the worthless alarm thingy to temporarily replace
    101     # '\n' so we'd better check it doesn't occur naturally
    102     if ($text =~ /\a/) {
    103     print STDERR "HTMLPlug::read - 'WARNING '\a' character occurs in text!!\n";
    104     }
    105 
    106262    # remove line breaks
    107     $text =~ s/\n/\a/g;
     263    $text =~ s/\s+/ /g;
    108264
    109265    # see if there's a <title> tag
     
    129285    $text =~ s/^.*?<body[^>]*>//i;
    130286
    131     # and any other unwanted tags
    132     $text =~ s/<(\/p|\/html|\/body)>//g;
    133 
    134     # fix up the image links
    135     $text =~ s/(<img[^>]*?src\s*=\s*\"?)([^\">]+)(\"?[^>]*>)/
    136     &replace_image_links($absdir, $doc_obj, $1, $2, $3)/ige;
    137 
    138     # put line breaks back in
    139     $text =~ s/\a/\n/g;
    140 
     287    # usemap="./#index" not handled correctly => change to "#index"
     288    $text =~ s/(<img[^>]*?usemap\s*=\s*\"?)([^\">]+)(\"?[^>]*>)/
     289    &replace_usemap_links($1, $2, $3)/ige;
     290
     291    # fix up the href links
     292    $f2d_mapping = $self->{'f2d_mapping'};
     293
     294    $text =~ s/(<(a|area)\s+[^>]*?href\s*=\s*\"?)([^\">]+)(\"?[^>]*>)/
     295       &replace_href_links($filename,$precalc_oid,$f2d_mapping,$1,$3,$4)/ige;
     296
     297    # add a newline at the beginning of each paragraph
     298    $text =~ s/(.)\s*<p\b/$1\n\n<p/gi;
     299   
     300    # add a newline every 80 characters at a word boundary
     301    # Note: this regular expression puts a line feed before
     302    # the last word in each section, even when it is not
     303    # needed.
     304    $text =~ s/(.{1,80})\s/$1\n/g;
     305
     306
     307    # Store URL for page as metadata
     308    my $web_url = "http://$lookup_filename";
     309    $doc_obj->add_metadata($cursection, "URL", $web_url);
     310
     311    my $import_url = $filename;
     312    $import_url =~ s/^$ENV{'GSDLCOLLECTDIR'}/_httpcollection_/;
     313
     314    # Add base tag so images can find correct location
     315    my $index_url = $import_url;
     316    $index_url =~ s/_httpcollection_\/import/_httpcollection_\/index\/imgsrc/;
     317
     318    my $dirsep_re = &util::get_re_dirsep();
     319    my @base_url_split = split(/$dirsep_re/,$index_url);
     320    pop(@base_url_split);
     321    my $base_url = join('/',@base_url_split,""); # force / at end
     322
     323    $text = "<base href=\"$base_url\">\n$text";
    141324    $doc_obj->add_text ($cursection, $text);
    142325
     326    # Add metadata that has been provided externally
     327    #--
    143328    foreach $field (keys(%$metadata)) {
    144329    # $metadata->{$field} may be an array reference
     
    152337    }
    153338
    154     # add OID
    155     $doc_obj->set_OID ();
     339    # Fix OID so it is the same as the pre-calculated OID
     340    $doc_obj->set_OID($precalc_oid);
     341
     342    my $set_oid = $doc_obj->get_OID();
     343    if ($precalc_oid ne $set_oid) # check (for super safety!)
     344    {
     345    print STDERR "Warning: pre-calculated OID and current OID differnt:";
     346    print STDERR " $filename\n";
     347    }
     348
    156349
    157350    # process the document
     
    161354}
    162355
    163 sub replace_image_links {
    164 
    165     my ($dir, $doc_obj, $front, $link, $back) = @_;
    166 
    167     my ($filename, $error);
    168     my $foundimage = 0;
    169    
    170     $link =~ s/\/\///;
    171     my ($imagetype) = $link =~ /([^\.]*)$/;
    172     $imagetype =~ tr/[A-Z]/[a-z]/;
    173     if ($imagetype eq "jpg") {$imagetype = "jpeg";}
    174     if ($imagetype !~ /^(jpg|gif|png)$/) {
    175     print STDERR "HTMLPlug: Warning - unknown image type ($imagetype)\n";
    176     }
    177     my ($imagefile) = $link =~ /([^\/]*)$/;
    178     my ($imagepath) = $link =~ /^[^\/]*(.*)$/;
    179 
    180     if (defined $imagepath && $imagepath =~ /\w/) {
    181     # relative link
    182     $filename = &util::filename_cat ($dir, $imagepath);
    183     if (-e $filename) {
    184         $doc_obj->associate_file ($filename, $imagefile, "image/$imagetype");
    185         $foundimage = 1;
    186     } else {
    187         $error = "HTMLPlug: Warning - couldn't find image file $imagefile in either $filename or";
    188     }
    189     }
    190 
    191     if (!$foundimage) {
    192     $filename = &util::filename_cat ($dir, $imagefile);
    193     if (-e $filename) {
    194         $doc_obj->associate_file ($filename, $imagefile, "image/$imagetype");   
    195         $foundimage = 1;
    196     } elsif (defined $error) {
    197         print STDERR "$error $filename\n";
    198     } else {
    199         print STDERR "HTMLPlug: Warning - couldn't find image file $imagefile in $filename\n";
    200     }
    201     }
    202 
    203     if ($foundimage) {
    204     return "${front}_httpcollection_/archives/_thisOID_/${imagefile}${back}";
    205     } else {
    206     return "";
    207     }
    208 }
     356# support for fixing up links to work as a GSDL collection
     357#--
     358
     359sub eval_dir_dots
     360{
     361    # evaluate any "../" to next directory up
     362    # evaluate any "./" as here
     363    #--
     364    my ($self,$filename) = @_;
     365
     366    my $dirsep_os = &util::get_os_dirsep();
     367    my @dirsep = split(/$dirsep_os/,$filename);
     368
     369    my @eval_dirs = ();
     370    foreach $d (@dirsep)
     371    {
     372    if ($d eq "..")
     373    {
     374        pop(@eval_dirs);
     375    }
     376    elsif ($d eq ".")
     377    {
     378        # do nothing!
     379    }
     380    else
     381    {
     382        push(@eval_dirs,$d);
     383    }
     384    }
     385
     386    return &util::filename_cat(@eval_dirs);
     387}
     388
     389sub replace_href_links
     390{
     391    my ($this_filename,$this_oid, $f2d_mapping, $front,$link,$back) = @_;
     392
     393    return $front.$link.$back if ($link =~ m/\.(gif|jpg|jpeg|png)$/i);
     394
     395    if ($link =~ m/^(http|ftp|file):/i)
     396    {
     397        # this should really check that the link ends in .htm*
     398    #****
     399    my $http_as_filename = $link;
     400    $http_as_filename =~ s/^(http|ftp|file):\/\///i;
     401
     402    my ($before_hash,$after_hash)
     403        = ($http_as_filename =~ m/^([^\#]*)\#?(.*)$/);
     404           
     405    if ($link =~ m/^(http|ftp):/i)
     406    {
     407        # Turn url (using /) into file name (possibly using \ on windows)
     408        my @http_dir_split = split('/',$before_hash);
     409        $http_as_filename = &util::filename_cat(@http_dir_split);
     410    }
     411
     412    $http_as_filename = $self->eval_dir_dots($http_as_filename);
     413                     
     414    if (defined($f2d_mapping->{$http_as_filename}))
     415    {
     416        # transform link into a local link (and then let it be
     417        #   processed by later code)
     418        $link = $http_as_filename;
     419
     420        $link = "_httpdocument_&cl=_cgiargcl_&d=$oid";
     421        $link .= "#$after_hash" if ($after_hash ne "");
     422    }
     423    else
     424    {
     425        # external link => set it up to pass through off-site page
     426
     427        my $link_safe = $link;
     428        &html::urlsafe($link_safe);
     429
     430        $link = "_httpextlink_&href=$link_safe&d=$this_oid";
     431
     432    }
     433    }   
     434    elsif ($link !~ m/^(mailto|news):/i)
     435    {
     436    my ($before_hash,$after_hash) = ($link =~ m/^([^\#]*)\#?(.*)$/);
     437    my $link_filename;
     438
     439    if ($before_hash =~ m/^\//)
     440    {
     441        my $dirsep_re = &util::get_re_dirsep();
     442        my $lookup = $self->f2d_lookup_filename($this_filename);
     443        my @lookup_split = split(/$dirsep_re/,$lookup);
     444        my $domname = shift(@lookup_split);
     445        $link_filename = &util::filename_cat($domname,$before_hash);
     446    }
     447    else
     448    {
     449        # Turn relative file path into full path
     450        if ($before_hash eq "") # handle links such as <a href="#x">
     451        {
     452        $link_filename = $this_filename;
     453        }
     454        else
     455        {
     456        my $dirname = &File::Basename::dirname($this_filename);
     457        $link_filename = &util::filename_cat($dirname,$before_hash);
     458        }
     459    }
     460
     461    $link_filename = $self->eval_dir_dots($link_filename);
     462    my $lookup_filename = $self->f2d_lookup_filename($link_filename);
     463
     464    my $oid = $f2d_mapping->{$lookup_filename};
     465    if (defined($oid))
     466    {
     467        $link = "_httpdocument_&cl=_cgiargcl_&d=$oid";
     468        $link .= "#$after_hash" if ($after_hash ne "");
     469    }
     470    else
     471    {
     472        print STDERR "HTMLPlug WARNING:";
     473        print STDERR " Could not find link: \"$lookup_filename\". ";
     474        print STDERR " Deactivating link\n";
     475       
     476        $link = "_httpextlink_&d=$this_oid";
     477    }
     478    }
     479    else
     480    {
     481    if ($link !~ m/^(mailto|news):/i)
     482    {
     483        print STDERR "HTMLPlug WARNING: Unhandled type of link, \"$link\"\n";
     484    }
     485    else
     486    {
     487        my $link_safe = $link;
     488        &html::urlsafe($link_safe);
     489
     490        $link = "_httpextlink_&href=$link_safe&d=$this_oid";
     491    }
     492    }
     493
     494    my $fixed_tag = "${front}${link}${back}";
     495
     496    return $fixed_tag;
     497}
     498
     499
     500sub replace_usemap_links
     501{
     502    my ($front, $link, $back) = @_;
     503
     504    $link =~ s/^\.\///;
     505
     506    return "${front}${link}${back}";
     507}
     508
     509
    209510
    2105111;
  • trunk/gsdl/perllib/util.pm

    r619 r721  
    4040    if (!-e $file) {
    4141        print STDERR "util::rm $file does not exist\n";
    42     } elsif (!-f $file) {
    43         print STDERR "util::rm $file is not a regular file\n";
     42    } elsif ((!-f $file) && (!-l $file)) {
     43        print STDERR "util::rm $file is not a regular (or symbolic) file\n";
    4444    } else {
    4545        push (@filefiles, $file);
     
    6868        print STDERR "util::rm_r $file does not exist\n";
    6969
    70     } elsif (-d $file) {
     70    } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
    7171        # get the contents of this directory
    7272        if (!opendir (INDIR, $file)) {
     
    9292}
    9393
     94# moves a file or a group of files
     95sub mv {
     96    my $dest = pop (@_);
     97    my (@srcfiles) = @_;
     98
     99    # remove trailing slashes from source and destination files
     100    $dest =~ s/[\\\/]+$//;
     101    map {$_ =~ s/[\\\/]+$//;} @srcfiles;
     102
     103    # a few sanity checks
     104    if (scalar (@srcfiles) == 0) {
     105    print STDERR "util::mv no destination directory given\n";
     106    return;
     107    } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
     108    print STDERR "util::mv if multiple source files are given the ".
     109        "destination must be a directory\n";
     110    return;
     111    }
     112
     113    # move the files
     114    foreach $file (@srcfiles) {
     115    my $tempdest = $dest;
     116    if (-d $tempdest) {
     117        my ($filename) = $file =~ /([^\\\/]+)$/;
     118        $tempdest .= "/$filename";
     119    }
     120    if (!-e $file) {
     121        print STDERR "util::mv $file does not exist\n";
     122    } else {
     123        rename ($file, $tempdest);
     124    }
     125    }
     126}
     127
    94128
    95129# copies a file or a group of files
     
    128162    }
    129163}
     164
    130165
    131166
     
    186221
    187222
     223sub mk_dir {
     224    my ($dir) = @_;
     225
     226    if (!mkdir ($dir, 0775)) {
     227    print STDERR "util::mk_dir could not create directory $dir\n";
     228    return;
     229    }
     230}
     231
    188232sub mk_all_dir {
    189233    my ($dir) = @_;
     
    239283    }
    240284   
     285}
     286
     287# make soft link to file if supported by OS, otherwise return error
     288sub soft_link {
     289    my ($src,$dest) = @_;
     290
     291    # remove trailing slashes from source and destination files
     292    $src =~ s/[\\\/]+$//;
     293    $dest =~ s/[\\\/]+$//;
     294
     295    # a few sanity checks
     296    if (!-e $src) {
     297    print STDERR "util::soft_link source file $src does not exist\n";
     298    return 0;
     299    }
     300
     301    my $dest_dir = &File::Basename::dirname($dest);
     302    mk_all_dir($dest_dir) if (!-e $dest_dir);
     303
     304    if (!symlink($src,$dest))
     305    {
     306    print STDERR "util::soft_link: unable to create soft link.";
     307    return 0;
     308    }
     309
     310    return 1;
    241311}
    242312
Note: See TracChangeset for help on using the changeset viewer.