Changeset 38476 for main


Ignore:
Timestamp:
2023-12-06T21:53:42+13:00 (6 months ago)
Author:
anupama
Message:

Tidying up cached filename paths that were internally duplicated. Thsi is the longer code that I've tested more thoroughly. I've now also added in the subroutine that Dr Bainbridge wanted with a hashmap that cleans up the filepath based on matching regex prefixes and determines the new relative file location and filename inside the cached subdirectory based on which regex in the hashmap matched. I will commit the longer code first (and the new function with the described hashmap, but it doesn't get called in this commit), in case I need to backtrack at any point. I will next commit the call to the function and remove the longer code.

Location:
main/trunk/greenstone2/perllib/plugins
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/BaseMediaConverter.pm

    r37049 r38476  
    7676
    7777
     78# given the file location where media or other product files are generated
     79# (such as the pages of a PDF file converted to paged images), this returns
     80# the full path to the document's cache subdirectory they are to be stored
     81# e.g. collect/<collection>/cached>/<fileRootname>
     82sub get_cache_filename_for_location
     83{
     84    my $self = shift @_;
     85    my ($filename, $orig_file_root, $base_dir, $collect_dir) = @_;
     86
     87    my $cached_dir = &FileUtils::filenameConcatenate($collect_dir,"cached");
     88   
     89    # hashmap of regex match to replacement regex
     90    my %prefixre_to_replacere_map;
     91
     92    my $orig_file_root_replacere = "";
     93    if(defined $orig_file_root) {
     94    $orig_file_root_replacere = "/$orig_file_root";
     95    }
     96
     97    # populate hashmap with regex matcher to regex replacement search
     98    # https://stackoverflow.com/questions/41334509/can-i-use-qq-or-q-instead-of-qw-in-the-following-perl-script
     99    # https://perldoc.perl.org/perlop#Regexp-Quote-Like-Operators
     100    $prefixre_to_replacere_map{qr/^$base_dir(.*?)$/} = "";   
     101    $prefixre_to_replacere_map{qr/^$cached_dir(.*?)$/} = qr/[\/\\][^\/\\]+([\/\\][^\/\\]*)$/;
     102    $prefixre_to_replacere_map{qr/^$collect_dir\/(tmp\/.*?)$/} = "";   
     103    $prefixre_to_replacere_map{qr/^$collect_dir(.*?)$/} = "";
     104    $prefixre_to_replacere_map{qr@^$ENV{'GSDLHOME'}/tmp/[^/]*(.*?)$@} = $orig_file_root_replacere; # prefix this
     105    #%prefixre_to_replacere_map{qr//} = qr//;
     106
     107    my $file;
     108   
     109    # https://stackoverflow.com/questions/3033/whats-the-safest-way-to-iterate-through-the-keys-of-a-perl-hash
     110    # https://stackoverflow.com/questions/383528/how-can-i-sort-a-hashs-keys-naturally
     111    # http://www.java2s.com/Code/Perl/Hash/SortHashbyKeysinReverseOrder.htm
     112    foreach my $key (reverse sort keys %prefixre_to_replacere_map) {
     113    # reverse sort as we want to try matching more specific "$coll_dir/$subdir" before $coll_dir
     114   
     115    print STDERR "@@@ Key and value: $key\n\t $prefixre_to_replacere_map{$key}\n";
     116
     117    my $prefixre = $key;
     118    my $replacere = $prefixre_to_replacere_map{$key};
     119   
     120    ($file) = ($filename =~ m/$prefixre/);
     121    if (!defined $file || $file eq $filename) {
     122        print STDERR "\t#### No match\n"; # keep looping looking for the next match
     123    } else {
     124        print STDERR "\t#### Found match. Applying: $replacere\n";
     125        if($replacere eq $orig_file_root_replacere) {
     126        $file = $orig_file_root_replacere.$file if $file;
     127        } else {
     128        $file =~ s/$replacere/$1/;
     129        }
     130        last; # found and processed a match
     131    }
     132    }
     133
     134    # No matches found, reset $file to $filename
     135    if(!defined $file) {
     136    $file = $filename;
     137    }
     138   
     139    $file =~ s/^\/|\\//; # get rid of leading slash from relative filename
     140    $file =~ s@^(\.(\/|\/))*@@; # get rid of any ./ at the start
     141    print STDERR "\tXXXX final file is: $file\n";
     142}
     143
    78144sub init_cache_for_file
    79145{
    80146    my $self = shift @_;
    81     my ($filename) = @_;
     147    my ($filename, $orig_file_root) = @_;
    82148   
    83149    my $verbosity = $self->{'verbosity'};
     
    91157    $filename =~ s/\\/\//g;
    92158    $base_dir =~ s/\\/\//g;
     159    print STDERR "@@@@ col_dir    : $collect_dir\n";
     160    print STDERR "@@@@ filename: $filename\n";
     161
     162#    $self->get_cache_filename_for_location($filename, $orig_file_root, $base_dir, $collect_dir);
     163   
    93164    my ($file) = ($filename =~ m/^$base_dir(.*?)$/);
    94    
    95     if (!defined $file) {
     165
     166    print STDERR "@@@@ orig_file_root : $orig_file_root\n" if $orig_file_root;
     167    print STDERR "@@@@ file    : $file\n";
     168    print STDERR "@@@@ base_dir: $base_dir\n";
     169   
     170    if (!defined $file || $file eq $filename) {
    96171    # Perhaps the filename is taken from within cache_dir area?
    97172    my $cached_dir = &FileUtils::filenameConcatenate($collect_dir,"cached");
     
    110185#   ($file) = ($filename =~ m/^$prev_cached_dir(.*?)$/);
    111186    }
    112     if (!defined $file) {
     187    if (!defined $file || $file eq $filename) {
    113188        # perhaps the filename is in the tmp folder? This can happen if the file was in a zip inside the import folder, e.g. as happens with remote gli
    114189        ($file) = ($filename =~ m/^$collect_dir\/(tmp\/.*?)$/);
    115190
    116191    }
     192   
     193    print STDERR "@@@@ file mid: $file\n" if defined $file;
     194
     195   
     196    if(!defined $file || $file eq $filename) {
     197    ($file) = ($filename =~ m/^$collect_dir(.*?)$/);   
     198    print STDERR "@@@@ file    : $file\n" if $file;
     199    }
     200    if($orig_file_root && !defined $file || $file eq $filename) {
     201        ($file) = ($filename =~ m@^$ENV{'GSDLHOME'}/tmp/[^/]*(.*?)$@);
     202        #$file = "/$orig_file_root/resized".$file if $file;
     203    $file = "/$orig_file_root".$file if $file;
     204        print STDERR "@@@@ file FIN: $file\n" if $file;
     205    }
     206    #if(!defined $file || $file eq $filename) {
     207    #   ($file) = ($filename =~ m/^$ENV{'GSDLHOME'}(.*?)$/);   
     208    #   print STDERR "@@@@ file FIN: $file\n" if $file;
     209    #}
     210    if(!defined $file) {
     211    $file = $filename;
     212    }
     213   
    117214    $file =~ s/^\/|\\//; # get rid of leading slash from relative filename
    118 
    119 
     215    $file =~ s@^(\.(\/|\/))*@@; # get rid of any ./ at the start
     216   
     217    print STDERR "@@@@ FIN file : $file\n";
     218
     219   
    120220    # Setup cached_dir and file_root
    121221
    122222    my ($file_root, $dirname, $suffix)
    123223    = &File::Basename::fileparse($file, "\\.[^\\.]+\$");
     224    #= &File::Basename::fileparse($conv_file, "\\.[^\\.]+\$"); 
    124225
    125226    # if dirname is in collections tmp area, remove collect_dir prefix
     
    140241    &FileUtils::makeAllDirectories($base_output_dir);
    141242    }
     243
     244
     245    print STDERR "@@@@ base_output_dir: $base_output_dir\n";
     246    print STDERR "@@@@ file_root: $file_root\n";
    142247
    143248    my $output_dir = &FileUtils::filenameConcatenate($base_output_dir,$file_root);
  • main/trunk/greenstone2/perllib/plugins/ImageConverter.pm

    r38165 r38476  
    213213    my ($filename_full_path, $filename_encoded_full_path, $doc_obj, $section, $filename_encoding) = @_;
    214214
     215    print STDERR "##### generate_images: filename full path = $filename_full_path\n";
     216   
    215217    my ($unused_fefp,$filename_encoded_no_path)
    216218    = &util::get_full_filenames("",$filename_encoded_full_path);
     
    230232
    231233    if ($self->{'enable_cache'}) {
    232     $self->init_cache_for_file($filename_full_path);
     234
     235    if($self->{'orig_filename_full_path'}) {
     236        my ($orig_file_root, $orig_dirname, $orig_suffix)
     237        #= &File::Basename::fileparse($doc_obj->{'orig_source_filename'}, "\\.[^\\.]+\$");
     238        = &File::Basename::fileparse($self->{'orig_filename_full_path'}, "\\.[^\\.]+\$");
     239   
     240        $self->init_cache_for_file($filename_full_path, $orig_file_root);
     241    } else {
     242        $self->init_cache_for_file($filename_full_path);
     243    }
    233244    }
    234245    if ($self->{'store_file_paths'}) {
  • main/trunk/greenstone2/perllib/plugins/PagedImagePlugin.pm

    r37148 r38476  
    324324    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
    325325
     326    $self->{'orig_filename_full_path'} = $filename_full_path;
     327    #$self->{'orig_filename_no_path'} = $filename_no_path;
     328   
    326329    my $toplevel_plugin_classname = ref($self);
    327330    print $outhandle "$toplevel_plugin_classname processing \"$filename_full_path\"\n"
     
    474477    my $url_encoded_full_filename
    475478        = &unicode::raw_filename_to_url_encoded($filename_full_path);
     479    print STDERR "#### process_image filename_full_path: $filename_full_path vs $self->{'orig_filename_full_path'}\n";
    476480    $result = $self->generate_images($filename_full_path, $url_encoded_full_filename, $doc_obj, $section);
    477481    }
     
    612616    }
    613617    $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", $final_doc_type);
    614     ### capiatalisation????
     618    ### capitalisation????
    615619#    if ($self->{'documenttype'} eq 'paged') {
    616620    # set the gsdlthistype metadata to Paged - this ensures this document will
Note: See TracChangeset for help on using the changeset viewer.