Changeset 24971 for main/trunk


Ignore:
Timestamp:
2012-01-24T16:19:58+13:00 (12 years ago)
Author:
ak19
Message:
  1. Introduced the util::filepath_to_url_format subroutine which will be used to convert filenames to URL style filenames to match the slashes used in the filename regex-es in extrameta keys used to index into extrameta data structures. 2. Fixed bug on windows where metadata.xml specifies filenames as regex with backslash in front of the file extension's period mark: DirectoryPlugin needed to unregex the filepath before calling fileparse on it, else the escaping backslash would interfere with perl's fileparse routine (only on windows, since backslash also represents a dirsep here). 3. Updated all those perl plugins where the new util::filepath_to_url_format needs to be called so that they use URL style filenames (thereafter regexed) to index into the extrameta data structures.
Location:
main/trunk/greenstone2/perllib
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/DSpacePlugin.pm

    r24951 r24971  
    5050use ReadTextFile;
    5151use plugin;
     52use util;
    5253use XMLParser;
    5354use strict;
     
    249250    my @fname_list = map { "(".$_->{'file'}.")" } @$fnamemime_list;
    250251    my $fname_re = join("|",@fname_list);
     252   
     253    # Indexing into the extrameta data structures requires the filename's style of slashes to be in URL format
     254    # Then need to convert the filename to a regex, no longer to protect windows directory chars \, but for
     255    # protecting special characters like brackets in the filepath such as "C:\Program Files (x86)\Greenstone".
     256    $fname_re = &util::filepath_to_url_format($fname_re); # just in case there are slashes in there
     257   
    251258    $fname_re =~ s/\./\\\./g;
    252259
  • main/trunk/greenstone2/perllib/plugins/DirectoryPlugin.pm

    r24951 r24971  
    482482    # filter out any extrametakeys that mention subdirectories and store
    483483    # for later use (i.e. when that sub-directory is being processed)
    484     foreach my $ek (@extrametakeys) {
    485     my ($subdir_re,$extrakey_dir) = &File::Basename::fileparse($ek);
    486     $extrakey_dir = &util::unregex_filename($extrakey_dir);
    487 
    488     my $dirsep_re = &util::get_re_dirsep();
    489 
    490     my $ek_non_re = &util::unregex_filename($ek);
     484    foreach my $ek (@extrametakeys) { # where each Extrametakey (which is a filename) is stored as a regex
     485    my $ek_non_re = &util::unregex_filename($ek); # unregex it. This step is *also* required for the fileparse operation since the file suffix
     486                                                  # will be specified as \.txt and we don't want fileparse to split it there making \.txt the "filename"
     487    my ($subdir_re,$extrakey_dir) = &File::Basename::fileparse($ek_non_re);     
     488    my $dirsep_re = &util::get_re_dirsep();
    491489    if ($ek_non_re =~ m/$dirsep_re/) { # specifies at least one directory
    492490        my $md = &extrametautil::getmetadata(\%extrametadata, $ek);
     
    494492
    495493        my $subdir_extrametakeys = $self->{'subdir_extrametakeys'};
     494       
     495    # Indexing into the extrameta data structures requires the filename's style of slashes to be in URL format
     496    # Then need to convert the filename to a regex, no longer to protect windows directory chars \, but for
     497    # protecting special characters like brackets in the filepath such as "C:\Program Files (x86)\Greenstone".
     498    $subdir_re = &util::filepath_to_url_format($subdir_re); # Possibly not necessary since subdir_re is the last segment of the filepath $ek ####
     499    $subdir_re = &util::filename_to_regex($subdir_re);      # Escape any special chars like brackets and . in subdir_re
    496500       
    497501        my $subdir_rec = { 're' => $subdir_re, 'md' => $md, 'mf' => $mf };
  • main/trunk/greenstone2/perllib/plugins/EmbeddedMetadataPlugin.pm

    r24951 r24971  
    3030use BasePlugin;
    3131use extrametautil;
     32use util;
    3233
    3334use Encode;
     
    325326    }
    326327
    327     # Protect windows directory chars \
    328     $file = &util::filename_to_regex($file); ####
     328    # Indexing into the extrameta data structures requires the filename's style of slashes to be in URL format
     329    # Then need to convert the filename to a regex, no longer to protect windows directory chars \, but for
     330    # protecting special characters like brackets in the filepath such as "C:\Program Files (x86)\Greenstone".
     331    $file = &util::filepath_to_url_format($file);
     332    $file = &util::filename_to_regex($file);
    329333   
    330334    # Associate the metadata now
  • main/trunk/greenstone2/perllib/plugins/LOMPlugin.pm

    r24951 r24971  
    3636use MetadataPass;
    3737use MetadataRead;
     38use util;
    3839use XMLParser;
    3940use Cwd;
     
    166167    $file_re = $tail;
    167168    }
     169   
     170    # Indexing into the extrameta data structures requires the filename's style of slashes to be in URL format
     171    # Then need to convert the filename to a regex, no longer to protect windows directory chars \, but for
     172    # protecting special characters like brackets in the filepath such as "C:\Program Files (x86)\Greenstone".
     173    $file_re = &util::filepath_to_url_format($file_re);
    168174    $file_re = &util::filename_to_regex($file_re);
    169175    $self->{'lom_srcdoc'} = undef; # reset for next file to be processed
  • main/trunk/greenstone2/perllib/plugins/MetadataCSVPlugin.pm

    r24951 r24971  
    3636use extrametautil;
    3737use multiread;
     38use util;
    3839
    3940use Encode;
     
    205206
    206207    # Associate the metadata now
     208    # Indexing into the extrameta data structures requires the filename's style of slashes to be in URL format
     209    # Then need to convert the filename to a regex, no longer to protect windows directory chars \, but for
     210    # protecting special characters like brackets in the filepath such as "C:\Program Files (x86)\Greenstone".
     211    $csv_line_filename = &util::filepath_to_url_format($csv_line_filename);
    207212    $csv_line_filename = &util::filename_to_regex($csv_line_filename);
    208213
  • main/trunk/greenstone2/perllib/plugins/MetadataXMLPlugin.pm

    r24951 r24971  
    328328    if ($element eq "FileSet") {
    329329    foreach my $target (@{$self->{'saved_targets'}}) {
     330   
     331        # Indexing into the extrameta data structures requires the filename's style of slashes to be in URL
     332        # format. But if metadata.xml was user-constructed, its FileName fields (stored in the saved_targets
     333        # map) could represent pathnames in Windows format using \ as file separator, instead of pathnames
     334        # being in URL format as Greenstone would've generated. Need to convert such filepaths to URL style
     335        # paths. Then need convert this to a regex to protect special characters like brackets in the
     336        # filepath, such as "C:\Program Files (x86)\Greenstone".
     337       
     338        $target = &util::unregex_filename($target); # filename is a regex, before converting it to URL form, unregex it
     339        $target = &util::filepath_to_url_format($target); # convert to URL form
     340        $target = &util::filename_to_regex($target); # regex it. Now we have regexed URL form of filename
     341   
    330342        my $file_metadata = &extrametautil::getmetadata($self->{'metadataref'}, $target);
    331343        my $saved_metadata = $self->{'saved_metadata'};
  • main/trunk/greenstone2/perllib/plugins/OAIPlugin.pm

    r24951 r24971  
    3838use metadatautil;
    3939use MetadataRead;
     40use util;
    4041
    4142# methods with identical signatures take precedence in the order given in the ISA list.
     
    255256    # return all the metadata we have extracted to the caller.
    256257    # Directory plug will pass it back in at read time, so we don't need to extract it again.
    257     # extrametadata keys should be regular expressions
     258   
     259    # Extrametadata keys should be regular expressions
     260    # Indexing into the extrameta data structures requires the filename's style of slashes to be in URL format
     261    # Then need to convert the filename to a regex, no longer to protect windows directory chars \, but for
     262    # protecting special characters like brackets in the filepath such as "C:\Program Files (x86)\Greenstone".
     263    $filename_for_metadata = &util::filepath_to_url_format($filename_for_metadata);
    258264    $filename_for_metadata = &util::filename_to_regex($filename_for_metadata);
     265   
    259266    &extrametautil::setmetadata($extrametadata, $filename_for_metadata, $new_metadata);
    260267    &extrametautil::addmetakey($extrametakeys, $filename_for_metadata);
  • main/trunk/greenstone2/perllib/util.pm

    r24940 r24971  
    837837    my $filename = shift (@_);
    838838
    839     # need to put single backslash back to double so that regex works
     839    # need to make single backslashes double so that regex works
    840840    $filename =~ s/\\/\\\\/g; # if ($ENV{'GSDLOS'} =~ /^windows$/i);   
    841841   
     
    10671067    # convert parameters only to / slashes if Windows
    10681068   
    1069     my $filename_urlformat = $filename;
    1070     my $within_dir_urlformat = $within_dir;
    1071    
    1072     if ($ENV{'GSDLOS'} =~ /^windows$/i) {
    1073         # Only need to worry about Windows, as Unix style directories already in url-format
    1074         # Convert Windows style \ => /
    1075         $filename_urlformat =~ s@\\@/@g;
    1076         $within_dir_urlformat =~ s@\\@/@g;
    1077     }
    1078    
     1069    my $filename_urlformat = &filepath_to_url_format($filename);
     1070    my $within_dir_urlformat = &filepath_to_url_format($within_dir);
     1071
    10791072    #if ($within_dir_urlformat !~ m/\/$/) {
    10801073        # make sure directory ends with a slash
     
    10921085   
    10931086    return $filename_urlformat;
     1087}
     1088
     1089# Convert parameter to use / slashes if Windows (if on Linux leave any \ as is,
     1090# since on Linux it doesn't represent a file separator but an escape char).
     1091sub filepath_to_url_format
     1092{
     1093    my ($filepath) = @_;
     1094    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
     1095        # Only need to worry about Windows, as Unix style directories already in url-format
     1096        # Convert Windows style \ => /
     1097        $filepath =~ s@\\@/@g;     
     1098    }
     1099    return $filepath;
    10941100}
    10951101
Note: See TracChangeset for help on using the changeset viewer.