Changeset 24971

Show
Ignore:
Timestamp:
24.01.2012 16:19:58 (7 years ago)
Author:
ak19
Message:

1. Introduced the util::filepath_to_url_format subroutine which will be used to convert filenames to URL style filenames to match the slashes used in the filename regex-es in extrameta keys used to index into extrameta data structures. 2. Fixed bug on windows where metadata.xml specifies filenames as regex with backslash in front of the file extension's period mark: DirectoryPlugin? needed to unregex the filepath before calling fileparse on it, else the escaping backslash would interfere with perl's fileparse routine (only on windows, since backslash also represents a dirsep here). 3. Updated all those perl plugins where the new util::filepath_to_url_format needs to be called so that they use URL style filenames (thereafter regexed) to index into the extrameta data structures.

Location:
main/trunk/greenstone2/perllib
Files:
8 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/DSpacePlugin.pm

    r24951 r24971  
    5050use ReadTextFile; 
    5151use plugin; 
     52use util; 
    5253use XMLParser; 
    5354use strict; 
     
    249250    my @fname_list = map { "(".$_->{'file'}.")" } @$fnamemime_list; 
    250251    my $fname_re = join("|",@fname_list); 
     252     
     253    # Indexing into the extrameta data structures requires the filename's style of slashes to be in URL format 
     254    # Then need to convert the filename to a regex, no longer to protect windows directory chars \, but for 
     255    # protecting special characters like brackets in the filepath such as "C:\Program Files (x86)\Greenstone". 
     256    $fname_re = &util::filepath_to_url_format($fname_re); # just in case there are slashes in there  
     257     
    251258    $fname_re =~ s/\./\\\./g; 
    252259 
  • main/trunk/greenstone2/perllib/plugins/DirectoryPlugin.pm

    r24951 r24971  
    482482    # filter out any extrametakeys that mention subdirectories and store 
    483483    # for later use (i.e. when that sub-directory is being processed) 
    484     foreach my $ek (@extrametakeys) { 
    485     my ($subdir_re,$extrakey_dir) = &File::Basename::fileparse($ek); 
    486     $extrakey_dir = &util::unregex_filename($extrakey_dir); 
    487  
    488     my $dirsep_re = &util::get_re_dirsep(); 
    489  
    490     my $ek_non_re = &util::unregex_filename($ek); 
     484    foreach my $ek (@extrametakeys) { # where each Extrametakey (which is a filename) is stored as a regex 
     485    my $ek_non_re = &util::unregex_filename($ek); # unregex it. This step is *also* required for the fileparse operation since the file suffix 
     486                                                  # will be specified as \.txt and we don't want fileparse to split it there making \.txt the "filename" 
     487    my ($subdir_re,$extrakey_dir) = &File::Basename::fileparse($ek_non_re);      
     488    my $dirsep_re = &util::get_re_dirsep();  
    491489    if ($ek_non_re =~ m/$dirsep_re/) { # specifies at least one directory 
    492490        my $md = &extrametautil::getmetadata(\%extrametadata, $ek); 
     
    494492 
    495493        my $subdir_extrametakeys = $self->{'subdir_extrametakeys'}; 
     494         
     495    # Indexing into the extrameta data structures requires the filename's style of slashes to be in URL format 
     496    # Then need to convert the filename to a regex, no longer to protect windows directory chars \, but for 
     497    # protecting special characters like brackets in the filepath such as "C:\Program Files (x86)\Greenstone". 
     498    $subdir_re = &util::filepath_to_url_format($subdir_re); # Possibly not necessary since subdir_re is the last segment of the filepath $ek ####  
     499    $subdir_re = &util::filename_to_regex($subdir_re);      # Escape any special chars like brackets and . in subdir_re 
    496500         
    497501        my $subdir_rec = { 're' => $subdir_re, 'md' => $md, 'mf' => $mf }; 
  • main/trunk/greenstone2/perllib/plugins/EmbeddedMetadataPlugin.pm

    r24951 r24971  
    3030use BasePlugin; 
    3131use extrametautil; 
     32use util; 
    3233 
    3334use Encode; 
     
    325326    } 
    326327 
    327     # Protect windows directory chars \ 
    328     $file = &util::filename_to_regex($file); ####  
     328    # Indexing into the extrameta data structures requires the filename's style of slashes to be in URL format 
     329    # Then need to convert the filename to a regex, no longer to protect windows directory chars \, but for 
     330    # protecting special characters like brackets in the filepath such as "C:\Program Files (x86)\Greenstone". 
     331    $file = &util::filepath_to_url_format($file); 
     332    $file = &util::filename_to_regex($file);  
    329333     
    330334    # Associate the metadata now 
  • main/trunk/greenstone2/perllib/plugins/LOMPlugin.pm

    r24951 r24971  
    3636use MetadataPass; 
    3737use MetadataRead; 
     38use util; 
    3839use XMLParser; 
    3940use Cwd; 
     
    166167    $file_re = $tail; 
    167168    } 
     169     
     170    # Indexing into the extrameta data structures requires the filename's style of slashes to be in URL format 
     171    # Then need to convert the filename to a regex, no longer to protect windows directory chars \, but for 
     172    # protecting special characters like brackets in the filepath such as "C:\Program Files (x86)\Greenstone". 
     173    $file_re = &util::filepath_to_url_format($file_re); 
    168174    $file_re = &util::filename_to_regex($file_re); 
    169175    $self->{'lom_srcdoc'} = undef; # reset for next file to be processed 
  • main/trunk/greenstone2/perllib/plugins/MetadataCSVPlugin.pm

    r24951 r24971  
    3636use extrametautil; 
    3737use multiread; 
     38use util; 
    3839 
    3940use Encode; 
     
    205206 
    206207    # Associate the metadata now 
     208    # Indexing into the extrameta data structures requires the filename's style of slashes to be in URL format 
     209    # Then need to convert the filename to a regex, no longer to protect windows directory chars \, but for 
     210    # protecting special characters like brackets in the filepath such as "C:\Program Files (x86)\Greenstone". 
     211    $csv_line_filename = &util::filepath_to_url_format($csv_line_filename); 
    207212    $csv_line_filename = &util::filename_to_regex($csv_line_filename); 
    208213 
  • main/trunk/greenstone2/perllib/plugins/MetadataXMLPlugin.pm

    r24951 r24971  
    328328    if ($element eq "FileSet") { 
    329329    foreach my $target (@{$self->{'saved_targets'}}) { 
     330     
     331        # Indexing into the extrameta data structures requires the filename's style of slashes to be in URL  
     332        # format. But if metadata.xml was user-constructed, its FileName fields (stored in the saved_targets  
     333        # map) could represent pathnames in Windows format using \ as file separator, instead of pathnames  
     334        # being in URL format as Greenstone would've generated. Need to convert such filepaths to URL style  
     335        # paths. Then need convert this to a regex to protect special characters like brackets in the  
     336        # filepath, such as "C:\Program Files (x86)\Greenstone". 
     337         
     338        $target = &util::unregex_filename($target); # filename is a regex, before converting it to URL form, unregex it 
     339        $target = &util::filepath_to_url_format($target); # convert to URL form 
     340        $target = &util::filename_to_regex($target); # regex it. Now we have regexed URL form of filename 
     341     
    330342        my $file_metadata = &extrametautil::getmetadata($self->{'metadataref'}, $target); 
    331343        my $saved_metadata = $self->{'saved_metadata'}; 
  • main/trunk/greenstone2/perllib/plugins/OAIPlugin.pm

    r24951 r24971  
    3838use metadatautil; 
    3939use MetadataRead; 
     40use util; 
    4041 
    4142# methods with identical signatures take precedence in the order given in the ISA list. 
     
    255256    # return all the metadata we have extracted to the caller. 
    256257    # Directory plug will pass it back in at read time, so we don't need to extract it again. 
    257     # extrametadata keys should be regular expressions 
     258     
     259    # Extrametadata keys should be regular expressions 
     260    # Indexing into the extrameta data structures requires the filename's style of slashes to be in URL format 
     261    # Then need to convert the filename to a regex, no longer to protect windows directory chars \, but for 
     262    # protecting special characters like brackets in the filepath such as "C:\Program Files (x86)\Greenstone". 
     263    $filename_for_metadata = &util::filepath_to_url_format($filename_for_metadata); 
    258264    $filename_for_metadata = &util::filename_to_regex($filename_for_metadata); 
     265     
    259266    &extrametautil::setmetadata($extrametadata, $filename_for_metadata, $new_metadata); 
    260267    &extrametautil::addmetakey($extrametakeys, $filename_for_metadata); 
  • main/trunk/greenstone2/perllib/util.pm

    r24940 r24971  
    837837    my $filename = shift (@_); 
    838838 
    839     # need to put single backslash back to double so that regex works 
     839    # need to make single backslashes double so that regex works 
    840840    $filename =~ s/\\/\\\\/g; # if ($ENV{'GSDLOS'} =~ /^windows$/i);     
    841841     
     
    10671067    # convert parameters only to / slashes if Windows 
    10681068     
    1069     my $filename_urlformat = $filename; 
    1070     my $within_dir_urlformat = $within_dir; 
    1071      
    1072     if ($ENV{'GSDLOS'} =~ /^windows$/i) { 
    1073         # Only need to worry about Windows, as Unix style directories already in url-format 
    1074         # Convert Windows style \ => / 
    1075         $filename_urlformat =~ s@\\@/@g; 
    1076         $within_dir_urlformat =~ s@\\@/@g; 
    1077     } 
    1078      
     1069    my $filename_urlformat = &filepath_to_url_format($filename); 
     1070    my $within_dir_urlformat = &filepath_to_url_format($within_dir); 
     1071 
    10791072    #if ($within_dir_urlformat !~ m/\/$/) { 
    10801073        # make sure directory ends with a slash 
     
    10921085     
    10931086    return $filename_urlformat; 
     1087} 
     1088 
     1089# Convert parameter to use / slashes if Windows (if on Linux leave any \ as is, 
     1090# since on Linux it doesn't represent a file separator but an escape char). 
     1091sub filepath_to_url_format 
     1092{ 
     1093    my ($filepath) = @_; 
     1094    if ($ENV{'GSDLOS'} =~ /^windows$/i) { 
     1095        # Only need to worry about Windows, as Unix style directories already in url-format 
     1096        # Convert Windows style \ => / 
     1097        $filepath =~ s@\\@/@g;       
     1098    } 
     1099    return $filepath; 
    10941100} 
    10951101