Ignore:
Timestamp:
2010-12-06T13:15:10+13:00 (13 years ago)
Author:
davidb
Message:

Further changes to deal with documents that use different filename encodings on the file-system. Now sets UTF8URL metadata to perform the cross-document look up. Files stored in doc.pm as associated files are now always raw filenames (rather than potentially UTF8 encoded). Storing of filenames seen by HTMLPlug when scanning for files to block on is now done in Unicode aware strings rather than utf8 but unware strings.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/ConvertBinaryFile.pm

    r23363 r23387  
    226226    my $convert_to_ext = $self->{'convert_to_ext'};
    227227   
     228
     229    my $upgraded_input_filename = &util::upgrade_if_dos_filename($input_filename);
     230
    228231    # derive tmp filename from input filename
    229232    my ($tailname, $dirname, $suffix)
    230     = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
     233    = &File::Basename::fileparse($upgraded_input_filename, "\\.[^\\.]+\$");
    231234
    232235    # softlink to collection tmp dir
     
    238241    }
    239242   
    240     # convert to utf-8 otherwise we have problems with the doc.xml file later on
    241 #    print STDERR "**** filename $tailname$suffix is already UTF8\n" if &unicode::check_is_utf8($tailname);
    242     $tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
     243#    # convert to utf-8 otherwise we have problems with the doc.xml file later on
     244#    my $utf8_tailname = (&unicode::check_is_utf8($tailname)) ? $tailname : $self->filepath_to_utf8($tailname);
     245
     246    # make sure filename to be used can be stored OK in a UTF-8 compliant doc.xml file
     247     my $utf8_tailname = &unicode::raw_filename_to_utf8_url_encoded($tailname);
     248
    243249
    244250    # URLEncode this since htmls with images where the html filename is utf8 don't seem
    245251    # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
    246252    # files on the filesystem.
    247     $tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
    248 
    249     $suffix = lc($suffix);
    250     my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix");
     253    $utf8_tailname = &util::rename_file($utf8_tailname, $self->{'file_rename_method'}, "without_suffix");
     254
     255    my $lc_suffix = lc($suffix);
     256    my $tmp_filename = &util::filename_cat($tmp_dirname, "$utf8_tailname$lc_suffix");
    251257   
    252258    # If gsdl is remote, we're given relative path to input file, of the form import/tailname.suffix
     
    324330    }
    325331    } else {
    326     $output_filename =~ s/$suffix$/.$output_type/;
     332    $output_filename =~ s/$lc_suffix$/.$output_type/;
    327333    }
    328334   
     
    446452    # need to check that not empty
    447453    my ($doc_ext) = $file =~ /\.(\w+)$/;
     454    $doc_ext = lc($doc_ext);
    448455    my $file_type = "unknown";
    449456    $file_type = $self->{'file_type'} if defined $self->{'file_type'};
     
    458465    $assocfilename = $doc_obj->get_assocfile_from_sourcefile();
    459466    }
     467
    460468    $doc_obj->associate_file($filename, $assocfilename, undef, $cursection);
    461469
     
    476484     my $tmp_dir = $self->{'tmp_dir'};
    477485     if (defined $tmp_dir && -d $tmp_dir) {
     486##   print STDERR "**** Supressing clean up of tmp dir\n";
    478487     &util::rm_r($tmp_dir);
    479488     $self->{'tmp_dir'} = undef;
Note: See TracChangeset for help on using the changeset viewer.