Ignore:
Timestamp:
2010-12-09T22:27:33+13:00 (13 years ago)
Author:
ak19
Message:

GLI now has a gs.FilenameEncoding metadata field which appears like all the others in GLI's EnrichPane, but is unique in that this metadata (once set, changed or removed) must be applied to the affected filenames in the Collection Tree. More importantly, the changes made for this are to allow GLI's java code to interact with the recent changes to Perl where strings were made unicode-aware (for proper regex matching) but which required other changes elsewhere. To still support filenames with different encodings Perl used URL encoded versions of filenames representing characters' code point values in URL encoding. This required that GLI write out URL encoded filenames to the metadata.xml files that are associated with each folder level of a collection, so that Perl can read them. In this way, they can both speak of the same filenames. Only works on unicode 16 (such as latin-1), non-UTF8 systems. The latter is a requirement since Java uses the filesystem encoding from startup. If it is UTF8, non-recognised characters are replaced by the invalid char for UTF8. This process being destructive, we can't get the original filenames' bytecodes back. The changes made to GLI will work on Windows which is UTF-16 (windows codepage 1252), presumably also Macs (some kind of UTF-16) and also works on Native Latin 1 Linux systems. UTF-8 Linux systems need to be reconfigured to Native Latin-1, or if not installed, an administrator can install it easily.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/gli/src/org/greenstone/gatherer/metadata/MetadataXMLFile.java

    r23394 r23433  
    3131import java.util.*;
    3232import org.greenstone.gatherer.DebugStream;
     33import org.greenstone.gatherer.collection.CollectionTreeNode;
    3334import org.greenstone.gatherer.util.XMLTools;
    3435import org.w3c.dom.*;
     
    4546    static final private String METADATA_ELEMENT = "Metadata";
    4647
     48    /** Special metadata field: the filename encoding is a unique sort of metadata in
     49     * that it is not just information stored with a collection file, but also needs to
     50     * be applied in real-time to the collection file (to its filename) for display. */
     51    static final public String FILENAME_ENCODING_METADATA = "gs.filenameEncoding";
     52   
    4753    // To speed things up a bit we keep the last accessed metadata.xml file in memory
    4854    static private File loaded_file = null;
     
    5763
    5864
    59     public void addMetadata(File file, ArrayList metadata_values)
     65    public void addMetadata(CollectionTreeNode file_node, ArrayList metadata_values)
    6066    {
    6167    // If this metadata.xml file isn't the one currently loaded, load it now
     
    7682
    7783    // Determine the file's path relative to the location of the metadata.xml file
    78     String metadata_xml_file_directory_path = getParentFile().getAbsolutePath();
    79     String file_relative_path = file.getAbsolutePath().substring(metadata_xml_file_directory_path.length());
    80     if (file_relative_path.startsWith(File.separator)) {
    81         file_relative_path = file_relative_path.substring(File.separator.length());
     84    String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
     85    String file_relative_path = file_node.getURLEncodedFilePath().substring(metadata_xml_file_directory_path.length());
     86    if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
     87        file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
    8288    }
    8389
     
    127133        appropriate_fileset_element.appendChild(new_description_element);
    128134
     135        // add the fileset element for .* at the top: especially important for
     136        // non-accumulating (and override mode) meta. Other type fileset elements can be appended
    129137        if(file_path_regexp.equals(DIRECTORY_FILENAME)) {
    130           loaded_file_document.getDocumentElement().insertBefore(appropriate_fileset_element, loaded_file_document.getDocumentElement().getFirstChild());
     138            loaded_file_document.getDocumentElement().insertBefore(appropriate_fileset_element,
     139                    loaded_file_document.getDocumentElement().getFirstChild());
    131140        } else {
    132141          loaded_file_document.getDocumentElement().appendChild(appropriate_fileset_element);
     
    147156        metadata_value_string = metadata_value_string.replaceAll("\\]", "]");
    148157
     158        // the gs.filenameEncoding metadata is unique in that, when added, removed or
     159        // changed, it must be applied on the file(name) whose metadata has been adjusted
     160        if(metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
     161            metadata_value_string = processFilenameEncoding(file_path_regexp,
     162                                        file_node, metadata_value_string, false);
     163                              // true only if removing meta
     164        }
     165
    149166        // Check if this piece of metadata has already been assigned to this FileSet element
    150167        boolean metadata_already_assigned = false;
     
    156173        String current_metadata_element_name_full = current_metadata_element.getAttribute("name");
    157174        if (current_metadata_element_name_full.equals(metadata_element_name_full)) {
    158           // if the metadata must not accumulate, then edit teh current value
     175          // if the metadata must not accumulate, then edit the current value
    159176          if (!metadata_value.isAccumulatingMetadata()) {
    160177            XMLTools.setNodeText(current_metadata_element, metadata_value_string);
     
    196213
    197214
    198     public ArrayList getMetadataAssignedToFile(File file)
     215    public ArrayList getMetadataAssignedToFile(File file, boolean fileEncodingOnly)
    199216    {
    200217    // If this metadata.xml file isn't the one currently loaded, load it now
     
    214231    }
    215232
    216     // Determine the file's path relative to the location of the metadata.xml file
     233    // Determine the file's path relative to the location of the metadata.xml file 
     234    String file_relative_path = FilenameEncoding.fileToURLEncoding(file);
    217235    File metadata_xml_file_directory = getParentFile();
    218     String file_relative_path = file.getAbsolutePath().substring(metadata_xml_file_directory.getAbsolutePath().length());
    219     if (file_relative_path.startsWith(File.separator)) {
    220         file_relative_path = file_relative_path.substring(File.separator.length());
     236    String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(metadata_xml_file_directory);
     237    file_relative_path = file_relative_path.substring(metadata_xml_file_directory_path.length());
     238
     239    if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
     240        file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
    221241    }
    222242
     
    255275
    256276        // This fileset specifies metadata for the folder the file is in
    257         if (file_relative_path.startsWith(current_filename_element_value + File.separator)) {
     277        if (file_relative_path.startsWith(current_filename_element_value + FilenameEncoding.URL_FILE_SEPARATOR)) {
    258278            current_fileset_matches = true;
    259279            folder_metadata_inherited_from = new File(metadata_xml_file_directory, current_filename_element_value);
     
    272292        Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
    273293        String metadata_element_name_full = current_metadata_element.getAttribute("name");
     294        // if we're only looking for fileEncoding metadata and this isn't it, skip to the next
     295        if(fileEncodingOnly && !metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
     296            continue;
     297        }       
    274298        String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
    275299
     
    335359
    336360
    337     public void removeMetadata(File file, ArrayList metadata_values)
     361    public void removeMetadata(CollectionTreeNode file_node, ArrayList metadata_values)
    338362    {
    339363    // If this metadata.xml file isn't the one currently loaded, load it now
     
    354378
    355379    // Determine the file's path relative to the location of the metadata.xml file
    356     String metadata_xml_file_directory_path = getParentFile().getAbsolutePath();
    357     String file_relative_path = file.getAbsolutePath().substring(metadata_xml_file_directory_path.length());
    358     if (file_relative_path.startsWith(File.separator)) {
    359         file_relative_path = file_relative_path.substring(File.separator.length());
     380    String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
     381    String file_relative_path = file_node.getURLEncodedFilePath().substring(metadata_xml_file_directory_path.length());
     382    if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
     383        file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
    360384    }
    361385
     
    422446            String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
    423447            if (current_metadata_value_string.equals(metadata_value_string)) {
     448           
    424449            // Remove this Metadata element
    425450            current_metadata_element.getParentNode().removeChild(current_metadata_element);
    426 
     451           
     452            // the gs.filenameEncoding metadata is unique in that, when added, removed or
     453            // changed, it must be applied on the file(name) whose metadata has been adjusted
     454            if(current_metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
     455           
     456                // metadata_value_string will hereafter be the inherited gs.FilenameEncoding
     457                // metadata (if any), now that the value at this level has been removed
     458                metadata_value_string = processFilenameEncoding(file_path_regexp,
     459                        file_node, "", true); // true only if *removing* this meta 
     460            }
     461           
    427462            // If there are no Metadata elements left now, remove the (empty) FileSet element
    428463            if (metadata_elements_nodelist.getLength() == 0) {
     
    441476
    442477
    443     public void replaceMetadata(File file, MetadataValue old_metadata_value, MetadataValue new_metadata_value)
     478    public void replaceMetadata(CollectionTreeNode file_node, MetadataValue old_metadata_value, MetadataValue new_metadata_value)
    444479    {
    445480    // If this metadata.xml file isn't the one currently loaded, load it now
     
    460495
    461496    // Determine the file's path relative to the location of the metadata.xml file
    462     String metadata_xml_file_directory_path = getParentFile().getAbsolutePath();
    463     String file_relative_path = file.getAbsolutePath().substring(metadata_xml_file_directory_path.length());
    464     if (file_relative_path.startsWith(File.separator)) {
    465         file_relative_path = file_relative_path.substring(File.separator.length());
     497    String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
     498    String file_relative_path = file_node.getURLEncodedFilePath().substring(metadata_xml_file_directory_path.length());
     499    if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
     500        file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
    466501    }
    467502
     
    541576        // If the new metadata value already existed, remove the original value
    542577        if (new_metadata_value_already_exists) {
    543         metadata_element_to_edit.getParentNode().removeChild(metadata_element_to_edit);
     578            if(metadata_element_to_edit != null) { //?????????
     579                metadata_element_to_edit.getParentNode().removeChild(metadata_element_to_edit);
     580            } else {
     581                System.err.println("ERROR MetadataXMLFile: metadata_element_to_edit is null");
     582            }
    544583        }
    545584        // Otherwise replace the old value with the new value
    546585        // Ensure metadata_element_to_edit isn't null (may occur when multiple files are selected)
    547586        else if (metadata_element_to_edit != null) {
     587       
     588        // the gs.filenameEncoding metadata is unique in that, when added, removed or
     589        // changed, it must be applied on the file(name) whose metadata has been adjusted
     590        if(metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
     591            new_metadata_value_string = processFilenameEncoding(file_path_regexp, file_node, new_metadata_value_string, false);
     592            // true only if removing meta       
     593        }       
    548594        XMLTools.setElementTextValue(metadata_element_to_edit, new_metadata_value_string);
    549595        }
     
    649695    }
    650696    }
     697
     698    /**
     699     * The gs.filenameEncoding metadata is unique in that, when added, removed or
     700     * replaced, it must be applied on the file(name) whose metadata has been
     701     * adjusted.
     702     * This method handles all that, given the regular expression or filepath name
     703     * to match on (.* matches subdirectories), the affected fileNode, the new
     704     * encoding value and whether a new encoding value has been added/an existing
     705     * one has been replaced or whether the encoding metadata has been removed.
     706     * The new adjusted value for the encoding metadata is returned.
     707     *
     708     * MetadataXMLFileManager maintains a hashmap of (URL-encoded filepaths, encoding)
     709     * to allow fast access to previously assigned gs.filenameEncoding metadata (if
     710     * any) for each file. This hashmap also needs to be updated, but this update
     711     * is complicated by the fact that it concerns regular expressions that could
     712     * affect multiple filenames.
     713     */
     714    public String processFilenameEncoding(String file_path_regexp, CollectionTreeNode file_node,
     715                    String encoding_metadata_value, boolean removingMetadata)
     716    {
     717        if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
     718            return encoding_metadata_value;
     719        }
     720
     721        // Work out this filenode's new encoding and apply it:
     722       
     723        if(removingMetadata) { // encoding_metadata_value = ""
     724            // gs.filenameEncoding metadata being removed, work out
     725            // any inherited metadata to replace it with in the meta-table
     726            encoding_metadata_value = FilenameEncoding.getInheritedFilenameEncoding(
     727                        file_node.getURLEncodedFilePath(), file_node.getFile());
     728            // should be canonical encoding already
     729        }
     730        else if(!encoding_metadata_value.equals("")) {
     731            // if adding or replacing filename encoding,
     732            // get the canonical encoding name for this alias
     733            encoding_metadata_value = FilenameEncoding.canonicalEncodingName(encoding_metadata_value);
     734        }   
     735        // Reencode the display of this filenode only as any affected
     736        // childnodes will be reencoded on FileNode.refreshDescendantEncodings()
     737        file_node.reencodeDisplayName(encoding_metadata_value);
     738           
     739
     740        // Whether removing or adding/replacing the file's gs.filename encoding meta,
     741        // store this in the file-to-encoding map for fast access, since the map stores
     742        // empty string values when no meta has been assigned at this file level.
     743        // In the case of removingMetadata, the value stored will be the fallback value
     744       
     745        String urlpath = file_node.getURLEncodedFilePath();
     746        if(removingMetadata) {
     747            // remove it from the map instead of inserting "", so that when folders in the collectiontree
     748            // are being deleted or shifted, the removemetada (and addmetadata) calls that get fired
     749            // for each affected filenodes does not cause the undesirable effect of multiple "" to be
     750            // entered into the filename-to-encoding map for filepaths that no longer exist .
     751            FilenameEncoding.map.remove(urlpath);
     752        } else { // for adding and replacing, put the encoding into the map (also replaces any existing encoding for it)
     753            FilenameEncoding.map.put(urlpath, encoding_metadata_value);         
     754        }
     755       
     756        // If new folder-level metadata (or metadata for a set of files fitting a pattern) has been
     757        //  assigned, the file_to_encodings map will be cleared for all descendant folders and files,
     758        //  so that these can be re-calculated upon refreshing the visible parts of the CollectionTree.
     759        // Mark the state as requiring a refresh of the CollectionTree.
     760        // This next step also serves to prevent the MetadataValueTableModel from trying to update
     761        // itself while a refresh (involving re-encoding of filenames of visible nodes) is in progress.
     762        FilenameEncoding.setRefreshRequired(true);
     763       
     764        return encoding_metadata_value;
     765    }
    651766}
Note: See TracChangeset for help on using the changeset viewer.