Changeset 7293


Ignore:
Timestamp:
2004-05-07T13:10:18+12:00 (20 years ago)
Author:
kjdon
Message:

messed around with the MEtadataXMLFile class - tried to tidy some stuff up a bit. moved the purge code out to a new method. changed the checkCache so that hopefully it uses the right keys now, got rid of description element stuff. merged all the MetaXMLFileMAnager save methods to avoid duplicate code, some of which was wrong

Location:
trunk/gli/src/org/greenstone/gatherer/msm
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/gli/src/org/greenstone/gatherer/msm/MetadataXMLFile.java

    r7234 r7293  
    8484    public void addMetadata(String filename, Metadata metadata, boolean force_accumulate)
    8585    {
    86     // System.err.println("Add '" + metadata + "' to " + (filename != null ? filename : "directory."));
     86    ///ystem.err.println("Add '" + metadata + "' to " + (filename != null ? filename : "directory."));
    8787    try {
    8888        // Retrieve the document element
     
    9494        // If there is no existing fileset, then create one
    9595        if (fileset_element == null) {
    96         // System.err.println("Creating a new fileset.");
    9796        fileset_element = base_document.createElement(FILESET_ELEMENT);
    9897        Element filename_element = base_document.createElement(FILENAME_ELEMENT);
     
    113112        // Otherwise we append the new fileset to gdm_element's children
    114113        else {
    115             // System.err.println("New fileset for " + filename);
     114            ///ystem.err.println("New fileset for " + filename);
    116115            filename = Codec.transform(filename, Codec.TEXT_TO_REGEXP);
    117             // System.err.println("After transform: " + filename);
     116            ///ystem.err.println("After transform: " + filename);
    118117            filename_text = base_document.createTextNode(filename);
    119118            gdm_element.appendChild(fileset_element);
     
    128127        String name = metadata.getElement().getName();
    129128        // If this is extracted metadata, we use a special element name that won't be recognized by greenstone
     129        String current_metadata_type = StaticStrings.METADATA_ELEMENT;
    130130        if(name.startsWith(Utility.EXTRACTED_METADATA_NAMESPACE)) {
    131         metadata_element = base_document.createElement(ALL_METADATA_TYPES[1]);
     131        current_metadata_type = StaticStrings.EXTRACTED_METADATA_ELEMENT;
    132132        name = name.substring(Utility.EXTRACTED_METADATA_NAMESPACE.length() + 1);
    133133        }
    134         else {
    135         metadata_element = base_document.createElement(ALL_METADATA_TYPES[0]);
    136         }
     134       
     135        metadata_element = base_document.createElement(current_metadata_type);
    137136        metadata_element.setAttribute(StaticStrings.NAME_ATTRIBUTE, name);
    138137
    139138        // To determine if this metadata entry should overwrite or accumulate we check if there are other entries with the same element in this fileset.
     139        // also we are checking for the existence of exactly the same metadata cos sometimes we can be asked to add the same metadata twice.
    140140        boolean will_accumulate = false;
    141         NodeList sibling_description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT);
    142         for(int k = 0; !will_accumulate && k < sibling_description_elements.getLength(); k++) {
    143         Element sibling_description_element = (Element) sibling_description_elements.item(k);
    144         // We have to do this for each type of metadata
    145         for(int z = 0; z < ALL_METADATA_TYPES.length; z++) {
    146             NodeList sibling_metadata_elements = sibling_description_element.getElementsByTagName(ALL_METADATA_TYPES[z]);
    147             for(int l = 0; !will_accumulate && l < sibling_metadata_elements.getLength(); l++) {
    148             Element sibling_metadata_element = (Element) sibling_metadata_elements.item(l);
    149                 // It appears that its possible that we can be asked to add the same metadata twice (especially after a copy action is cancelled then repeated). So we check if we have been asked to add exactly the same value twice.
    150             if(sibling_metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE).equals(metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE))) {
    151                 // Check the values and return if they are the same.
    152                 if(metadata.getAbsoluteValue().equals(MSMUtils.getValue(sibling_metadata_element))) {
    153                 return;
    154                 }
    155                 will_accumulate = true;
    156             }
    157             sibling_metadata_element = null;
    158             }
    159             sibling_metadata_elements = null;
    160         }
    161         sibling_description_element = null;
    162         }
    163         sibling_description_elements = null;
     141        // only look through Metadata or XMetadata depending on which type we are trying to add
     142        NodeList sibling_metadata_elements = fileset_element.getElementsByTagName(current_metadata_type);
     143        for (int i=0; i<sibling_metadata_elements.getLength(); i++) {
     144        Element sib_meta = (Element) sibling_metadata_elements.item(i);
     145        if(name.equals(sib_meta.getAttribute(StaticStrings.NAME_ATTRIBUTE))) {
     146            // found one with the same name - the new metadata will accumulate
     147            will_accumulate = true;
     148            // check for the same value
     149            // *** TODO this doesn't work for hierarchical metadata
     150            System.err.println("sibling value="+MSMUtils.getValue(sib_meta)+", this value="+metadata.getAbsoluteValue());
     151            if(metadata.getAbsoluteValue().equals(MSMUtils.getValue(sib_meta))) {
     152            System.err.println("matches!!!!!");
     153            return;
     154            }
     155           
     156        }
     157        sib_meta = null;
     158        }
     159        sibling_metadata_elements = null;
     160       
    164161        if(will_accumulate || force_accumulate) { //mode.equals(ACCUMULATE)) {
    165162        metadata_element.setAttribute(MODE_ATTRIBUTE, ACCUMULATE);
    166163        }
    167164        // As we can't possibly store all the metadata in memory, nor can we ensure that the indexes written to file remain the same until the new time we look at this file, and to avoid having to open a rewrite every collection document whenever any value tree changes, I'm writing the value out as a full path string
    168         GValueModel model = Gatherer.c_man.getCollection().msm.getValueTree(metadata.getElement());
    169         String node_value = null;
    170         if(model != null && model.isHierarchy()) {
     165        // getAbsoluteValue now does return the full path
     166        String node_value = metadata.getAbsoluteValue();
     167        //GValueModel model = Gatherer.c_man.getCollection().msm.getValueTree(metadata.getElement());
     168        //String node_value = null;
     169        //if(model != null && model.isHierarchy()) {
    171170        //node_value = /odec.transform(metadata.getValueNode().getFullPath(false), /odec.TEXT_TO_DOM);
    172         node_value = metadata.getValueNode().getFullPath(false);
    173         }
    174         else {
    175         node_value = metadata.getAbsoluteValue();
    176         }
     171        //node_value = metadata.getValueNode().getFullPath(false);
     172        //}
     173        //else {
     174        //node_value = metadata.getAbsoluteValue();
     175        // }
    177176        ///ystem.err.println("Creating node in MetadataXMLFile: '" + node_value + "'");
    178177        metadata_element.appendChild(base_document.createTextNode(node_value));
     
    192191    }
    193192
    194     public int countMetadata() {
    195     int count = 0;
     193    /** this is used to 'purge' the metadata - I've taken the purge code out of getMetadata and put it in to here, cos its only called from one place and we dont want to retrieve the metadata, just update it */
     194    public void cleanUpMetadataRefs() {
     195    //Gatherer.println("clean up metadata refs!");
     196
     197    String file_relative_path = "";
    196198    try {
    197199        // Retrieve the document element.
    198200        Element directorymetadata_element = base_document.getDocumentElement();
    199         // Iterate through the filesets, checking the FileName child element against    the target file's name using regular expression matching.
    200         NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT);
    201         for(int i = 0; i < fileset_elements.getLength(); i++) {
    202         Element fileset_element = (Element) fileset_elements.item(i);
    203         NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT);
    204         for(int k = 0; k < description_elements.getLength(); k++) {
    205             Element description_element = (Element) description_elements.item(k);
    206             // We have to do this for each type of metadata
    207             for(int z = 0; z < ALL_METADATA_TYPES.length; z++) {
    208             NodeList metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[z]);
    209             count = count + metadata_elements.getLength();
    210             metadata_elements = null;
    211             }
    212             description_element = null;
    213         }
    214         description_elements = null;
    215         fileset_element = null;
    216         }
    217         fileset_elements = null;
    218         directorymetadata_element = null;
    219     }
    220     catch (Exception error) {
    221         Gatherer.printStackTrace(error);
    222     }
    223     return count;
     201        // Iterate through all the metadata
     202        // We have to do this for each type of metadata - do we need it for extracted metadata - will this only affect hierarchical metadata in which case only need to do normal metadata ?
     203        for(int z = 0; z < ALL_METADATA_TYPES.length; z++) {
     204        NodeList metadata_elements = directorymetadata_element.getElementsByTagName(ALL_METADATA_TYPES[z]);
     205        for(int l = 0; l < metadata_elements.getLength(); l++) {
     206            Element metadata_element = (Element) metadata_elements.item(l);
     207            String raw_element = metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
     208            String raw_value = MSMUtils.getValue(metadata_element);
     209            //
     210            // ***** LEGACY SUPPORT *****
     211            // If this raw_value contains a '\' character, but no '\\', '[' or ']' characters, then replace the '\' with a '\\'
     212            if(raw_value.indexOf(StaticStrings.ESCAPE_STR) != -1) {
     213            Gatherer.println("Detected Legacy Path: " + raw_value);
     214            raw_value = raw_value.replaceAll(StaticStrings.ESCAPE_PATTERN, StaticStrings.PIPE_STR);
     215            MSMUtils.setValue(metadata_element, raw_value);
     216            }
     217
     218            ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(raw_element);
     219            if (element != null) {
     220            GValueNode value = Metadata.getDefaultValueNode(element, raw_value);
     221            String current_value = value.getFullPath(false);
     222            if(!raw_value.equals(current_value)) {
     223                // set the new value
     224                MSMUtils.setValue(metadata_element, current_value);
     225            }
     226            value = null;
     227            current_value = null;
     228            }
     229            element = null;
     230            metadata_element = null;
     231            raw_element = null;
     232            raw_value = null;
     233        } // for each metadata element
     234        metadata_elements = null;
     235        } // for each metadata type
     236    } catch (Exception error) {
     237        Gatherer.self.printStackTrace(error);
     238    }
    224239    }
    225240
     
    263278    }
    264279
    265     /** Get all of the metadata, including directory level, associated with this file. */
     280    /** Retrieve the metadata associated with the given filename. Keep track of what metadata should be overwritten and what should be accumulated. Also make note of the source file, and remove the metadata if required. */
     281    // !! Michael has written a much nicer version of this function !!
     282    // Kath has cleaned up this version a bit
    266283    public ArrayList getMetadata(String filename, boolean remove, ArrayList metadatum_so_far, File file, boolean append_folder_level) {
    267     return getMetadata(filename, remove, metadatum_so_far, file, append_folder_level, false);
    268     }
    269     /** Retrieve the metadata associated with the given filename. Keep track of what metadata should be overwritten and what should be accumulated. Also make note of the source file, and remove the metadata if required. Finally if purge is set retrieve every piece of metadata in this file. */
    270     // !! Michael has written a much nicer version of this function !!
    271     public ArrayList getMetadata(String filename, boolean remove, ArrayList metadatum_so_far, File file, boolean append_folder_level, boolean purge) {
    272284    Gatherer.println("Get metadata for " + filename);
    273     Gatherer.println("remove = " + remove + ", metadata_so_far = " + (metadatum_so_far != null ? String.valueOf(metadatum_so_far.size()) : "null") + ", file = " + file + ", append_folder_level = " + append_folder_level + ", purge = " + purge);
     285    Gatherer.println("remove = " + remove + ", metadata_so_far = " + (metadatum_so_far != null ? String.valueOf(metadatum_so_far.size()) : "null") + ", file = " + file + ", append_folder_level = " + append_folder_level);
    274286
    275287    // Determine the file's path relative to the location of the metadata.xml file
     
    294306        for(int i = 0; i < fileset_elements.getLength(); i++) {
    295307        Element fileset_element = (Element) fileset_elements.item(i);
     308        boolean fileset_matches = false;
     309        // look through the filename elements of this and see if we have a match
    296310        NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT);
     311        String filename_text = "";
    297312        for(int j = 0; j < filename_elements.getLength(); j++) {
    298313            Element filename_element = (Element) filename_elements.item(j);
    299             String filename_text = MSMUtils.getValue(filename_element);
    300             if((filename != null && (filename.matches(filename_text) || (append_folder_level && filename.indexOf(File.separator) != -1 && filename_text.equals(filename.substring(0, filename.indexOf(File.separator)))))) || ((filename == null || append_folder_level) && filename_text.equals(DIRECTORY_FILENAME)) || purge) {
    301             // If they match add all of the metadata found in the Description child element, remembering to abide by desired mode (accumulate vs. overwrite).
    302             // Normal metadata
    303             NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT);
    304             for(int k = 0; k < description_elements.getLength(); k++) {
    305                 Element description_element = (Element) description_elements.item(k);
    306                 // We have to do this for each type of metadata
    307                 for(int z = 0; z < ALL_METADATA_TYPES.length; z++) {
    308                 NodeList metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[z]);
    309                 for(int l = 0; l < metadata_elements.getLength(); l++) {
    310                     Element metadata_element = (Element) metadata_elements.item(l);
    311                     String raw_element = metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
    312                     //String language = metadata_element.getAttribute("language");
    313                     String mode = metadata_element.getAttribute(MODE_ATTRIBUTE);
    314                     String raw_value = MSMUtils.getValue(metadata_element);
    315                     //
    316                     //raw_value = Codec.transform(raw_value, Codec.DOM_TO_);
    317                     ///ystem.err.println("Retrieved raw value: " + raw_value);
    318                     // ***** LEGACY SUPPORT *****
    319                     // If this raw_value contains a '\' character, but no '\\', '[' or ']' characters, then replace the '\' with a '\\'
    320                     if(raw_value.indexOf(StaticStrings.ESCAPE_STR) != -1) {
    321                     ///ystem.err.println("Blarg");
    322                     Gatherer.println("Detected Legacy Path: " + raw_value);
    323                     raw_value = raw_value.replaceAll(StaticStrings.ESCAPE_PATTERN, StaticStrings.PIPE_STR);
    324                     Gatherer.println("Updated Path To: " + raw_value);
    325                     MSMUtils.setValue(metadata_element, raw_value);
     314            filename_text = MSMUtils.getValue(filename_element);
     315            if((filename != null && (filename.matches(filename_text) || (append_folder_level && filename.indexOf(File.separator) != -1 && filename_text.equals(filename.substring(0, filename.indexOf(File.separator)))))) || ((filename == null || append_folder_level) && filename_text.equals(DIRECTORY_FILENAME))) {
     316            fileset_matches = true;
     317            filename_element = null;
     318            break;
     319            }
     320            filename_element = null;
     321        }
     322       
     323        if (!fileset_matches) {
     324            // go to teh next fileset
     325            fileset_element = null;
     326            continue;
     327        }
     328        // If this fileset matches add all of the metadata found in the fileset, remembering to abide by desired mode (accumulate vs. overwrite).
     329        // We have to do this for each type of metadata
     330        for(int z = 0; z < ALL_METADATA_TYPES.length; z++) {
     331            NodeList metadata_elements = fileset_element.getElementsByTagName(ALL_METADATA_TYPES[z]);
     332            for(int l = 0; l < metadata_elements.getLength(); l++) {
     333            Element metadata_element = (Element) metadata_elements.item(l);
     334            String raw_element = metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
     335            String mode = metadata_element.getAttribute(MODE_ATTRIBUTE);
     336            String raw_value = MSMUtils.getValue(metadata_element);
     337            // ***** LEGACY SUPPORT *****
     338            // If this raw_value contains a '\' character, but no '\\', '[' or ']' characters, then replace the '\' with a '\\'
     339            if(raw_value.indexOf(StaticStrings.ESCAPE_STR) != -1) {
     340                Gatherer.println("Detected Legacy Path: " + raw_value);
     341                raw_value = raw_value.replaceAll(StaticStrings.ESCAPE_PATTERN, StaticStrings.PIPE_STR);
     342                Gatherer.println("Updated Path To: " + raw_value);
     343                MSMUtils.setValue(metadata_element, raw_value);
     344            }
     345            // **************************
     346            // Using the element string and value, retrieve a matching Metadata object from the cache
     347            Metadata metadata = null;
     348            // If this element has hierarchy values then we must ensure the raw value is a full path, not an index.
     349            // Try to retrieve an already comstructed piece of metadata from file - but not if we are purging, as this will stuff up anything that is still using that metadata - such as the GTable
     350            if(MetadataXMLFileManager.metadata_cache.contains(raw_element, raw_value)) {
     351                ///ystem.err.println("HIT! Retrieve metadata from cache: " + raw_element + " -> " + raw_value + "\n");
     352                metadata = (Metadata) MetadataXMLFileManager.metadata_cache.get(raw_element, raw_value);
     353            }
     354            else {
     355                ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(raw_element);
     356                if (element != null) {
     357                GValueNode value = Metadata.getDefaultValueNode(element, raw_value);
     358                ///ystem.err.println("Miss. Create new metadata: " + raw_element + " -> " + raw_value + "\n");
     359                metadata = new Metadata(element, value);
     360                MetadataXMLFileManager.metadata_cache.put(raw_element, raw_value, metadata);
     361               
     362                ///ystem.err.println("Added metadata to cache: " + raw_element + " -> " + raw_value + "\n");
     363                value = null;
     364                element = null;
     365                }
     366            }
     367           
     368            // Determine whether this metadata is file or folder level
     369            if (metadata != null) {
     370                // System.err.println("File relative path: " + file_relative_path + " Filename text: " + filename_text);
     371                // Direct match to regular expression
     372                if (file_relative_path.matches(filename_text)) {
     373                boolean is_folder_level = filename_text.equals(".*") && !file_relative_path.equals("");
     374                metadata.setFile(file);
     375                metadata.setFileLevel(!is_folder_level);
     376                }
     377                // Indirect match to regular expression (always folder level)
     378                else if (file_relative_path.startsWith(filename_text + File.separator)) {
     379                metadata.setFile(new File(file, filename_text));
     380                metadata.setFileLevel(false);
     381                }
     382               
     383                // If mode is overwrite, then remove any previous values for this metadata element.
     384                if(mode.equals("accumulate")) {
     385                metadata.setAccumulate(true);
     386                }
     387                else {
     388                metadata.setAccumulate(false);
     389                ///ystem.err.println("Metadata overwrites: " + metadata);
     390                for(int m = metadatum.size() - 1; m >= 0; m--) {
     391                    Metadata old_metadata = (Metadata) metadatum.get(m);
     392                    if(old_metadata.getElement().equals(metadata.getElement())) {
     393                    metadatum.remove(m);
     394                    ///ystem.err.println("Removing overridden metadata: " + old_metadata);
    326395                    }
    327                     // **************************
    328                     // Using the element string and value, retrieve a matching Metadata object from the cache
    329                     Metadata metadata = null;
    330                     // If this element has hierarchy values then we must ensure the raw value is a full path, not an index.
    331                     // Try to retrieve an already comstructed piece of metadata from file - but not if we are purging, as this will stuff up anything that is still using that metadata - such as the GTable
    332                     if(MetadataXMLFileManager.metadata_cache.contains(raw_element, raw_value) && !purge) {
    333                     ///ystem.err.println("HIT! Retrieve metadata from cache: " + raw_element + " -> " + raw_value + "\n");
    334                     metadata = (Metadata) MetadataXMLFileManager.metadata_cache.get(raw_element, raw_value);
    335                     }
    336                     else {
    337                     ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(raw_element);
    338                     if (element != null) {
    339                         GValueNode value = Metadata.getDefaultValueNode(element, raw_value);
    340                         ///ystem.err.println("Miss. Create new metadata: " + raw_element + " -> " + raw_value + "\n");
    341                         metadata = new Metadata(element, value);
    342                         if(!purge) {
    343                         MetadataXMLFileManager.metadata_cache.put(raw_element, raw_value, metadata);
    344                         }
    345                         ///ystem.err.println("Added metadata to cache: " + raw_element + " -> " + raw_value + "\n");
    346                         value = null;
    347                         element = null;
    348                     }
    349                     }
    350 
    351                     // Determine whether this metadata is file or folder level
    352                     if (metadata != null) {
    353                     // System.err.println("File relative path: " + file_relative_path + " Filename text: " + filename_text);
    354                     // Direct match to regular expression
    355                     if (file_relative_path.matches(filename_text)) {
    356                         boolean is_folder_level = filename_text.equals(".*") && !file_relative_path.equals("");
    357                         metadata.setFile(file);
    358                         metadata.setFileLevel(!is_folder_level);
    359                     }
    360                     // Indirect match to regular expression (always folder level)
    361                     else if (file_relative_path.startsWith(filename_text + File.separator)) {
    362                         metadata.setFile(new File(file, filename_text));
    363                         metadata.setFileLevel(false);
    364                     }
    365 
    366                     // If mode is overwrite, then remove any previous values for this metadata element.
    367                     if(mode.equals("accumulate")) {
    368                         metadata.setAccumulate(true);
    369                     }
    370                     else {
    371                         metadata.setAccumulate(false);
    372                         ///ystem.err.println("Metadata overwrites: " + metadata);
    373                         for(int m = metadatum.size() - 1; m >= 0; m--) {
    374                         Metadata old_metadata = (Metadata) metadatum.get(m);
    375                         if(old_metadata.getElement().equals(metadata.getElement())) {
    376                             metadatum.remove(m);
    377                             ///ystem.err.println("Removing overridden metadata: " + old_metadata);
    378                         }
    379                         old_metadata = null;
    380                         }
    381                     }
    382                     mode = null;
    383                     // Add the completed metadata and clean up
    384                     ///ystem.err.println("Adding metadata: " + metadata);
    385                     metadatum.add(metadata);
    386                     // Having found our metadata check if the value from the xml matches the one from the gvaluenode. If not update it. This happens whenever hierarchy information is involved (indexes rapidly become obsolete).
    387                     // If remove was set, remove it. We can only remove pure file level metadata, or folder level iff we were asked for folder level.
    388                     ///atherer.println("Have we been asked to remove the metadata: " + metadata);
    389                     ///atherer.println("Given:");
    390                     ///atherer.println("\tremove = " + remove);
    391                     ///atherer.println("\tfilename = " + filename);
    392                     ///atherer.println("\tfilename_text = " + filename_text + "?");
    393                     if(remove && ((filename != null && filename.matches(filename_text) && !filename_text.equals(DIRECTORY_FILENAME)) || (filename == null && filename_text.equals(DIRECTORY_FILENAME)))) {
    394                         ///atherer.println("Yes! Queuing for Removal.");
    395                         queued_for_removal.add(metadata_element);
    396                     }
    397                     else {
    398                         ///atherer.println("No. Updating.");
    399                         String current_value = metadata.getValueNode().getFullPath(false);
    400                         ///ystem.err.println("Checking the current mdv path: " + current_value);
    401                         ///ystem.err.println("Against whats in the metadata file: " + raw_value);
    402                         if(!raw_value.equals(current_value)) {
    403                         // Remove old text
    404                         while(metadata_element.hasChildNodes()) {
    405                             metadata_element.removeChild(metadata_element.getFirstChild());
    406                         }
    407                         // Add new.
    408                         metadata_element.appendChild(base_document.createTextNode(current_value));
    409                         }
    410                     }
    411                     }
    412                     metadata = null;
    413                     raw_value = null;
    414                     raw_element = null;
    415                     metadata_element = null;
     396                    old_metadata = null;
    416397                }
    417                 metadata_elements = null;
    418                 }
    419 
    420                 // Now we remove any elements that have been queued for deletion
    421                 for(int a = 0; queued_for_removal != null && a < queued_for_removal.size(); a++) {
    422                 Element metadata_element = (Element) queued_for_removal.get(a);
    423                 description_element.removeChild(metadata_element);
    424                 up_to_date = false;
    425                 }
    426                 queued_for_removal.clear();
    427 
    428                         // If the description_element no longer has any children remove it
    429                 NodeList metadata_elements = description_element.getElementsByTagName(StaticStrings.METADATA_ELEMENT);
    430                 NodeList extracted_elements = description_element.getElementsByTagName(StaticStrings.EXTRACTED_METADATA_ELEMENT);
    431                 if(metadata_elements.getLength() == 0 && extracted_elements.getLength() == 0) {
    432                 fileset_element.removeChild(description_element);
    433                 up_to_date = false;
    434                 }
    435                 description_element = null;
     398                }
     399                mode = null;
     400                // Add the completed metadata and clean up
     401                ///ystem.err.println("Adding metadata: " + metadata);
     402                metadatum.add(metadata);
     403                // Having found our metadata check if the value from the xml matches the one from the gvaluenode. If not update it. This happens whenever hierarchy information is involved (indexes rapidly become obsolete).
     404                // If remove was set, remove it. We can only remove pure file level metadata, or folder level iff we were asked for folder level.
     405                ///atherer.println("Have we been asked to remove the metadata: " + metadata);
     406                ///atherer.println("Given:");
     407                ///atherer.println("\tremove = " + remove);
     408                ///atherer.println("\tfilename = " + filename);
     409                ///atherer.println("\tfilename_text = " + filename_text + "?");
     410                if(remove && ((filename != null && filename.matches(filename_text) && !filename_text.equals(DIRECTORY_FILENAME)) || (filename == null && filename_text.equals(DIRECTORY_FILENAME)))) {
     411                ///atherer.println("Yes! Queuing for Removal.");
     412                queued_for_removal.add(metadata_element);
     413                }
     414                else {
     415                ///atherer.println("No. Updating.");
     416                String current_value = metadata.getValueNode().getFullPath(false);
     417                ///ystem.err.println("Checking the current mdv path: " + current_value);
     418                ///ystem.err.println("Against whats in the metadata file: " + raw_value);
     419                if(!raw_value.equals(current_value)) {
     420                    MSMUtils.setValue(metadata_element, current_value);
     421                }
     422                }
    436423            }
    437             description_elements = null;
    438             }
    439             filename_text = null;
    440             filename_element = null;
    441         }
    442         // If the file set no longer has any description entries, remove it entirely
    443         NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT);
    444         if(description_elements.getLength() == 0) {
    445             directorymetadata_element.removeChild(fileset_element);
    446             up_to_date = false;
    447         }
    448         description_elements = null;
    449         filename_elements = null;
     424            metadata = null;
     425            raw_value = null;
     426            raw_element = null;
     427            metadata_element = null;
     428            } // for all metadata elements
     429            metadata_elements = null;
     430        } // for all metadata types
     431     
     432        // Now we remove any elements that have been queued for deletion
     433        for(int a = 0; queued_for_removal != null && a < queued_for_removal.size(); a++) {
     434            Element metadata_element = (Element) queued_for_removal.get(a);
     435            Element parent = (Element) metadata_element.getParentNode();
     436            parent.removeChild(metadata_element);
     437           
     438            up_to_date = false;
     439        }
     440        queued_for_removal.clear();
     441           
     442        // If the fileset no longer has any metadata remove it
     443        NodeList metadata_elements = fileset_element.getElementsByTagName(StaticStrings.METADATA_ELEMENT);
     444        if (metadata_elements.getLength()==0) {
     445            metadata_elements = fileset_element.getElementsByTagName(StaticStrings.EXTRACTED_METADATA_ELEMENT);
     446            if (metadata_elements.getLength()==0) {
     447            directorymetadata_element.removeChild(fileset_element);
     448            up_to_date = false;
     449            }
     450        }
     451        metadata_elements = null;
    450452        fileset_element = null;
    451         }
     453        filename_text = null;
     454        } // for each fileset element
     455       
    452456        fileset_elements = null;
    453457        directorymetadata_element = null;
     
    461465    }
    462466   
     467    /** returns true if the document has at least one Metadata or XMetadata element */
     468    public boolean hasMetadata() {
     469    boolean has_meta = true;
     470    try {
     471        // Retrieve the document element.
     472        Element directory_metadata_element = base_document.getDocumentElement();
     473        NodeList metadata_nodes = directory_metadata_element.getElementsByTagName(StaticStrings.METADATA_ELEMENT);
     474        if (metadata_nodes.getLength()==0) {
     475        // try extracted metadata
     476        metadata_nodes = directory_metadata_element.getElementsByTagName(StaticStrings.EXTRACTED_METADATA_ELEMENT);
     477        if (metadata_nodes.getLength()==0) {
     478            has_meta = false;
     479        }
     480        }
     481        directory_metadata_element=null;
     482        metadata_nodes=null;
     483    }
     484    catch (Exception error) {
     485        Gatherer.printStackTrace(error);
     486    }
     487    return has_meta;
     488    }
     489
    463490    /** Determine if this document has been saved recently, and thus xml file version is up to date. */
    464491    public boolean isUpToDate() {
     
    504531    }
    505532
    506     /** Remove the given directory level metadata from this document. All directory level metadata is available under the FileSet with filename '.*'. There is at least one nasty case to consider, where the first overwriting metadata entry, of several with the same element, is removed. In this case the next entry must become overwrite to ensure proper inheritance. */
     533    /** Remove the given metadata from this document.If filename is null, then removes directory level metadata, otherwise just removes it from the specified file. All directory level metadata is available under the FileSet with filename '.*'. There is at least one nasty case to consider, where the first overwriting metadata entry, of several with the same element, is removed. In this case the next entry must become overwrite to ensure proper inheritance. */
    507534    public void removeMetadata(String filename, Metadata metadata) {
    508535    Gatherer.println("Remove metadata: " + metadata + "\nFrom filename: " + filename);
     
    512539        boolean make_next_metadata_element_overwrite = false;
    513540        boolean remove_fileset = false;
     541        // is this extracted or normal metadata?
     542        String removing_metadata_name = metadata.getElement().getName();
     543        // If this is extracted metadata, we use a special element name that won't be recognized by greenstone
     544        String current_metadata_type = StaticStrings.METADATA_ELEMENT;
     545        if(removing_metadata_name.startsWith(Utility.EXTRACTED_METADATA_NAMESPACE)) {
     546        current_metadata_type = StaticStrings.EXTRACTED_METADATA_ELEMENT;
     547        removing_metadata_name = removing_metadata_name.substring(Utility.EXTRACTED_METADATA_NAMESPACE.length() + 1);
     548        }
     549
    514550        // Retrieve the document element.
    515551        Element directorymetadata_element = base_document.getDocumentElement();
    516         // Iterate through the filesets looking for the directory level one.
     552        // Iterate through the filesets looking for the appropriate one.
    517553        NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT);
    518554        for(int i = 0; !found && i < fileset_elements.getLength(); i++) {
     
    523559            String filename_text = MSMUtils.getValue(filename_element);
    524560            if((filename != null && filename.matches(filename_text) && !filename.equals(DIRECTORY_FILENAME)) || (filename == null && filename_text.equals(DIRECTORY_FILENAME))) {
    525             // Retrieve the Metadata Element for this fileset, and iterate through them looking for the one which we are to remove.
    526             NodeList description_elements = fileset_element.getElementsByTagName("Description");
    527             for(int k = 0; !found && k < description_elements.getLength(); k++) {
    528                 Element description_element = (Element) description_elements.item(k);
    529                 // We have to do this for each type of metadata
    530                 for(int z = 0; z < ALL_METADATA_TYPES.length; z++) {
    531                 NodeList metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[z]);
    532                 for(int l = 0; (!found || !make_next_metadata_element_overwrite) && l < metadata_elements.getLength(); l++) {
    533                     Element metadata_element = (Element) metadata_elements.item(l);
    534                     String element = metadata_element.getAttribute("name");
    535                     String value = MSMUtils.getValue(metadata_element);
    536                     // See if this is the metadata we wish to remove
    537                     if(element.equals(metadata.getElement().getName())) {
    538                     if(value.equals(metadata.getValueNode().getFullPath(false))) {
    539                         // Remove it
    540                         ///ystem.err.println("Remove " + element + "-" + value);
    541                         description_element.removeChild(metadata_element);
    542                         found = true;
    543                         // If this was the first metadata with this element found, and it was set to overwrite, then we have to ensure that the next metadata with this element found (if any) is changed to be overwrite now.
    544                         if(first_metadata_element_found && !metadata.accumulates()) {
    545                         ///ystem.err.println("First of this element found!");
    546                         make_next_metadata_element_overwrite = true;
    547                         }
    548                     }
    549                     // If this was the first metadata we've found with the element of the one to be removed set first found to false.
    550                     else if(first_metadata_element_found) {
    551                         ///ystem.err.println("Found a matching element: " + element + "=" + value);
    552                         first_metadata_element_found = false;
    553                     }
    554                     // Otherwise we should make this metadata overwrite as requested.
    555                     else if(make_next_metadata_element_overwrite) {
    556                         ///ystem.err.println("Changing to overwrite: " + element + "=" + value);
    557                         metadata_element.setAttribute(MODE_ATTRIBUTE, "");
    558                     }
     561            // Retrieve the Metadata Elements for this fileset, and iterate through them looking for the one which we are to remove.
     562            NodeList metadata_elements = fileset_element.getElementsByTagName(current_metadata_type);
     563            for(int l = 0; (!found || !make_next_metadata_element_overwrite) && l < metadata_elements.getLength(); l++) {
     564                Element metadata_element = (Element) metadata_elements.item(l);
     565                String element = metadata_element.getAttribute("name");
     566                String value = MSMUtils.getValue(metadata_element);
     567                // See if this is the metadata we wish to remove
     568                if(element.equals(removing_metadata_name)) {
     569                if(value.equals(metadata.getValueNode().getFullPath(false))) {
     570                    // Remove it
     571                    ///ystem.err.println("Remove " + element + "-" + value);
     572                    Element parent_elem = (Element)metadata_element.getParentNode();
     573                    parent_elem.removeChild(metadata_element);
     574                   
     575                    //description_element.removeChild(metadata_element);
     576                    found = true;
     577                    // If this was the first metadata with this element found, and it was set to overwrite, then we have to ensure that the next metadata with this element found (if any) is changed to be overwrite now.
     578                    if(first_metadata_element_found && !metadata.accumulates()) {
     579                    ///ystem.err.println("First of this element found!");
     580                    make_next_metadata_element_overwrite = true;
    559581                    }
    560                     value = null;
    561                     element = null;
    562                     metadata_element = null;
    563582                }
    564                 NodeList normal_metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[0]);
    565                 NodeList extracted_metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[1]);
    566                 // If we found it, removed it, and now the description tag has no children, mark the fileset for removal
    567                 if(normal_metadata_elements.getLength() == 0 && extracted_metadata_elements.getLength() == 0) {
    568                     remove_fileset = true;
     583                // If this was the first metadata we've found with the element of the one to be removed set first found to false.
     584                else if(first_metadata_element_found) {
     585                    ///ystem.err.println("Found a matching element: " + element + "=" + value);
     586                    first_metadata_element_found = false;
    569587                }
    570                 extracted_metadata_elements = null;
    571                 normal_metadata_elements = null;
    572                 metadata_elements = null;
    573                 }
    574                 description_element = null;
     588                // Otherwise we should make this metadata overwrite as requested.
     589                else if(make_next_metadata_element_overwrite) {
     590                    ///ystem.err.println("Changing to overwrite: " + element + "=" + value);
     591                    metadata_element.setAttribute(MODE_ATTRIBUTE, "");
     592                }
     593                }
     594                value = null;
     595                element = null;
     596                metadata_element = null;
     597            } // for each metadata
     598            metadata_elements = null;
     599            } // if the filename matches
     600
     601            if (found) {
     602            // if we found an element and removed it, we now want to check whether the fileset is empty or not
     603            NodeList metadata_elements = fileset_element.getElementsByTagName(StaticStrings.METADATA_ELEMENT);
     604            if (metadata_elements.getLength() ==0) {
     605                metadata_elements = fileset_element.getElementsByTagName(StaticStrings.EXTRACTED_METADATA_ELEMENT);
     606                if (metadata_elements.getLength() ==0) {
     607                // remove the fileset
     608                directorymetadata_element.removeChild(fileset_element);
     609                }
    575610            }
    576             description_elements = null;
     611            metadata_elements = null;
    577612            }
    578613            filename_text = null;
    579614            filename_element = null;
    580         }
     615        } // for each filename element
    581616        filename_elements = null;
    582         if(found && remove_fileset) {
    583             directorymetadata_element.removeChild(fileset_element);
    584         }
    585617        fileset_element = null;
    586         }
     618        } // for each fileset element
    587619        fileset_elements = null;
    588620        directorymetadata_element = null;
     
    593625    }
    594626    }
    595    
     627
    596628    /** Change the up to date flag.
    597629     * @param up_to_date true if the document on the filesystem is the same as the one in memory, false otherwise
  • trunk/gli/src/org/greenstone/gatherer/msm/MetadataXMLFileManager.java

    r7114 r7293  
    370370     */
    371371    public synchronized void metadataChanged(MSMEvent event) {
    372     ///ystem.err.println("Recieved Event: " + event.toString());
    373372    File file = event.getFile();
    374373    if(file == null) {
     
    446445        File file = (File) iterator.next();
    447446        MetadataXMLFile document = (MetadataXMLFile) get(file);
    448         if(!document.isUpToDate()) {
    449         //ystem.err.println("Saving: " + file.getAbsolutePath());
    450         // First purge any old references.
    451         document.getMetadata(null, false, null, null, false, true);
    452         // If there is no metadata in this document then don't write out a file. In fact delete any file that already exists.
    453         int count = document.countMetadata();
    454         if(count > 0) {
    455             // Now write the xml
    456             Utility.export(document.getDocument(), file);
    457             document.setUpToDate(true);
    458         }
    459         else if(file.exists()) {
    460             file.delete();
    461         }
    462         }
    463     }
    464     }
     447        save(file, document);
     448    }
     449    }
     450
    465451    /** Used to cause the document associated with a particular file to write the latest copy of itself to disk. */
    466452    public void save(FileNode node) {
     
    468454    if(file != null && file.isFile()) {
    469455        MetadataXMLFile document = getDocument(file);
    470         File xml_file;
    471         if(file.isFile()) {
    472         xml_file = new File(file.getParentFile(), "metadata.xml");
    473         }
    474         else {
    475         xml_file = new File(file, "metadata.xml");
    476         }
    477         if(document != null && !document.isUpToDate()) {
    478         // First purge any old references.
    479         document.getMetadata(null, false, null, null, true);
    480         // Now write the xml
    481         Utility.export(document.getDocument(), xml_file);
    482         document.setUpToDate(true);
    483         }
    484         xml_file = null;
     456        if (document != null && !document.isUpToDate()) {
     457        File xml_file;
     458        if(file.isFile()) {
     459            xml_file = new File(file.getParentFile(), "metadata.xml");
     460        }
     461        else {
     462            xml_file = new File(file, "metadata.xml");
     463        }
     464        save(xml_file, document);
     465        xml_file = null;
     466        }
    485467        document = null;
    486468    }
    487469    file = null;
    488470    }
     471
    489472
    490473    /** Write out the latest copy of a certain document. */
    491474    public void save(File file, MetadataXMLFile document) {
    492         if(!document.isUpToDate()) {
    493             // First purge any old references.
    494             document.getMetadata(null, false, null, null, true);
    495             // Now write the xml
    496             Utility.export(document.getDocument(), file);
    497             document.setUpToDate(true);
    498         }
     475    if(!document.isUpToDate()) {
     476        // First purge any old references.
     477        document.cleanUpMetadataRefs();
     478        // If there is no metadata in this document then don't write out a file. In fact delete any file that already exists.
     479        boolean has_metadata = document.hasMetadata();
     480        if (has_metadata) {
     481        // Now write the xml
     482        Utility.export(document.getDocument(), file);
     483        }
     484        else if(file.exists()) {
     485        file.delete();
     486        }
     487        document.setUpToDate(true);
     488    }
    499489    }
    500490
     
    523513    private Metadata checkCache(Metadata metadata) {
    524514    if(metadata != null) {
    525         ///ystem.err.println("Search for " + metadata.toString());
    526         if(metadata_cache.contains(metadata.getElement(), metadata.getValueNode())) {
    527         metadata = (Metadata) metadata_cache.get(metadata.getElement(), metadata.getValueNode());
     515        //if(metadata_cache.contains(metadata.getElement(), metadata.getValueNode())) {
     516        //  metadata = (Metadata) metadata_cache.get(metadata.getElement(), metadata.getValueNode());
     517        //  System.err.println("cache contains teh value");
     518        // }
     519        // the element name was used as the key
     520        if(metadata_cache.contains(metadata.getElement().getElement().getAttribute("name"), metadata.getValue())) {
     521        metadata = (Metadata) metadata_cache.get(metadata.getElement().getElement().getAttribute("name"), metadata.getValue());
    528522        }
    529523    }
Note: See TracChangeset for help on using the changeset viewer.