Ignore:
Timestamp:
2004-01-20T10:17:50+13:00 (20 years ago)
Author:
jmt12
Message:

Extended the metadata set handling code to allow for a newer style of MDS which is optimized for multilingual data. By this I mean that deferred-node-expansion is used to prevent large language-specific subtrees of the DOM model being considered when they are not of the current interface language. The old style of MDS format caused a considerable slow down when opening/closing collections which use the Dublin Core metadata set - which now contains several translations.

Location:
trunk/gli/src/org/greenstone/gatherer/msm
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/gli/src/org/greenstone/gatherer/msm/ElementWrapper.java

    r6537 r6549  
    205205
    206206    /** Removes an Attribute node from the element. */
    207     public boolean removeAttribute(String name, String language, String value)
    208     {
    209     // Find the attribute to remove
    210     for (Node n = element.getFirstChild(); n != null; n = n.getNextSibling()) {
    211         if (n.getNodeName().equals("Attribute")) {
    212         Element e = (Element) n;
    213         if (e.getAttribute("name").equals(name) &&
    214             e.getAttribute("language").equalsIgnoreCase(language) &&
    215             MSMUtils.getValue(e).equals(value)) {
    216             // Match found, so remove the attribute node and return
    217             element.removeChild(n);
    218             return true;
    219         }
    220         }
    221     }
    222 
    223     // No match found
    224     return false;
     207    public boolean removeAttribute(String name, String language, String value) {
     208    return MSMUtils.removeElementAttribute(element, name, language, value);
    225209    }
    226210
     
    231215    public String toString() {
    232216    return getNamespace() + MSMUtils.NS_SEP + getIdentity();
    233     /*
    234     String element_name = getName();
    235     String element_identifier = getIdentity();
    236 
    237     // Generate the element name without the namespace
    238     String element_name_no_namespace = element_name;
    239     int namespace_end = element_name.indexOf(MSMUtils.NS_SEP);
    240     if (namespace_end != -1) {
    241         element_name_no_namespace = element_name.substring(namespace_end + 1);
    242     }
    243 
    244     // Return just the element name, unless the element identifier differs
    245     //if (element_name_no_namespace.equals(element_identifier)) {
    246         return element_name;
    247     //}
    248     //else {
    249     //    return element_name + " (" + element_identifier + ")";
    250     //}
    251     */
    252217    }
    253218}
  • trunk/gli/src/org/greenstone/gatherer/msm/MSMUtils.java

    r6537 r6549  
    5656    static public MetadataComparator METADATA_COMPARATOR = new MetadataComparator();
    5757    /** An element of the enumeration of type filter. */
    58     static public final int NONE = 0;
     58    static public int NONE = 0;
    5959    /** An element of the enumeration of type filter. */
    60     static public final int VALUES = 1;
     60    static public int VALUES = 1;
    6161    /** An element of the enumeration of type filter. */
    62     static public final int ALIASES = 2;
     62    static public int ALIASES = 2;
    6363    /** An element of the enumeration of type filter. */
    64     static public final int BOTH = 3;
     64    static public int BOTH = 3;
    6565    /** The character used to separate name space from metadata element. */
    66     static public final char NS_SEP= '.';
     66    static public char NS_SEP= '.';
    6767    /** The character used to separate subfields from metadata element. */
    68     static public final String SF_SEP= "#";
     68    static public String SF_SEP= "#";
    6969    /** Method to add one node as a child of another, after migrating into the target document.
    7070     * @param parent The <strong>Node</strong> we are inserting into.
    7171     * @param child The original <strong>Node</strong> we are inserting. Must first be cloned into the parents document.
    7272     */
    73     static final public void add(Node parent, Node child) {
     73    static public void add(Node parent, Node child) {
    7474    Document document = parent.getOwnerDocument();
    7575    Node new_child = document.importNode(child, true);
     
    7777    }
    7878
    79     static final public void addElementAttribute(Node node, String name, String language, String value) {
    80     Document document = node.getOwnerDocument();
    81     Element attribute_node = document.createElementNS("", "Attribute");
    82     attribute_node.setAttribute("name", name);
    83     attribute_node.setAttribute("language", language);
    84     node.appendChild(attribute_node);
    85     Node attribute_text = document.createTextNode(value);
    86     attribute_node.appendChild(attribute_text);
     79    /** Method to add an attribute element to the given element. This method makes use of the language_dependant attribute of the document to not only determine if the attribute is language dependant, but also to see whether a Language element should be created if doesn't already exist.
     80     * @param element_element the Element to add the attribute element to
     81     * @param attribute_name_str the name of the new attribute to add as a String
     82     * @param language_code_str the two letter code String of the language this attribute is to be added as
     83     * @param value_str the String to be assigned as the attribute elements value
     84     * @see org.greenstone.gatherer.msm.MSMUtils#isAttributeLanguageDependant
     85     * @see org.greenstone.gatherer.msm.MSMUtils#setValue(Element, String)
     86     * @see org.greenstone.gatherer.util.StaticStrings#ATTRIBUTE_ELEMENT
     87     * @see org.greenstone.gatherer.util.StaticStrings#CODE_ATTRIBUTE
     88     * @see org.greenstone.gatherer.util.StaticStrings#LANGUAGE_ATTRIBUTE
     89     * @see org.greenstone.gatherer.util.StaticStrings#LANGUAGE_ELEMENT
     90     * @see org.greenstone.gatherer.util.StaticStrings#NAME_ATTRIBUTE
     91     */
     92    static public void addElementAttribute(Element element_element, String attribute_name_str, String language_code_str, String value_str) {
     93    Document document = element_element.getOwnerDocument();
     94    // Create the basic new attribute (everything except language attribute)
     95    Element attribute_element = document.createElement(StaticStrings.ATTRIBUTE_ELEMENT);
     96    attribute_element.setAttribute(StaticStrings.NAME_ATTRIBUTE, attribute_name_str);
     97    MSMUtils.setValue(attribute_element, value_str);
     98    // Start off by determining if we have to add this node in the new multilingual optimized way
     99    if(isAttributeLanguageDependant(document, attribute_name_str)) {
     100        boolean found = false;
     101        // Try to retrieve a language element for the given language code
     102        NodeList language_elements = element_element.getElementsByTagName(StaticStrings.LANGUAGE_ELEMENT);
     103        for(int i = 0; i < language_elements.getLength(); i++) {
     104        Element language_element = (Element) language_elements.item(i);
     105        if(language_element.getAttribute(StaticStrings.CODE_ATTRIBUTE).equals(language_code_str)) {
     106            found = true;
     107            // Add attribute
     108            language_element.appendChild(attribute_element);
     109        }
     110        language_element = null;
     111        }
     112        language_elements = null;
     113        // If it still hasn't been found, then add it
     114        if(!found) {
     115        Element language_element = document.createElement(StaticStrings.LANGUAGE_ELEMENT);
     116        language_element.setAttribute(StaticStrings.CODE_ATTRIBUTE, language_code_str);
     117        element_element.appendChild(language_element);
     118        // Add attribute
     119        language_element.appendChild(attribute_element);
     120        language_element = null;
     121        }
     122    }
     123    // Just add the attribute the old fashioned way
     124    else {
     125        attribute_element.setAttribute(StaticStrings.LANGUAGE_ATTRIBUTE, language_code_str);
     126        element_element.appendChild(attribute_element);
     127    }
     128    // Clean up
     129    attribute_element = null;
     130    document = null;
    87131    }
    88132
     
    280324    }
    281325
    282     static final public TreeSet getAttributes(Element element) {
    283     TreeSet attributes = new TreeSet();
    284     for(Node n = element.getFirstChild(); n != null; n = n.getNextSibling()) {
    285         if(n.getNodeName().equals("Attribute")) {
    286         Element e = (Element)n;
    287         attributes.add(new Attribute(e.getAttribute("name"), e.getAttribute("language"), getValue(e)));
    288         }
    289     }
    290     return attributes;
     326    /** Retrieve all of the attributes for the given element as a tree set. Note that this requires significant manipulation if the source is a multilingual optimized metadata set.
     327     * @param element the Element whose attributes we wish to catalog
     328     * @return a TreeSet of the attributes sorted by their natural ordering
     329     * @see org.greenstone.gatherer.msm.MSMUtils#getValue(Element)
     330     * @see org.greenstone.gatherer.util.StaticStrings#ATTRIBUTE_ELEMENT
     331     * @see org.greenstone.gatherer.util.StaticStrings#CODE_ATTRIBUTE
     332     * @see org.greenstone.gatherer.util.StaticStrings#LANGUAGE_ATTRIBUTE
     333     * @see org.greenstone.gatherer.util.StaticStrings#LANGUAGE_ELEMENT
     334     * @see org.greenstone.gatherer.util.StaticStrings#NAME_ATTRIBUTE
     335     */
     336    static public TreeSet getAttributes(Element element) {
     337    TreeSet attribute_tree = new TreeSet();
     338    for(Node node = element.getFirstChild(); node != null; node = node.getNextSibling()) {
     339        if(node instanceof Element) {
     340        Element some_element = (Element) node;
     341        String some_element_name = some_element.getNodeName();
     342        if(some_element_name.equals(StaticStrings.ATTRIBUTE_ELEMENT)) {
     343            attribute_tree.add(new Attribute(some_element.getAttribute(StaticStrings.NAME_ATTRIBUTE), some_element.getAttribute(StaticStrings.LANGUAGE_ATTRIBUTE), MSMUtils.getValue(some_element)));
     344        }
     345        else if(some_element_name.equals(StaticStrings.LANGUAGE_ELEMENT)) {
     346            String language_code = some_element.getAttribute(StaticStrings.CODE_ATTRIBUTE);
     347            NodeList attribute_elements = some_element.getElementsByTagName(StaticStrings.ATTRIBUTE_ELEMENT);
     348            for(int i = 0; i < attribute_elements.getLength(); i++) {
     349            Element attribute_element = (Element) attribute_elements.item(i);
     350            attribute_tree.add(new Attribute(attribute_element.getAttribute(StaticStrings.NAME_ATTRIBUTE), language_code, MSMUtils.getValue(element)));
     351            attribute_element = null;
     352            }
     353            attribute_elements = null;
     354            language_code = null;
     355        }
     356        some_element_name = null;
     357        some_element = null;
     358        }
     359    }
     360    return attribute_tree;
    291361    }
    292362
     
    365435
    366436    /** Method to construct an elements description by retrieving the correct attribute.
    367      * @param element The <strong>Element</strong> whose name we wish to retrieve.
    368      * @return A <strong>String</strong> which is the elements description, or an empty string if no description exists.
    369      */
    370     static final public String getDescription(Node element) {
    371     String definition = "";
    372     Element definition_node = getAttributeNodeNamed(element, "definition");
    373     if(definition_node != null) {
    374         definition = getValue(definition_node);
    375     }
    376     String comment = "";
    377     Element comment_node = getAttributeNodeNamed(element, "comment");
    378     if(comment_node != null) {
    379         comment = getValue(comment_node);
    380     }
    381     if(!definition.endsWith(StaticStrings.SPACE_CHARACTER) && !comment.startsWith(StaticStrings.SPACE_CHARACTER)) {
    382         comment = StaticStrings.SPACE_CHARACTER + comment;
    383     }
    384     //String description = definition + comment;
    385     return definition + comment; //Utility.stripNL(description.trim());
    386     }
    387 
    388     /** Extracts the file name pattern from within a fileset of a Greenstone Directory Metadata model.
    389      * @param fileset The fileset Node in question.
    390      * @return The pattern as a String.
    391      */
    392     /* static final private String getFileNamePattern(Node fileset) {
    393     // Locate the child node called filename
    394     for(Node child = fileset.getFirstChild(); child != null; child = child.getNextSibling()) {
    395         if(child.getNodeName().equalsIgnoreCase("FileName")) {
    396         // Find the file string.
    397         return MSMUtils.getValue(child);
    398         }
    399     }         
    400     return null;
    401     } */
    402 
    403     /*************************************************************************/
    404     /** Method to create the fully namespace quantified identifier for this element.
    405      * @param element The <strong>Node</strong> in question.
    406      * @return A fully qualified identifier as a <strong>String</strong>
    407      */
    408     /* static final private String getFullIdentifier(Node element, String namespace) {
    409     StringBuffer identifier_buffer = new StringBuffer();
    410     if(element == null) {
    411         return "Error";
    412     }
    413     // First get the root node.
    414     Document document = element.getOwnerDocument();
    415     Element root = document.getDocumentElement();
    416     document = null;
    417     // Retrieve this elements identifier
    418     identifier_buffer.append(getIdentifier(element));
    419     // Now we check if element has a parent node, other than root. If so we begin building up the full identifier
    420     Element parent_element = (Element) element.getParentNode();
    421     while(parent_element != null && parent_element != root) {
    422         identifier_buffer.insert(0, SF_SEP);
    423         identifier_buffer.insert(0, getIdentifier(parent_element));
    424         parent_element = (Element)parent_element.getParentNode();
    425     }
    426     parent_element = null;
    427     // Finally insert the namespace and we are all done.
    428     if(root != null) {
    429         namespace = root.getAttribute("namespace");
    430     }
    431     root = null;
    432     // If no root, or no namespace found, assume its extracted (at least then they can't edit it)
    433     if(namespace == null || namespace.equals("")) {
    434         namespace = Utility.EXTRACTED_METADATA_NAMESPACE;
    435     }
    436     identifier_buffer.insert(0, NS_SEP);
    437     identifier_buffer.insert(0, namespace);
    438     namespace = null;
    439     return identifier_buffer.toString();
    440     } */
     437     * @param element the Element whose name we wish to retrieve
     438     * @return a String which is the elements description, or an empty string if no description exists
     439     * @see org.greenstone.gatherer.msm.MSMUtils#getElementAttribute
     440     * @see org.greenstone.gatherer.util.StaticStrings#COMMENT_VALUE
     441     * @see org.greenstone.gatherer.util.StaticStrings#DEFINITION_VALUE
     442     * @see org.greenstone.gatherer.util.StaticStrings#EMPTY_STR
     443     * @see org.greenstone.gatherer.util.StaticStrings#SPACE_CHARACTER
     444     */
     445    static public String getDescription(Element element) {
     446    String language_code_str = Gatherer.config.getLanguage();
     447    StringBuffer description = new StringBuffer(StaticStrings.EMPTY_STR);
     448    description.append(getElementAttribute(element, StaticStrings.DEFINITION_VALUE, language_code_str, false));
     449    if(description.length() > 0) {
     450        description.append(StaticStrings.SPACE_CHARACTER);
     451    }
     452    description.append(getElementAttribute(element, StaticStrings.COMMENT_VALUE, language_code_str, false));
     453    language_code_str = null;
     454    return description.toString();
     455    }
     456
     457    /** Retrieve the value for the requested attribute in the required language. Once again this method must be aware of the differences between the old metadata sets and the new multilingual optimized ones.
     458     * @param element_element the Element whose attributes we are searching through
     459     * @param attribute_name_str the name of the desired attribute as a String
     460     * @param language_code_str the two letter code String indicating the desired language
     461     * @param first_match true to allow the first match to be the default value in the absence of a closer match, false for the empty string instead. This argument only has an effect when dealing with legacy metadata sets
     462     * @see org.greenstone.gatherer.msm.MSMUtils#getValue
     463     * @see org.greenstone.gatherer.msm.MSMUtils#isAttributeLanguageDependant
     464     * @see org.greenstone.gatherer.util.StaticStrings#ATTRIBUTE_ELEMENT
     465     * @see org.greenstone.gatherer.util.StaticStrings#CODE_ATTRIBUTE
     466     * @see org.greenstone.gatherer.util.StaticStrings#EMPTY_STR
     467     * @see org.greenstone.gatherer.util.StaticStrings#LANGUAGE_ATTRIBUTE
     468     * @see org.greenstone.gatherer.util.StaticStrings#LANGUAGE_ELEMENT
     469     * @see org.greenstone.gatherer.util.StaticStrings#NAME_ATTRIBUTE
     470     */
     471    static public String getElementAttribute(Element element_element, String attribute_name_str, String language_code_str, boolean first_match) {
     472    boolean found = false;
     473    String result = StaticStrings.EMPTY_STR;
     474    // Determine if the attribute is language specific
     475    if(isAttributeLanguageDependant(element_element.getOwnerDocument(), attribute_name_str)) {
     476        NodeList language_elements = element_element.getElementsByTagName(StaticStrings.LANGUAGE_ELEMENT);
     477        for(int i = 0; !found && i < language_elements.getLength(); i++) {
     478        Element language_element = (Element) language_elements.item(i);
     479        if(language_element.getAttribute(StaticStrings.CODE_ATTRIBUTE).equals(language_code_str)) {
     480            NodeList attribute_elements = language_element.getElementsByTagName(StaticStrings.ATTRIBUTE_ELEMENT);
     481            for(int j = 0; !found && j < attribute_elements.getLength(); j++) {
     482            Element attribute_element = (Element) attribute_elements.item(j);
     483            if(attribute_element.getAttribute(StaticStrings.NAME_ATTRIBUTE).equals(attribute_name_str)) {
     484                found = true;
     485                result = MSMUtils.getValue(attribute_element);
     486            }
     487            attribute_element = null;
     488            }
     489            attribute_elements = null;
     490        }
     491        language_element = null;
     492        }
     493        language_elements = null;
     494    }
     495    else {
     496        NodeList attribute_elements = element_element.getElementsByTagName(StaticStrings.ATTRIBUTE_ELEMENT);
     497        for(int k = 0; !found && k < attribute_elements.getLength(); k++) {
     498        Element attribute_element = (Element) attribute_elements.item(k);
     499        // We don't want to consider those attributes found inside language elements
     500        if(attribute_element.getParentNode() == element_element) {
     501            String target_name_str = attribute_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
     502            String target_language_str = attribute_element.getAttribute(StaticStrings.LANGUAGE_ATTRIBUTE);
     503            if(attribute_name_str.equals(target_name_str)) {
     504            if(language_code_str.equals(target_language_str)) {
     505                found = true;
     506                result = MSMUtils.getValue(attribute_element);
     507            }
     508            else if(first_match) {
     509                first_match = false;
     510                result = MSMUtils.getValue(attribute_element);
     511            }
     512            }
     513            target_language_str = null;
     514            target_name_str = null;
     515        }
     516        attribute_element = null;
     517        }
     518        attribute_elements = null;
     519
     520    }
     521    return result;
     522    }
    441523
    442524    /*************************************************************************/
     
    488570    } // static public String getFullName(Element element)
    489571
    490     /** Method to construct an elements identifier by retrieving the correct attribute. Language specific, based on default Locale.
    491      * @param element The <strong>Element</strong> whose name we wish to retrieve.
    492      * @return A <strong>String</strong> which is the elements identifier, or an empty string if no identifier exists.
    493      */
    494     static final public String getIdentifier(Node element) {
    495     String identifier = null;
    496     // Determine locale code.
    497     String language_code = Gatherer.config.getLanguage();
    498     // Get the 'identifier' Element with the correct locale
    499     for(Node node = element.getFirstChild(); node != null;
    500         node = node.getNextSibling()) {
    501         if(node.getNodeName().equals("Attribute")) {
    502         Element target = (Element)node;
    503         if(target.getAttribute("name").equals("identifier")) {
    504             Node text = target.getFirstChild();
    505             if(target.getAttribute("language").equalsIgnoreCase(language_code)) {
    506             return text.getNodeValue();
    507             }
    508             else if(target.getAttribute("language").equalsIgnoreCase("en")) {
    509             identifier = text.getNodeValue();
    510             }
    511             else if(identifier == null) {
    512             identifier = text.getNodeValue();
    513             }
    514             text = null;
    515         }
    516         target = null;
    517         }
    518     }
    519     language_code = null;
    520     // We may have harvested some identifier from the file.
    521     if(identifier != null) {
    522         return identifier;
    523     }
     572    /** Method to construct an elements name (sic identifier) by retrieving the correct attribute, language specific.
     573     * @param element the Element whose name we wish to retrieve
     574     * @return a String which is the elements identifier, or an empty string if no identifier exists
     575     * @see org.greenstone.gatherer.msm.MSMUtils#getElementAttribute
     576     * @see org.greenstone.gatherer.util.StaticStrings#IDENTIFIER_VALUE
     577     * @see org.greenstone.gatherer.util.StaticStrings#NAME_ATTRIBUTE
     578     */
     579    static final public String getIdentifier(Element element) {
     580    String identifier = getElementAttribute(element, StaticStrings.IDENTIFIER_VALUE, Gatherer.config.getLanguage(), false);
    524581    // Failing the above we return the nodes name instead.
    525     return ((Element)element).getAttribute("name");
    526     }
    527 
    528     /** Retrieve the metadata description element from this fileset node.
    529      * @param fileset The fileset in question.
    530      * @return The description node or null if no such node.
    531      */
    532     /* static final private Node getMetadataDescription(Node fileset) {
    533     // Locate the child node called filename
    534     for(Node child = fileset.getLastChild(); child != null; child = child.getPreviousSibling()) {
    535         if(child.getNodeName().equalsIgnoreCase("Description")) {
    536         return child;
    537         }
    538     }         
    539     return null;
    540     } */
     582    if(identifier == null || identifier.length() == 0) {
     583        identifier = element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
     584    }
     585    return identifier;
     586    }
    541587
    542588    /** Method to retrieve from the node given, a certain child node with the specified name.
     
    690736    return Utility.parse(Utility.METADATA_VALUE_TEMPLATE, true);
    691737    }
     738
     739    /** Determine if the named attribute is language specific for this collection. This information is found in a DOM attribute of the document element, as a comma separated list of attribute names.
     740     * @param document the Document for which we wish to check the language requirements
     741     * @param attribute_name_str the name of the attribute we a testing as a String
     742     * @see org.greenstone.gatherer.util.StaticStrings#LANGUAGEDEPENDANT_ATTRIBUTE
     743     */
     744    static public boolean isAttributeLanguageDependant(Document document, String attribute_name_str) {
     745    String language_specific_attributes = document.getDocumentElement().getAttribute(StaticStrings.LANGUAGEDEPENDANT_ATTRIBUTE).toLowerCase();
     746    return language_specific_attributes.indexOf(attribute_name_str) != -1;
     747    }
     748
    692749    /** Method to compare two OptionsLists for equality.
    693750     * @param al A <strong>Node</strong> which represents an OptionList.
     
    721778    }
    722779    return true;
     780    }
     781
     782    /** A method to remove a specific attribute element from an element. This attribute must match in name, language and in value before being removed. Note that this method supports both legacy and multilingual optimized versions of the mds.
     783     * @param element_element the Element which represent the metadata element we are altering
     784     * @param attribute_name_str the name of the attribute to remove as a String
     785     * @param language_code_str the language code we must match as a String
     786     * @param value_str the value String which also must match before we remove anything
     787     * @return true if the desired attribute was successfully found and removed, false otherwise
     788     * @see org.greenstone.gatherer.msm.MSMUtils#isAttributeLanguageDependant
     789     * @see org.greenstone.gatherer.msm.MSMUtils#getValue(Element)
     790     * @see org.greenstone.gatherer.util.StaticStrings#ATTRIBUTE_ELEMENT
     791     * @see org.greenstone.gatherer.util.StaticStrings#CODE_ATTRIBUTE
     792     * @see org.greenstone.gatherer.util.StaticStrings#LANGUAGE_ATTRIBUTE
     793     * @see org.greenstone.gatherer.util.StaticStrings#LANGUAGE_ELEMENT
     794     * @see org.greenstone.gatherer.util.StaticStrings#NAME_ATTRIBUTE
     795     */
     796    static public boolean removeElementAttribute(Element element_element, String attribute_name_str, String language_code_str, String value_str) {
     797    // Multilingual Optimized version
     798    // 1. Determine the if this is one of the language specific attributes
     799    if(isAttributeLanguageDependant(element_element.getOwnerDocument(), attribute_name_str)) {
     800        // Retrieve the language elements, and determine the correct one
     801        NodeList language_elements = element_element.getElementsByTagName(StaticStrings.LANGUAGE_ELEMENT);
     802        for(int i = 0; i < language_elements.getLength(); i++) {
     803        Element language_element = (Element) language_elements.item(i);
     804        if(language_element.getAttribute(StaticStrings.CODE_ATTRIBUTE).equalsIgnoreCase(language_code_str)) {
     805            NodeList attribute_elements = language_element.getElementsByTagName(StaticStrings.ATTRIBUTE_ELEMENT);
     806            for(int j = 0; j < attribute_elements.getLength(); j++) {
     807            Element attribute_element = (Element) attribute_elements.item(j);
     808            String target_name_str = attribute_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
     809            String target_value_str = MSMUtils.getValue(attribute_element);
     810            if(attribute_name_str.equals(target_name_str) && value_str.equals(target_value_str)) {
     811                language_element.removeChild(attribute_element);
     812                if(attribute_elements.getLength() == 0) {
     813                element_element.removeChild(language_element);
     814                }
     815                target_value_str = null;
     816                target_name_str = null;
     817                attribute_element = null;
     818                attribute_elements = null;
     819                language_element = null;
     820                language_elements = null;
     821                return true;
     822            }
     823            target_value_str = null;
     824            target_name_str = null;
     825            attribute_element = null;           
     826            }
     827            attribute_elements = null;
     828        }
     829        language_element = null;
     830        }
     831        language_elements = null;
     832        // Not found
     833        return false;
     834    }
     835    // Otherwise just use the old method
     836
     837    // Find the attribute to remove
     838    NodeList attribute_elements = element_element.getElementsByTagName(StaticStrings.ATTRIBUTE_ELEMENT);
     839    for (int k = 0; k < attribute_elements.getLength(); k++) {
     840        Element attribute_element = (Element) attribute_elements.item(k);
     841        // Remember to ignore any attributes that live within nested language elements
     842        if (attribute_element.getParentNode() == element_element && attribute_element.getAttribute(StaticStrings.NAME_ATTRIBUTE).equals(attribute_name_str) && attribute_element.getAttribute(StaticStrings.LANGUAGE_ATTRIBUTE).equalsIgnoreCase(language_code_str) && MSMUtils.getValue(attribute_element).equals(value_str)) {
     843        // Match found, so remove the attribute node and return
     844        element_element.removeChild(attribute_element);
     845        attribute_element = null;
     846        attribute_elements = null;
     847        return true;
     848        }
     849        attribute_element = null;
     850    }
     851    attribute_elements = null;
     852    // No match found
     853    return false;
    723854    }
    724855
  • trunk/gli/src/org/greenstone/gatherer/msm/MetadataSet.java

    r6537 r6549  
    4545import org.greenstone.gatherer.valuetree.GValueModel;
    4646import org.greenstone.gatherer.valuetree.GValueNode;
     47import org.greenstone.gatherer.util.StaticStrings;
    4748import org.greenstone.gatherer.util.Utility;
    4849import org.w3c.dom.*;
     
    152153    switch(condition) {
    153154    case ALL_VALUES:
    154                 // Do nothing.
     155        // Do nothing.
    155156        break;
    156157    case SUBJECTS_ONLY:
    157                 // For each element retrieve its AssignedValues element.
     158        // For each element retrieve its AssignedValues element.
    158159        for(Enumeration keys = value_trees.keys(); keys.hasMoreElements(); ) {
    159160        ElementWrapper value_element = (ElementWrapper)keys.nextElement();
     
    166167        break;
    167168    case NO_VALUES:
    168                 // Remove assigned values trees.
     169        // Remove assigned values trees.
    169170        value_trees.clear();
    170171        break;
     
    316317     */
    317318    public String getDescription() {
     319    if(current_language_code != null && !Gatherer.config.getLanguage().equals(current_language_code)) {
     320        description = null;
     321    }
    318322    if(description == null) {
    319                 // Determine the code.
    320         String language_code = Gatherer.dictionary.getLanguage();
    321                 // Recover all Description elements
    322         NodeList descriptions = document.getElementsByTagName("Description");
    323                 // Iterate through the available descriptions looking for the appropriate one. Also make note of the first description, then overwrite it with any english one.
    324         boolean found = false;
    325         for(int i = 0; !found && i < descriptions.getLength(); i++) {
    326         Element pos_description = (Element) descriptions.item(i);
    327         String pos_description_code = pos_description.getAttribute("language");
    328         if(pos_description_code.equalsIgnoreCase(language_code)) {
    329             description = MSMUtils.getValue(pos_description);
    330             found = true;
    331         }
    332         else if(pos_description_code.equalsIgnoreCase("en")) {
    333             description = MSMUtils.getValue(pos_description);
    334         }
    335         else if(description == null) {
    336             description = MSMUtils.getValue(pos_description);
    337         }
    338         pos_description_code = null;
    339         pos_description = null;
    340         }
    341         descriptions = null;
    342         language_code = null;
    343                 // Failing all that set an error message
    344         if(description == null) {
    345         description = Dictionary.get("MSM.No_Description");
    346         }
     323        description = getAttribute(StaticStrings.DESCRIPTION_ELEMENT, Dictionary.get("MSM.No_Description"));
    347324    }
    348325    return description;
     
    440417    }
    441418    if(name == null) {
    442         // Determine the code.
    443         current_language_code = Gatherer.config.getLanguage();
    444         // Recover all Name elements
    445         NodeList names = document.getElementsByTagName("Name");
    446         // Iterate through the available names looking for the appropriate one. Also make note of the first name, then overwrite it with any english one.
    447         boolean found = false;
    448         for(int i = 0; !found && i < names.getLength(); i++) {
    449         Element pos_name = (Element) names.item(i);
    450         String pos_name_code = pos_name.getAttribute("language");
    451         if(pos_name_code.equalsIgnoreCase(current_language_code)) {
    452             name = MSMUtils.getValue(pos_name);
    453             found = true;
    454         }
    455         else if(pos_name_code.equalsIgnoreCase("en")) {
    456             name = MSMUtils.getValue(pos_name);
    457         }
    458         else if(name == null) {
    459             name = MSMUtils.getValue(pos_name);
    460         }
    461         pos_name_code = null;
    462         pos_name = null;
    463         }
    464         names = null;
    465         // Failing all that set an error message
    466         if(name == null) {
    467         name = Dictionary.get("MSM.No_Name");
    468         }
     419        name = getAttribute(StaticStrings.NAME_ELEMENT, Dictionary.get("MSM.No_Name"));
    469420    }
    470421    return name;
     
    595546    }
    596547
     548    /** This method retrieves the required attribute from the Metadata Set, typically it's name or it's description. Note that this method is language dependant, and moreover supports both legacy metadata sets and the new sets optimized for multiple languages.
     549     * @param element_name the name of the type of element the required information is in as a String
     550     * @param default_string the value to return in no such element is found also as a String
     551     * @see org.greenstone.gatherer.Configuration#getLanguage()
     552     * @see org.greenstone.gatherer.Gatherer#config
     553     * @see org.greenstone.gatherer.msm.MSMUtils#getValue(Element)
     554     * @see org.greenstone.gatherer.util.StaticStrings#CODE_ATTRIBUTE
     555     * @see org.greenstone.gatherer.util.StaticStrings#SETLANGUAGE_ELEMENT
     556     */
     557    private String getAttribute(String element_name, String default_string) {
     558    String result = null;
     559    // Determine the language code.
     560    current_language_code = Gatherer.config.getLanguage();
     561
     562    // New Metadata Set Format makes use of deferred-node-expansion to save memory - rather than create nodes for a name and description in each language, nodes which have potentially huge strings, we instead create simplier SETLANGUAGE nodes, and then only expand the one in the desired language. Of course if a user happens to change to every available language slightly more memory will be used than in the old method. For instance consider the DLS with 25 languages, each with a name node of 50 bytes and an descriptions of 500. Thus old style > 13750 bytes while new style < 600.
     563    NodeList set_language_elements = document.getElementsByTagName(StaticStrings.SETLANGUAGE_ELEMENT);
     564    for(int b = 0; b < set_language_elements.getLength(); b++) {
     565        Element set_language_element = (Element) set_language_elements.item(b);
     566        String code = set_language_element.getAttribute(StaticStrings.CODE_ATTRIBUTE).toLowerCase();
     567        if(code.equals(current_language_code) || name == null) {
     568        NodeList specific_elements = set_language_element.getElementsByTagName(element_name);
     569        if(specific_elements.getLength() > 0) {
     570            Element specific_element = (Element) specific_elements.item(0);
     571            result = MSMUtils.getValue(specific_element);
     572            specific_element = null;
     573        }
     574        specific_elements = null;
     575        }
     576        code = null;
     577        set_language_element = null;
     578    }
     579    set_language_elements = null;
     580    // And we may be all done
     581    if(result != null) {
     582        return result;
     583    }
     584
     585    // Failing that we move on to an older style search - start by recovering all Name elements
     586    NodeList possible_elements = document.getElementsByTagName(element_name);
     587    // Iterate through the available names looking for the appropriate one. Also make note of the first name, then overwrite it with any english one.
     588    boolean found = false;
     589    for(int i = 0; !found && i < possible_elements.getLength(); i++) {
     590        Element possible_element = (Element) possible_elements.item(i);
     591        String possible_element_code = possible_element.getAttribute("language").toLowerCase();
     592        if(possible_element_code.equals(current_language_code) || name == null) {
     593        result = MSMUtils.getValue(possible_element);
     594        found = true;
     595        }
     596        possible_element_code = null;
     597        possible_element = null;
     598    }
     599    possible_elements = null;
     600    // Failing all that set an error message
     601    if(result == null) {
     602        result = default_string;
     603    }
     604    return result;
     605    }
     606
    597607    private void init(File file) {
    598608    this.file = file;
     
    602612        this.elements = document.getElementsByTagName("Element");
    603613        this.root = document.getDocumentElement();
    604                 // Now for each element read in its value tree if present.
     614        // Now for each element read in its value tree if present.
    605615        for(int i = elements.getLength() - 1; i >= 0; i--) {
    606616        ElementWrapper value_element = new ElementWrapper((Element)elements.item(i));
Note: See TracChangeset for help on using the changeset viewer.