Changeset 6827


Ignore:
Timestamp:
2004-02-18T13:11:14+13:00 (20 years ago)
Author:
mdewsnip
Message:

Filenames written out to metadata.xml files are now transformed so they are regular expression matching safe - ie. files with parentheses in their names work.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gli/src/org/greenstone/gatherer/msm/GDMDocument.java

    r6539 r6827  
    4545 */
    4646public class GDMDocument {
    47     /** Record if the document this object is based on is up to date. */
    48     private boolean up_to_date = true;
    49     /** The document this class sources its data from. */
    50     private Document base_document;
    51     static final private String ACCUMULATE = "accumulate";
    52     /** The pattern to match when searching for directory level assignments. */
    53     static final private String DIRECTORY_FILENAME = ".*";
    54     static final private String DESCRIPTION_ELEMENT = "Description";
    55     static final private String FILENAME_ELEMENT = "FileName";
    56     static final private String FILESET_ELEMENT = "FileSet";
    57     static final private String HVALUE_ATTRIBUTE = "hvalue";
    58     static final private String MODE_ATTRIBUTE = "mode";
    59     static final private String OVERWRITE = "overwrite";
    60     static final private String[] ALL_METADATA_TYPES = {StaticStrings.METADATA_ELEMENT, StaticStrings.EXTRACTED_METADATA_ELEMENT};
     47    /** Record if the document this object is based on is up to date. */
     48    private boolean up_to_date = true;
     49    /** The document this class sources its data from. */
     50    private Document base_document;
     51    static final private String ACCUMULATE = "accumulate";
     52    /** The pattern to match when searching for directory level assignments. */
     53    static final private String DIRECTORY_FILENAME = ".*";
     54    static final private String DESCRIPTION_ELEMENT = "Description";
     55    static final private String FILENAME_ELEMENT = "FileName";
     56    static final private String FILESET_ELEMENT = "FileSet";
     57    static final private String HVALUE_ATTRIBUTE = "hvalue";
     58    static final private String MODE_ATTRIBUTE = "mode";
     59    static final private String OVERWRITE = "overwrite";
     60    static final private String[] ALL_METADATA_TYPES = {StaticStrings.METADATA_ELEMENT, StaticStrings.EXTRACTED_METADATA_ELEMENT};
    6161
    6262    /** Constructor which creates a brand new metadata.xml document. */
    63     public GDMDocument() {
     63    public GDMDocument() {
    6464    // Create new document. We do this by loading a copy of the template. */
    6565    this.base_document = Utility.parse(Utility.GREENSTONEDIRECTORYMETADATA_TEMPLATE, true);
    66     }
    67 
    68     /** Constructor which parses an existing metadata.xml document. */
    69     public GDMDocument(File file) {
     66    }
     67
     68    /** Constructor which parses an existing metadata.xml document. */
     69    public GDMDocument(File file) {
    7070    try {
    71         this.base_document = Utility.parse(file.getAbsolutePath(), false);
     71        this.base_document = Utility.parse(file.getAbsolutePath(), false);
    7272    }
    7373    catch (Exception error) {
    74         // Poorly formed, or completely invalid metadata.xml file!
    75     }
    76     }
    77 
    78     /** Constructor which wraps around an existing metadata.xml document. */
    79     public GDMDocument(Document base_document) {
     74        // Poorly formed, or completely invalid metadata.xml file!
     75    }
     76    }
     77
     78    /** Constructor which wraps around an existing metadata.xml document. */
     79    public GDMDocument(Document base_document) {
    8080    this.base_document = base_document;
    81     }
     81    }
    8282
    8383    /** Add this metadata to the named file. There is one tricky thing to consider. Whenever a metadata entry is added it is taken to be accumulating except if it is the first added, in which case it overwrites! Actually this gets worse, as we could have been told to append this metadata to a document which already inherits metadata. Thus we need a new argument to determine whether this add was triggered by an append or a replace. */
    84     public void addMetadata(String filename, Metadata metadata, boolean force_accumulate) {
    85     ///atherer.println("Add '" + metadata + "' to " + (filename != null ? filename : "directory."));
     84    public void addMetadata(String filename, Metadata metadata, boolean force_accumulate)
     85    {
     86    if (filename != null) {
     87        filename = Codec.transform(filename, Codec.TEXT_TO_REGEXP);
     88    }
     89    /// System.err.println("Add '" + metadata + "' to " + (filename != null ? filename : "directory."));
    8690    try {
    8791        // Retrieve the document element.
     
    9195        boolean found = false;
    9296        NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT);
    93         for(int i = 0; !found && i < fileset_elements.getLength(); i++) {
     97        for(int i = 0; !found && i < fileset_elements.getLength(); i++) {
    9498        fileset_element = (Element) fileset_elements.item(i);
    9599        NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT);
    96100        for(int j = 0; !found && j < filename_elements.getLength(); j++) {
    97             Element filename_element = (Element) filename_elements.item(j);
    98             String filename_pattern = MSMUtils.getValue(filename_element);
    99             // Have we found a match. If so break out of for loop.
    100             if(filename != null && filename.matches(filename_pattern) && !filename_pattern.equals(DIRECTORY_FILENAME)) {
     101            Element filename_element = (Element) filename_elements.item(j);
     102            String filename_pattern = MSMUtils.getValue(filename_element);
     103            filename_pattern = Codec.transform(filename_pattern, Codec.TEXT_TO_REGEXP);
     104            // System.err.println("Checking " + filename + " against " + filename_pattern + "|");
     105            // Have we found a match. If so break out of for loop.
     106            if(filename != null && filename.matches(filename_pattern) && !filename_pattern.equals(DIRECTORY_FILENAME)) {
    101107            ///ystem.err.println("Adding to existing file fileset!");
    102108            found = true;
    103             }
    104             else if(filename == null && filename_pattern.equals(DIRECTORY_FILENAME)) {
     109            }
     110            else if(filename == null && filename_pattern.equals(DIRECTORY_FILENAME)) {
    105111            ///ystem.err.println("Adding to existing folder fileset!");
    106112            ///ystem.err.println("filename_pattern = '" + filename_pattern + "'");
    107113            found = true;
    108             }
    109             // No match. On to the next one.
    110             else {
     114            }
     115            // No match. On to the next one.
     116            else {
    111117            fileset_element = null;
    112             }
    113             filename_pattern = null;
    114             filename_element = null;
    115         }
    116         }
    117         fileset_elements = null;
    118         // If we still haven't found an existing fileset, then its time to create one.
    119         if(fileset_element == null) {
     118            }
     119            filename_pattern = null;
     120            filename_element = null;
     121        }
     122        }
     123        fileset_elements = null;
     124        // If we still haven't found an existing fileset, then its time to create one.
     125        if(fileset_element == null) {
    120126        ///ystem.err.println("Creating a new fileset.");
    121127        fileset_element = base_document.createElement(FILESET_ELEMENT);
     
    127133        // If the filename is null then we add a directory metadata set as directorymetadata_element's first child
    128134        if(filename == null) {
    129             filename_text = base_document.createTextNode(DIRECTORY_FILENAME);
    130             if(directorymetadata_element.hasChildNodes()) {
     135            filename_text = base_document.createTextNode(DIRECTORY_FILENAME);
     136            if(directorymetadata_element.hasChildNodes()) {
    131137            directorymetadata_element.insertBefore(fileset_element, directorymetadata_element.getFirstChild());
    132             }
    133             else {
     138            }
     139            else {
    134140            directorymetadata_element.appendChild(fileset_element);
    135             }
     141            }
    136142        }
    137143        // Otherwise we just append the new fileset to directorymetadata_element's children.
    138144        else {
    139             filename_text = base_document.createTextNode(filename);
    140             directorymetadata_element.appendChild(fileset_element);
     145            filename_text = base_document.createTextNode(filename);
     146            directorymetadata_element.appendChild(fileset_element);
    141147        }
    142148        filename_element.appendChild(filename_text);
     
    144150        description_element = null;
    145151        filename_element = null;
    146         }
    147         // Now, finally, we can add the metadata.
    148         Element metadata_element = null;
    149         String name = metadata.getElement().getName();
    150         // If this is extracted metadata, we use a special element name that won't be recognized by greenstone
    151         if(name.startsWith(Utility.EXTRACTED_METADATA_NAMESPACE)) {
    152               metadata_element = base_document.createElement(ALL_METADATA_TYPES[1]);
    153               name = name.substring(Utility.EXTRACTED_METADATA_NAMESPACE.length() + 1);
    154         }
    155         else {
    156               metadata_element = base_document.createElement(ALL_METADATA_TYPES[0]);
    157         }
    158         metadata_element.setAttribute(StaticStrings.NAME_ATTRIBUTE, name);
    159 
    160         // To determine if this metadata entry should overwrite or accumulate we check if there are other entries with the same element in this fileset.
    161         boolean will_accumulate = false;
    162         NodeList sibling_description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT);
    163         for(int k = 0; !will_accumulate && k < sibling_description_elements.getLength(); k++) {
     152        }
     153        // Now, finally, we can add the metadata.
     154        Element metadata_element = null;
     155        String name = metadata.getElement().getName();
     156        // If this is extracted metadata, we use a special element name that won't be recognized by greenstone
     157        if(name.startsWith(Utility.EXTRACTED_METADATA_NAMESPACE)) {
     158        metadata_element = base_document.createElement(ALL_METADATA_TYPES[1]);
     159        name = name.substring(Utility.EXTRACTED_METADATA_NAMESPACE.length() + 1);
     160        }
     161        else {
     162        metadata_element = base_document.createElement(ALL_METADATA_TYPES[0]);
     163        }
     164        metadata_element.setAttribute(StaticStrings.NAME_ATTRIBUTE, name);
     165
     166        // To determine if this metadata entry should overwrite or accumulate we check if there are other entries with the same element in this fileset.
     167        boolean will_accumulate = false;
     168        NodeList sibling_description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT);
     169        for(int k = 0; !will_accumulate && k < sibling_description_elements.getLength(); k++) {
    164170        Element sibling_description_element = (Element) sibling_description_elements.item(k);
    165171        // We have to do this for each type of metadata
    166172        for(int z = 0; z < ALL_METADATA_TYPES.length; z++) {
    167             NodeList sibling_metadata_elements = sibling_description_element.getElementsByTagName(ALL_METADATA_TYPES[z]);
    168             for(int l = 0; !will_accumulate && l < sibling_metadata_elements.getLength(); l++) {
    169                 Element sibling_metadata_element = (Element) sibling_metadata_elements.item(l);
     173            NodeList sibling_metadata_elements = sibling_description_element.getElementsByTagName(ALL_METADATA_TYPES[z]);
     174            for(int l = 0; !will_accumulate && l < sibling_metadata_elements.getLength(); l++) {
     175            Element sibling_metadata_element = (Element) sibling_metadata_elements.item(l);
    170176                // It appears that its possible that we can be asked to add the same metadata twice (especially after a copy action is cancelled then repeated). So we check if we have been asked to add exactly the same value twice.
    171                 if(sibling_metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE).equals(metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE))) {
    172                     // Check the values and return if they are the same.
    173                    if(metadata.getAbsoluteValue().equals(MSMUtils.getValue(sibling_metadata_element))) {
    174                       return;
    175                    }
    176                    will_accumulate = true;
    177                 }
    178                 sibling_metadata_element = null;
     177            if(sibling_metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE).equals(metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE))) {
     178                // Check the values and return if they are the same.
     179                if(metadata.getAbsoluteValue().equals(MSMUtils.getValue(sibling_metadata_element))) {
     180                return;
     181                }
     182                will_accumulate = true;
    179183            }
    180             sibling_metadata_elements = null;
     184            sibling_metadata_element = null;
     185            }
     186            sibling_metadata_elements = null;
    181187        }
    182188        sibling_description_element = null;
    183         }
    184         sibling_description_elements = null;
    185         if(will_accumulate || force_accumulate) { //mode.equals(ACCUMULATE)) {
    186             metadata_element.setAttribute(MODE_ATTRIBUTE, ACCUMULATE);
    187         }
    188         // As we can't possibly store all the metadata in memory, nor can we ensure that the indexes written to file remain the same until the new time we look at this file, and to avoid having to open a rewrite every collection document whenever any value tree changes, I'm writing the value out as a full path string
    189         GValueModel model = Gatherer.c_man.getCollection().msm.getValueTree(metadata.getElement());
    190         String node_value = null;
    191         if(model != null && model.isHierarchy()) {
    192             //node_value = /odec.transform(metadata.getValueNode().getFullPath(false), /odec.TEXT_TO_DOM);
    193             node_value = metadata.getValueNode().getFullPath(false);
    194         }
    195         else {
    196             node_value = metadata.getAbsoluteValue();
    197         }
    198         ///ystem.err.println("Creating node in GDMDocument: '" + node_value + "'");
    199         metadata_element.appendChild(base_document.createTextNode(node_value));
    200         // Retrieve the first description element for this fileset (there should only be one, but I'll play it safe).
    201         NodeList description_elements = fileset_element.getElementsByTagName("Description");
    202         Element description_element = (Element) description_elements.item(0);
    203         description_element.appendChild(metadata_element);
    204         description_element = null;
    205         metadata_element = null;
     189        }
     190        sibling_description_elements = null;
     191        if(will_accumulate || force_accumulate) { //mode.equals(ACCUMULATE)) {
     192        metadata_element.setAttribute(MODE_ATTRIBUTE, ACCUMULATE);
     193        }
     194        // As we can't possibly store all the metadata in memory, nor can we ensure that the indexes written to file remain the same until the new time we look at this file, and to avoid having to open a rewrite every collection document whenever any value tree changes, I'm writing the value out as a full path string
     195        GValueModel model = Gatherer.c_man.getCollection().msm.getValueTree(metadata.getElement());
     196        String node_value = null;
     197        if(model != null && model.isHierarchy()) {
     198        //node_value = /odec.transform(metadata.getValueNode().getFullPath(false), /odec.TEXT_TO_DOM);
     199        node_value = metadata.getValueNode().getFullPath(false);
     200        }
     201        else {
     202        node_value = metadata.getAbsoluteValue();
     203        }
     204        ///ystem.err.println("Creating node in GDMDocument: '" + node_value + "'");
     205        metadata_element.appendChild(base_document.createTextNode(node_value));
     206        // Retrieve the first description element for this fileset (there should only be one, but I'll play it safe).
     207        NodeList description_elements = fileset_element.getElementsByTagName("Description");
     208        Element description_element = (Element) description_elements.item(0);
     209        description_element.appendChild(metadata_element);
     210        description_element = null;
     211        metadata_element = null;
    206212                //mode = null;
    207         fileset_element = null;
    208         directorymetadata_element = null;
    209         up_to_date = false;
     213        fileset_element = null;
     214        directorymetadata_element = null;
     215        up_to_date = false;
    210216    }
    211217    catch (Exception error) {
    212         Gatherer.printStackTrace(error);
    213     }
    214     }
    215 
    216     public int countMetadata() {
     218        Gatherer.printStackTrace(error);
     219    }
     220    }
     221
     222    public int countMetadata() {
    217223    int count = 0;
    218224    try {
    219         // Retrieve the document element.
    220         Element directorymetadata_element = base_document.getDocumentElement();
    221         // Iterate through the filesets, checking the FileName child element against    the target file's name using regular expression matching.
    222         NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT);
    223         for(int i = 0; i < fileset_elements.getLength(); i++) {
     225        // Retrieve the document element.
     226        Element directorymetadata_element = base_document.getDocumentElement();
     227        // Iterate through the filesets, checking the FileName child element against    the target file's name using regular expression matching.
     228        NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT);
     229        for(int i = 0; i < fileset_elements.getLength(); i++) {
    224230        Element fileset_element = (Element) fileset_elements.item(i);
    225231        NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT);
    226232        for(int k = 0; k < description_elements.getLength(); k++) {
    227             Element description_element = (Element) description_elements.item(k);
    228             // We have to do this for each type of metadata
    229             for(int z = 0; z < ALL_METADATA_TYPES.length; z++) {
    230                 NodeList metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[z]);
    231                 count = count + metadata_elements.getLength();
    232                 metadata_elements = null;
    233             }
    234             description_element = null;
     233            Element description_element = (Element) description_elements.item(k);
     234            // We have to do this for each type of metadata
     235            for(int z = 0; z < ALL_METADATA_TYPES.length; z++) {
     236            NodeList metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[z]);
     237            count = count + metadata_elements.getLength();
     238            metadata_elements = null;
     239            }
     240            description_element = null;
    235241        }
    236242        description_elements = null;
    237243        fileset_element = null;
    238         }
    239         fileset_elements = null;
    240         directorymetadata_element = null;
     244        }
     245        fileset_elements = null;
     246        directorymetadata_element = null;
    241247    }
    242248    catch (Exception error) {
    243         Gatherer.printStackTrace(error);
     249        Gatherer.printStackTrace(error);
    244250    }
    245251    return count;
    246     }
    247 
    248     /** Retrieve the document this class is wrapping. */
    249     public Document getDocument() {
     252    }
     253
     254    /** Retrieve the document this class is wrapping. */
     255    public Document getDocument() {
    250256    return base_document;
    251     }
    252 
    253     /** Get all of the metadata, including directory level, associated with this file. */
    254     public ArrayList getMetadata(String filename, boolean remove, ArrayList metadatum_so_far, File file, boolean append_folder_level) {
     257    }
     258
     259    /** Get all of the metadata, including directory level, associated with this file. */
     260    public ArrayList getMetadata(String filename, boolean remove, ArrayList metadatum_so_far, File file, boolean append_folder_level) {
    255261    return getMetadata(filename, remove, metadatum_so_far, file, append_folder_level, false);
    256     }
     262    }
    257263    /** Retrieve the metadata associated with the given filename. Keep track of what metadata should be overwritten and what should be accumulated. Also make note of the source file, and remove the metadata if required. Finally if purge is set retrieve every piece of metadata in this file. */
    258264    public ArrayList getMetadata(String filename, boolean remove, ArrayList metadatum_so_far, File file, boolean append_folder_level, boolean purge) {
     
    450456    }
    451457   
    452     /** Determine if this document has been saved recently, and thus xml file version is up to date. */
    453     public boolean isUpToDate() {
     458    /** Determine if this document has been saved recently, and thus xml file version is up to date. */
     459    public boolean isUpToDate() {
    454460    return false;
    455     }
    456 
    457     /** Determine is this is a valid Greenstone Directory Metadata file. It may of course just be some xml file with the name metadata.xml. */
    458     public boolean isValid() {
     461    }
     462
     463    /** Determine is this is a valid Greenstone Directory Metadata file. It may of course just be some xml file with the name metadata.xml. */
     464    public boolean isValid() {
    459465    // Just determine if the doctype is GreenstoneDirectoryMetadata and root node is called DirectoryMetadata.
    460466    String doctype_name = base_document.getDoctype().getName();
    461467    String root_name = base_document.getDocumentElement().getTagName();
    462468    return ((doctype_name.equals("GreenstoneDirectoryMetadata") && root_name.equals("GreenstoneDirectoryMetadata")) || (doctype_name.equals("DirectoryMetadata") && root_name.equals("DirectoryMetadata")));
    463     }
     469    }
    464470
    465471    /** Remove all of the extracted metadata (XMetadata) from this document. */
Note: See TracChangeset for help on using the changeset viewer.