Changeset 5040


Ignore:
Timestamp:
2003-07-25T16:44:43+12:00 (21 years ago)
Author:
jmt12
Message:

Modified searching process so that a valid metadata.xml can be read in, even if it isn't part of a current collection. This functionality was an unitentional side-effect of searching for required hierarchy files. Note that if reading in a so-called 'wild metadata' file, then the profile actions generated will be matched against that files parent folders only (whereas it would be against a particular collection's name in the normal case).

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gli/src/org/greenstone/gatherer/msm/parsers/GreenstoneMetadataParser.java

    r4619 r5040  
    1 package org.greenstone.gatherer.msm.parsers;
    21/**
    32 *#########################################################################
     
    76 * University of Waikato, New Zealand.
    87 *
    9  * <BR><BR>
    10  *
    118 * Author: John Thompson, Greenstone Digital Library, University of Waikato
    129 *
    13  * <BR><BR>
    14  *
    1510 * Copyright (C) 1999 New Zealand Digital Library Project
    16  *
    17  * <BR><BR>
    1811 *
    1912 * This program is free software; you can redistribute it and/or modify
     
    2215 * (at your option) any later version.
    2316 *
    24  * <BR><BR>
    25  *
    2617 * This program is distributed in the hope that it will be useful,
    2718 * but WITHOUT ANY WARRANTY; without even the implied warranty of
    2819 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    2920 * GNU General Public License for more details.
    30  *
    31  * <BR><BR>
    3221 *
    3322 * You should have received a copy of the GNU General Public License
     
    3625 *########################################################################
    3726 */
     27package org.greenstone.gatherer.msm.parsers;
     28/**************************************************************************************
     29 * Written:      ??/??/02
     30 * Revised:      ??/??/02 - Commented
     31 *               25/07/03 - Fix to allow any valid greenstone metadata.xml to be imported from, not just those that occur within a collection. This functionality is accidental and caused by GLI attempting to find a collect.cfg to extract hierarchy file information from, and failing.
     32 **************************************************************************************/
    3833import java.io.*;
    3934import java.lang.ref.*;
     
    4136import java.util.*;
    4237import java.util.regex.*;
    43 import javax.swing.JOptionPane;
     38import javax.swing.*;
    4439import javax.swing.tree.*;
    4540import org.greenstone.gatherer.Gatherer;
     
    5550import org.greenstone.gatherer.valuetree.GValueModel;
    5651import org.greenstone.gatherer.valuetree.GValueNode;
    57 import org.w3c.dom.Document;
    58 import org.w3c.dom.Element;
    59 import org.w3c.dom.Node;
    60 import org.w3c.dom.NodeList;
     52import org.w3c.dom.*;
    6153/** Provides a metadata parser implementation that knows how to locate, prepare for, then import metadata from a previous Greenstone collection. Is aware of such factors as the presence of Metadata Set files and hierarchy files. Updates the profiler where possible to allow for faster subsequent imports from a certain collection. Caches all the information about encountered collections in CollectCFG objects which are softly cached (ie are cached, but are reclaimed before an OutOfMemory exception would be thrown).
    6254 * @author John Thompson, Greenstone Digital Library, University of Waikato
     
    137129    }
    138130
    139     // Continue only if we are sure this is a greenstone collection
    140     if(collection_dir != null && collect_cfg != null) {
    141         // 2. Attempt to merge in any mdses and make note of those that are successfully imported (by removing reference from collect.cfg).
    142         ///ystem.err.print("2 ");
     131    // 2. If a collection configuration file was found, attempt to merge in any mdses and make note of those that are successfully imported (by removing reference from collect.cfg).
     132    ///ystem.err.print("2 ");
     133    if(collect_cfg != null) {
    143134        ArrayList mdses = collect_cfg.getMetadataSets();
    144135        for(int i = 0; i < mdses.size(); i++) {
     
    148139        mdses.clear();
    149140        mdses = null;
    150 
    151         // 3. Locate all of the metadata.xml files that may have an affect on the origin file. Make sure the metadata.xml closest to the origin files directory is last (to ensure property inheritance regarding accumulate/overwrite).
    152         ///ystem.err.print("3 ");
    153         ArrayList search_files = new ArrayList();
    154         File file = origin.getFile();
    155         String filename = null;
    156         boolean file_level;
    157         if(file.isFile()) {
    158         file_level = false;
     141    }
     142
     143    // 3. Locate all of the metadata.xml files that may have an affect on the origin file. Make sure the metadata.xml closest to the origin files directory is last (to ensure property inheritance regarding accumulate/overwrite).
     144    ///ystem.err.print("3 ");
     145    ArrayList search_files = new ArrayList();
     146    File file = origin.getFile();
     147    String filename = null;
     148    boolean file_level;
     149    if(file.isFile()) {
     150        file_level = false;
     151        filename = file.getName();
     152        file = file.getParentFile();
     153    }
     154    else {
     155        file_level = true;
     156    }
     157    while(file != null && (collection_dir == null || !file.equals(collection_dir))) {
     158        File test_file = new File(file, Utility.METADATA_XML);
     159        if(test_file.exists()) {
     160        search_files.add(0, new MetadataXMLFileSearch(test_file, filename));
     161        }
     162        if(filename != null) {
     163        filename = file.getName() + SEPARATOR + filename;
     164        }
     165        else {
    159166        filename = file.getName();
    160         file = file.getParentFile();
    161         }
    162         else {
    163         file_level = true;
    164         }
    165         while(!file.equals(collection_dir)) {
    166         File test_file = new File(file, Utility.METADATA_XML);
    167         if(test_file.exists()) {
    168             search_files.add(0, new MetadataXMLFileSearch(test_file, filename));
    169         }
    170         if(filename != null) {
    171             filename = file.getName() + SEPARATOR + filename;
     167        }
     168        file = file.getParentFile();
     169    }
     170    filename = null;
     171    file = null;
     172    // Start with an initially empty ArrayList of metadata
     173    ArrayList metadata = new ArrayList();
     174    // Now search each of these metadata xml for metadata, remembering to accumulate or overwrite as we go along.
     175    for(int i = 0; i < search_files.size(); i++) {
     176        MetadataXMLFileSearch a_search = (MetadataXMLFileSearch) search_files.get(i);
     177        ///ystem.err.println("Search " + a_search.file.getAbsolutePath() + " for " + (a_search.filename != null ? a_search.filename : ".*"));
     178        // Retrieve the document
     179        BasicGDMDocument document = getDocument(a_search.file);
     180        if(document != null) {
     181        // If this is a dummy run, our original source file is actually the metadata.xml file and we retrieve all metadata for this collection, as if accumulated!
     182        if(dummy_run) {
     183            metadata = document.getAllMetadata();
    172184        }
    173185        else {
    174             filename = file.getName();
    175         }
    176         file = file.getParentFile();
    177         }
    178         filename = null;
    179         file = null;
    180         // Start with an initially empty ArrayList of metadata
    181         ArrayList metadatum = new ArrayList();
    182         // Now search each of these metadata xml for metadata, remembering to accumulate or overwrite as we go along.
    183         for(int i = 0; i < search_files.size(); i++) {
    184         MetadataXMLFileSearch a_search = (MetadataXMLFileSearch) search_files.get(i);
    185         ///ystem.err.println("Search " + a_search.file.getAbsolutePath() + " for " + (a_search.filename != null ? a_search.filename : ".*"));
    186         // Retrieve the document
    187         BasicGDMDocument document = getDocument(a_search.file);
    188         if(document != null) {
    189             // If this is a dummy run, our original source file is actually the metadata.xml file and we retrieve all metadata for this collection, as if accumulated!
    190             if(dummy_run) {
    191             metadatum = document.getAllMetadata();
    192             }
    193             else {
    194             metadatum = document.getMetadata(a_search.filename, metadatum, folder_level);
    195             }
    196             document = null;
    197         }
    198         a_search = null;
    199         }
    200         search_files = null;
    201         // Finally assign the metadata
    202         ///ystem.err.println("Found " + metadatum.size() + " pieces of metadata for " + destination);
    203         if(metadatum.size() > 0) {
    204         addMetadata(destination, metadatum, collection_dir, collect_cfg, dummy_run);
    205         }
    206     }
    207     else {
    208         ///ystem.err.println("Not a greenstone collection (no collect.cfg found).");
     186            metadata = document.getMetadata(a_search.filename, metadata, folder_level);
     187        }
     188        document = null;
     189        }
     190        a_search = null;
     191    }
     192    search_files = null;
     193    // Finally assign the metadata
     194    ///ystem.err.println("Found " + metadata.size() + " pieces of metadata for " + destination);
     195    if(metadata.size() > 0) {
     196        addMetadata(origin, destination, metadata, collection_dir, collect_cfg, dummy_run);
    209197    }
    210198    return dialog_cancelled;
     
    215203    }
    216204
    217     private void addMetadata(FileNode destination, ArrayList metadatum, File collection_dir, CollectCFG collect_cfg, boolean dummy_run) {
     205    private void addMetadata(FileNode origin, FileNode destination, ArrayList metadata, File collection_dir, CollectCFG collect_cfg, boolean dummy_run) {
    218206    // before we try to addMetadata, we need to check that there are some metadata sets for the collection - otherwise we cant add or import
    219207    Vector meta_sets = Gatherer.c_man.getCollection().msm.getSets(false);
     
    230218    ///ystem.err.print("6 ");
    231219    // Used in a complicated test later on.
    232     for(int i = 0; !dialog_cancelled && i < metadatum.size(); i++) {
    233         BasicMetadata basic_metadata = (BasicMetadata) metadatum.get(i);
    234         BasicMetadata metadata = (BasicMetadata) metadatum.get(i);
    235         metadata.collection = collection_dir;
     220    for(int i = 0; !dialog_cancelled && i < metadata.size(); i++) {
     221        BasicMetadata basic_metadata = ((BasicMetadata) metadata.get(i)).copy();
     222        BasicMetadata metadatum = (BasicMetadata) metadata.get(i);
     223        metadatum.collection = collection_dir; // May be null. Doesn't matter.
    236224        Metadata final_metadata = null;
    237225        // If this BasicMetadata already exists in the transform cache then we can save ourselves a lot of work.
     
    242230        if(final_metadata == null) {
    243231        ///ystem.err.println("No existing Metadata object for BasicMetadata: " + basic_metadata);
    244         // 6a. Check if an hfile is associated with this metadata, and if so load it, cache it in the collection.cfg object, then resolve metadata value index.
    245         HFile h_file = collect_cfg.getHFile(metadata.element);
    246         if(h_file != null && !dummy_run) {
    247             ///ystem.err.print(metadata.value + " maps to ");
    248             metadata.value = h_file.getValue(metadata.value);
    249             ///ystem.err.println(metadata.value);
    250         }
    251         h_file = null;
    252         // 6b. Check if there is a profile regarding the current metadata.
    253         ///ystem.err.println("Retrieve existing action: " + collection_dir.getAbsolutePath() + ", " + metadata.element);
    254         if(Gatherer.c_man.getCollection().msm.profiler.containsAction(collection_dir.getAbsolutePath(), metadata.element)) {
    255             String new_element_name = Gatherer.c_man.getCollection().msm.profiler.getAction(collection_dir.getAbsolutePath(), metadata.element);
    256             ///ystem.err.println("Profile result = " + new_element_name);
    257             if(new_element_name == null) {
    258             metadata = null;
    259             }
    260             else {
    261             metadata.element = new_element_name;
    262             }
    263             new_element_name = null;
    264         }
    265         ///atherer.println("Assigning metadata.");
    266         if(metadata != null) {
     232        // 6a. Check if an hfile is associated with this metadata, and if so load it, cache it in the collection.cfg object, then resolve metadata value index. Of course we can only do this if a collection configuration file was found in the first place.
     233        if(collect_cfg != null) {
     234            HFile h_file = collect_cfg.getHFile(metadatum.element);
     235            if(h_file != null && !dummy_run) {
     236            ///ystem.err.print(metadata.value + " maps to ");
     237            metadatum.value = h_file.getValue(metadatum.value);
     238            ///ystem.err.println(metadatum.value);
     239            }
     240            h_file = null;
     241        }
     242        // 6b. Check if there is a profile regarding the current metadata. The profile may be stored for the collection directory, or if no such directory is available, then try the ancestor folders of the origin file.
     243        ///ystem.err.println("Retrieve existing action: " + collection_dir.getAbsolutePath() + ", " + metadatum.element);
     244        if(collection_dir != null) {
     245            // Note that the first test is whether a profile action exist, while the 'getAction' can return null as the profile action.
     246            if(Gatherer.c_man.getCollection().msm.profiler.containsAction(collection_dir.getAbsolutePath(), metadatum.element)) {
     247            String new_element_name = Gatherer.c_man.getCollection().msm.profiler.getAction(collection_dir.getAbsolutePath(), metadatum.element);
     248            ///ystem.err.println("Profile result = " + new_element_name);
     249            if(new_element_name == null) {
     250                metadatum = null;
     251            }
     252            else {
     253                metadatum.element = new_element_name;
     254            }
     255            new_element_name = null;
     256            }
     257        }
     258        else {
     259            boolean found = false;
     260            File current_folder = origin.getFile().getParentFile();
     261            while(!found && metadatum != null && current_folder != null) {
     262            if(Gatherer.c_man.getCollection().msm.profiler.containsAction(current_folder.getAbsolutePath(), metadatum.element)) {
     263                found = true;
     264                String new_element_name = Gatherer.c_man.getCollection().msm.profiler.getAction(current_folder.getAbsolutePath(), metadatum.element);
     265                ///ystem.err.println("Profile result = " + new_element_name);
     266                if(new_element_name == null) {
     267                metadatum = null;
     268                }
     269                else {
     270                metadatum.element = new_element_name;
     271                }
     272                new_element_name = null;
     273            }
     274            else {
     275                current_folder = current_folder.getParentFile();
     276            }
     277            }
     278            current_folder = null;
     279        }
     280        ///atherer.println("Assigning metadatum.");
     281        if(metadatum != null) {
    267282            // 6c. Try to add metadata. If there is no matching metadata element:
    268             ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(metadata.element);
     283            ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(metadatum.element);
    269284            // Arg. The element returned may come from the Greenstone dls, which of course should never be involved during importing. To solve check the namespace isn't "" and if it is nullify the element. Nullify. NULLIFY, Bwuhahahaha...
    270285            if(element != null && element.getNamespace().equals("")) {
     
    273288            // 6ci. If no match exists, prompt the user to add/merge with specific metadata element. The user can also choose to ignore this metadata.
    274289            if(element == null) {
    275             element = selectElement(metadata.element);
     290            element = selectElement(metadatum.element);
    276291            if(!dialog_cancelled) {
    277292                // 6ciii. If either of the above work, remember to add to profile.
    278293                if(element == null) {
    279                 ///ystem.err.println("Adding profile action: " + collection_dir.getAbsolutePath() + ", " + metadata.element + ", null");
    280                 Gatherer.c_man.getCollection().msm.profiler.addAction(collection_dir.getAbsolutePath(), metadata.element, null);
     294                ///ystem.err.println("Adding profile action: " + collection_dir.getAbsolutePath() + ", " + metadatum.element + ", null");
     295                if(collection_dir != null) {
     296                    Gatherer.c_man.getCollection().msm.profiler.addAction(collection_dir.getAbsolutePath(), metadatum.element, null);
     297                }
     298                else {
     299                    Gatherer.c_man.getCollection().msm.profiler.addAction(origin.getFile().getParentFile().getAbsolutePath(), metadatum.element, null);
     300                }
    281301                }
    282302                else {
    283                 ///ystem.err.println("Adding profile action: " + collection_dir.getAbsolutePath() + ", " + metadata.element + ", " + element.getName());
    284                 Gatherer.c_man.getCollection().msm.profiler.addAction(collection_dir.getAbsolutePath(), metadata.element, element.getName());
     303                ///ystem.err.println("Adding profile action: " + collection_dir.getAbsolutePath() + ", " + metadatum.element + ", " + element.getName());
     304                if(collection_dir != null) {
     305                    Gatherer.c_man.getCollection().msm.profiler.addAction(collection_dir.getAbsolutePath(), metadatum.element, null);
     306}
     307                else {
     308                    Gatherer.c_man.getCollection().msm.profiler.addAction(origin.getFile().getParentFile().getAbsolutePath(), metadatum.element, null);
     309                }
    285310                }
    286311            }
     
    292317            if(model != null) {
    293318                // One little 'fix' for importing from the demo or dls files. The Title metadata found in the metadata.xml isn't used in preference for the automatically extracted Titles. However we want to use them, so we should remove '.*(<filename>)$' for a certain file <filename>.
    294                 String raw_value = metadata.value.trim();
     319                String raw_value = metadatum.value.trim();
    295320                String filename_munged = destination.getFile().getName();
    296321                int index = -1;
     
    299324                }
    300325                filename_munged = "(" + filename_munged + ")";
    301                 ///atherer.println("Hack: filename = " + destination.getFile().getName() + ", munged = " + filename_munged + ", raw_value = " + raw_value);
    302326                if(raw_value.endsWith(filename_munged)) {
    303327                raw_value = (raw_value.substring(0, raw_value.length() - filename_munged.length())).trim();
     
    305329                GValueNode node = model.addValue(raw_value);
    306330                final_metadata = new Metadata(element, node);
    307                 ///ystem.err.println("Adding final metadata: " + metadata.toString());
     331                ///ystem.err.println("Adding final metadata: " + metadatum.toString());
    308332                node = null;
    309333            }
     
    322346        }
    323347        if(!dummy_run && final_metadata != null && !dialog_cancelled) {
    324         final_metadata.setAccumulate(metadata.accumulates);
     348        final_metadata.setAccumulate(metadatum.accumulates);
    325349        // Now we can finally add the metadata.
    326350        ///ystem.err.println("Adding Metadata: " + final_metadata);
     
    329353                // Otherwise there is no way to add this metadata. No value model no metadata value.
    330354        final_metadata = null;
    331         metadata = null;
     355        metadatum = null;
    332356    }
    333357    }
     
    623647    }
    624648
     649    public BasicMetadata copy() {
     650        return new BasicMetadata(element, value, accumulates);
     651    }
     652
    625653    public int compareTo(Object other) {
    626654        return toString().compareTo(other.toString());
     
    632660    public boolean equals(Object object) {
    633661        BasicMetadata other = (BasicMetadata) object;
    634         if(collection != null) {
     662        if(collection != null && other.collection != null) {
    635663        return (collection.equals(other.collection) && element.equals(other.element) && value.equals(other.value));
    636664        }
Note: See TracChangeset for help on using the changeset viewer.