Changeset 8131


Ignore:
Timestamp:
2004-09-22T11:53:21+12:00 (20 years ago)
Author:
mdewsnip
Message:

More improvements to the new metadata code, including language-specific metadata element display and a 5x speed up in the skimming of the doc.xml files.

Location:
trunk/gli/src/org/greenstone/gatherer/metadata
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/gli/src/org/greenstone/gatherer/metadata/DocXMLFile.java

    r8123 r8131  
    2525    super(doc_xml_file_path);
    2626
    27     // Parse the doc.xml file
    28     System.err.println("Loading doc.xml file " + doc_xml_file_path + "...");
    29     Document document = XMLTools.parseXMLFile(this);
    30     if (document == null) {
    31         System.err.println("Error: Could not parse doc.xml file " + getAbsolutePath());
    32         return;
    33     }
    34 
    3527    MetadataSet extracted_metadata_set = MetadataSetManager.getMetadataSet(MetadataSetManager.EXTRACTED_METADATA_NAMESPACE);
    3628
    37     // Read all the Archive elements in the file
    38     NodeList archive_elements_nodelist = document.getElementsByTagName(ARCHIVE_ELEMENT);
    39     for (int i = 0; i < archive_elements_nodelist.getLength(); i++) {
    40         Element current_archive_element = (Element) archive_elements_nodelist.item(i);
     29    // Skim the doc.xml file as quickly as possible (don't parse as XML), looking at the Metadata elements
     30    System.err.println("Skimming doc.xml file " + this + "...");
     31    try {
     32        BufferedReader buffered_reader = new BufferedReader(new FileReader(this));
     33        String line = null;
     34        while ((line = buffered_reader.readLine()) != null) {
     35        // This line doesn't contain a metadata element
     36        if (line.indexOf("<Metadata ") == -1) {
     37            continue;
     38        }
    4139
    42         // Read the child Section elements of the archive (but not all descendants)
    43         ArrayList child_section_elements = XMLTools.getChildElementsByTagName(current_archive_element, SECTION_ELEMENT);
    44         for (int j = 0; j < child_section_elements.size(); j++) {
    45         Element current_section_element = (Element) child_section_elements.get(j);
     40        // Extract the metadata element name
     41        int name_index = line.indexOf(" name=\"") + " name=\"".length();
     42        String metadata_element_name_full = line.substring(name_index, line.indexOf("\"", name_index));
    4643
    47         // Read the Description elements of this section only (not child sections as well)
    48         ArrayList child_description_elements = XMLTools.getChildElementsByTagName(current_section_element, DESCRIPTION_ELEMENT);
    49         for (int k = 0; k < child_description_elements.size(); k++) {
    50             Element current_description_element = (Element) child_description_elements.get(k);
     44        // If the metadata has a namespace it isn't extracted metadata, so we're not interested
     45        String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
     46        if (!metadata_set_namespace.equals("")) {
     47            continue;
     48        }
    5149
    52             String gsdlsourcefilename_value = null;
    53             boolean bibliographic_data = false;
     50        // Extracted metadata!
     51        String metadata_element_name = metadata_element_name_full;
    5452
    55             // Read all the Metadata elements in this description element
    56             NodeList metadata_elements_nodelist = current_description_element.getElementsByTagName(METADATA_ELEMENT);
    57             for (int l = 0; l < metadata_elements_nodelist.getLength(); l++) {
    58             Element current_metadata_element = (Element) metadata_elements_nodelist.item(l);
    59             String metadata_element_name_full = current_metadata_element.getAttribute("name");
     53        // Note which file this doc.xml is for
     54        if (metadata_element_name.equals("gsdlsourcefilename")) {
     55            // Extract the gsdlsourcefilename element value
     56            int value_index = line.indexOf(">", name_index) + ">".length();
     57            String gsdlsourcefilename_value = line.substring(value_index, line.indexOf("<", value_index));
    6058
    61             // If the metadata has a namespace it isn't extracted metadata, so we're not interested
    62             String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
    63             if (!metadata_set_namespace.equals("")) {
    64                 continue;
     59            // We're only interested in the path relative to the import folder
     60            int import_index = gsdlsourcefilename_value.indexOf("import");
     61            if (import_index != -1) {
     62            gsdlsourcefilename_value = gsdlsourcefilename_value.substring(import_index + "import".length());
     63
     64            boolean is_unix_path = gsdlsourcefilename_value.startsWith("/");
     65            gsdlsourcefilename_value = gsdlsourcefilename_value.substring(1);
     66
     67            // Make sure the path matches the OS that is running
     68            if (is_unix_path && Utility.isWindows()) {
     69                // Convert path from Unix to Windows
     70                gsdlsourcefilename_value = gsdlsourcefilename_value.replaceAll("/", File.separator);
     71            }
     72            if (!is_unix_path && !Utility.isWindows()) {
     73                // Convert path from Windows to Unix
     74                gsdlsourcefilename_value = gsdlsourcefilename_value.replaceAll("\\\\", File.separator);
    6575            }
    6676
    67             // Extracted metadata!
    68             String metadata_element_name = metadata_element_name_full;
    69 
    70             // Note which file this Section is for
    71             if (metadata_element_name.equals("gsdlsourcefilename")) {
    72                 gsdlsourcefilename_value = XMLTools.getElementTextValue(current_metadata_element);
    73 
    74                 // We're only interested in the path relative to the import folder
    75                 int import_index = gsdlsourcefilename_value.indexOf("import");
    76                 if (import_index != -1) {
    77                 gsdlsourcefilename_value = gsdlsourcefilename_value.substring(import_index + "import".length());
    78 
    79                 boolean is_unix_path = gsdlsourcefilename_value.startsWith("/");
    80                 gsdlsourcefilename_value = gsdlsourcefilename_value.substring(1);
    81 
    82                 // Make sure the path matches the OS that is running
    83                 if (is_unix_path && Utility.isWindows()) {
    84                     // Convert path from Unix to Windows
    85                     gsdlsourcefilename_value = gsdlsourcefilename_value.replaceAll("/", File.separator);
    86                 }
    87                 if (!is_unix_path && !Utility.isWindows()) {
    88                     // Convert path from Windows to Unix
    89                     gsdlsourcefilename_value = gsdlsourcefilename_value.replaceAll("\\\\", File.separator);
    90                 }
    91                 }
    92                 else {
    93                 // We don't really know what is going on...
    94                 System.err.println("Warning: Could not understand gsdlsourcefilename " + gsdlsourcefilename_value);
    95                 }
    96             }
    97 
    98             // We don't do much with bibliographic data
    99             if (metadata_element_name.equals("SourceSegment")) {
    100                 bibliographic_data = true;
    101             }
    102 
    103             // Ignore lower-case metadata elements (gsdlsourcefilename, gsdlassocfile etc.)
    104             //   and those starting with '/' (/srclink)
    105             char first_character = metadata_element_name.charAt(0);
    106             if (Character.isLowerCase(first_character) || first_character == '/') {
    107                 continue;
    108             }
    109 
    110             MetadataElement metadata_element = extracted_metadata_set.getMetadataElement(metadata_element_name);
    111             if (metadata_element == null) {
    112                 // This element isn't defined in ex.mds, so create it for this session
    113                 System.err.println("Extracted metadata element not defined: " + metadata_element_name);
    114                 extracted_metadata_set.addMetadataElementForThisSession(metadata_element_name);
     77            // Remember this for quick access later
     78            if (gsdlsourcefilename_value != null) {
     79                files_in_doc_xml_file.add(gsdlsourcefilename_value);
    11580            }
    11681            }
    117 
    118             // Remember this for quick access later
    119             if (gsdlsourcefilename_value != null && !bibliographic_data) {
    120             files_in_doc_xml_file.add(gsdlsourcefilename_value);
     82            else {
     83            // We don't really know what is going on...
     84            System.err.println("Warning: Could not understand gsdlsourcefilename " + gsdlsourcefilename_value);
    12185            }
    12286        }
     87
     88        // Ignore lower-case metadata elements (gsdlsourcefilename, gsdlassocfile etc.)
     89        //   and those starting with '/' (/srclink)
     90        char first_character = metadata_element_name.charAt(0);
     91        if (Character.isLowerCase(first_character) || first_character == '/') {
     92            continue;
     93        }
     94
     95        MetadataElement metadata_element = extracted_metadata_set.getMetadataElement(metadata_element_name);
     96        if (metadata_element == null) {
     97            // This element isn't defined in ex.mds, so create it for this session
     98            System.err.println("Extracted metadata element not defined: " + metadata_element_name);
     99            extracted_metadata_set.addMetadataElementForThisSession(metadata_element_name);
     100        }
    123101        }
     102    }
     103    catch (Exception ex) {
     104        System.err.println("Exception: " + ex);
     105        ex.printStackTrace();
    124106    }
    125107    }
  • trunk/gli/src/org/greenstone/gatherer/metadata/MetadataElement.java

    r8123 r8131  
    33
    44import java.io.File;
     5import org.greenstone.gatherer.Gatherer;
    56import org.greenstone.gatherer.util.XMLTools;
    67import org.w3c.dom.*;
     
    7273
    7374
     75    public String getDisplayName()
     76    {
     77    String metadata_element_display_name = MetadataTools.getMetadataElementAttribute(this, "identifier", Gatherer.config.getLanguage(), "en");
     78    if (metadata_element_display_name != null) {
     79        return getNamespace() + "." + metadata_element_display_name;
     80    }
     81
     82    return metadata_element_name_full;
     83    }
     84
     85
    7486    public MetadataValueTreeModel getMetadataValueTreeModel()
    7587    {
     
    111123    public String toString()
    112124    {
    113     return metadata_element_name_full;
     125    return getDisplayName();
    114126    }
    115127
  • trunk/gli/src/org/greenstone/gatherer/metadata/MetadataXMLFile.java

    r8128 r8131  
    2727
    2828    // Parse the metadata.xml file
     29    System.err.println("Loading metadata.xml file " + metadata_xml_file_path + "...");
    2930    Document document = XMLTools.parseXMLFile(this);
    3031    if (document == null) {
     
    4546        // Check if we have an import mapping for this metadata element
    4647        String target_metadata_element_name_full = ProfileXMLFileManager.getMetadataElementFor(metadata_element_name_full);
    47         if (target_metadata_element_name_full == null) {
     48        if (target_metadata_element_name_full == null && MetadataSetManager.getMetadataSets().size() > 1) {
    4849            // No, so ask the user how they want to deal with this element
    4950            MetadataImportMappingPrompt metadata_import_mapping_prompt = new MetadataImportMappingPrompt(metadata_element_name_full);
  • trunk/gli/src/org/greenstone/gatherer/metadata/MetadataXMLFileManager.java

    r8123 r8131  
    3636        MetadataXMLFile metadata_xml_file = (MetadataXMLFile) metadata_xml_files.get(j);
    3737
    38         // This metadata.xml file is only applicable if it is above or at the same level as the file
    39         // if (current_file_directory.getAbsolutePath().startsWith(metadata_xml_file.getParentFile().getAbsolutePath())) {
    4038        // This metadata.xml file is only applicable if it is at the same level as the file
    4139        if (current_file_directory.getAbsolutePath().equals(metadata_xml_file.getParentFile().getAbsolutePath())) {
Note: See TracChangeset for help on using the changeset viewer.