Changeset 9195


Ignore:
Timestamp:
2005-02-25T15:16:56+13:00 (19 years ago)
Author:
mdewsnip
Message:

Now rips everything except the metadata out of doc.xml files before sending them back. By Matthew Whyte.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gli/src/org/greenstone/gatherer/util/Utility.java

    r9166 r9195  
    4545import javax.swing.tree.*;
    4646import org.apache.xerces.parsers.*;
     47import org.apache.xerces.dom.DocumentImpl;
    4748import org.apache.xml.serialize.*;
    4849import org.greenstone.gatherer.Configuration;
     
    7071    static final public int BUFFER_SIZE = FACTOR * 1024;
    7172    /** Definition of an important directory name, in this case the base dir, or the working directory of the Gatherer. */
     73    /** The regular expression that marks if we only want doc.xml files. Will this work in Windows?? --Matthew */
     74    static final public String METADATA_ONLY_EXPR = "\".*" + File.separator + "doc\\.xml\"";
     75
    7276    static public String BASE_DIR  = System.getProperty("user.dir") + File.separator;
    7377    static final public String BUILD_CFG_FILENAME = "build.cfg";
     
    619623    static protected boolean zipFunc (ZipOutputStream zos, String file_path, int prefix_strip, GShell source, boolean encountered_file, String accept_expr, String reject_expr)
    620624    {
    621    
     625    String new_file_path = file_path;
     626
    622627    if((reject_expr != "") && (file_path.matches(reject_expr))) {
    623628        // matches reject expression
     
    630635        DebugStream.println("File \'" + file_path + "\' doesn't match accept expression \'" + accept_expr + "\'");
    631636        return encountered_file;
     637    }
     638
     639    //Special case: we only want the metadata. Parse the xml files.
     640    if(accept_expr.compareTo(".*doc.xml") == 0) {
     641        System.err.println("Only want to get metadata"); //debug
     642        Document old_document = XMLTools.parseXMLFile(new File(file_path));
     643        NodeList content = old_document.getElementsByTagName("Content");
     644
     645        try {
     646        for(int i=content.getLength()-1; i >= 0; i--) {
     647            //Remove all content - just leave the metadata.
     648            content.item(i).getParentNode().removeChild(content.item(i));
     649        }
     650        }
     651        catch (Exception ex) {
     652        ex.printStackTrace();
     653        }
     654
     655        new_file_path = file_path.substring(0, file_path.lastIndexOf(File.separator) + 1) + "new.xml";
     656        //Is there any better way than writing out as a new file, then reading back in??
     657        XMLTools.writeXMLFile(new File(new_file_path), old_document);
    632658    }
    633659
     
    650676
    651677        // Create a file input stream and a buffered input stream.
    652         FileInputStream fis = new FileInputStream(file_path);
     678        FileInputStream fis = new FileInputStream(new_file_path);
    653679        BufferedInputStream bis = new BufferedInputStream(fis);
    654680       
     
    783809        else {
    784810            String zip_full_file = zip_dir_or_file;
    785 
    786811            String zip_path = zip_full_file.substring(prefix_strip);
    787812
Note: See TracChangeset for help on using the changeset viewer.