Changeset 5946


Ignore:
Timestamp:
2003-11-24T14:27:42+13:00 (20 years ago)
Author:
cs025
Message:

Extensions and new IndexExtractor

Location:
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/extractor
Files:
1 added
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/extractor/ExtractorManager.java

    r5800 r5946  
    11package org.greenstone.gsdl3.gs3build.extractor;
     2
     3import java.util.Iterator;
    24
    35import org.greenstone.gsdl3.gs3build.doctypes.DocumentID;
     
    79public class ExtractorManager
    810{
    9     DocumentList          documents;
    10     ExtractorInterface [] list;
    11     int                   size;
    12     int                   used;
     11  DocumentList          documents;
     12  ExtractorInterface [] list;
     13  int                   size;
     14  int                   used;
    1315
    14     public ExtractorManager(DocumentList documentList)
    15     { this.list = new ExtractorInterface[10];
    16       this.size = 10;
    17       this.used = 0;
    18       this.documents = documentList;
     16  public ExtractorManager(DocumentList documentList)
     17  { this.list = new ExtractorInterface[10];
     18    this.size = 10;
     19    this.used = 0;
     20    this.documents = documentList;
     21  }
     22
     23  public void addExtractor(ExtractorInterface extractor)
     24  { this.ensureSize(this.used + 1);
     25    this.list[this.used] = extractor;
     26    this.used ++;
     27  }
     28
     29  public void extractDocument(DocumentID docId, DocumentInterface document)
     30  { for (int i = 0; i < this.used; i ++)
     31    { this.list[i].extractDocument(docId, document);
    1932    }
     33  }
    2034
    21     public void addExtractor(ExtractorInterface extractor)
    22     {   this.ensureSize(this.used + 1);
    23         this.list[this.used] = extractor;
    24         this.used ++;
     35  public void extractDocuments()
     36  { for (int i = 0; i < this.used; i ++)
     37    { for (int p = 0; p < this.list[i].getNumberOfPasses(); p ++)
     38      { this.list[i].startPass(p);
     39
     40        Iterator iterator = documents.iterator();
     41    while (iterator.hasNext()) {
     42      DocumentInterface document = (DocumentInterface) iterator.next();
     43      this.list[i].extractDocument(document.getID(), document);
    2544    }
     45    this.list[i].endPass(p);
     46      }
     47    }
     48  }
    2649
    27     public void extractDocument(DocumentID docId, DocumentInterface document)
    28     {   for (int i = 0; i < this.used; i ++)
    29         {   this.list[i].extractDocument(docId, document);
    30         }
    31     }
    32 
    33     public void extractDocuments()
    34     {   for (int i = 0; i < this.used; i ++)
    35         {   for (int p = 0; p < this.list[i].getNumberOfPasses(); p ++)
    36             {   this.list[i].startPass(p);
    37                 for (int d = 0; d < this.documents.size(); d ++)
    38                 {   this.list[i].extractDocument(null, documents.getDocument(d));
    39                 }
    40                 this.list[i].endPass(p);
    41             }
    42         }
    43     }
    44 
    45     public void ensureSize(int size)
    46     {   while (size >= this.size)
    47         {   ExtractorInterface newList [] = new ExtractorInterface[this.size*2];
    48             this.size *= 2;
    49             System.arraycopy(this.list, 0, newList, 0, this.size);
    50             this.list = newList;
    51         }
    52     }
     50  public void ensureSize(int size)
     51  { while (size >= this.size)
     52    { ExtractorInterface newList [] = new ExtractorInterface[this.size*2];
     53      this.size *= 2;
     54      System.arraycopy(this.list, 0, newList, 0, this.size);
     55      this.list = newList;
     56    }
     57  }
    5358}
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/extractor/GMLExtractor.java

    r5822 r5946  
    1616public class GMLExtractor implements ExtractorInterface
    1717{
    18     /**
    19      *  An inner class to handle GML files
    20      */
    21     class GMLHandler extends DefaultHandler
    22     { String file;
    23       String label;
    24       StringBuffer value;
    25       boolean inElement;
     18  /**
     19   *  An inner class to handle GML files
     20   */
     21  class GMLHandler extends DefaultHandler
     22  { String file;
     23    String label;
     24    StringBuffer value;
     25    boolean inElement;
    2626
    27       GMLHandler()
    28       { super();
     27    GMLHandler()
     28    { super();
     29   
     30      this.file = null;
     31      this.label = null;
     32      this.value = null;
     33    }
    2934
    30         this.file = null;
    31     this.label = null;
    32     this.value = null;
     35    public void startElement(String URI, String localName, String qName, Attributes attributes)
     36    { if (localName.equals("Filename"))
     37      { this.value = new StringBuffer();
    3338      }
    34 
    35         public void startElement(String URI, String localName, String qName, Attributes attributes)
    36         {   if (localName.equals("Filename"))
    37             {   this.value = new StringBuffer();
    38             }
    39             else if (localName.equals("Metadata"))
    40             {   this.label = attributes.getValue("name");
    41                 this.value = new StringBuffer();
    42             }
    43         }
    44 
    45         public void endElement(String URI, String localName, String qName)
    46         {   if (localName.equals("Filename"))
    47             {   this.file = this.value.toString();
    48                 this.value = null;
    49             }
    50             else if (localName.equals("Metadata"))
    51             {   GMLExtractor.postMetadata(this.file, this.label, this.value.toString());
    52                 this.value = null;
    53                 this.label = null;
    54             }
    55         }
    56 
    57         public void characters(char c[], int start, int length)
    58         {   if (this.label != null)
    59             { String string = new String(c, start, length);
    60                 this.value.append(string);
    61             }
    62         }
    63     }
    64 
    65     /**
    66      *  Construct of extractor
    67      */
    68     public GMLExtractor()
    69     { // Intentionally left blank
    70     }
    71 
    72     /**
    73      *  This extractor doesn't need to do any preparation/completion work,
    74      *  so this member function is empty.
    75      */
    76     public void configure(String outputDir)
    77     { // Intentionally left blank
    78     }
    79 
    80     /**
    81      *  This extractor doesn't need to do any preparation/completion work,
    82      *  so this member function is empty.
    83      */
    84     public void startPass(int passNo)
    85     { // Intentionally left blank
    86     }
    87 
    88     /**
    89      *  Process the document - for a GML document, this results in the
    90      *  decoration of other files, for other documents, it does nothing.
    91      */
    92     public void extractDocument(DocumentID docID, DocumentInterface document)
    93     {   
    94       if (document.getDocumentType().equals(GMLDocument.GML_DOCUMENT_TYPE))
    95       { // Extract the content from the GML file
    96     try {
    97       XMLReader reader = XMLReaderFactory.createXMLReader();
    98       GMLHandler handler = new GMLHandler();
    99       reader.setContentHandler(handler);
    100       reader.setErrorHandler(handler);
    101 
    102       // A GML document consists of one file only - get it from the 'default'
    103       // file group
    104       FileReader fileReader = new FileReader(document.getDocumentFiles().getFile(0).toString());
    105       reader.parse(new InputSource(fileReader));
    106     }
    107     catch (SAXException saxException)
    108     { // TODO: log error
    109     }
    110     catch (java.io.FileNotFoundException fileException)
    111     {
    112     }
    113     catch (java.io.IOException ioException)
    114     {
    115     }
    116     // for each document post it to the corresponding document
     39      else if (localName.equals("Metadata"))
     40      { this.label = attributes.getValue("name");
     41        this.value = new StringBuffer();
    11742      }
    11843    }
    11944
    120     protected static void postMetadata(String file, String value, String label)
    121     {
    122     }
     45    public void endElement(String URI, String localName, String qName)
     46    { if (localName.equals("Filename"))
     47      { this.file = this.value.toString();
     48        this.value = null;
     49      }
     50      else if (localName.equals("Metadata"))
     51      { GMLExtractor.postMetadata(this.file, this.label, this.value.toString());
     52        this.value = null;
     53    this.label = null;
     54      }
     55    }
    12356
    124     /**
    125      *  This extractor doesn't need to do any preparation/completion work,
    126      *  so this member function is empty.
    127      */
    128     public void endPass(int passNo)
    129     { // Intentionally left blank
    130     }
     57    public void characters(char c[], int start, int length)
     58    { if (this.label != null)
     59      { String string = new String(c, start, length);
     60        this.value.append(string);
     61      }
     62    }
     63  }
    13164
    132     /**
    133      *  This extractor is a simple, single-pass extractor
    134      *
    135      *  @see: org.greenstone.gsdl3.gs3build.extractor.ExtractorInterface:getNumberOfPasses
    136      */
    137     public int getNumberOfPasses()
    138     {   return 1;
    139     }
     65  /**
     66   *  Construct of extractor
     67   */
     68  public GMLExtractor()
     69  { // Intentionally left blank
     70  }
     71
     72  /**
     73   *  This extractor doesn't need to do any preparation/completion work,
     74   *  so this member function is empty.
     75   */
     76  public void configure(String outputDir)
     77  { // Intentionally left blank
     78  }
     79
     80  /**
     81   *  This extractor doesn't need to do any preparation/completion work,
     82   *  so this member function is empty.
     83   */
     84  public void startPass(int passNo)
     85  { // Intentionally left blank
     86  }
     87
     88  /**
     89   *  Process the document - for a GML document, this results in the
     90   *  decoration of other files, for other documents, it does nothing.
     91   */
     92  public void extractDocument(DocumentID docID, DocumentInterface document)
     93  {
     94    if (document.getDocumentType().equals(GMLDocument.GML_DOCUMENT_TYPE))
     95    { // Extract the content from the GML file
     96      try {
     97    XMLReader reader = XMLReaderFactory.createXMLReader();
     98    GMLHandler handler = new GMLHandler();
     99    reader.setContentHandler(handler);
     100    reader.setErrorHandler(handler);
     101     
     102    // A GML document consists of one file only - get it from the 'default'
     103    // file group
     104    FileReader fileReader = new FileReader(document.getDocumentFiles().getFile(0).toString());
     105    reader.parse(new InputSource(fileReader));
     106      }
     107      catch (SAXException saxException)
     108      { // TODO: log error
     109      }
     110      catch (java.io.FileNotFoundException fileException)
     111      {
     112      }
     113      catch (java.io.IOException ioException)
     114      {
     115      }
     116      // for each document post it to the corresponding document
     117    }
     118  }
     119
     120  protected static void postMetadata(String file, String value, String label)
     121  {
     122  }
     123
     124  /**
     125   *  This extractor doesn't need to do any preparation/completion work,
     126   *  so this member function is empty.
     127   */
     128  public void endPass(int passNo)
     129  { // Intentionally left blank
     130  }
     131
     132  /**
     133   *  This extractor is a simple, single-pass extractor
     134   *
     135   *  @see: org.greenstone.gsdl3.gs3build.extractor.ExtractorInterface:getNumberOfPasses
     136   */
     137  public int getNumberOfPasses()
     138  { return 1;
     139  }
    140140}
Note: See TracChangeset for help on using the changeset viewer.