Changeset 6101


Ignore:
Timestamp:
2003-12-03T09:38:39+13:00 (20 years ago)
Author:
cs025
Message:

Added getSectionText member function to documents

Location:
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes
Files:
10 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/AbstractDocument.java

    r6018 r6101  
    143143  public abstract String getDocumentText();
    144144
     145  public abstract String getSectionText(String sectionId);
     146
    145147  public String getMETSType()
    146148  { return "document";
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/DocumentFactory.java

    r6018 r6101  
    2424  public static DocumentInterface readSQLDocument(GS3SQLConnection connection, DocumentID id)
    2525  {
    26     String query = "SELECT * FROM document WHERE DocID="+id.toString()+";";
     26    String query = "SELECT * FROM document WHERE DocID=\""+id.toString()+"\";";
    2727    connection.execute(query);
    2828
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/DocumentInterface.java

    r5944 r6101  
    5757   */
    5858  public String getDocumentText();
     59
     60  /**
     61   *  Get the text of a section of this document
     62   *
     63   *  @return <code>String</code> the text as a string - for sections
     64   *          that have no textual components, this value may be
     65   *          <code>null</code>
     66   */
     67  public String getSectionText(String sectionId);
    5968   
    6069  /**
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/DocumentList.java

    r6018 r6101  
    101101  }
    102102
     103  /**
     104   *  Get a list of documents that match a
     105   */
     106  public List findDocumentIdsUsingFiles(List fileRefs, String withinNode)
     107  {
     108    StringBuffer queryBuffer = new StringBuffer("SELECT FileGroupRef FROM files WHERE FileLocation ");
     109
     110    Iterator files = fileRefs.iterator();
     111    while (files.hasNext()) {
     112      String file = files.next().toString();
     113
     114      if (withinNode != null) {
     115    queryBuffer.append("REGEXP \"^");
     116    queryBuffer.append(withinNode);
     117    queryBuffer.append(".*");
     118      }
     119      else {
     120    queryBuffer.append("REGEXP \"");
     121      }
     122      queryBuffer.append(file);
     123      queryBuffer.append("\"");
     124
     125      if (files.hasNext()) {
     126    queryBuffer.append(" OR ");
     127      }
     128    }
     129    queryBuffer.append(";");
     130    return this.findDocumentIdsUsingFileQuery(queryBuffer.toString());
     131  }
     132
     133  public List findDocumentIdsUsingFile(String fileRef, String withinNode)
     134  { String query = "SELECT FileGroupRef FROM files WHERE FileLocation REGEXP \"^"+withinNode+".*"+fileRef+"\";";
     135    return this.findDocumentIdsUsingFileQuery(query);
     136  }
     137
    103138  public List findDocumentIdsUsingFile(String fileRef)
    104139  {
    105140    // Get the simple list of file objects & their file group reference
    106141    String query = "SELECT FileGroupRef FROM files WHERE FileLocation REGEXP \"" + fileRef +"\";";
    107    
    108     this.connection.execute(query);
     142
     143    return this.findDocumentIdsUsingFileQuery(query);
     144  }
     145
     146  private List findDocumentIdsUsingFileQuery(String query)
     147  { this.connection.execute(query);
    109148
    110149    try {
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/GMLDocument.java

    r5800 r6101  
    2121public class GMLDocument extends AbstractDocument
    2222{
    23     public static final String GML_DOCUMENT_TYPE = "GML";
     23  public static final String GML_DOCUMENT_TYPE = "GML";
    2424
    25     public GMLDocument(URL url)
    26     {   super(url);
    27     }
     25  public GMLDocument(URL url)
     26  { super(url);
     27  }
    2828
    29     public String getDocumentType()
    30     {   return GML_DOCUMENT_TYPE;
    31     }
     29  public String getDocumentType()
     30  { return GML_DOCUMENT_TYPE;
     31  }
    3232
    33     /**
    34     *  A pretty minimal and lazy document text extraction process.
    35     */
    36     public String getDocumentText()
    37     { return "";
    38     }
     33  /**
     34  *  A pretty minimal and lazy document text extraction process.
     35  */
     36  public String getDocumentText()
     37  { return "";
     38  }
    3939
    40     public METSDescriptiveSet getDocumentMetadata()
    41     { return null;
    42     }
     40  public String getSectionText(String sectionId)
     41  { return "";
     42  }
     43
     44  public METSDescriptiveSet getDocumentMetadata()
     45  { return null;
     46  }
    4347}
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/HTMLDocument.java

    r5944 r6101  
    143143    return htmlDoc.getContent();
    144144  }
     145
     146  public String getSectionText(String document)
     147  { return "";
     148  }
    145149}
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/IndexDocument.java

    r6010 r6101  
    4242  }
    4343
     44  public String getSectionText(String sectionId)
     45  { return "";
     46  }
     47
    4448  /**
    4549   *  Indicate whether this document is indexed.
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/JPEGDocument.java

    r5800 r6101  
    1313public class JPEGDocument extends AbstractDocument
    1414{
    15     public static final String DOCUMENT_JPEG_TYPE = "JPEG";
     15  public static final String DOCUMENT_JPEG_TYPE = "JPEG";
     16 
     17  public JPEGDocument(URL url)
     18  { super(url);
     19  }
    1620
    17     public JPEGDocument(URL url)
    18     {   super(url);
    19     }
     21  public String getDocumentType()
     22  { return DOCUMENT_JPEG_TYPE;
     23  }
    2024
    21     public String getDocumentType()
    22     {   return DOCUMENT_JPEG_TYPE;
    23     }
     25  public String getDocumentText()
     26  { // Just a dummy function for JPEGs
     27    return "";
     28  }
    2429
    25     public String getDocumentText()
    26     { // Just a dummy function for JPEGs
    27         return "";
    28     }
     30  public String getSectionText(String sectionId)
     31  { // Just a dummy function for JPEGs
     32    return "";
     33  }
    2934
    30     public boolean isMETSCompatible()
    31     {   return true;
    32     }
    33 
    34     public DocumentWriter getMETSWriter()
    35     {   return null;
    36     }
     35  public boolean isMETSCompatible()
     36  { return true;
     37  }
     38 
     39  public DocumentWriter getMETSWriter()
     40  { return null;
     41  }
    3742}
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/METSDocument.java

    r5800 r6101  
    2121public class METSDocument extends AbstractDocument
    2222{
    23     public static final String METS_DOCUMENT_TYPE = "METS";
     23  public static final String METS_DOCUMENT_TYPE = "METS";
    2424
    25     public METSDocument(URL url)
    26     {   super(url);
     25  public METSDocument(URL url)
     26  { super(url);
     27 
     28    if (url.toString().startsWith("file://"))
     29    { this._parseFile(new File(url.toString().substring(7)));
     30    }
     31  }
     32 
     33  private void _parseFile(File file)
     34  {
     35    try {
     36      DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
     37      DocumentBuilder builder = factory.newDocumentBuilder();
     38      Document document = builder.parse(file);
    2739
    28         if (url.toString().startsWith("file://"))
    29         {   this._parseFile(new File(url.toString().substring(7)));
    30         }
     40      // TODO: get all the types in the tree
     41     
     42      // TODO: do a traverse, and thus cope with elements-within-elements if needsbe, but
     43      // this shouldn't happen except in a directly defined situation - actually doing the
     44      // parsing in part inside each node would work well provided one checked for a node
     45      // having already been done...
     46      System.out.println("Read");
     47     
     48      NodeList fileSecs = document.getElementsByTagName("mets:fileSec");
     49     
     50      for (int g = 0; g < fileSecs.getLength(); g ++) {
     51    //              Schema schema = new Schema(schemas.item(s));
     52    this._parseFileSec(fileSecs.item(g));
     53      }
     54     
     55      // Get document metadata sections
     56      NodeList dmdSecs = document.getElementsByTagName("mets:dmdSec");
     57     
     58      //                Schema schema = new Schema(schemas.item(s));
     59      this.metadata = METSDescriptiveSet.parseXML(fileSecs);
     60    }
     61    catch (FactoryConfigurationError e) {
     62      System.out.println(e);
     63    }
     64    catch (ParserConfigurationException ex) {
     65      System.out.println(ex);
     66    }
     67    catch (SAXException ex) {
     68      System.out.println(ex);
     69    }
     70    catch (IOException ex) {
     71      System.out.println(ex);
     72    }
     73  }
     74
     75  private void _parseFileSec(Node fileSec)
     76  { // this is in effect a group without a sense of 'self'...
     77    this._parseFileGroup((Element) fileSec, null);
     78  }
     79
     80  private void _parseFileGroup(Element groupTag, METSFileGroup group)
     81  { NodeList children = groupTag.getChildNodes();
     82
     83    for (int c = 0; c < children.getLength(); c ++)
     84    { if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE) {
     85        continue;
     86      }
     87
     88      System.out.println(children.item(c));
     89   
     90      Element element = (Element) children.item(c);
     91
     92      if (element.getNodeName().equals("mets:File"))
     93      { if (group != null)
     94    { METSFile file = METSFile.parseXML(element, group);
    3195    }
     96        else
     97    { // TODO: error
     98    }
     99      }
     100      else if (element.getNodeName().equals("mets:fileGrp"))
     101      { // recurse
     102    METSFileGroup childGroup = new METSFileGroup(element.getAttribute("ID"));
    32103
    33     private void _parseFile(File file)
    34     {
    35       try {
    36             DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    37             DocumentBuilder builder = factory.newDocumentBuilder();
    38             Document document = builder.parse(file);
     104    this._parseFileGroup(element, childGroup);
     105    if (group != null)
     106    { group.addGroup(childGroup);
     107    }
     108    else
     109    { this.fileSet.addGroup(childGroup);
     110    }
     111      }
     112    }
     113  }
    39114
    40             // TODO: get all the types in the tree
     115  public String getDocumentType()
     116  { return METS_DOCUMENT_TYPE;
     117  }
    41118
    42             // TODO: do a traverse, and thus cope with elements-within-elements if needsbe, but
    43             // this shouldn't happen except in a directly defined situation - actually doing the
    44             // parsing in part inside each node would work well provided one checked for a node
    45             // having already been done...
    46             System.out.println("Read");
     119  public String getDocumentText()
     120  { // TODO: make this more than a dummy function!
     121    return null;
     122  }
    47123
    48             NodeList fileSecs = document.getElementsByTagName("mets:fileSec");
     124  public String getSectionText(String sectionId)
     125  { return null;
     126  }
    49127
    50             for (int g = 0; g < fileSecs.getLength(); g ++) {
    51 //              Schema schema = new Schema(schemas.item(s));
    52                 this._parseFileSec(fileSecs.item(g));
    53             }
    54 
    55             // Get document metadata sections
    56             NodeList dmdSecs = document.getElementsByTagName("mets:dmdSec");
    57 
    58 //              Schema schema = new Schema(schemas.item(s));
    59             this.metadata = METSDescriptiveSet.parseXML(fileSecs);
    60         }
    61         catch (FactoryConfigurationError e) {
    62             System.out.println(e);
    63         }
    64         catch (ParserConfigurationException ex) {
    65             System.out.println(ex);
    66         }
    67         catch (SAXException ex) {
    68             System.out.println(ex);
    69         }
    70         catch (IOException ex) {
    71             System.out.println(ex);
    72         }
    73     }
    74 
    75     private void _parseFileSec(Node fileSec)
    76     {   // this is in effect a group without a sense of 'self'...
    77         this._parseFileGroup((Element) fileSec, null);
    78     }
    79 
    80     private void _parseFileGroup(Element groupTag, METSFileGroup group)
    81     {   NodeList children = groupTag.getChildNodes();
    82 
    83         for (int c = 0; c < children.getLength(); c ++)
    84         { if (children.item(c).getNodeType() != org.w3c.dom.Node.ELEMENT_NODE) {
    85                 continue;
    86             }
    87 
    88             System.out.println(children.item(c));
    89 
    90             Element element = (Element) children.item(c);
    91 
    92             if (element.getNodeName().equals("mets:File"))
    93             {   if (group != null)
    94                 { METSFile file = METSFile.parseXML(element, group);
    95                 }
    96                 else
    97                 { // TODO: error
    98                 }
    99             }
    100             else if (element.getNodeName().equals("mets:fileGrp"))
    101             {   // recurse
    102               METSFileGroup childGroup = new METSFileGroup(element.getAttribute("ID"));
    103 
    104                 this._parseFileGroup(element, childGroup);
    105                 if (group != null)
    106                 { group.addGroup(childGroup);
    107                 }
    108                 else
    109                 { this.fileSet.addGroup(childGroup);
    110                 }
    111             }
    112         }
    113     }
    114 
    115     public String getDocumentType()
    116     {   return METS_DOCUMENT_TYPE;
    117     }
    118 
    119     public String getDocumentText()
    120     { // TODO: make this more than a dummy function!
    121         return null;
    122     }   
    123128}
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/TextDocument.java

    r5944 r6101  
    3535  { return DocumentLoader.getAsString((URL) this.fileSet.getFile(0).getLocation());
    3636  }
     37
     38  /**
     39   *  Text documents don't actually have more than one section (at present), so
     40   *  this is a dummy function...
     41   */
     42  public String getSectionText(String sectionId)
     43  { return null;
     44  }
    3745}
Note: See TracChangeset for help on using the changeset viewer.