Changeset 8495


Ignore:
Timestamp:
2004-11-09T11:32:37+13:00 (19 years ago)
Author:
kjdon
Message:

New AbstractRecogniser class - skeleton to aid with implementing new recognisers. All current recognisers now extend this. the constructors no longer take the documentList as a parameter - this must be set using setListRepository.
new subclasses should set the class variables in the constructor (preferred_mome_type, filename_extensions, document_type)

Location:
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes
Files:
1 added
8 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/GMLRecogniser.java

    r8484 r8495  
    44import java.io.*;
    55import java.net.*;
     6import java.util.ArrayList;
    67
    78import org.greenstone.gsdl3.gs3build.metadata.*;
    89
    9 public class GMLRecogniser implements RecogniserInterface
     10public class GMLRecogniser extends  AbstractRecogniser
    1011{
    11     DocumentList listRepository;
    1212   
    13     public GMLRecogniser(DocumentList listRepository)
    14     {
    15     this.listRepository = listRepository;
     13    public GMLRecogniser()   {
     14    this.preferred_mime_type = "text/xml";
     15    this.filename_extensions = new ArrayList();
     16    this.filename_extensions.add(".gml");
     17    this.document_type = GMLDocument.GML_DOCUMENT_TYPE;
    1618    }
    1719   
    18     public boolean parseDocument(METSFile file)
    19     {
    20     String MIMEType = file.getMIMEType();
    21     if (MIMEType == null ||
    22         MIMEType.equals("text/xml")) {
    23         URL location = file.getLocation();
    24         return this.parseDocument(location);
    25     }
    26     return false;
    27   }
    28 
     20   
    2921    public boolean parseDocument(URL url)
    3022    {
     23    String filename = null;
    3124    if (url.getProtocol().equals("file")) {
    32         String fileName = url.getPath();
    33         if (fileName.endsWith(".gml")) {
     25        filename = url.getPath();
     26    }
     27    if (filename != null) {
     28        if (isAcceptedFilename(filename)) {
    3429       
    35         System.out.println("Posting GML Document " + fileName);
     30        System.out.println("Posting GML Document " + filename);
    3631        GMLDocument doc = new GMLDocument(url);
    37         this.listRepository.addDocument(doc);
     32        this.list_repository.addDocument(doc);
    3833        // TODO: spawn knowledge of children too...
    3934        // System.out.println(doc.getDocumentText());
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/GS2METSRecogniser.java

    r8458 r8495  
    11package org.greenstone.gsdl3.gs3build.doctypes;
    22
    3 import java.io.*;
    4 import java.net.*;
     3import java.util.ArrayList;
    54
    6 import org.greenstone.gsdl3.gs3build.metadata.*;
    7 import org.greenstone.gsdl3.gs3build.util.HTTPTools;
    8 
    9 public class GS2METSRecogniser implements RecogniserInterface
     5public class GS2METSRecogniser extends AbstractRecogniser
    106{
    11     DocumentList listRepository;
    12 
    13     public GS2METSRecogniser(DocumentList listRepository)
     7    public GS2METSRecogniser()
    148    {
    15     this.listRepository = listRepository;
    16     }
    17 
    18     public boolean parseDocument(METSFile file)
    19     {
    20     String MIMEType = file.getMIMEType();
    21     if (MIMEType == null ||
    22         MIMEType.equals("text/xml")) {
    23         URL location = file.getLocation();
    24         return this.parseDocument(location);
    25     }
    26     return false;
     9    this.preferred_mime_type = "text/xml";
     10    this.filename_extensions = new ArrayList();
     11    this.filename_extensions.add("docmets.xml");
     12    this.document_type = METSDocument.METS_DOCUMENT_TYPE;
    2713    }
    2814   
    29     public boolean parseDocument(URL url)
    30     {
    31     String fileName = null;
    32    
    33     if (url.getProtocol().equals("file")) {
    34         fileName = url.getPath();
    35     }
    36 
    37     if (fileName != null) {
    38         if (fileName.endsWith("docmets.xml")) {
    39         System.out.println("Posting METS Document " + fileName);
    40         METSDocument doc = new METSDocument(url);
    41         this.listRepository.addDocument(doc);
    42         return true;
    43         }
    44     } else {
    45         // Get Mime type remotely, and then proceed if required
    46         String mimeType = HTTPTools.getMIMEType(url);
    47        
    48         if (mimeType == "text/html") {
    49         System.out.println("Posting METS Document " + url.toString());
    50         METSDocument doc = new METSDocument(url);
    51         this.listRepository.addDocument(doc);
    52         return true;
    53         }
    54     }
    55     return false;
    56     }
    5715}
    5816
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/HTMLRecogniser.java

    r8484 r8495  
    11package org.greenstone.gsdl3.gs3build.doctypes;
    22
    3 import java.io.*;
    4 import java.net.*;
     3import java.util.ArrayList;
    54
    6 import org.greenstone.gsdl3.gs3build.metadata.*;
    7 import org.greenstone.gsdl3.gs3build.util.HTTPTools;
    8 
    9 public class HTMLRecogniser implements RecogniserInterface
     5public class HTMLRecogniser extends AbstractRecogniser
    106{
    11     DocumentList listRepository;
    127   
    13     public HTMLRecogniser(DocumentList listRepository)
     8    public HTMLRecogniser()
    149    {
    15     this.listRepository = listRepository;
     10    this.preferred_mime_type = "text/html";
     11    this.filename_extensions = new ArrayList();
     12    this.filename_extensions.add(".htm");
     13    this.filename_extensions.add(".html");
     14    this.document_type = HTMLDocument.HTML_DOCUMENT_TYPE;
     15   
    1616    }
    1717   
    18     public boolean parseDocument(METSFile file)
    19     {
    20     String MIMEType = file.getMIMEType();
    21     if (MIMEType == null ||
    22         MIMEType.equals("text/html")) {
    23         URL location = file.getLocation();
    24         return this.parseDocument(location);
    25     }
    26     return false;
    27     }
    2818
    29     public boolean parseDocument(URL url)
    30     {
    31     String fileName = null;
    32 
    33     if (url.getProtocol().equals("file")) {
    34         fileName = url.getPath();
    35     }
    36 
    37     if (fileName != null) {
    38         if (fileName.endsWith(".htm") ||
    39         fileName.endsWith(".html")) {
    40        
    41         System.out.println("Posting HTML Document " + fileName);
    42        
    43         HTMLDocument doc = new HTMLDocument(url);
    44         this.listRepository.addDocument(doc);
    45         return true;
    46         }
    47     } else {
    48         // Get Mime type remotely, and then proceed if required
    49         String mimeType = HTTPTools.getMIMEType(url);
    50        
    51         if (mimeType == "text/html") {
    52         System.out.println("Posting HTML Document " + url.toString());
    53        
    54         HTMLDocument doc = new HTMLDocument(url);
    55         this.listRepository.addDocument(doc);
    56         return true;
    57         }
    58     }
    59     return false;
    60     }
    6119}
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/IndexRecogniser.java

    r8484 r8495  
    33import java.io.*;
    44import java.net.*;
     5import java.util.ArrayList;
    56
    67import org.greenstone.gsdl3.gs3build.metadata.*;
    78
    8 public class IndexRecogniser implements RecogniserInterface
     9public class IndexRecogniser extends AbstractRecogniser
    910{
    10     DocumentList listRepository;
    1111   
    1212    public IndexRecogniser(DocumentList listRepository)
    1313    {
    14     this.listRepository = listRepository;
     14    this.preferred_mime_type = "text/plain";
     15    this.filename_extensions = new ArrayList();
     16    this.filename_extensions.add("index.txt");
     17    this.document_type = IndexDocument.INDEX_DOCUMENT_TYPE;
    1518    }
    1619   
    17     public boolean parseDocument(METSFile file)
    18     {
    19     String MIMEType = file.getMIMEType();
    20     if (MIMEType == null ||
    21         MIMEType.equals("text/plain")) {
    22         URL location = file.getLocation();
    23         return this.parseDocument(location);
    24     }
    25     return false;
    26     }
    2720
    2821    public boolean parseDocument(URL url)
    2922    {
    30     String fileName = null;
     23    String filename = null;
    3124
    3225    if (url.getProtocol().equals("file")) {
    33         fileName = url.getPath();
     26        filename = url.getPath();
    3427    }
    3528   
    36     if (fileName != null) {
     29    if (filename != null) {
    3730        String leafName;
    38         int leafAt = fileName.lastIndexOf(File.separator);
     31        int leafAt = filename.lastIndexOf(File.separator);
    3932        if (leafAt >= 0) {
    40         leafName = fileName.substring(leafAt+1);
     33        leafName = filename.substring(leafAt+1);
    4134        } else {
    42         leafName = fileName;
     35        leafName = filename;
    4336        }
    4437
    4538        if (leafName.equals("index.txt")) {
    4639       
    47         System.out.println("Posting Index Document " + fileName);
     40        System.out.println("Posting Index Document " + filename);
    4841        IndexDocument doc = new IndexDocument(url);
    49         this.listRepository.addDocument(doc);
     42        this.list_repository.addDocument(doc);
    5043        // TODO: spawn knowledge of children too...
    5144        //              System.out.println(doc.getDocumentText());
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/JPEGRecogniser.java

    r8484 r8495  
    11package org.greenstone.gsdl3.gs3build.doctypes;
    22
    3 import java.io.*;
    4 import java.net.*;
     3import java.util.ArrayList;
    54
    6 import org.greenstone.gsdl3.gs3build.metadata.*;
    7 
    8 public class JPEGRecogniser implements RecogniserInterface
     5public class JPEGRecogniser extends AbstractRecogniser
    96{
    10     DocumentList listRepository;
    117   
    12     public JPEGRecogniser(DocumentList listRepository)
     8    public JPEGRecogniser()
    139    {
    14     this.listRepository = listRepository;
    15     }
    16 
    17     public boolean parseDocument(METSFile file)
    18     {
    19     String MIMEType = file.getMIMEType();
    20     if (MIMEType == null ||
    21         MIMEType.equals("image/jpeg")) {
    22         URL location = file.getLocation();
    23         return this.parseDocument(location);
    24     }
    25     return false;
    26     }
    27 
    28     public boolean parseDocument(URL url)
    29     {
    30     if (url.getProtocol().equals("file")) {
    31         String fileName = url.getPath();
    32        
    33         if (fileName != null &&
    34         (fileName.endsWith(".jpg") ||
    35          fileName.endsWith(".jpeg"))) {
    36         System.out.println("Posting jpeg document " + url.toString());
    37         this.listRepository.addDocument(new JPEGDocument(url));
    38         // TODO: spawn knowledge of children too...
    39         return true;
    40         }
    41     } else {
    42         // TODO: get Mime type remotely, and then proceed if required
    43     }
    44    
    45     return false;
     10    this.preferred_mime_type = "image/jpeg";
     11    this.filename_extensions = new ArrayList();
     12    this.filename_extensions.add(".jpg");
     13    this.filename_extensions.add(".jpeg");
     14    this.document_type = JPEGDocument.JPEG_DOCUMENT_TYPE;
    4615    }
    4716   
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/MetadataRecogniser.java

    r8484 r8495  
    11package org.greenstone.gsdl3.gs3build.doctypes;
    22
    3 import java.io.*;
    4 import java.net.*;
     3//import java.io.*;
     4import java.net.URL;
     5import java.util.ArrayList;
    56
    6 import org.greenstone.gsdl3.gs3build.metadata.*;
    77
    8 public class MetadataRecogniser implements RecogniserInterface
     8public class MetadataRecogniser extends AbstractRecogniser
    99{
    10     DocumentList listRepository;
    11    
    12     public MetadataRecogniser(DocumentList listRepository)
     10
     11    public MetadataRecogniser()
    1312    {
    14     this.listRepository = listRepository;
    15     }
    16 
    17     public boolean parseDocument(METSFile file)
    18     {
    19     String MIMEType = file.getMIMEType();
    20     if (MIMEType == null ||
    21         MIMEType.equals("text/xml")) {
    22         URL location = file.getLocation();
    23         return this.parseDocument(location);
    24     }
    25     return false;
     13    this.preferred_mime_type = "text/xml";
     14    this.filename_extensions = new ArrayList();
     15    this.filename_extensions.add("metadata.xml");
     16    this.document_type = MetadataDocument.METADATA_DOCUMENT_TYPE;
    2617    }
    2718
    2819    public boolean parseDocument(URL url)
    2920    {
    30     String fileName = null;
     21    String filename = null;
    3122   
    3223    if (url.getProtocol().equals("file")) {
    33         fileName = url.getPath();
     24        filename = url.getPath();
    3425    }
    3526   
    36     if (fileName != null) {
    37         if (fileName.endsWith(File.separatorChar + "metadata.xml")) {
     27    if (filename != null) {
     28        if (isAcceptedFilename(filename)) {
    3829       
    39         System.out.println("Posting Metadata Document " + fileName);
     30        System.out.println("Posting Metadata Document " + filename);
    4031        MetadataDocument doc = new MetadataDocument(url);
    41         this.listRepository.addDocument(doc);
     32        this.list_repository.addDocument(doc);
    4233        // TODO: spawn knowledge of children too...
    4334        // System.out.println(doc.getDocumentText());
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/RecogniserInterface.java

    r5800 r8495  
    55import org.greenstone.gsdl3.gs3build.metadata.*;
    66
     7/** Interface for Recogniser Objects
     8 *
     9 * A skeleton class is provided for simple Recognisers (AbstractRecogniser)
     10 *
     11*/
    712public interface RecogniserInterface
    813{
    9     public boolean parseDocument(METSFile fileReference);
    10     public boolean parseDocument(URL fileReference);
     14    public void setListRepository(DocumentList docList);
     15    public boolean parseDocument(METSFile fileReference);
     16    public boolean parseDocument(URL fileReference);
    1117}
  • trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/TextRecogniser.java

    r8484 r8495  
    33import java.io.*;
    44import java.net.*;
     5import java.util.ArrayList;
    56
    67import org.greenstone.gsdl3.gs3build.metadata.*;
    78import org.greenstone.gsdl3.gs3build.util.HTTPTools;
    89
    9 public class TextRecogniser implements RecogniserInterface
     10public class TextRecogniser extends AbstractRecogniser
    1011{
    11     DocumentList listRepository;
    1212   
    13     public TextRecogniser(DocumentList listRepository)
     13    public TextRecogniser()
    1414    {
    15     this.listRepository = listRepository;
     15    this.preferred_mime_type = "text/plain";
     16    this.filename_extensions = new ArrayList();
     17    this.filename_extensions.add(".txt");
     18    this.filename_extensions.add(".text");
     19    this.document_type = TextDocument.TEXT_DOCUMENT_TYPE;
    1620    }
    1721   
    18     public boolean parseDocument(METSFile file)
    19     {
    20     String MIMEType = file.getMIMEType();
    21     if (MIMEType == null ||
    22         MIMEType.equals("text/plain")) {
    23         URL location = file.getLocation();
    24         return this.parseDocument(location);
    25     }
    26     return false;
    27     }
    28    
    29     public boolean parseDocument(URL url)
    30     {
    31     String fileName = null;
    32 
    33     if (url.getProtocol().equals("file")) {
    34         fileName = url.getPath();
    35     }
    36    
    37     if (fileName != null) {
    38         if (fileName.endsWith(".txt") || fileName.endsWith(".text")) {
    39         this.listRepository.addDocument(new TextDocument(url));
    40         // TODO: spawn knowledge of children too...
    41         System.out.println("Posting text document " + fileName);
    42         return true;
    43         }
    44     } else { // Check MIME type
    45         String mimeType = HTTPTools.getMIMEType(url);
    46        
    47         if (mimeType == "text/plain") {
    48         System.out.println("Posting Text document " + url.toString());
    49        
    50         TextDocument doc = new TextDocument(url);
    51         this.listRepository.addDocument(doc);
    52         return true;
    53         }
    54     }
    55    
    56     return false;
    57     }
    5822}
Note: See TracChangeset for help on using the changeset viewer.