Changeset 8495
- Timestamp:
- 2004-11-09T11:32:37+13:00 (19 years ago)
- Location:
- trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes
- Files:
-
- 1 added
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/GMLRecogniser.java
r8484 r8495 4 4 import java.io.*; 5 5 import java.net.*; 6 import java.util.ArrayList; 6 7 7 8 import org.greenstone.gsdl3.gs3build.metadata.*; 8 9 9 public class GMLRecogniser implements RecogniserInterface10 public class GMLRecogniser extends AbstractRecogniser 10 11 { 11 DocumentList listRepository;12 12 13 public GMLRecogniser(DocumentList listRepository) 14 { 15 this.listRepository = listRepository; 13 public GMLRecogniser() { 14 this.preferred_mime_type = "text/xml"; 15 this.filename_extensions = new ArrayList(); 16 this.filename_extensions.add(".gml"); 17 this.document_type = GMLDocument.GML_DOCUMENT_TYPE; 16 18 } 17 19 18 public boolean parseDocument(METSFile file) 19 { 20 String MIMEType = file.getMIMEType(); 21 if (MIMEType == null || 22 MIMEType.equals("text/xml")) { 23 URL location = file.getLocation(); 24 return this.parseDocument(location); 25 } 26 return false; 27 } 28 20 29 21 public boolean parseDocument(URL url) 30 22 { 23 String filename = null; 31 24 if (url.getProtocol().equals("file")) { 32 String fileName = url.getPath(); 33 if (fileName.endsWith(".gml")) { 25 filename = url.getPath(); 26 } 27 if (filename != null) { 28 if (isAcceptedFilename(filename)) { 34 29 35 System.out.println("Posting GML Document " + file Name);30 System.out.println("Posting GML Document " + filename); 36 31 GMLDocument doc = new GMLDocument(url); 37 this.list Repository.addDocument(doc);32 this.list_repository.addDocument(doc); 38 33 // TODO: spawn knowledge of children too... 39 34 // System.out.println(doc.getDocumentText()); -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/GS2METSRecogniser.java
r8458 r8495 1 1 package org.greenstone.gsdl3.gs3build.doctypes; 2 2 3 import java.io.*; 4 import java.net.*; 3 import java.util.ArrayList; 5 4 6 import org.greenstone.gsdl3.gs3build.metadata.*; 7 import org.greenstone.gsdl3.gs3build.util.HTTPTools; 8 9 public class GS2METSRecogniser implements RecogniserInterface 5 public class GS2METSRecogniser extends AbstractRecogniser 10 6 { 11 DocumentList listRepository; 12 13 public GS2METSRecogniser(DocumentList listRepository) 7 public GS2METSRecogniser() 14 8 { 15 this.listRepository = listRepository; 16 } 17 18 public boolean parseDocument(METSFile file) 19 { 20 String MIMEType = file.getMIMEType(); 21 if (MIMEType == null || 22 MIMEType.equals("text/xml")) { 23 URL location = file.getLocation(); 24 return this.parseDocument(location); 25 } 26 return false; 9 this.preferred_mime_type = "text/xml"; 10 this.filename_extensions = new ArrayList(); 11 this.filename_extensions.add("docmets.xml"); 12 this.document_type = METSDocument.METS_DOCUMENT_TYPE; 27 13 } 28 14 29 public boolean parseDocument(URL url)30 {31 String fileName = null;32 33 if (url.getProtocol().equals("file")) {34 fileName = url.getPath();35 }36 37 if (fileName != null) {38 if (fileName.endsWith("docmets.xml")) {39 System.out.println("Posting METS Document " + fileName);40 METSDocument doc = new METSDocument(url);41 this.listRepository.addDocument(doc);42 return true;43 }44 } else {45 // Get Mime type remotely, and then proceed if required46 String mimeType = HTTPTools.getMIMEType(url);47 48 if (mimeType == "text/html") {49 System.out.println("Posting METS Document " + url.toString());50 METSDocument doc = new METSDocument(url);51 this.listRepository.addDocument(doc);52 return true;53 }54 }55 return false;56 }57 15 } 58 16 -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/HTMLRecogniser.java
r8484 r8495 1 1 package org.greenstone.gsdl3.gs3build.doctypes; 2 2 3 import java.io.*; 4 import java.net.*; 3 import java.util.ArrayList; 5 4 6 import org.greenstone.gsdl3.gs3build.metadata.*; 7 import org.greenstone.gsdl3.gs3build.util.HTTPTools; 8 9 public class HTMLRecogniser implements RecogniserInterface 5 public class HTMLRecogniser extends AbstractRecogniser 10 6 { 11 DocumentList listRepository;12 7 13 public HTMLRecogniser( DocumentList listRepository)8 public HTMLRecogniser() 14 9 { 15 this.listRepository = listRepository; 10 this.preferred_mime_type = "text/html"; 11 this.filename_extensions = new ArrayList(); 12 this.filename_extensions.add(".htm"); 13 this.filename_extensions.add(".html"); 14 this.document_type = HTMLDocument.HTML_DOCUMENT_TYPE; 15 16 16 } 17 17 18 public boolean parseDocument(METSFile file)19 {20 String MIMEType = file.getMIMEType();21 if (MIMEType == null ||22 MIMEType.equals("text/html")) {23 URL location = file.getLocation();24 return this.parseDocument(location);25 }26 return false;27 }28 18 29 public boolean parseDocument(URL url)30 {31 String fileName = null;32 33 if (url.getProtocol().equals("file")) {34 fileName = url.getPath();35 }36 37 if (fileName != null) {38 if (fileName.endsWith(".htm") ||39 fileName.endsWith(".html")) {40 41 System.out.println("Posting HTML Document " + fileName);42 43 HTMLDocument doc = new HTMLDocument(url);44 this.listRepository.addDocument(doc);45 return true;46 }47 } else {48 // Get Mime type remotely, and then proceed if required49 String mimeType = HTTPTools.getMIMEType(url);50 51 if (mimeType == "text/html") {52 System.out.println("Posting HTML Document " + url.toString());53 54 HTMLDocument doc = new HTMLDocument(url);55 this.listRepository.addDocument(doc);56 return true;57 }58 }59 return false;60 }61 19 } -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/IndexRecogniser.java
r8484 r8495 3 3 import java.io.*; 4 4 import java.net.*; 5 import java.util.ArrayList; 5 6 6 7 import org.greenstone.gsdl3.gs3build.metadata.*; 7 8 8 public class IndexRecogniser implements RecogniserInterface9 public class IndexRecogniser extends AbstractRecogniser 9 10 { 10 DocumentList listRepository;11 11 12 12 public IndexRecogniser(DocumentList listRepository) 13 13 { 14 this.listRepository = listRepository; 14 this.preferred_mime_type = "text/plain"; 15 this.filename_extensions = new ArrayList(); 16 this.filename_extensions.add("index.txt"); 17 this.document_type = IndexDocument.INDEX_DOCUMENT_TYPE; 15 18 } 16 19 17 public boolean parseDocument(METSFile file)18 {19 String MIMEType = file.getMIMEType();20 if (MIMEType == null ||21 MIMEType.equals("text/plain")) {22 URL location = file.getLocation();23 return this.parseDocument(location);24 }25 return false;26 }27 20 28 21 public boolean parseDocument(URL url) 29 22 { 30 String file Name = null;23 String filename = null; 31 24 32 25 if (url.getProtocol().equals("file")) { 33 file Name = url.getPath();26 filename = url.getPath(); 34 27 } 35 28 36 if (file Name != null) {29 if (filename != null) { 37 30 String leafName; 38 int leafAt = file Name.lastIndexOf(File.separator);31 int leafAt = filename.lastIndexOf(File.separator); 39 32 if (leafAt >= 0) { 40 leafName = file Name.substring(leafAt+1);33 leafName = filename.substring(leafAt+1); 41 34 } else { 42 leafName = file Name;35 leafName = filename; 43 36 } 44 37 45 38 if (leafName.equals("index.txt")) { 46 39 47 System.out.println("Posting Index Document " + file Name);40 System.out.println("Posting Index Document " + filename); 48 41 IndexDocument doc = new IndexDocument(url); 49 this.list Repository.addDocument(doc);42 this.list_repository.addDocument(doc); 50 43 // TODO: spawn knowledge of children too... 51 44 // System.out.println(doc.getDocumentText()); -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/JPEGRecogniser.java
r8484 r8495 1 1 package org.greenstone.gsdl3.gs3build.doctypes; 2 2 3 import java.io.*; 4 import java.net.*; 3 import java.util.ArrayList; 5 4 6 import org.greenstone.gsdl3.gs3build.metadata.*; 7 8 public class JPEGRecogniser implements RecogniserInterface 5 public class JPEGRecogniser extends AbstractRecogniser 9 6 { 10 DocumentList listRepository;11 7 12 public JPEGRecogniser( DocumentList listRepository)8 public JPEGRecogniser() 13 9 { 14 this.listRepository = listRepository; 15 } 16 17 public boolean parseDocument(METSFile file) 18 { 19 String MIMEType = file.getMIMEType(); 20 if (MIMEType == null || 21 MIMEType.equals("image/jpeg")) { 22 URL location = file.getLocation(); 23 return this.parseDocument(location); 24 } 25 return false; 26 } 27 28 public boolean parseDocument(URL url) 29 { 30 if (url.getProtocol().equals("file")) { 31 String fileName = url.getPath(); 32 33 if (fileName != null && 34 (fileName.endsWith(".jpg") || 35 fileName.endsWith(".jpeg"))) { 36 System.out.println("Posting jpeg document " + url.toString()); 37 this.listRepository.addDocument(new JPEGDocument(url)); 38 // TODO: spawn knowledge of children too... 39 return true; 40 } 41 } else { 42 // TODO: get Mime type remotely, and then proceed if required 43 } 44 45 return false; 10 this.preferred_mime_type = "image/jpeg"; 11 this.filename_extensions = new ArrayList(); 12 this.filename_extensions.add(".jpg"); 13 this.filename_extensions.add(".jpeg"); 14 this.document_type = JPEGDocument.JPEG_DOCUMENT_TYPE; 46 15 } 47 16 -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/MetadataRecogniser.java
r8484 r8495 1 1 package org.greenstone.gsdl3.gs3build.doctypes; 2 2 3 import java.io.*; 4 import java.net.*; 3 //import java.io.*; 4 import java.net.URL; 5 import java.util.ArrayList; 5 6 6 import org.greenstone.gsdl3.gs3build.metadata.*;7 7 8 public class MetadataRecogniser implements RecogniserInterface8 public class MetadataRecogniser extends AbstractRecogniser 9 9 { 10 DocumentList listRepository; 11 12 public MetadataRecogniser(DocumentList listRepository) 10 11 public MetadataRecogniser() 13 12 { 14 this.listRepository = listRepository; 15 } 16 17 public boolean parseDocument(METSFile file) 18 { 19 String MIMEType = file.getMIMEType(); 20 if (MIMEType == null || 21 MIMEType.equals("text/xml")) { 22 URL location = file.getLocation(); 23 return this.parseDocument(location); 24 } 25 return false; 13 this.preferred_mime_type = "text/xml"; 14 this.filename_extensions = new ArrayList(); 15 this.filename_extensions.add("metadata.xml"); 16 this.document_type = MetadataDocument.METADATA_DOCUMENT_TYPE; 26 17 } 27 18 28 19 public boolean parseDocument(URL url) 29 20 { 30 String file Name = null;21 String filename = null; 31 22 32 23 if (url.getProtocol().equals("file")) { 33 file Name = url.getPath();24 filename = url.getPath(); 34 25 } 35 26 36 if (file Name != null) {37 if ( fileName.endsWith(File.separatorChar + "metadata.xml")) {27 if (filename != null) { 28 if (isAcceptedFilename(filename)) { 38 29 39 System.out.println("Posting Metadata Document " + file Name);30 System.out.println("Posting Metadata Document " + filename); 40 31 MetadataDocument doc = new MetadataDocument(url); 41 this.list Repository.addDocument(doc);32 this.list_repository.addDocument(doc); 42 33 // TODO: spawn knowledge of children too... 43 34 // System.out.println(doc.getDocumentText()); -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/RecogniserInterface.java
r5800 r8495 5 5 import org.greenstone.gsdl3.gs3build.metadata.*; 6 6 7 /** Interface for Recogniser Objects 8 * 9 * A skeleton class is provided for simple Recognisers (AbstractRecogniser) 10 * 11 */ 7 12 public interface RecogniserInterface 8 13 { 9 public boolean parseDocument(METSFile fileReference); 10 public boolean parseDocument(URL fileReference); 14 public void setListRepository(DocumentList docList); 15 public boolean parseDocument(METSFile fileReference); 16 public boolean parseDocument(URL fileReference); 11 17 } -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/TextRecogniser.java
r8484 r8495 3 3 import java.io.*; 4 4 import java.net.*; 5 import java.util.ArrayList; 5 6 6 7 import org.greenstone.gsdl3.gs3build.metadata.*; 7 8 import org.greenstone.gsdl3.gs3build.util.HTTPTools; 8 9 9 public class TextRecogniser implements RecogniserInterface10 public class TextRecogniser extends AbstractRecogniser 10 11 { 11 DocumentList listRepository;12 12 13 public TextRecogniser( DocumentList listRepository)13 public TextRecogniser() 14 14 { 15 this.listRepository = listRepository; 15 this.preferred_mime_type = "text/plain"; 16 this.filename_extensions = new ArrayList(); 17 this.filename_extensions.add(".txt"); 18 this.filename_extensions.add(".text"); 19 this.document_type = TextDocument.TEXT_DOCUMENT_TYPE; 16 20 } 17 21 18 public boolean parseDocument(METSFile file)19 {20 String MIMEType = file.getMIMEType();21 if (MIMEType == null ||22 MIMEType.equals("text/plain")) {23 URL location = file.getLocation();24 return this.parseDocument(location);25 }26 return false;27 }28 29 public boolean parseDocument(URL url)30 {31 String fileName = null;32 33 if (url.getProtocol().equals("file")) {34 fileName = url.getPath();35 }36 37 if (fileName != null) {38 if (fileName.endsWith(".txt") || fileName.endsWith(".text")) {39 this.listRepository.addDocument(new TextDocument(url));40 // TODO: spawn knowledge of children too...41 System.out.println("Posting text document " + fileName);42 return true;43 }44 } else { // Check MIME type45 String mimeType = HTTPTools.getMIMEType(url);46 47 if (mimeType == "text/plain") {48 System.out.println("Posting Text document " + url.toString());49 50 TextDocument doc = new TextDocument(url);51 this.listRepository.addDocument(doc);52 return true;53 }54 }55 56 return false;57 }58 22 }
Note:
See TracChangeset
for help on using the changeset viewer.